Mirai's Miscellaneous Misadventures
M39 / core / text-ualyze.c
1
2
3
4
5
6
7
8
9#define char16_t uint16_t
10
11#include <stdlib.h>
12#include <ualyze.h>
13
14#include <mimimi/text.h>
15
16static int mimimi_utf16(uint16_t *ints, unsigned char **text)
17{
18 unsigned long int cp = mimimi_code_point(*text);
19 *text = mimimi_skip_code_point(*text);
20
21 if (cp < 0x10000)
22 {
23 ints[0] = cp;
24 return 1;
25 }
26
27 cp -= 0x10000;
28
29 unsigned int high = cp >> 10;
30 unsigned int low = cp & 0x3FF;
31 high += 0xD800;
32 low += 0xDC00;
33 ints[0] = high;
34 ints[1] = low;
35
36 return 2;
37}
38
39static int mimimi_utf16_count(unsigned char *text)
40{
41 unsigned long int cp = mimimi_code_point(text);
42 uint16_t n[2];
43 return mimimi_utf16(n, &text);
44}
45
46static int mimimi_utf16_count_text(unsigned char *text)
47{
48 int count = 0;
49 while (*text != 0)
50 count += mimimi_utf16_count(text),
51 text = mimimi_skip_code_point(text);
52 return count;
53}
54
55static int mimimi_ual_buffer(ual_buffer **buffer, unsigned char *text)
56{
57 int count = mimimi_utf16_count_text(text);
58 uint16_t *utf16 = NULL;
59 if (count > 0)
60 {
61 utf16 = malloc((count + 1) * sizeof *utf16);
62 if (utf16 == NULL) exit(1);
63
64 unsigned char *text1 = text;
65 uint16_t *utf16x = utf16;
66 while (*text1 != 0) utf16x += mimimi_utf16(utf16x, &text1);
67 *utf16x = 0;
68 }
69
70 *buffer = ual_buffer_create();
71 if (*buffer == 0) exit(1);
72
73 int count2 = ual_analyze_paragraph(*buffer, utf16, count);
74 int i = 0;
75 while (count2 > 0)
76 {
77 count2 -= mimimi_utf16_count(text);
78 text = mimimi_skip_code_point(text);
79 i++;
80 }
81 return i;
82}
83
84static void mimimi_finish_ual_buffer(ual_buffer *buffer)
85{
86 uint16_t *utf16 = ual_buffer_text(buffer);
87 ual_buffer_release(buffer);
88 if (utf16 != NULL) free(utf16);
89}
90
91int mimimi_count_paragraph(unsigned char *text)
92{
93 int i = 0;
94 for (;;)
95 {
96 unsigned long int cp = mimimi_code_point(text);
97 if (cp == 0) break;
98 if (cp == 0x0A) break; if (cp == 0x0B) break; if (cp == 0x0C) break; if (cp == 0x0D) break;
99 if (cp == 0x0085) break; if (cp == 0x2028) break; if (cp == 0x2029) break;
100 text = mimimi_skip_code_point(text);
101 i++;
102 }
103 return i;
104}
105
106unsigned char *mimimi_skip_paragraph(unsigned char *text)
107{
108 ual_buffer *buffer;
109 text += mimimi_ual_buffer(&buffer, text);
110 mimimi_finish_ual_buffer(buffer);
111 return text;
112}
113
114static unsigned char *mimimi_ual_until(unsigned char *text, int flags, int n)
115{
116 ual_buffer *buffer;
117 mimimi_ual_buffer(&buffer, text);
118 ual_analyze_breaks(buffer);
119
120 ual_char *chars = ual_buffer_chars(buffer);
121
122 if (*text != 0 && n != 0)
123 chars++,
124 text = mimimi_skip_code_point(text);
125
126 while (*text != 0 && (chars->bc & flags) == 0)
127 chars++,
128 text = mimimi_skip_code_point(text);
129
130 mimimi_finish_ual_buffer(buffer);
131 return text;
132}
133
134unsigned char *mimimi_skip_grapheme(unsigned char *text)
135{
136 return mimimi_ual_until(text, UAL_BREAK_CLUSTER, 1);
137}
138
139int mimimi_count_grapheme(unsigned char *text)
140{
141 return mimimi_skip_grapheme(text) - text;
142}
143
144unsigned char *mimimi_skip_word(unsigned char *text)
145{
146 return mimimi_ual_until(text, UAL_BREAK_LINE, 1);
147}
148
149int mimimi_count_word(unsigned char *text)
150{
151 return mimimi_ual_until(text, UAL_BREAK_LINE | UAL_BREAK_SPACES, 0) - text;
152}