Mirai's Miscellaneous Misadventures

M39 / core / text-ualyze.c

1// copyright 2023 zamfofex
2// license: AGPLv3 or later
3
4// todo: this is really slow!
5// it has to recompute a lot of stuff across calls.
6// but it's necessary to fit the current API.
7
8// todo: this is currently necessary...
9#define char16_t uint16_t
10
11#include <stdlib.h>
12#include <ualyze.h>
13
14#include <mimimi/text.h>
15
16static int mimimi_utf16(uint16_t *ints, unsigned char **text)
17{
18	unsigned long int cp = mimimi_code_point(*text);
19	*text = mimimi_skip_code_point(*text);
20	
21	if (cp < 0x10000)
22	{
23		ints[0] = cp;
24		return 1;
25	}
26	
27	cp -= 0x10000;
28	
29	unsigned int high = cp >> 10;
30	unsigned int low = cp & 0x3FF;
31	high += 0xD800;
32	low += 0xDC00;
33	ints[0] = high;
34	ints[1] = low;
35	
36	return 2;
37}
38
39static int mimimi_utf16_count(unsigned char *text)
40{
41	unsigned long int cp = mimimi_code_point(text);
42	uint16_t n[2];
43	return mimimi_utf16(n, &text);
44}
45
46static int mimimi_utf16_count_text(unsigned char *text)
47{
48	int count = 0;
49	while (*text != 0)
50		count += mimimi_utf16_count(text),
51		text = mimimi_skip_code_point(text);
52	return count;
53}
54
55static int mimimi_ual_buffer(ual_buffer **buffer, unsigned char *text)
56{
57	int count = mimimi_utf16_count_text(text);
58	uint16_t *utf16 = NULL;
59	if (count > 0)
60	{
61		utf16 = malloc((count + 1) * sizeof *utf16);
62		if (utf16 == NULL) exit(1);
63		
64		unsigned char *text1 = text;
65		uint16_t *utf16x = utf16;
66		while (*text1 != 0) utf16x += mimimi_utf16(utf16x, &text1);
67		*utf16x = 0;
68	}
69	
70	*buffer = ual_buffer_create();
71	if (*buffer == 0) exit(1);
72	
73	int count2 = ual_analyze_paragraph(*buffer, utf16, count);
74	int i = 0;
75	while (count2 > 0)
76	{
77		count2 -= mimimi_utf16_count(text);
78		text = mimimi_skip_code_point(text);
79		i++;
80	}
81	return i;
82}
83
84static void mimimi_finish_ual_buffer(ual_buffer *buffer)
85{
86	uint16_t *utf16 = ual_buffer_text(buffer);
87	ual_buffer_release(buffer);
88	if (utf16 != NULL) free(utf16);
89}
90
91int mimimi_count_paragraph(unsigned char *text)
92{
93	int i = 0;
94	for (;;)
95	{
96		unsigned long int cp = mimimi_code_point(text);
97		if (cp == 0) break;
98		if (cp == 0x0A) break; if (cp == 0x0B) break; if (cp == 0x0C) break; if (cp == 0x0D) break;
99		if (cp == 0x0085) break; if (cp == 0x2028) break; if (cp == 0x2029) break;
100		text = mimimi_skip_code_point(text);
101		i++;
102	}
103	return i;
104}
105
106unsigned char *mimimi_skip_paragraph(unsigned char *text)
107{
108	ual_buffer *buffer;
109	text += mimimi_ual_buffer(&buffer, text);
110	mimimi_finish_ual_buffer(buffer);
111	return text;
112}
113
114static unsigned char *mimimi_ual_until(unsigned char *text, int flags, int n)
115{
116	ual_buffer *buffer;
117	mimimi_ual_buffer(&buffer, text);
118	ual_analyze_breaks(buffer);
119	
120	ual_char *chars = ual_buffer_chars(buffer);
121	
122	if (*text != 0 && n != 0)
123		chars++,
124		text = mimimi_skip_code_point(text);
125	
126	while (*text != 0 && (chars->bc & flags) == 0)
127		chars++,
128		text = mimimi_skip_code_point(text);
129	
130	mimimi_finish_ual_buffer(buffer);
131	return text;
132}
133
134unsigned char *mimimi_skip_grapheme(unsigned char *text)
135{
136	return mimimi_ual_until(text, UAL_BREAK_CLUSTER, 1);
137}
138
139int mimimi_count_grapheme(unsigned char *text)
140{
141	return mimimi_skip_grapheme(text) - text;
142}
143
144unsigned char *mimimi_skip_word(unsigned char *text)
145{
146	return mimimi_ual_until(text, UAL_BREAK_LINE, 1);
147}
148
149int mimimi_count_word(unsigned char *text)
150{
151	return mimimi_ual_until(text, UAL_BREAK_LINE | UAL_BREAK_SPACES, 0) - text;
152}