Mirai's Miscellaneous Misadventures
M39 / core / text-ualyze.c
#define char16_t uint16_t
#include <stdlib.h>
#include <ualyze.h>
#include <mimimi/text.h>
static int mimimi_utf16(uint16_t *ints, unsigned char **text)
{
unsigned long int cp = mimimi_code_point(*text);
*text = mimimi_skip_code_point(*text);
if (cp < 0x10000)
{
ints[0] = cp;
return 1;
}
cp -= 0x10000;
unsigned int high = cp >> 10;
unsigned int low = cp & 0x3FF;
high += 0xD800;
low += 0xDC00;
ints[0] = high;
ints[1] = low;
return 2;
}
static int mimimi_utf16_count(unsigned char *text)
{
unsigned long int cp = mimimi_code_point(text);
uint16_t n[2];
return mimimi_utf16(n, &text);
}
static int mimimi_utf16_count_text(unsigned char *text)
{
int count = 0;
while (*text != 0)
count += mimimi_utf16_count(text),
text = mimimi_skip_code_point(text);
return count;
}
static int mimimi_ual_buffer(ual_buffer **buffer, unsigned char *text)
{
int count = mimimi_utf16_count_text(text);
uint16_t *utf16 = NULL;
if (count > 0)
{
utf16 = malloc((count + 1) * sizeof *utf16);
if (utf16 == NULL) exit(1);
unsigned char *text1 = text;
uint16_t *utf16x = utf16;
while (*text1 != 0) utf16x += mimimi_utf16(utf16x, &text1);
*utf16x = 0;
}
*buffer = ual_buffer_create();
if (*buffer == 0) exit(1);
int count2 = ual_analyze_paragraph(*buffer, utf16, count);
int i = 0;
while (count2 > 0)
{
count2 -= mimimi_utf16_count(text);
text = mimimi_skip_code_point(text);
i++;
}
return i;
}
static void mimimi_finish_ual_buffer(ual_buffer *buffer)
{
uint16_t *utf16 = ual_buffer_text(buffer);
ual_buffer_release(buffer);
if (utf16 != NULL) free(utf16);
}
int mimimi_count_paragraph(unsigned char *text)
{
int i = 0;
for (;;)
{
unsigned long int cp = mimimi_code_point(text);
if (cp == 0) break;
if (cp == 0x0A) break; if (cp == 0x0B) break; if (cp == 0x0C) break; if (cp == 0x0D) break;
if (cp == 0x0085) break; if (cp == 0x2028) break; if (cp == 0x2029) break;
text = mimimi_skip_code_point(text);
i++;
}
return i;
}
unsigned char *mimimi_skip_paragraph(unsigned char *text)
{
ual_buffer *buffer;
text += mimimi_ual_buffer(&buffer, text);
mimimi_finish_ual_buffer(buffer);
return text;
}
static unsigned char *mimimi_ual_until(unsigned char *text, int flags, int n)
{
ual_buffer *buffer;
mimimi_ual_buffer(&buffer, text);
ual_analyze_breaks(buffer);
ual_char *chars = ual_buffer_chars(buffer);
if (*text != 0 && n != 0)
chars++,
text = mimimi_skip_code_point(text);
while (*text != 0 && (chars->bc & flags) == 0)
chars++,
text = mimimi_skip_code_point(text);
mimimi_finish_ual_buffer(buffer);
return text;
}
unsigned char *mimimi_skip_grapheme(unsigned char *text)
{
return mimimi_ual_until(text, UAL_BREAK_CLUSTER, 1);
}
int mimimi_count_grapheme(unsigned char *text)
{
return mimimi_skip_grapheme(text) - text;
}
unsigned char *mimimi_skip_word(unsigned char *text)
{
return mimimi_ual_until(text, UAL_BREAK_LINE, 1);
}
int mimimi_count_word(unsigned char *text)
{
return mimimi_ual_until(text, UAL_BREAK_LINE | UAL_BREAK_SPACES, 0) - text;
}