blob: 8a154243b4aa900e7187b1a30a9e7bdf2223baf4 [file] [log] [blame] [edit]
/* See LICENSE file for copyright and license details. */
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "character.h"
#include "util.h"
int
main(int argc, char *argv[])
{
const struct codepoint_property *match;
struct codepoint_property_set *properties_dcp, *properties_emoji,
*properties_grapheme;
uint_least64_t *properties;
uint_least32_t cp;
(void)argc;
/* parse properties from the Unicode data files */
properties_dcp = parse_property_file("data/DerivedCoreProperties.txt");
properties_emoji = parse_property_file("data/emoji-data.txt");
properties_grapheme = parse_property_file("data/GraphemeBreakProperty.txt");
/* allocate property array and initialise to zero */
if (!(properties = calloc(UINT32_C(0x110000), sizeof(*properties)))) {
fprintf(stderr, "%s: malloc: %s\n", argv[0], strerror(errno));
exit(1);
}
for (cp = 0; cp <= UINT32_C(0x10FFFF); cp++) {
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "Control", 0)) {
properties[cp] |= CHAR_PROP_CONTROL;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "Extend", 0)) {
properties[cp] |= CHAR_PROP_EXTEND;
}
if (match_in_codepoint_property_set(
&(properties_emoji[cp]), "Extended_Pictographic", 0)) {
properties[cp] |= CHAR_PROP_EXTENDED_PICTOGRAPHIC;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "L", 0)) {
properties[cp] |= CHAR_PROP_HANGUL_L;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "V", 0)) {
properties[cp] |= CHAR_PROP_HANGUL_V;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "T", 0)) {
properties[cp] |= CHAR_PROP_HANGUL_T;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "LV", 0)) {
properties[cp] |= CHAR_PROP_HANGUL_LV;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "LVT", 0)) {
properties[cp] |= CHAR_PROP_HANGUL_LVT;
}
if ((match = match_in_codepoint_property_set(
&(properties_dcp[cp]), "InCB", 0))) {
if (strcmp(match->fields[1], "Consonant") == 0) {
properties[cp] |= CHAR_PROP_ICB_CONSONANT;
} else if (strcmp(match->fields[1], "Extend") == 0) {
properties[cp] |= CHAR_PROP_ICB_EXTEND;
} else if (strcmp(match->fields[1], "Linker") == 0) {
properties[cp] |= CHAR_PROP_ICB_LINKER;
}
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "Prepend", 0)) {
properties[cp] |= CHAR_PROP_PREPEND;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "Regional_Indicator", 0)) {
properties[cp] |= CHAR_PROP_REGIONAL_INDICATOR;
}
if (match_in_codepoint_property_set(
&(properties_grapheme[cp]), "SpacingMark", 0)) {
properties[cp] |= CHAR_PROP_SPACINGMARK;
}
}
/* generate code */
compress_and_output(properties, "character");
/* cleanup */
free_codepoint_property_set_array(properties_dcp);
free_codepoint_property_set_array(properties_emoji);
free_codepoint_property_set_array(properties_grapheme);
free(properties);
return 0;
}