| /* See LICENSE file for copyright and license details. */ |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "util.h" |
| |
| #define FILE_EMOJI "data/emoji-data.txt" |
| #define FILE_WORD "data/WordBreakProperty.txt" |
| |
| static const struct property_spec word_break_property[] = { |
| { |
| .enumname = "OTHER", |
| .file = NULL, |
| .ucdname = NULL, |
| }, |
| { |
| .enumname = "ALETTER", |
| .file = FILE_WORD, |
| .ucdname = "ALetter", |
| }, |
| { |
| .enumname = "BOTH_ALETTER_EXTPICT", |
| .file = NULL, |
| .ucdname = NULL, |
| }, |
| { |
| .enumname = "CR", |
| .file = FILE_WORD, |
| .ucdname = "CR", |
| }, |
| { |
| .enumname = "DOUBLE_QUOTE", |
| .file = FILE_WORD, |
| .ucdname = "Double_Quote", |
| }, |
| { |
| .enumname = "EXTEND", |
| .file = FILE_WORD, |
| .ucdname = "Extend", |
| }, |
| { |
| .enumname = "EXTENDED_PICTOGRAPHIC", |
| .file = FILE_EMOJI, |
| .ucdname = "Extended_Pictographic", |
| }, |
| { |
| .enumname = "EXTENDNUMLET", |
| .file = FILE_WORD, |
| .ucdname = "ExtendNumLet", |
| }, |
| { |
| .enumname = "FORMAT", |
| .file = FILE_WORD, |
| .ucdname = "Format", |
| }, |
| { |
| .enumname = "HEBREW_LETTER", |
| .file = FILE_WORD, |
| .ucdname = "Hebrew_Letter", |
| }, |
| { |
| .enumname = "KATAKANA", |
| .file = FILE_WORD, |
| .ucdname = "Katakana", |
| }, |
| { |
| .enumname = "LF", |
| .file = FILE_WORD, |
| .ucdname = "LF", |
| }, |
| { |
| .enumname = "MIDLETTER", |
| .file = FILE_WORD, |
| .ucdname = "MidLetter", |
| }, |
| { |
| .enumname = "MIDNUM", |
| .file = FILE_WORD, |
| .ucdname = "MidNum", |
| }, |
| { |
| .enumname = "MIDNUMLET", |
| .file = FILE_WORD, |
| .ucdname = "MidNumLet", |
| }, |
| { |
| .enumname = "NEWLINE", |
| .file = FILE_WORD, |
| .ucdname = "Newline", |
| }, |
| { |
| .enumname = "NUMERIC", |
| .file = FILE_WORD, |
| .ucdname = "Numeric", |
| }, |
| { |
| .enumname = "REGIONAL_INDICATOR", |
| .file = FILE_WORD, |
| .ucdname = "Regional_Indicator", |
| }, |
| { |
| .enumname = "SINGLE_QUOTE", |
| .file = FILE_WORD, |
| .ucdname = "Single_Quote", |
| }, |
| { |
| .enumname = "WSEGSPACE", |
| .file = FILE_WORD, |
| .ucdname = "WSegSpace", |
| }, |
| { |
| .enumname = "ZWJ", |
| .file = FILE_WORD, |
| .ucdname = "ZWJ", |
| }, |
| }; |
| |
| static uint_least8_t |
| handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2) |
| { |
| uint_least8_t result; |
| |
| (void)cp; |
| |
| if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") && |
| !strcmp(word_break_property[prop2].enumname, |
| "EXTENDED_PICTOGRAPHIC")) || |
| (!strcmp(word_break_property[prop1].enumname, |
| "EXTENDED_PICTOGRAPHIC") && |
| !strcmp(word_break_property[prop2].enumname, "ALETTER"))) { |
| for (result = 0; result < LEN(word_break_property); result++) { |
| if (!strcmp(word_break_property[result].enumname, |
| "BOTH_ALETTER_EXTPICT")) { |
| break; |
| } |
| } |
| if (result == LEN(word_break_property)) { |
| fprintf(stderr, "handle_conflict: Internal error.\n"); |
| exit(1); |
| } |
| } else { |
| fprintf(stderr, "handle_conflict: Cannot handle conflict.\n"); |
| exit(1); |
| } |
| |
| return result; |
| } |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| (void)argc; |
| |
| properties_generate_break_property( |
| word_break_property, LEN(word_break_property), NULL, |
| handle_conflict, NULL, "word_break", argv[0]); |
| |
| return 0; |
| } |