| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 1999-2001, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * created on: April 03 2001 |
| * created by: Syn Wee Quek |
| * |
| * This program reads the FCDCheck text file, parses it and builds compact |
| * binary tables for random-access lookup in a checkFCD() API function. |
| * |
| * fcdcheck.dat file format (after UDataInfo header etc. - see udata.c) |
| * (all data is static const) |
| * |
| * UDataInfo fields: |
| * dataFormat "fchk" |
| * formatVersion 1.0 |
| * dataVersion = Unicode version from -u or --unicode command line option, |
| * defaults to 3.0.0 |
| * |
| * Data generated is a trie of normalization form corresponding to the index |
| * code point. |
| * Hence codepoint 0xABCD will have normalization form |
| * <code> |
| * fcdcheck[codepoint] = |
| * STAGE_3_[STAGE_2_[STAGE_1_[codepoint >> STAGE_1_SHIFT_] + |
| * ((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] + |
| * (codepoint & STAGE_3_MASK_)]; |
| * </code> |
| * value is 2 byte containing 2 sets of 8 bits information.<br> |
| * 1st byte : combining class of the first character in the NFD form of the |
| * codepoint |
| * 2nd byte : combining class of the last character in the NFD form of the |
| * codepoint |
| * |
| * Output file format |
| * - Header |
| * - Stage 1 index in memory set of uint16_t |
| * - Stage 2 index in memory set of uint16_t |
| * - Stage 3 index in memory set of uint16_t |
| * - Stage 1 |
| * - Stage 2 |
| * - Stage 3 |
| */ |
| |
| #include <stdio.h> |
| #include "unicode/utypes.h" |
| #include "unicode/putil.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "unewdata.h" |
| #include "uoptions.h" |
| #include "filestrm.h" |
| |
| #define INPUT_FILE_NAME_ "FCDCheck.txt" |
| #define DATA_NAME_ "fchk" |
| #define DATA_TYPE_ "dat" |
| #define DATA_BUFFER_SIZE_ 100 |
| #define VERSION_STRING_ "fchk" |
| |
| /* UDataInfo cf. udata.h */ |
| static UDataInfo DATA_INFO_ = { |
| sizeof(UDataInfo), |
| 0, |
| |
| U_IS_BIG_ENDIAN, |
| U_CHARSET_FAMILY, |
| sizeof(UChar), |
| 0, |
| |
| {0x66, 0x63, 0x68, 0x6b}, /* dataFormat="qchk" */ |
| {1, 0, 0, 0}, /* formatVersion */ |
| {3, 0, 0, 0} /* dataVersion */ |
| }; |
| |
| static UBool BE_VERBOSE_ = FALSE, |
| BE_QUIET_ = FALSE, |
| HAVE_COPYRIGHT_ =TRUE; |
| |
| static UOption OPTIONS_[] = { |
| UOPTION_HELP_H, |
| UOPTION_HELP_QUESTION_MARK, |
| UOPTION_VERBOSE, |
| UOPTION_QUIET, |
| UOPTION_COPYRIGHT, |
| UOPTION_DESTDIR, |
| UOPTION_SOURCEDIR, |
| { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 } |
| }; |
| |
| /* Stage 1 values for Trie */ |
| static uint16_t STAGE_1_[0x800]; |
| static uint16_t STAGE_1_SIZE_; |
| |
| /* Stage 2 values for Trie */ |
| static uint16_t STAGE_2_[0xFFFF]; |
| static uint16_t STAGE_2_SIZE_; |
| |
| /* Stage 3 values for Trie */ |
| static uint16_t STAGE_3_[0xFFFF]; |
| static uint16_t STAGE_3_SIZE_; |
| |
| /* generate output data ----------------------------------------------------- */ |
| |
| static UBool |
| parseTrieStage(char *pline, UBool *passflag, uint16_t *pstage, |
| uint16_t *psize, UErrorCode *perror) |
| { |
| char *pend; |
| |
| /* gets the first block of code points */ |
| while (!(*passflag) && *pline != '{' && *pline != 0) { |
| ++ pline; |
| } |
| |
| /* error in a field function? */ |
| if (*pline == '\n') { |
| *perror = U_PARSE_ERROR; |
| return FALSE; |
| } |
| |
| /* first line is just declarations */ |
| if (!(*passflag)) { |
| *passflag = TRUE; |
| return TRUE; |
| } |
| |
| /* proceeding with the real block of data */ |
| while (*pline != '\n') { |
| if (*pline == '}') { |
| return FALSE; |
| } |
| |
| /* read one value by the default base*/ |
| pstage[*psize] = (uint16_t)uprv_strtoul(pline, &pend, 0); |
| |
| (*psize) ++; |
| |
| if (*pend == '\n') |
| return TRUE; |
| |
| if (pend <= pline || (*pend != ',')) { |
| fprintf(stderr, "genqchk: syntax error parsing trie at %s\n", |
| pline); |
| *perror = U_PARSE_ERROR; |
| return FALSE; |
| } |
| |
| pline = pend + 1; |
| /* getting rid of space */ |
| while (*pline == ' ') { |
| pline ++; |
| } |
| } |
| |
| return TRUE; |
| } |
| |
| static void |
| parseDB(const char *filename) { |
| char line[DATA_BUFFER_SIZE_]; |
| UErrorCode error = U_ZERO_ERROR; |
| FileStream *file = T_FileStream_open(filename, "r"); |
| UBool stage1 = TRUE; |
| UBool stage2 = TRUE; |
| UBool stage3 = TRUE; |
| UBool stage1pass = FALSE; |
| UBool stage2pass = FALSE; |
| UBool stage3pass = FALSE; |
| |
| if (file == NULL) { |
| fprintf(stderr, "*** unable to open input file %s ***\n", filename); |
| error = U_FILE_ACCESS_ERROR; |
| return; |
| } |
| |
| /* initializing variables */ |
| STAGE_1_SIZE_ = 0; |
| STAGE_2_SIZE_ = 0; |
| STAGE_3_SIZE_ = 0; |
| |
| while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
| /* skip this line if it is empty or a comment or is a return value */ |
| if(line[0] == 0 || line[0] == '#' || line[0] == '\n') { |
| continue; |
| } |
| |
| if (stage1) { |
| stage1 = parseTrieStage(line, &stage1pass, STAGE_1_, |
| &STAGE_1_SIZE_, &error); |
| } |
| else if (stage2) { |
| stage2 = parseTrieStage(line, &stage2pass, STAGE_2_, |
| &STAGE_2_SIZE_, &error); |
| } |
| else if (stage3) { |
| stage3 = parseTrieStage(line, &stage3pass, STAGE_3_, |
| &STAGE_3_SIZE_, &error); |
| } |
| } |
| |
| if (filename != NULL) { |
| T_FileStream_close(file); |
| } |
| } |
| |
| static void |
| generateData(const char *dataDir) { |
| UNewDataMemory *pData; |
| UErrorCode error = U_ZERO_ERROR; |
| uint16_t index = 0; |
| |
| pData=udata_create(dataDir, DATA_TYPE_, DATA_NAME_, &DATA_INFO_, |
| HAVE_COPYRIGHT_ ? U_COPYRIGHT_STRING : NULL, &error); |
| if(U_FAILURE(error)) { |
| fprintf(stderr, |
| "genfchk: unable to create data memory, error %d\n", |
| error); |
| exit(error); |
| } |
| |
| /* stage bit size */ |
| udata_write16(pData, 6); |
| udata_write16(pData, 4); |
| /* offsets in number of uint16_t*/ |
| /* stage 1 */ |
| index = 0; |
| udata_write16(pData, index); |
| /* stage 2 */ |
| index += STAGE_1_SIZE_; |
| udata_write16(pData, index); |
| /* stage 3 */ |
| index += STAGE_2_SIZE_; |
| udata_write16(pData, index); |
| udata_write16(pData, 0); |
| udata_write16(pData, 0); |
| udata_write16(pData, 0); |
| |
| udata_writeBlock(pData, STAGE_1_, STAGE_1_SIZE_ * sizeof(uint16_t)); |
| udata_writeBlock(pData, STAGE_2_, STAGE_2_SIZE_ * sizeof(uint16_t)); |
| udata_writeBlock(pData, STAGE_3_, STAGE_3_SIZE_ * sizeof(uint16_t)); |
| |
| udata_finish(pData, &error); |
| if (U_FAILURE(error)) { |
| fprintf(stderr, "genfchk: error %d writing the output file\n", |
| error); |
| exit(error); |
| } |
| } |
| |
| |
| extern int |
| main(int argc, char* argv[]) { |
| UVersionInfo version; |
| char filename[300]; |
| const char *srcDir = NULL, |
| *destDir = NULL; |
| char *basename = NULL; |
| |
| /* preset then read command line OPTIONS_ */ |
| OPTIONS_[5].value = u_getDataDirectory(); |
| OPTIONS_[6].value=""; |
| OPTIONS_[7].value="3.0.0"; |
| |
| argc = u_parseArgs(argc, argv, sizeof(OPTIONS_) / sizeof(OPTIONS_[0]), |
| OPTIONS_); |
| |
| /* error handling, printing usage message */ |
| if (argc < 0) { |
| fprintf(stderr, "error in command line argument \"%s\"\n", |
| argv[-argc]); |
| } |
| |
| if (argc < 0 || OPTIONS_[0].doesOccur || OPTIONS_[1].doesOccur) { |
| fprintf(stderr, |
| "usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] filename\n" |
| "\tread the FCDCheck.txt file and \n" |
| "\tcreate a binary file " DATA_NAME_ "." DATA_TYPE_ "\n" |
| "\t\tfilename absolute path/filename for the\n" |
| "\t\t\tQuickCheck text file (default: standard input)\n" |
| "\toptions:\n" |
| "\t\t-h or -? or --help this usage text\n" |
| "\t\t-v or --verbose verbose output\n" |
| "\t\t-q or --quiet no output\n" |
| "\t\t-c or --copyright include a copyright notice\n" |
| "\t\t-d or --destdir destination directory, followed by the path\n" |
| "\t\t-s or --sourcedir source directory, followed by the path\n" |
| "\t\t-u or --unicode Unicode version, followed by the version like 3.0.0\n", |
| argv[0]); |
| fprintf(stderr, argv[0]); |
| return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
| } |
| |
| /* get the OPTIONS_ values */ |
| BE_VERBOSE_ = OPTIONS_[2].doesOccur; |
| BE_QUIET_ = OPTIONS_[3].doesOccur; |
| HAVE_COPYRIGHT_ = OPTIONS_[4].doesOccur; |
| destDir = OPTIONS_[5].value; |
| srcDir = OPTIONS_[6].value; |
| |
| /* set the Unicode version */ |
| u_versionFromString(version, OPTIONS_[7].value); |
| uprv_memcpy(DATA_INFO_.dataVersion, version, 4); |
| |
| /* prepare the filename beginning with the source dir */ |
| uprv_strcpy(filename, srcDir); |
| basename = filename + uprv_strlen(filename); |
| if (basename > filename && *(basename - 1) != U_FILE_SEP_CHAR) { |
| *basename ++ = U_FILE_SEP_CHAR; |
| } |
| |
| uprv_strcpy(basename, INPUT_FILE_NAME_); |
| |
| parseDB(filename); |
| generateData(OPTIONS_[5].value); |
| |
| return 0; |
| } |
| |
| /* |
| * Hey, Emacs, please set the following: |
| * |
| * Local Variables: |
| * indent-tabs-mode: nil |
| * End: |
| * |
| */ |
| |