| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 1998-2000, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * |
| * File parse.c |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 05/26/99 stephen Creation. |
| * 02/25/00 weiv Overhaul to write udata |
| ******************************************************************************* |
| */ |
| |
| #include "parse.h" |
| #include "error.h" |
| #include "uhash.h" |
| #include "cmemory.h" |
| #include "read.h" |
| #include "unicode/ustdio.h" |
| #include "ustr.h" |
| #include "reslist.h" |
| #include "unicode/ustring.h" |
| #include "unicode/ucol.h" |
| |
| U_CAPI const UChar * U_EXPORT2 ucol_getDefaultRulesArray(uint32_t *size); |
| |
| U_STRING_DECL(k_start_string, "string", 6); |
| U_STRING_DECL(k_start_binary, "binary", 6); |
| U_STRING_DECL(k_start_table, "table", 5); |
| U_STRING_DECL(k_start_int, "int", 3); |
| U_STRING_DECL(k_start_array, "array", 5); |
| U_STRING_DECL(k_start_intvector, "intvector", 9); |
| U_STRING_DECL(k_start_reserved, "reserved", 8); |
| U_STRING_DECL(rootName, "root", 4); |
| |
| static UBool didInit=FALSE; |
| static UBool didInitRoot=FALSE; |
| |
| /* Node IDs for the state transition table. */ |
| enum ENode { |
| eError, |
| eInitial, /* Next: Locale name */ |
| eGotLoc, /* Next: { */ |
| eIdle, /* Next: Tag name | } */ |
| eGotTag, /* Next: { | : */ |
| eNode5, /* Next: Data | Subtag */ |
| eNode6, /* Next: } | { | , */ |
| eList, /* Next: List data */ |
| eNode8, /* Next: , */ |
| eTagList, /* Next: Subtag data */ |
| eNode10, /* Next: } */ |
| eNode11, /* Next: Subtag */ |
| eNode12, /* Next: { */ |
| e2dArray, /* Next: Data | } */ |
| eNode14, /* Next: , | } */ |
| eNode15, /* Next: , | } */ |
| eNode16, /* Next: { | } */ |
| eTypeStart, /* Next: Type name */ |
| eGotType /* Next: { */ |
| }; |
| |
| /* Action codes for the state transtiion table. */ |
| enum EAction { |
| /* Generic actions */ |
| eNOP = 0x0100, /* Do nothing */ |
| eOpen = 0x0200, /* Open a new locale data block with the data |
| string as the locale name */ |
| eClose = 0x0300, /* Close a locale data block */ |
| eSetTag = 0x0400, /* Record the last string as the tag name */ |
| |
| /* Comma-delimited lists */ |
| eBegList = 0x1100, /* Start a new string list with the last string |
| as the first element */ |
| eEndList = 0x1200, /* Close a string list being built */ |
| eListStr = 0x1300, /* Record the last string as a data string and |
| increment the index */ |
| eStr = 0x1400, /* Record the last string as a singleton string */ |
| |
| /* 2-d lists */ |
| eBeg2dList = 0x2100, /* Start a new 2d string list with no elements as yet */ |
| eEnd2dList = 0x2200, /* Close a 2d string list being built */ |
| e2dStr = 0x2300, /* Record the last string as a 2d string */ |
| eNewRow = 0x2400, /* Start a new row */ |
| |
| /* Tagged lists */ |
| eBegTagged = 0x3100, /* Start a new tagged list with the last |
| string as the first subtag */ |
| eEndTagged = 0x3200, /* Close a tagged list being build */ |
| eSubtag = 0x3300, /* Record the last string as the subtag */ |
| eTaggedStr = 0x3400, /* Record the last string as a tagged string */ |
| |
| /* Type support */ |
| eBegType = 0x4100, /* Start getting a type */ |
| eSetType = 0x4200 /* Record and init type */ |
| }; |
| |
| /* A struct which encapsulates a node ID and an action. */ |
| struct STransition { |
| enum ENode fNext; |
| enum EAction fAction; |
| }; |
| |
| /* This table describes an ATM (state machine) which parses resource |
| bundle text files rather strictly. Each row represents a node. The |
| columns of that row represent transitions into other nodes. Most |
| transitions are "eError" because most transitions are |
| disallowed. For example, if the parser has just seen a tag name, it |
| enters node 4 ("eGotTag"). The state table then marks only one |
| valid transition, which is into node 5, upon seeing an eOpenBrace |
| token. We allow an extra comma after the last element in a |
| comma-delimited list (transition from eList to eIdle on |
| kCloseBrace). */ |
| static struct STransition gTransitionTable [] = { |
| /* kString kOpenBrace kCloseBrace kComma |
| /*eError*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, |
| |
| /*eInitial*/ {eGotLoc,eOpen}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, |
| /*eGotLoc*/ {eError,eNOP}, {eIdle,eNOP}, {eError,eNOP}, {eError,eNOP}, |
| |
| /*eIdle*/ {eGotTag,eSetTag}, {eError,eNOP}, {eInitial,eClose}, {eError,eNOP}, |
| /*eGotTag*/ {eError,eNOP}, {eNode5,eNOP}, {eError,eNOP}, {eError,eNOP}, |
| /*eNode5*/ {eNode6,eNOP}, {e2dArray,eBeg2dList},{eError,eNOP}, {eError,eNOP}, |
| /*eNode6*/ {eError,eNOP}, {eTagList,eBegTagged},{eIdle,eStr}, {eList,eBegList}, |
| |
| /*eList*/ {eNode8,eListStr}, {eError,eNOP}, {eIdle,eEndList}, {eError,eNOP}, |
| /*eNode8*/ {eError,eNOP}, {eError,eNOP}, {eIdle,eEndList}, {eList,eNOP}, |
| |
| /*eTagList*/ {eNode10,eTaggedStr},{eError,eNOP}, {eError,eNOP}, {eError,eNOP}, |
| /*eNode10*/ {eError,eNOP}, {eError,eNOP}, {eNode11,eNOP}, {eError,eNOP}, |
| /*eNode11*/ {eNode12,eNOP}, {eError,eNOP}, {eIdle,eEndTagged},{eError,eNOP}, |
| /*eNode12*/ {eError,eNOP}, {eTagList,eSubtag}, {eError,eNOP}, {eError,eNOP}, |
| |
| /*e2dArray*/ {eNode14,e2dStr}, {eError,eNOP}, {eNode15,eNOP}, {eError,eNOP}, |
| /*eNode14*/ {eError,eNOP}, {eError,eNOP}, {eNode15,eNOP}, {e2dArray,eNOP}, |
| /*eNode15*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eNode16,eNOP}, |
| /*eNode16*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eError,eNOP}, |
| /*eTypeStart*/{eGotType,eSetType}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, |
| /*eGotType*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP} |
| }; |
| |
| /* Row length is 4 */ |
| #define GETTRANSITION(row,col) (gTransitionTable[col + (row<<2)]) |
| /* Not anymore, it is 5 now */ |
| /*#define GETTRANSITION(row,col) (gTransitionTable[col + (row*5)])*/ |
| |
| /********************************************************************* |
| * Hashtable glue |
| ********************************************************************/ |
| |
| static UBool get(UHashtable *hash, const struct UString* tag) { |
| return uhash_get(hash, tag) != NULL; |
| } |
| |
| static void put(UHashtable *hash, const struct UString *tag, |
| UErrorCode* status) { |
| struct UString* key = uprv_malloc(sizeof(struct UString)); |
| ustr_init(key); |
| ustr_cpy(key, tag, status); |
| uhash_put(hash, key, (void*)1, status); |
| } |
| |
| static void freeUString(void* ustr) { |
| ustr_deinit(ustr); |
| uprv_free(ustr); |
| } |
| |
| static int32_t hashUString(const void* ustr) { |
| return uhash_hashUChars(((struct UString*)ustr)->fChars); |
| } |
| |
| static UBool compareUString(const void* ustr1, const void* ustr2) { |
| return uhash_compareUChars(((struct UString*)ustr1)->fChars, |
| ((struct UString*)ustr2)->fChars); |
| } |
| |
| /********************************************************************* |
| * parse |
| ********************************************************************/ |
| |
| struct SRBRoot* |
| parse(FileStream *f, const char *cp, |
| UErrorCode *status) |
| { |
| struct UFILE *file; |
| enum ETokenType type; |
| enum ENode node; |
| struct STransition t; |
| |
| struct UString token; |
| struct UString tag; |
| |
| char cTag[1024]; |
| char cSubTag[1024]; |
| struct SRBRoot *bundle = NULL; |
| struct SResource *rootTable = NULL; |
| struct SResource *temp = NULL; |
| struct SResource *temp1 = NULL; |
| struct SResource *temp2 = NULL; |
| UBool colEl = FALSE; |
| |
| /* Hashtable for keeping track of seen tag names */ |
| struct UHashtable *data; |
| |
| |
| if(U_FAILURE(*status)) return NULL; |
| |
| /* setup */ |
| |
| ustr_init(&token); |
| ustr_init(&tag); |
| /* |
| cTag = uprv_malloc(1024); |
| if(cTag == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| cSubTag = uprv_malloc(1024); |
| if(cSubTag == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| */ |
| |
| node = eInitial; |
| data = 0; |
| |
| file = u_finit((FILE *)f, 0, cp); |
| /* file = u_finit(f, cp, status); */ |
| if(file == NULL) { |
| setErrorText("Could not initialize input file - most probably because of wrong converter\n"); |
| *status = U_INVALID_FORMAT_ERROR; |
| goto finish; |
| } |
| |
| bundle = bundle_open(status); |
| rootTable = bundle -> fRoot; |
| |
| if(U_FAILURE(*status) || file == NULL) { |
| goto finish; |
| } |
| |
| /* iterate through the stream */ |
| for(;;) { |
| |
| /* get next token from stream */ |
| type = getNextToken(file, &token, status); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| |
| switch(type) { |
| case tok_EOF: |
| *status = (node == eInitial) ? U_ZERO_ERROR : U_INVALID_FORMAT_ERROR; |
| if(U_FAILURE(*status)) { |
| setErrorText("Unexpected EOF encountered"); |
| } |
| goto finish; |
| /*break;*/ |
| |
| case tok_error: |
| *status = U_INVALID_FORMAT_ERROR; |
| goto finish; |
| /*break;*/ |
| |
| default: |
| break; |
| } |
| |
| t = GETTRANSITION(node, type); |
| node = t.fNext; |
| |
| if(node == eError) { |
| *status = U_INVALID_FORMAT_ERROR; |
| goto finish; |
| } |
| |
| switch(t.fAction) { |
| case eNOP: |
| break; |
| |
| /* Record the last string as the tag name */ |
| case eSetTag: |
| ustr_cpy(&tag, &token, status); |
| u_UCharsToChars(tag.fChars, cTag, u_strlen(tag.fChars)+1); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| if(uprv_strchr(cTag, ':')) { |
| /* type modificator - do the type modification*/ |
| } else if(uprv_strcmp(cTag, "CollationElements") == 0) { |
| colEl = TRUE; |
| } |
| /*if(uhash_get(data, uhash_hashUString(tag.fChars)) != 0) {*/ |
| if(get(data, &tag)) { |
| char *s; |
| *status = U_INVALID_FORMAT_ERROR; |
| s = uprv_malloc(1024); |
| strcpy(s, "Duplicate tag name detected: "); |
| u_austrcpy(s+strlen(s), tag.fChars); |
| setErrorText(s); |
| goto finish; |
| } |
| |
| break; |
| |
| /* Record a singleton string */ |
| case eStr: |
| if(temp != NULL) { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| temp = string_open(bundle, cTag, token.fChars, token.fLength, status); |
| table_add(rootTable, temp, status); |
| if(colEl == TRUE) { |
| const UChar * defaultRulesArray; |
| UErrorCode intStatus = U_ZERO_ERROR; |
| uint32_t defaultRulesArrayLength = 0; |
| /* do the collation elements */ |
| int32_t len = 0; |
| uint8_t *binColData = NULL; |
| UCollator *coll = NULL; |
| UChar *rules = NULL; |
| defaultRulesArray = ucol_getDefaultRulesArray(&defaultRulesArrayLength); |
| rules = uprv_malloc(sizeof(defaultRulesArray[0])*(defaultRulesArrayLength + token.fLength)); |
| uprv_memcpy(rules, defaultRulesArray, defaultRulesArrayLength*sizeof(defaultRulesArray[0])); |
| uprv_memcpy(rules + defaultRulesArrayLength, token.fChars, token.fLength*sizeof(token.fChars[0])); |
| |
| coll = ucol_openRules(rules, defaultRulesArrayLength + token.fLength, UCOL_DECOMP_CAN, 0, &intStatus); |
| ucol_setNormalization(coll, UCOL_NO_NORMALIZATION); |
| |
| if(U_SUCCESS(intStatus) && coll !=NULL) { |
| binColData = ucol_cloneRuleData(coll, &len, &intStatus); |
| if(U_SUCCESS(*status) && data != NULL) { |
| temp1 = bin_open(bundle, "%%Collation", len, binColData, status); |
| table_add(rootTable, temp1, status); |
| uprv_free(binColData); |
| } |
| ucol_close(coll); |
| } else { |
| setErrorText("Warning: %%Collation could not be constructed from CollationElements - check context!"); |
| } |
| uprv_free(rules); |
| colEl = FALSE; |
| } |
| /*uhash_put(data, tag.fChars, status);*/ |
| put(data, &tag, status); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| temp = NULL; |
| break; |
| |
| /* Begin a string list */ |
| case eBegList: |
| if(temp != NULL) { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| temp = array_open(bundle, cTag, status); |
| temp1 = string_open(bundle, NULL, token.fChars, token.fLength, status); |
| array_add(temp, temp1, status); |
| temp1 = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| /* Record a comma-delimited list string */ |
| case eListStr: |
| temp1 = string_open(bundle, NULL, token.fChars, token.fLength, status); |
| array_add(temp, temp1, status); |
| temp1 = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| /* End a string list */ |
| case eEndList: |
| /*uhash_put(data, tag.fChars, status);*/ |
| put(data, &tag, status); |
| table_add(rootTable, temp, status); |
| temp = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case eBeg2dList: |
| if(temp != NULL) { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| temp = array_open(bundle, cTag, status); |
| temp1 = array_open(bundle, NULL, status); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case eEnd2dList: |
| /*uhash_put(data, tag.fChars, status);*/ |
| put(data, &tag, status); |
| array_add(temp, temp1, status); |
| table_add(rootTable, temp, status); |
| temp1 = NULL; |
| temp = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case e2dStr: |
| temp2 = string_open(bundle, NULL, token.fChars, token.fLength, status); |
| array_add(temp1, temp2, status); |
| temp2 = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case eNewRow: |
| array_add(temp, temp1, status); |
| temp1 = array_open(bundle, NULL, status); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case eBegTagged: |
| if(temp != NULL) { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| temp = table_open(bundle, cTag, status); |
| u_UCharsToChars(token.fChars, cSubTag, u_strlen(token.fChars)+1); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case eEndTagged: |
| /*uhash_put(data, tag.fChars, status);*/ |
| put(data, &tag, status); |
| table_add(rootTable, temp, status); |
| temp = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| case eTaggedStr: |
| temp1 = string_open(bundle, cSubTag, token.fChars, token.fLength, status); |
| table_add(temp, temp1, status); |
| temp1 = NULL; |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| break; |
| |
| /* Record the last string as the subtag */ |
| case eSubtag: |
| u_UCharsToChars(token.fChars, cSubTag, u_strlen(token.fChars)+1); |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| if(table_get(temp, cSubTag, status) != 0) { |
| *status = U_INVALID_FORMAT_ERROR; |
| setErrorText("Duplicate subtag found in tagged list"); |
| goto finish; |
| } |
| break; |
| |
| case eOpen: |
| if(data != 0) { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| bundle_setlocale(bundle, token.fChars, status); |
| if(didInitRoot == FALSE) { |
| U_STRING_INIT(rootName, "root", 4); |
| didInitRoot = TRUE; |
| } |
| |
| if(u_strcmp(token.fChars, rootName) == 0) { |
| const UChar * defaultRulesArray; |
| uint32_t defaultRulesArrayLength = 0; |
| /* do the collation elements */ |
| int32_t len = 0; |
| uint8_t *data = NULL; |
| uint8_t *data2 = NULL; |
| UCollator *coll = NULL; |
| |
| UChar *rules = NULL; |
| defaultRulesArray = ucol_getDefaultRulesArray(&defaultRulesArrayLength); |
| rules = uprv_malloc(sizeof(defaultRulesArray[0])*(defaultRulesArrayLength)); |
| uprv_memcpy(rules, defaultRulesArray, defaultRulesArrayLength*sizeof(defaultRulesArray[0])); |
| |
| coll = ucol_openRules(rules, defaultRulesArrayLength, UCOL_DECOMP_CAN, 0, status); |
| ucol_setNormalization(coll, UCOL_DEFAULT_NORMALIZATION); |
| |
| if(U_SUCCESS(*status) && coll !=NULL) { |
| data = ucol_cloneRuleData(coll, &len, status); |
| if(U_SUCCESS(*status) && data != NULL) { |
| temp1 = bin_open(bundle, "%%Collation", len, data, status); |
| table_add(rootTable, temp1, status); |
| uprv_free(data); |
| } |
| ucol_close(coll); |
| } |
| uprv_free(rules); |
| } |
| if(U_FAILURE(*status)) { |
| goto finish; |
| } |
| data = uhash_open(hashUString, compareUString, status); |
| uhash_setKeyDeleter(data, freeUString); |
| break; |
| |
| case eClose: |
| if(data == 0) { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| break; |
| case eSetType: |
| /* type recognition */ |
| if(!didInit) { |
| U_STRING_INIT(k_start_string, "string", 6); |
| U_STRING_INIT(k_start_binary, "binary", 6); |
| U_STRING_INIT(k_start_table, "table", 5); |
| U_STRING_INIT(k_start_int, "int", 3); |
| U_STRING_INIT(k_start_array, "array", 5); |
| U_STRING_INIT(k_start_intvector, "intvector", 9); |
| U_STRING_INIT(k_start_reserved, "reserved", 8); |
| didInit=TRUE; |
| } |
| if(u_strcmp(token.fChars, k_start_string) == 0) { |
| node = eGotTag; |
| } else if(u_strcmp(token.fChars, k_start_array) == 0) { |
| node = eGotTag; |
| } else if(u_strcmp(token.fChars, k_start_table) == 0) { |
| node = eGotTag; |
| } else if(u_strcmp(token.fChars, k_start_binary) == 0) { |
| /* start of binary */ |
| } else if(u_strcmp(token.fChars, k_start_int) == 0) { |
| /* start of integer */ |
| } else if(u_strcmp(token.fChars, k_start_intvector) == 0) { |
| /* start of intvector */ |
| } else if(u_strcmp(token.fChars, k_start_reserved) == 0) { |
| /* start of reserved */ |
| } else { |
| *status = U_INTERNAL_PROGRAM_ERROR; |
| goto finish; |
| } |
| break; |
| } |
| } |
| |
| finish: |
| |
| /* clean up */ |
| |
| if(data != 0) |
| uhash_close(data); |
| |
| ustr_deinit(&token); |
| ustr_deinit(&tag); |
| |
| /*uprv_free(cTag);*/ |
| /*uprv_free(cSubTag);*/ |
| |
| if(file != 0) |
| u_fclose(file); |
| |
| return bundle; |
| } |