source/tools/genrb/parse.c - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 *
 *   Copyright (C) 1998-2000, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *
 * File parse.c
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   05/26/99    stephen     Creation.
 *   02/25/00    weiv        Overhaul to write udata
 *******************************************************************************
 */

 #include "ucol_imp.h"
 #include "parse.h"
 #include "error.h"
 #include "uhash.h"
 #include "cmemory.h"
 #include "read.h"
 #include "unicode/ustdio.h"
 #include "ustr.h"
 #include "reslist.h"
 #include "unicode/ustring.h"
 #include "unicode/putil.h"

 #define U_ICU_UNIDATA "unidata"

 U_STRING_DECL(k_start_string, "string", 6);
 U_STRING_DECL(k_start_binary, "binary", 6);
 U_STRING_DECL(k_start_table, "table", 5);
 U_STRING_DECL(k_start_int, "int", 3);
 U_STRING_DECL(k_start_array, "array", 5);
 U_STRING_DECL(k_start_intvector, "intvector", 9);
 U_STRING_DECL(k_start_reserved, "reserved", 8);
 U_STRING_DECL(rootName, "root", 4);

 static UBool didInit=FALSE;
 static UBool didInitRoot=FALSE;

 /* Node IDs for the state transition table. */
 enum ENode {
     eError,
     eInitial,   /* Next: Locale name */
     eGotLoc,    /* Next: { */
     eIdle,      /* Next: Tag name | } */
     eGotTag,    /* Next: { | : */
     eNode5,     /* Next: Data | Subtag */
     eNode6,     /* Next: } | { | , */
     eList,      /* Next: List data */
     eNode8,     /* Next: , */
     eTagList,   /* Next: Subtag data */
     eNode10,    /* Next: } */
     eNode11,    /* Next: Subtag */
     eNode12,    /* Next: { */
     e2dArray,   /* Next: Data | } */
     eNode14,    /* Next: , | } */
     eNode15,    /* Next: , | } */
     eNode16,     /* Next: { | } */
     eTypeStart, /* Next: Type name */
     eGotType    /* Next: { */
 };

 /* Action codes for the state transtiion table. */
 enum EAction {
     /* Generic actions */
     eNOP       = 0x0100, /* Do nothing */
     eOpen      = 0x0200, /* Open a new locale data block with the data
                           string as the locale name */
     eClose     = 0x0300, /* Close a locale data block */
     eSetTag    = 0x0400, /* Record the last string as the tag name */

     /* Comma-delimited lists */
     eBegList   = 0x1100, /* Start a new string list with the last string
                           as the first element */
     eEndList   = 0x1200, /* Close a string list being built */
     eListStr   = 0x1300, /* Record the last string as a data string and
                           increment the index */
     eStr       = 0x1400, /* Record the last string as a singleton string */

     /* 2-d lists */
     eBeg2dList = 0x2100, /* Start a new 2d string list with no elements as yet */
     eEnd2dList = 0x2200, /* Close a 2d string list being built */
     e2dStr     = 0x2300, /* Record the last string as a 2d string */
     eNewRow    = 0x2400, /* Start a new row */

     /* Tagged lists */
     eBegTagged = 0x3100, /* Start a new tagged list with the last
                           string as the first subtag */
     eEndTagged = 0x3200, /* Close a tagged list being build */
     eSubtag    = 0x3300, /* Record the last string as the subtag */
     eTaggedStr = 0x3400,  /* Record the last string as a tagged string */

     /* Type support */
     eBegType = 0x4100, /* Start getting a type */
     eSetType = 0x4200 /* Record and init type */
 };

 /* A struct which encapsulates a node ID and an action. */
 struct STransition {
     enum ENode fNext;
     enum EAction fAction;
 };

 /* This table describes an ATM (state machine) which parses resource
    bundle text files rather strictly. Each row represents a node. The
    columns of that row represent transitions into other nodes. Most
    transitions are "eError" because most transitions are
    disallowed. For example, if the parser has just seen a tag name, it
    enters node 4 ("eGotTag"). The state table then marks only one
    valid transition, which is into node 5, upon seeing an eOpenBrace
    token. We allow an extra comma after the last element in a
    comma-delimited list (transition from eList to eIdle on
    kCloseBrace). */
 static struct STransition gTransitionTable [] = {
   /*                kString           kOpenBrace            kCloseBrace         kComma       */
   /*eError*/    {eError,eNOP},       {eError,eNOP},        {eError,eNOP},      {eError,eNOP},

   /*eInitial*/  {eGotLoc,eOpen},     {eError,eNOP},        {eError,eNOP},      {eError,eNOP},
   /*eGotLoc*/   {eError,eNOP},       {eIdle,eNOP},         {eError,eNOP},      {eError,eNOP},

   /*eIdle*/     {eGotTag,eSetTag},   {eError,eNOP},        {eInitial,eClose},  {eError,eNOP},
   /*eGotTag*/   {eError,eNOP},       {eNode5,eNOP},        {eError,eNOP},      {eError,eNOP},
   /*eNode5*/    {eNode6,eNOP},       {e2dArray,eBeg2dList},{eError,eNOP},      {eError,eNOP},
   /*eNode6*/    {eError,eNOP},       {eTagList,eBegTagged},{eIdle,eStr},       {eList,eBegList},

   /*eList*/     {eNode8,eListStr},   {eError,eNOP},         {eIdle,eEndList},  {eError,eNOP},
   /*eNode8*/    {eError,eNOP},       {eError,eNOP},         {eIdle,eEndList},  {eList,eNOP},

   /*eTagList*/  {eNode10,eTaggedStr},{eError,eNOP},         {eError,eNOP},     {eError,eNOP},
   /*eNode10*/   {eError,eNOP},       {eError,eNOP},         {eNode11,eNOP},    {eError,eNOP},
   /*eNode11*/   {eNode12,eNOP},      {eError,eNOP},         {eIdle,eEndTagged},{eError,eNOP},
   /*eNode12*/   {eError,eNOP},       {eTagList,eSubtag},    {eError,eNOP},     {eError,eNOP},

   /*e2dArray*/  {eNode14,e2dStr},    {eError,eNOP},         {eNode15,eNOP},    {eError,eNOP},
   /*eNode14*/   {eError,eNOP},       {eError,eNOP},         {eNode15,eNOP},    {e2dArray,eNOP},
   /*eNode15*/   {eError,eNOP},       {e2dArray,eNewRow},    {eIdle,eEnd2dList},{eNode16,eNOP},
   /*eNode16*/   {eError,eNOP},       {e2dArray,eNewRow},    {eIdle,eEnd2dList},{eError,eNOP},
   /*eTypeStart*/{eGotType,eSetType}, {eError,eNOP},         {eError,eNOP},     {eError,eNOP},
   /*eGotType*/  {eError,eNOP},       {eError,eNOP},         {eError,eNOP},     {eError,eNOP}
 };

 /* Row length is 4 */
 #define GETTRANSITION(row,col) (gTransitionTable[col + (row<<2)])
 /* Not anymore, it is 5 now */
 /*#define GETTRANSITION(row,col) (gTransitionTable[col + (row*5)])*/

 /*********************************************************************
  * Hashtable glue
  ********************************************************************/

 static UBool get(UHashtable *hash, const struct UString* tag) {
     return (UBool)(uhash_get(hash, tag) != NULL);
 }

 static void put(UHashtable *hash, const struct UString *tag,
                 UErrorCode* status) {
     struct UString* key = (struct UString*)uprv_malloc(sizeof(struct UString));
     ustr_init(key);
     ustr_cpy(key, tag, status);
     uhash_put(hash, key, (void*)1, status);
 }

 static void freeUString(void* ustr) {
     ustr_deinit((struct UString*)ustr);
     uprv_free(ustr);
 }

 static int32_t hashUString(const void* ustr) {
     return uhash_hashUChars(((struct UString*)ustr)->fChars);
 }

 static UBool compareUString(const void* ustr1, const void* ustr2) {
     return uhash_compareUChars(((struct UString*)ustr1)->fChars,
                                ((struct UString*)ustr2)->fChars);
 }

 char *getModificationData(struct UFILE *file, UErrorCode *status) {
     enum ETokenType modType;
     struct UString modToken;
     char *retValue = NULL;

     ustr_init(&modToken);
     modType = getNextToken(file, &modToken, status);
     if(U_SUCCESS(*status) && modType == tok_open_brace) {
         modType = getNextToken(file, &modToken, status);
         if(U_SUCCESS(*status) && modType == tok_string) {
             retValue = uprv_malloc(u_strlen(modToken.fChars)+1);
             u_UCharsToChars(modToken.fChars, retValue, u_strlen(modToken.fChars)+1);
             modType = getNextToken(file, &modToken, status);
             if(U_SUCCESS(*status) && modType == tok_close_brace) {
                 return retValue;
             } else {
                 uprv_free(retValue);
             }
         }
     }
     setErrorText("Invalid modificator directive");
     *status = U_INVALID_FORMAT_ERROR;

     return NULL;
 }

 /*********************************************************************
  * parse
  ********************************************************************/
 int32_t lineCount = 0;
 char lastTag[200] = "";

 struct SRBRoot*
 parse(FileStream *f, const char *cp, const char *inputDir,
       UErrorCode *status)
 {
     struct UFILE *file;
     enum ETokenType type;
     enum ENode node;
     struct STransition t;

     struct UString token;
     struct UString tag;

     char cTag[1024];
     char cSubTag[1024];
     struct SRBRoot *bundle = NULL;
     struct SResource *rootTable = NULL;
     struct SResource *temp = NULL;
     struct SResource *temp1 = NULL;
     struct SResource *temp2 = NULL;
     UBool colEl = FALSE, colOverride = FALSE, ucaEl = FALSE;
     UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; /* Just to store "TRUE" and "FALSE" */
     UChar falseValue[] = {0x0046, 0x0041, 0x004C, 0x0053, 0x0045, 0x0000};

     /* Hashtable for keeping track of seen tag names */
     struct UHashtable *data;

     strcpy(lastTag, "<none>");

     if(U_FAILURE(*status)) return NULL;

     /* setup */

     ustr_init(&token);
     ustr_init(&tag);
 /*
     cTag = uprv_malloc(1024);
     if(cTag == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         return NULL;
     }
     cSubTag = uprv_malloc(1024);
     if(cSubTag == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         return NULL;
     }
 */

     node = eInitial;
     data = 0;

     file = u_finit((FILE *)f, 0, cp);
     lineCount = 1;
 /*  file = u_finit(f, cp, status); */
     if(file == NULL) {
         setErrorText("Could not initialize input file - most probably because of wrong converter\n");
         *status = U_INVALID_FORMAT_ERROR;
         goto finish;
     }

     bundle = bundle_open(status);
     rootTable = bundle -> fRoot;

     if(U_FAILURE(*status) || file == NULL) {
         goto finish;
     }

     /* iterate through the stream */
     for(;;) {
         /* Collation tailoring rules version */
         UVersionInfo version;
         /* get next token from stream */
         type = getNextToken(file, &token, status);
         if(U_FAILURE(*status)) {
             goto finish;
         }

         switch(type) {
         case tok_EOF:
             *status = (node == eInitial) ? U_ZERO_ERROR : U_INVALID_FORMAT_ERROR;
             if(U_FAILURE(*status)) {
                 setErrorText("Unexpected EOF encountered");
             }
             goto finish;
             /*break;*/

         case tok_error:
             *status = U_INVALID_FORMAT_ERROR;
             goto finish;
             /*break;*/

         default:
             break;
         }

         t = GETTRANSITION(node, type);
         node = t.fNext;

         if(node == eError) {
             *status = U_INVALID_FORMAT_ERROR;
             goto finish;
         }

         switch(t.fAction) {
         case eNOP:
             break;

           /* Record the last string as the tag name */
         case eSetTag:
             ustr_cpy(&tag, &token, status);
             u_UCharsToChars(tag.fChars, cTag, u_strlen(tag.fChars)+1);
             if(U_FAILURE(*status)) {
                 goto finish;
             }
              strcpy(lastTag, cTag);
            /*  fprintf(stdout, "%d: %s\n", lineCount,  lastTag); //[prints all tags]
             */

             if(get(data, &tag)) {
                 char *s;
                 *status = U_INVALID_FORMAT_ERROR;
                 s = uprv_malloc(1024);
                 strcpy(s, "Duplicate tag name detected: ");
                 u_austrcpy(s+strlen(s), tag.fChars);
                 setErrorText(s);
                 goto finish;
             }
             {
                 char *modificator = uprv_strchr(cTag, ':');
                 if(modificator != NULL) {
                     /* type modificator - do the type modification*/
                     *modificator = '\0';
                     ustr_deinit(&tag);
                     ustr_setlen(&tag, uprv_strlen(cTag), status);
                     u_charsToUChars(cTag, tag.fChars, uprv_strlen(cTag));
                     /* we need to test whether we have the same name, different type here */
                     if(get(data, &tag)) {
                       char *s;
                       *status = U_INVALID_FORMAT_ERROR;
                       s = uprv_malloc(1024);
                       strcpy(s, "Duplicate tag name detected: ");
                       u_austrcpy(s+strlen(s), tag.fChars);
                       setErrorText(s);
                       goto finish;
                     }
                     modificator++;
                     /* including streams of binary data */
                     if(uprv_strcmp(modificator, "bin") == 0) {
                       char *binaryValue;
                       char toConv[3];
                       uint32_t i = 0, bytesConverted = 0;
                       uint8_t val = 0;
                       uint8_t *newValue;
                       fprintf(stdout, "bin\n");
                       binaryValue = getModificationData(file, status);
                       if(U_SUCCESS(*status) && binaryValue != NULL) {
                         /* do the parsing & outputing of the data */
                         fprintf(stdout, "Will parse binary value  %s and store it in tag: %s\n", binaryValue, cTag);
                         newValue = uprv_malloc(sizeof(uint8_t)*uprv_strlen(binaryValue));
                         for(i = 0; i<uprv_strlen(binaryValue); i+=2) {
                           toConv[0] = *(binaryValue+i);
                           toConv[1] = *(binaryValue+i+1);
                           toConv[2] = '\0';
                           val = (uint8_t)uprv_strtoul(toConv, NULL, 16);
                           newValue[bytesConverted] = val;
                           bytesConverted++;
                         }
                         temp1 = bin_open(bundle, cTag, bytesConverted, newValue, status);
                         table_add(rootTable, temp1, status);

                         uprv_free(newValue);
                         uprv_free(binaryValue);
                         node = eIdle;
                       } else {
                         if(binaryValue != NULL) {
                           uprv_free(binaryValue);
                         }
                         node = eError;
                       }
                     }
                     /* including integers */
                     else if(uprv_strcmp(modificator, "int") == 0) {
                       char *intValue;
                       int32_t val;
                       fprintf(stdout, "int\n");
                       intValue = getModificationData(file, status);
                       if(U_SUCCESS(*status) && intValue != NULL) {
                         /* do the parsing & outputing of the data */
                         fprintf(stdout, "Will parse integer value  %s and store it in tag: %s\n", intValue, cTag);
                         val = uprv_strtol(intValue, NULL, 10);
                         uprv_free(intValue);
                         temp1 = int_open(bundle, cTag, val, status);
                         fprintf(stdout, "Added integer %s, value %d -> %s\n", cTag, val,
                           u_errorName(*status) );
                         table_add(rootTable, temp1, status);

                         put(data, &tag, status);
                         node = eIdle;
                       } else {
                         if(intValue != NULL) {
                           uprv_free(intValue);
                         }
                         node = eError;
                       }
                     }
                     /* importing a file and storing it in a binary object */
                     else if(uprv_strcmp(modificator, "import") == 0) {
                       FileStream *importFile;
                       int32_t len;
                       uint8_t *binData;
                       char *fileName;
                       fprintf(stdout, "import\n");
                       fileName = getModificationData(file, status);
                       if(U_SUCCESS(*status) && fileName != NULL) {
                         /* do the reading & outputing of the file */
                         fprintf(stdout, "Will read %s and store it in tag:  %s\n", fileName, cTag);
                         /* Open the input file for reading */
                         if(inputDir == NULL) {
                           importFile = T_FileStream_open(fileName, "rb");
                         } else {
                           char *openFileName = NULL;
                           int32_t dirlen = uprv_strlen(inputDir);
                           int32_t filelen = uprv_strlen(fileName);
                           if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
                             openFileName = (char *) uprv_malloc(dirlen+filelen+2);
                             uprv_strcpy(openFileName, inputDir);
                             openFileName[dirlen] = U_FILE_SEP_CHAR;
                             openFileName[dirlen+1] = '\0';
                             uprv_strcat(openFileName, fileName);
                           } else {
                             openFileName = (char *) uprv_malloc(dirlen+filelen+1);
                             uprv_strcpy(openFileName, inputDir);
                             uprv_strcat(openFileName, fileName);
                           }
                           importFile = T_FileStream_open(openFileName, "rb");
                           uprv_free(openFileName);
                         }
                         if(importFile == NULL) {
                           fprintf(stderr, "Error! Couldn't open input file %s for tag %s\n", fileName, cTag);

                           node = eError;
                           continue;
                         }

                         len = T_FileStream_size(importFile);
                         binData = uprv_malloc(len);
                         T_FileStream_read(importFile,binData,len);
                         T_FileStream_close(importFile);

                         temp1 = bin_open(bundle, cTag, len, binData, status);
                         fprintf(stdout, "Added %s, len %d -> %s\n", cTag, len,
                                   u_errorName(*status) );
                         table_add(rootTable, temp1, status);
                         uprv_free(binData);
                         uprv_free(fileName);
                         put(data, &tag, status);
                         node = eIdle;
                       } else {
                         if(fileName != NULL) {
                           uprv_free(fileName);
                         }
                         node = eError;
                       }
                     }
                     /* array of integers, still unimplemented */
                     else if(uprv_strcmp(modificator, "intarray") == 0) {
                       fprintf(stdout, "intarray\n");
                     }
                     /* unknown tupe - an error */
                     else {
                       fprintf(stderr, "Unknown %s\n", modificator);
                     }

                 } else if(uprv_strcmp(cTag, "CollationElements") == 0) {
                     colEl = TRUE;
                 } else if(uprv_strcmp(cTag, "%%UCARULES")==0){
                     ucaEl =TRUE;
                 }
             }
             break;

           /* Record a singleton string */
         case eStr:
             /* check if we have reached here after finding %%UCARULES */
             if(ucaEl==TRUE){
                 UChar *c,*end,*ucaRulesStr;
                 FileStream *in =NULL;
                 UFILE* ufile=NULL;
                 int fileLength = 0;
                 char fileName[256]={'\0'};
                 char cs[128] = { '\0'};
                 char* cp=NULL;
                 char start[3] ={'0'};
                 ucaEl=FALSE; /* reset ucaEL */
                 /* make the fileName including the directory */
                 uprv_strcat(fileName,inputDir);
                 uprv_strcat(fileName,U_FILE_SEP_STRING);
                 uprv_strcat(fileName,U_ICU_UNIDATA);
                 uprv_strcat(fileName,U_FILE_SEP_STRING);
                 u_UCharsToChars(token.fChars,cs,token.fLength);
                 uprv_strcat(fileName, cs);
                 /* open the file */
                 in = T_FileStream_open(fileName, "rb");
                 T_FileStream_read(in, start, 3);
                 if(start[0] == '\xFE' && start[1] == '\xFF') {
                     cp = "UTF16_BigEndian";
                 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
                      cp = "UTF16_LittleEndian";
                 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
                     cp = "UTF8";
                 }
                 ufile = u_finit((FILE*) in,0, cp);
                 if(in && ufile){
                     fileLength =T_FileStream_size(in);
                     ucaRulesStr = (UChar*)uprv_malloc(sizeof(UChar) * fileLength);
                     c= ucaRulesStr;
                     end = ucaRulesStr + fileLength/2;
                     /* read in the rulses */
                     while(c < end) {
                       *c++ = u_fgetc(ufile);
                     }
                      /* couldn't read all chars */
                     if(c < end) {
                         fprintf(stderr, "Error! Couldn't read all chars from input file %s for tag %s\n", fileName, cTag);
                     }else{
                         /* Add it to bundle */
                         temp = string_open(bundle,cTag, ucaRulesStr, fileLength/2, status);
                         table_add(rootTable, temp, status);
                         put(data, &tag, status);
                         if(U_FAILURE(*status)) {
                             goto finish;
                         }
                         temp = NULL;
                     }
                     uprv_free(ucaRulesStr);
                 }else{
                     fprintf(stderr, "Error! Couldn't open input file %s for tag %s\n", fileName, cTag );
                     goto finish;
                 }

             }else{
                 if(temp != NULL) {
                     *status = U_INTERNAL_PROGRAM_ERROR;
                     goto finish;
                 }
                 temp = string_open(bundle, cTag, token.fChars, token.fLength, status);
                 table_add(rootTable, temp, status);

                 /*uhash_put(data, tag.fChars, status);*/
                 put(data, &tag, status);
                 if(U_FAILURE(*status)) {
                     goto finish;
                 }
                 temp = NULL;
             }
             break;
           /* Begin a string list */
         case eBegList:
             if(temp != NULL) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
                 goto finish;
             }
             temp = array_open(bundle, cTag, status);
             temp1 = string_open(bundle, NULL, token.fChars, token.fLength, status);
             array_add(temp, temp1, status);
             temp1 = NULL;
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

           /* Record a comma-delimited list string */
         case eListStr:
             temp1 = string_open(bundle, NULL, token.fChars, token.fLength, status);
             array_add(temp, temp1, status);
             temp1 = NULL;
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

           /* End a string list */
         case eEndList:
             /*uhash_put(data, tag.fChars, status);*/
             put(data, &tag, status);
             table_add(rootTable, temp, status);
             temp = NULL;
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case eBeg2dList:
             if(temp != NULL) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
                 goto finish;
             }
             temp = array_open(bundle, cTag, status);
             temp1 = array_open(bundle, NULL, status);
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case eEnd2dList:
             /*uhash_put(data, tag.fChars, status);*/
             put(data, &tag, status);
             array_add(temp, temp1, status);
             table_add(rootTable, temp, status);
             temp1 = NULL;
             temp = NULL;
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case e2dStr:
             temp2 = string_open(bundle, NULL, token.fChars, token.fLength, status);
             array_add(temp1, temp2, status);
             temp2 = NULL;
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case eNewRow:
             array_add(temp, temp1, status);
             temp1 = array_open(bundle, NULL, status);
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case eBegTagged:
             if(temp != NULL) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
                 goto finish;
             }
             temp = table_open(bundle, cTag, status);
             u_UCharsToChars(token.fChars, cSubTag, u_strlen(token.fChars)+1);
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case eEndTagged:
             /*uhash_put(data, tag.fChars, status);*/
             put(data, &tag, status);
             table_add(rootTable, temp, status);
             temp = NULL;
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             break;

         case eTaggedStr:
             temp1 = string_open(bundle, cSubTag, token.fChars, token.fLength, status);
             table_add(temp, temp1, status);
             temp1 = NULL;

             if(U_FAILURE(*status)) {
                 goto finish;
             }
             /* We have seen the Override tag aleady, now checks if the value is "TRUE" or "FALSE". */
             if (uprv_strcmp(cSubTag, "Override") == 0)
             {
                 if (u_strncmp(token.fChars, trueValue, u_strlen(trueValue)) == 0)
                 {
                     colOverride = TRUE;
                 } else {
                     colOverride = FALSE;
                 }
             }
             if(colEl && (uprv_strcmp(cSubTag, "Version") == 0)){
                 char tVer[40];
                 int32_t length=u_strlen(token.fChars);
                 if(length>=(int32_t)sizeof(tVer)) {
                     length=(int32_t)sizeof(tVer)-1;
                 }
                 u_UCharsToChars(token.fChars, tVer, length);
                 u_versionFromString(version,tVer);
             }
             if (colEl && (uprv_strcmp(cSubTag, "Sequence") == 0))
             {
                 UErrorCode intStatus = U_ZERO_ERROR;
                 uint32_t defaultRulesArrayLength = 0;
                 /* do the collation elements */
                 int32_t len = 0;
                 uint8_t *binColData = NULL;
                 UCollator *coll = NULL;
                 UChar *rules = NULL;
                 struct UString newTag;

                 coll = ucol_openRules(token.fChars, token.fLength, UCOL_DECOMP_CAN, 0, &intStatus);

                 if(U_SUCCESS(intStatus) && coll !=NULL) {
                     ucol_setNormalization(coll, UCOL_NO_NORMALIZATION);
                     binColData = ucol_cloneRuleData(coll, &len, &intStatus);
                     coll->dataInfo.dataVersion[1] = version[0]; /*tailoring rules version*/
                     if(U_SUCCESS(*status) && data != NULL) {
                         temp1 = bin_open(bundle, "%%CollationNew", len, binColData, status);
                         table_add(rootTable, temp1, status);
                         uprv_free(binColData);
                     } else {
                       setErrorText("Warning: could not obtain rules from collator");
                     }
                     ucol_close(coll);
                 } else {
                     setErrorText("Warning: %%Collation could not be constructed from CollationElements - check context!");
                 }
                 uprv_free(rules);
                 colEl = FALSE;
                 colOverride = FALSE;
                 intStatus = U_ZERO_ERROR;
                 ustr_initChars(&newTag, "CollationElements", -1, &intStatus);
                 if(U_FAILURE(intStatus)) {
                     goto finish;
                 }
                 put(data, &newTag, &intStatus);
                 ustr_deinit(&newTag);
                 if(U_FAILURE(intStatus)) {
                     goto finish;
                 }
             }
             break;
           /* Record the last string as the subtag */
         case eSubtag:
             u_UCharsToChars(token.fChars, cSubTag, u_strlen(token.fChars)+1);
             if(U_FAILURE(*status)) {
                 goto finish;
             }
             if(table_get(temp, cSubTag, status) != 0) {
                 *status = U_INVALID_FORMAT_ERROR;
                 setErrorText("Duplicate subtag found in tagged list");
                 goto finish;
             }
             break;

         case eOpen:
             if(data != 0) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
                 goto finish;
             }
             bundle_setlocale(bundle, token.fChars, status);

             if(U_FAILURE(*status)) {
                 goto finish;
             }
             data = uhash_open(hashUString, compareUString, status);
             uhash_setKeyDeleter(data, freeUString);
             break;

         case eClose:
             if(data == 0) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
                 goto finish;
             }
             break;
         case eSetType:
             /* type recognition */
             if(!didInit) {
                 U_STRING_INIT(k_start_string, "string", 6);
                 U_STRING_INIT(k_start_binary, "binary", 6);
                 U_STRING_INIT(k_start_table, "table", 5);
                 U_STRING_INIT(k_start_int, "int", 3);
                 U_STRING_INIT(k_start_array, "array", 5);
                 U_STRING_INIT(k_start_intvector, "intvector", 9);
                 U_STRING_INIT(k_start_reserved, "reserved", 8);
                 didInit=TRUE;
             }
             if(u_strcmp(token.fChars, k_start_string) == 0) {
                 node = eGotTag;
             } else if(u_strcmp(token.fChars, k_start_array) == 0) {
                 node = eGotTag;
             } else if(u_strcmp(token.fChars, k_start_table) == 0) {
                 node = eGotTag;
             } else if(u_strcmp(token.fChars, k_start_binary) == 0) {
                 /* start of binary */
             } else if(u_strcmp(token.fChars, k_start_int) == 0) {
                 /* start of integer */
             } else if(u_strcmp(token.fChars, k_start_intvector) == 0) {
                 /* start of intvector */
             } else if(u_strcmp(token.fChars, k_start_reserved) == 0) {
                 /* start of reserved */
             } else {
                 *status = U_INTERNAL_PROGRAM_ERROR;
                 goto finish;
             }
             break;
         }
     }

 finish:

     /* clean  up */

     if(data != 0)
         uhash_close(data);

     ustr_deinit(&token);
     ustr_deinit(&tag);

     /*uprv_free(cTag);*/
     /*uprv_free(cSubTag);*/

     if(file != 0)
         u_fclose(file);

     return bundle;
 }