unicode/c/genuca/genuca.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 *
 *   Copyright (C) 2000-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  genuca.cpp
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created at the end of XX century
 *   created by: Vladimir Weinstein
 *
 *   This program reads the Franctional UCA table and generates
 *   internal format for UCA table as well as inverse UCA table.
 *   It then writes binary files containing the data: ucadata.dat
 *   & invuca.dat
 *   Change history:
 *   02/23/2001  grhoten                 Made it into a tool
 *   02/23/2001  weiv                    Moved element & table handling code to i18n
 *   05/09/2001  weiv                    Case bits are now in the CEs, not in front
 *   10/26/2010  sgill                   Support for reordering codes
 */

 #define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1

 #include "unicode/utypes.h"
 #include "unicode/putil.h"
 #include "unicode/udata.h"
 #include "unicode/uclean.h"
 #include "unicode/uscript.h"
 #include "unicode/ustring.h"
 #include "unicode/utf16.h"
 #include "charstr.h"
 #include "ucol_bld.h"
 #include "ucol_imp.h"
 #include "genuca.h"
 #include "uoptions.h"
 #include "uparse.h"
 #include "toolutil.h"
 #include "unewdata.h"
 #include "cstring.h"
 #include "cmemory.h"

 #include <stdio.h>

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

 /** The maximum UTF-16 length (number of UChars) in a UCA contraction. */
 static const int32_t MAX_UCA_CONTRACTION_LENGTH=4;

 // script reordering structures
 typedef struct {
     uint16_t reorderCode;
     uint16_t offset;
 } ReorderIndex;

 typedef struct {
     uint16_t LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH;
     uint16_t* LEAD_BYTE_TO_SCRIPTS_INDEX;
     uint16_t LEAD_BYTE_TO_SCRIPTS_DATA_LENGTH;
     uint16_t* LEAD_BYTE_TO_SCRIPTS_DATA;
     uint16_t LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET;

     uint16_t SCRIPT_TO_LEAD_BYTES_INDEX_LENGTH;
     ReorderIndex* SCRIPT_TO_LEAD_BYTES_INDEX;
     uint16_t SCRIPT_TO_LEAD_BYTES_INDEX_COUNT;
     uint16_t SCRIPT_TO_LEAD_BYTES_DATA_LENGTH;
     uint16_t* SCRIPT_TO_LEAD_BYTES_DATA;
     uint16_t SCRIPT_TO_LEAD_BYTES_DATA_OFFSET;
 } LeadByteConstants;

 int ReorderIndexComparer(const void *a, const void *b) {
     return reinterpret_cast<const ReorderIndex*>(a)->reorderCode - reinterpret_cast<const ReorderIndex*>(b)->reorderCode;
 }

 /*
  * Global - verbosity
  */
 UBool beVerbose = FALSE;

 static UVersionInfo UCAVersion;

 #if UCONFIG_NO_COLLATION

 /* dummy UDataInfo cf. udata.h */
 static UDataInfo dummyDataInfo = {
     sizeof(UDataInfo),
     0,

     U_IS_BIG_ENDIAN,
     U_CHARSET_FAMILY,
     U_SIZEOF_UCHAR,
     0,

     { 0, 0, 0, 0 },                 /* dummy dataFormat */
     { 0, 0, 0, 0 },                 /* dummy formatVersion */
     { 0, 0, 0, 0 }                  /* dummy dataVersion */
 };

 #else

 static const UDataInfo ucaDataInfo={
     sizeof(UDataInfo),
     0,

     U_IS_BIG_ENDIAN,
     U_CHARSET_FAMILY,
     sizeof(UChar),
     0,

     {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3},     /* dataFormat="UCol"            */
     /* 03/26/2002 bumped up version since format has changed */
     /* 09/16/2002 bumped up version since we went from UColAttributeValue */
     /*            to int32_t in UColOptionSet */
     /* 05/13/2003 This one also updated since we added UCA and UCD versions */
     /*            to header */
     /* 09/11/2003 Adding information required by data swapper */
     {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3},                 /* formatVersion                */
     {0, 0, 0, 0}                  /* dataVersion = Unicode Version*/
 };

 static const UDataInfo invUcaDataInfo={
     sizeof(UDataInfo),
     0,

     U_IS_BIG_ENDIAN,
     U_CHARSET_FAMILY,
     sizeof(UChar),
     0,

     {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3},     /* dataFormat="InvC"            */
     /* 03/26/2002 bumped up version since format has changed */
     /* 04/29/2003 2.1 format - we have added UCA version to header */
     {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3},                 /* formatVersion                */
     {0, 0, 0, 0}                  /* dataVersion = Unicode Version*/
 };

 UCAElements le;

 // returns number of characters read
 int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
     if(U_FAILURE(*status)) {
         return 0;
     }
     char buffer[1024];
     int32_t i = 0;
     for(;;) {
         char c = **from;
         if(c == separator || (separator == ' ' && c == '\t')) {
             break;
         }
         if (c == '\0') {
             return 0;
         }
         if(c != ' ') {
             *(buffer+i++) = c;
         }
         (*from)++;
     }
     (*from)++;
     *(buffer + i) = 0;
     //*to = (char *)malloc(strlen(buffer)+1);
     strcpy(to, buffer);
     return i;
 }

 int32_t skipUntilWhiteSpace(char **from, UErrorCode *status) {
     if (U_FAILURE(*status)) {
         return 0;
     }
     int32_t count = 0;
     while (**from != ' ' && **from != '\t' && **from != '\0') {
         (*from)++;
         count++;
     }
     return count;
 }

 int32_t skipWhiteSpace(char **from, UErrorCode *status) {
     if (U_FAILURE(*status)) {
         return 0;
     }
     int32_t count = 0;
     while (**from == ' ' || **from == '\t') {
         (*from)++;
         count++;
     }
     return count;
 }

 uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UErrorCode *status) {
     if(U_FAILURE(*status)) {
         return 0;
     }
     uint32_t value = 0;
     char primsave = '\0';
     char secsave = '\0';
     char tersave = '\0';
     char *primend = primary+4;
     if(strlen(primary) > 4) {
         primsave = *primend;
         *primend = '\0';
     }
     char *secend = secondary+2;
     if(strlen(secondary) > 2) {
         secsave = *secend;
         *secend = '\0';
     }
     char *terend = tertiary+2;
     if(strlen(tertiary) > 2) {
         tersave = *terend;
         *terend = '\0';
     }
     uint32_t primvalue = (uint32_t)((*primary!='\0')?strtoul(primary, &primend, 16):0);
     uint32_t secvalue = (uint32_t)((*secondary!='\0')?strtoul(secondary, &secend, 16):0);
     uint32_t tervalue = (uint32_t)((*tertiary!='\0')?strtoul(tertiary, &terend, 16):0);
     if(primvalue <= 0xFF) {
       primvalue <<= 8;
     }

     value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)|
         ((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
         (tervalue&UCOL_TERTIARYORDERMASK);

     if(primsave!='\0') {
         *primend = primsave;
     }
     if(secsave!='\0') {
         *secend = secsave;
     }
     if(tersave!='\0') {
         *terend = tersave;
     }
     return value;
 }

 static uint32_t inverseTable[0xFFFF][3];
 static uint32_t inversePos = 0;
 static UChar stringContinue[0xFFFF];
 static uint32_t sContPos = 0;

 static void addNewInverse(UCAElements *element, UErrorCode *status) {
   if(U_FAILURE(*status)) {
     return;
   }
   if(beVerbose && isContinuation(element->CEs[1])) {
     //printf("+");
   }
   inversePos++;
   inverseTable[inversePos][0] = element->CEs[0];
   if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
     inverseTable[inversePos][1] = element->CEs[1];
   } else {
     inverseTable[inversePos][1] = 0;
   }
   if(element->cSize < 2) {
     inverseTable[inversePos][2] = element->cPoints[0];
   } else { /* add a new store of cruft */
     inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
     memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
     sContPos += element->cSize+1;
   }
 }

 static void insertInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
   if(U_FAILURE(*status)) {
     return;
   }

   if(beVerbose && isContinuation(element->CEs[1])) {
     //printf("+");
   }
   if(position <= inversePos) {
     /*move stuff around */
     uint32_t amountToMove = (inversePos - position+1)*sizeof(inverseTable[0]);
     uprv_memmove(inverseTable[position+1], inverseTable[position], amountToMove);
   }
   inverseTable[position][0] = element->CEs[0];
   if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
     inverseTable[position][1] = element->CEs[1];
   } else {
     inverseTable[position][1] = 0;
   }
   if(element->cSize < 2) {
     inverseTable[position][2] = element->cPoints[0];
   } else { /* add a new store of cruft */
     inverseTable[position][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
     memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
     sContPos += element->cSize+1;
   }
   inversePos++;
 }

 static void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) {

   if(U_FAILURE(*status)) {
     return;
   }

       if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */
         stringContinue[sContPos] = (UChar)inverseTable[position][2];
         inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos;
         sContPos++;
         stringContinue[sContPos++] = 0xFFFF;
         memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
         sContPos += element->cSize;
         stringContinue[sContPos++] = 0xFFFE;
       } else { /* adding to the already existing continuing table */
         uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK;
         uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;

         if(contIndex+contSize < sContPos) {
           /*fprintf(stderr, ".", sContPos, contIndex+contSize);*/
           memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar));
         }

         stringContinue[contIndex+contSize-1] = 0xFFFF;
         memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar));
         sContPos += element->cSize+1;
         stringContinue[contIndex+contSize+element->cSize] = 0xFFFE;

         inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex;
       }
 }

 /*
  * Takes two CEs (lead and continuation) and
  * compares them as CEs should be compared:
  * primary vs. primary, secondary vs. secondary
  * tertiary vs. tertiary
  */
 static int32_t compareCEs(uint32_t *source, uint32_t *target) {
   uint32_t s1 = source[0], s2, t1 = target[0], t2;
   if(isContinuation(source[1])) {
     s2 = source[1];
   } else {
     s2 = 0;
   }
   if(isContinuation(target[1])) {
     t2 = target[1];
   } else {
     t2 = 0;
   }

   uint32_t s = 0, t = 0;
   if(s1 == t1 && s2 == t2) {
     return 0;
   }
   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
   if(s < t) {
     return -1;
   } else if(s > t) {
     return 1;
   } else {
     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
     if(s < t) {
       return -1;
     } else if(s > t) {
       return 1;
     } else {
       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
       if(s < t) {
         return -1;
       } else {
         return 1;
       }
     }
   }
 }

 static uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
   uint32_t position = inversePos;
   uint32_t saveElement = element->CEs[0];
   int32_t compResult = 0;
   element->CEs[0] &= 0xFFFFFF3F;
   if(element->noOfCEs == 1) {
     element->CEs[1] = 0;
   }
   if(inversePos == 0) {
     inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0;
     addNewInverse(element, status);
   } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) {
     while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0);
     if(beVerbose) { printf("p:%u ", (int)position); }
     if(compResult == 0) {
       addToExistingInverse(element, position, status);
     } else {
       insertInverse(element, position+1, status);
     }
   } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) {
     addToExistingInverse(element, inversePos, status);
   } else {
     addNewInverse(element, status);
   }
   element->CEs[0] = saveElement;
   if(beVerbose) { printf("+"); }
   return inversePos;
 }

 static InverseUCATableHeader *assembleInverseTable(UErrorCode *status)
 {
   InverseUCATableHeader *result = NULL;
   uint32_t headerByteSize = paddedsize(sizeof(InverseUCATableHeader));
   uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3;
   uint32_t contsByteSize = sContPos * sizeof(UChar);
   uint32_t i = 0;

   result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize);
   uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize);
   if(result != NULL) {
     result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;

     inversePos++;
     inverseTable[inversePos][0] = 0xFFFFFFFF;
     inverseTable[inversePos][1] = 0xFFFFFFFF;
     inverseTable[inversePos][2] = 0x0000FFFF;
     inversePos++;

     for(i = 2; i<inversePos; i++) {
       if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) {
         fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]);
       } else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) {
         fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]);
       }
     }

     result->tableSize = inversePos;
     result->contsSize = sContPos;

     result->table = headerByteSize;
     result->conts = headerByteSize + inverseTableByteSize;

     memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize);
     memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize);

   } else {
     *status = U_MEMORY_ALLOCATION_ERROR;
     return NULL;
   }

   return result;
 }


 static void writeOutInverseData(InverseUCATableHeader *data,
                   const char *outputDir,
                   const char *copyright,
                   UErrorCode *status)
 {
     UNewDataMemory *pData;

     long dataLength;

     UDataInfo invUcaInfo;
     uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo));
     u_getUnicodeVersion(invUcaInfo.dataVersion);

     pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo,
                        copyright, status);

     if(U_FAILURE(*status)) {
         fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
         return;
     }

     /* write the data to the file */
     if (beVerbose) {
         printf("Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR,
                                                                 INVC_DATA_NAME,
                                                                 INVC_DATA_TYPE);
     }
     udata_writeBlock(pData, data, data->byteSize);

     /* finish up */
     dataLength=udata_finish(pData, status);
     if(U_FAILURE(*status)) {
         fprintf(stderr, "Error: error %d writing the output file\n", *status);
         return;
     }
 }

 static int32_t hex2num(char hex) {
     if(hex>='0' && hex <='9') {
         return hex-'0';
     } else if(hex>='a' && hex<='f') {
         return hex-'a'+10;
     } else if(hex>='A' && hex<='F') {
         return hex-'A'+10;
     } else {
         return 0;
     }
 }

 // static char* CHARACTER_CATEGORY_REORDER_CODES[] = {
 //     "Zs", "Nd", "Sc"
 // };
 // static const uint16_t CHARACTER_CATEGORY_REORDER_CODE_OFFSET = 0x1000;
 // static uint16_t CHARACTER_CATEGORY_REORDER_CODES_VALUE[] = {
 //     U_SPACE_SEPARATOR + CHARACTER_CATEGORY_REORDER_CODE_OFFSET,
 //     U_DECIMAL_DIGIT_NUMBER + CHARACTER_CATEGORY_REORDER_CODE_OFFSET,
 //     U_CURRENCY_SYMBOL + CHARACTER_CATEGORY_REORDER_CODE_OFFSET
 // };

 static const struct {
     const char *name;
     int32_t code;
 } specialReorderTokens[] = {
     { "TERMINATOR", -2 },  // -2 means "ignore"
     { "LEVEL-SEPARATOR", -2 },
     { "FIELD-SEPARATOR", -2 },
     { "COMPRESS", -2 },  // TODO: We should parse/store which lead bytes are compressible; there is a ticket for that.
     { "PUNCTUATION", UCOL_REORDER_CODE_PUNCTUATION },
     { "IMPLICIT", USCRIPT_HAN },  // Implicit weights are usually for Han characters. Han & unassigned share a lead byte.
     { "TRAILING", -2 },  // We do not reorder trailing weights (those after implicits).
     { "SPECIAL", -2 }  // We must never reorder internal, special CE lead bytes.
 };

 int32_t getReorderCode(const char* name) {
     int32_t code = ucol_findReorderingEntry(name);
     if (code >= 0) {
         return code;
     }
     code = u_getPropertyValueEnum(UCHAR_SCRIPT, name);
     if (code >= 0) {
         return code;
     }
     for (int32_t i = 0; i < LENGTHOF(specialReorderTokens); ++i) {
         if (0 == strcmp(name, specialReorderTokens[i].name)) {
             return specialReorderTokens[i].code;
         }
     }
     return -1;  // Same as UCHAR_INVALID_CODE or USCRIPT_INVALID_CODE.
 }

 UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, LeadByteConstants *leadByteConstants, UErrorCode *status) {
     static int itemsToDataBlock = 0;
     static int scriptDataWritten = 0;
     char buffer[2048], primary[100], secondary[100], tertiary[100];
     UChar uBuffer[2048];
     UChar uBuffer2[2048];
     UChar leadByte[100], scriptCode[100];
     int32_t i = 0;
     unsigned int theValue;
     char *pointer = NULL;
     char *commentStart = NULL;
     char *startCodePoint = NULL;
     char *endCodePoint = NULL;
     char *result = fgets(buffer, 2048, data);
     int32_t buflen = (int32_t)uprv_strlen(buffer);
     if(U_FAILURE(*status)) {
         return 0;
     }
     *primary = *secondary = *tertiary = '\0';
     *leadByte = *scriptCode = '\0';
     if(result == NULL) {
         if(feof(data)) {
             return NULL;
         } else {
             fprintf(stderr, "empty line but no EOF!\n");
             *status = U_INVALID_FORMAT_ERROR;
             return NULL;
         }
     }
     while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) {
       buffer[--buflen] = 0;
     }

     if(buffer[0] == 0 || buffer[0] == '#') {
         return NULL; // just a comment, skip whole line
     }

     UCAElements *element = &le;
     memset(element, 0, sizeof(*element));

     enum ActionType {
       READCE,
       READHEX1,
       READHEX2,
       READUCAVERSION,
       READLEADBYTETOSCRIPTS,
       READSCRIPTTOLEADBYTES,
       IGNORE,
     };

     // Directives.
     if(buffer[0] == '[') {
       uint32_t cnt = 0;
       static const struct {
         char name[128];
         uint32_t *what;
         ActionType what_to_do;
       } vt[]  = { {"[first tertiary ignorable",  consts->UCA_FIRST_TERTIARY_IGNORABLE,  READCE},
                   {"[last tertiary ignorable",   consts->UCA_LAST_TERTIARY_IGNORABLE,   READCE},
                   {"[first secondary ignorable", consts->UCA_FIRST_SECONDARY_IGNORABLE, READCE},
                   {"[last secondary ignorable",  consts->UCA_LAST_SECONDARY_IGNORABLE,  READCE},
                   {"[first primary ignorable",   consts->UCA_FIRST_PRIMARY_IGNORABLE,   READCE},
                   {"[last primary ignorable",    consts->UCA_LAST_PRIMARY_IGNORABLE,    READCE},
                   {"[first variable",            consts->UCA_FIRST_VARIABLE,            READCE},
                   {"[last variable",             consts->UCA_LAST_VARIABLE,             READCE},
                   {"[first regular",             consts->UCA_FIRST_NON_VARIABLE,        READCE},
                   {"[last regular",              consts->UCA_LAST_NON_VARIABLE,         READCE},
                   {"[first implicit",            consts->UCA_FIRST_IMPLICIT,            READCE},
                   {"[last implicit",             consts->UCA_LAST_IMPLICIT,             READCE},
                   {"[first trailing",            consts->UCA_FIRST_TRAILING,            READCE},
                   {"[last trailing",             consts->UCA_LAST_TRAILING,             READCE},

                   {"[fixed top",                    &consts->UCA_PRIMARY_TOP_MIN,       READHEX1},
                   {"[fixed first implicit byte",    &consts->UCA_PRIMARY_IMPLICIT_MIN,  READHEX1},
                   {"[fixed last implicit byte",     &consts->UCA_PRIMARY_IMPLICIT_MAX,  READHEX1},
                   {"[fixed first trail byte",       &consts->UCA_PRIMARY_TRAILING_MIN,  READHEX1},
                   {"[fixed last trail byte",        &consts->UCA_PRIMARY_TRAILING_MAX,  READHEX1},
                   {"[fixed first special byte",     &consts->UCA_PRIMARY_SPECIAL_MIN,   READHEX1},
                   {"[fixed last special byte",      &consts->UCA_PRIMARY_SPECIAL_MAX,   READHEX1},
                   {"[variable top = ",              &t->options->variableTopValue,      READHEX2},
                   {"[UCA version = ",               NULL,                               READUCAVERSION},
                   {"[top_byte",                     NULL,                               READLEADBYTETOSCRIPTS},
                   {"[reorderingTokens",             NULL,                               READSCRIPTTOLEADBYTES},
                   {"[categories",                   NULL,                               IGNORE},
                   {"[first tertiary in secondary non-ignorable",                 NULL,                               IGNORE},
                   {"[last tertiary in secondary non-ignorable",                 NULL,                               IGNORE},
                   {"[first secondary in primary non-ignorable",                 NULL,                               IGNORE},
                   {"[last secondary in primary non-ignorable",                 NULL,                               IGNORE},
       };
       for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) {
         uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name);
         if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) {
             ActionType what_to_do = vt[cnt].what_to_do;
             if (what_to_do == IGNORE) { //vt[cnt].what_to_do == IGNORE
                 return NULL;
             } else if(what_to_do == READHEX1 || what_to_do == READHEX2) {
               pointer = buffer+vtLen;
               int32_t numBytes = readElement(&pointer, primary, ']', status) / 2;
               if(numBytes != (what_to_do == READHEX1 ? 1 : 2)) {
                   fprintf(stderr, "Value of \"%s\" has unexpected number of %d bytes\n",
                           buffer, (int)numBytes);
                   //*status = U_INVALID_FORMAT_ERROR;
                   return NULL;
               }
               *(vt[cnt].what) = (uint32_t)uprv_strtoul(primary, &pointer, 16);
               if(*pointer != 0) {
                   fprintf(stderr, "Value of \"%s\" is not a hexadecimal number\n", buffer);
                   //*status = U_INVALID_FORMAT_ERROR;
                   return NULL;
               }
             } else if (what_to_do == READCE) {
               // TODO: combine & clean up the two CE parsers
               pointer = strchr(buffer+vtLen, '[');
               if(pointer) {
                 pointer++;
                 element->sizePrim[0]=readElement(&pointer, primary, ',', status) / 2;
                 element->sizeSec[0]=readElement(&pointer, secondary, ',', status) / 2;
                 element->sizeTer[0]=readElement(&pointer, tertiary, ']', status) / 2;
                 vt[cnt].what[0] = getSingleCEValue(primary, secondary, tertiary, status);
                 if(element->sizePrim[0] > 2 || element->sizeSec[0] > 1 || element->sizeTer[0] > 1) {
                   uint32_t CEi = 1;
                   uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
                     if(2*CEi<element->sizePrim[i]) {
                         value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
                         value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
                     }

                     if(2*CEi+1<element->sizePrim[i]) {
                         value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
                         value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
                     }

                     if(CEi<element->sizeSec[i]) {
                         value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
                         value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
                     }

                     if(CEi<element->sizeTer[i]) {
                         value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
                         value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
                     }

                     CEi++;

                     vt[cnt].what[1] = value;
                     //element->CEs[CEindex++] = value;
                 } else {
                   vt[cnt].what[1] = 0;
                 }
               } else {
                 fprintf(stderr, "Failed to read a CE from line %s\n", buffer);
               }
             } else if (what_to_do == READUCAVERSION) { //vt[cnt].what_to_do == READUCAVERSION
               u_versionFromString(UCAVersion, buffer+vtLen);
               if(beVerbose) {
                 char uca[U_MAX_VERSION_STRING_LENGTH];
                 u_versionToString(UCAVersion, uca);
                 printf("UCA version %s\n", uca);
               }
               UVersionInfo UCDVersion;
               u_getUnicodeVersion(UCDVersion);
               if (UCAVersion[0] != UCDVersion[0] || UCAVersion[1] != UCDVersion[1]) {
                 char uca[U_MAX_VERSION_STRING_LENGTH];
                 char ucd[U_MAX_VERSION_STRING_LENGTH];
                 u_versionToString(UCAVersion, uca);
                 u_versionToString(UCDVersion, ucd);
                 fprintf(stderr, "error: UCA version %s != UCD version %s (temporarily change the FractionalUCA.txt UCA version during Unicode version upgrade)\n", uca, ucd);
                 *status = U_INVALID_FORMAT_ERROR;
                 return NULL;
               }
             } else if (what_to_do == READLEADBYTETOSCRIPTS) { //vt[cnt].what_to_do == READLEADBYTETOSCRIPTS
                 pointer = buffer + vtLen;
                 skipWhiteSpace(&pointer, status);

                 uint16_t leadByte = (hex2num(*pointer++) * 16) + hex2num(*pointer++);
                 //printf("~~~~ processing lead byte = %02x\n", leadByte);
                 if (leadByte >= leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH) {
                     fprintf(stderr, "Lead byte larger than allocated table!");
                     // set status and return
                     *status = U_INTERNAL_PROGRAM_ERROR;
                     return NULL;
                 }
                 skipWhiteSpace(&pointer, status);

                 int32_t reorderCodeArray[100];
                 uint32_t reorderCodeArrayCount = 0;
                 char scriptName[100];
                 int32_t elementLength = 0;
                 while ((elementLength = readElement(&pointer, scriptName, ' ', status)) > 0) {
                     if (scriptName[0] == ']') {
                         break;
                     }
                     int32_t reorderCode = getReorderCode(scriptName);
                     if (reorderCode == -2) {
                         continue;  // Ignore "TERMINATOR" etc.
                     }
                     if (reorderCode < 0) {
                         printf("Syntax error: unable to parse reorder code from '%s'\n", scriptName);
                         *status = U_INVALID_FORMAT_ERROR;
                         return NULL;
                     }
                     if (reorderCodeArrayCount >= LENGTHOF(reorderCodeArray)) {
                         printf("reorder code array count is greater than allocated size!\n");
                         *status = U_INTERNAL_PROGRAM_ERROR;
                         return NULL;
                     }
                     reorderCodeArray[reorderCodeArrayCount++] = reorderCode;
                 }
                 //printf("reorderCodeArrayCount = %d\n", reorderCodeArrayCount);
                 switch (reorderCodeArrayCount) {
                     case 0:
                         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX[leadByte] = 0;
                         break;
                     case 1:
                         // TODO = move 0x8000 into defined constant
                         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX[leadByte] = 0x8000 | reorderCodeArray[0];
                         break;
                     default:
                         if (reorderCodeArrayCount + leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET > leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_LENGTH) {
                             // Error condition
                         }
                         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX[leadByte] = leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET;
                         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA[leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET++] = reorderCodeArrayCount;
                         for (int reorderCodeIndex = 0; reorderCodeIndex < reorderCodeArrayCount; reorderCodeIndex++) {
                             leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA[leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET++] = reorderCodeArray[reorderCodeIndex];
                         }
                 }
             } else if (what_to_do == READSCRIPTTOLEADBYTES) { //vt[cnt].what_to_do == READSCRIPTTOLEADBYTES
                 uint16_t leadByteArray[100];
                 uint32_t leadByteArrayCount = 0;
                 char scriptName[100];

                 pointer = buffer + vtLen;
                 skipWhiteSpace(&pointer, status);
                 uint32_t scriptNameLength = readElement(&pointer, scriptName, '\t', status);
                 int32_t reorderCode = getReorderCode(scriptName);
                 if (reorderCode >= 0) {
                     //printf("^^^ processing reorder code = %04x (%s)\n", reorderCode, scriptName);
                     skipWhiteSpace(&pointer, status);

                     int32_t elementLength = 0;
                     char leadByteString[100];
                     while ((elementLength = readElement(&pointer, leadByteString, '=', status)) == 2) {
                         //printf("\tleadByteArrayCount = %d, elementLength = %d, leadByteString = %s\n", leadByteArrayCount, elementLength, leadByteString);
                         uint32_t leadByte = (hex2num(leadByteString[0]) * 16) + hex2num(leadByteString[1]);
                         leadByteArray[leadByteArrayCount++] = (uint16_t) leadByte;
                         skipUntilWhiteSpace(&pointer, status);
                     }

                     if (leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT >= leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_LENGTH) {
                         //printf("\tError condition\n");
                         //printf("\tindex count = %d, total index size = %d\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT, sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX) / sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[0]));
                         // Error condition
                         *status = U_INTERNAL_PROGRAM_ERROR;
                         return NULL;
                     }
                     leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT].reorderCode = reorderCode;

                     //printf("\tlead byte count = %d\n", leadByteArrayCount);
                     //printf("\tlead byte array = ");
                     //for (int i = 0; i < leadByteArrayCount; i++) {
                     //    printf("%02x, ", leadByteArray[i]);
                     //}
                     //printf("\n");

                     switch (leadByteArrayCount) {
                         case 0:
                             leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT].offset = 0;
                             break;
                         case 1:
                             // TODO = move 0x8000 into defined constant
                             //printf("\t+++++ lead byte = &x\n", leadByteArray[0]);
                             leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT].offset = 0x8000 | leadByteArray[0];
                             break;
                         default:
                             //printf("\t+++++ lead bytes written to data block - %d\n", itemsToDataBlock++);
                             //printf("\tlead bytes = ");
                             //for (int i = 0; i < leadByteArrayCount; i++) {
                             //    printf("%02x, ", leadByteArray[i]);
                             //}
                             //printf("\n");
                             //printf("\tBEFORE data bytes = ");
                             //for (int i = 0; i < leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET; i++) {
                             //    printf("%02x, ", leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA[i]);
                             //}
                             //printf("\n");
                             //printf("\tdata offset = %d, data length = %d\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET, leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_LENGTH);
                             if ((leadByteArrayCount + leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET) > leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_LENGTH) {
                                 //printf("\tError condition\n");
                                 // Error condition
                                 *status = U_INTERNAL_PROGRAM_ERROR;
                                 return NULL;
                             }
                             leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT].offset = leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET;
                             leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA[leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET++] = leadByteArrayCount;
                             scriptDataWritten++;
                             memcpy(&leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA[leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET],
                                 leadByteArray, leadByteArrayCount * sizeof(leadByteArray[0]));
                             scriptDataWritten += leadByteArrayCount;
                             //printf("\tlead byte data written = %d\n", scriptDataWritten);
                             //printf("\tcurrentIndex.reorderCode = %04x, currentIndex.offset = %04x\n",
                             //    leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT.reorderCode, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT.offset);
                             leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET += leadByteArrayCount;
                             //printf("\tdata offset = %d\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET);
                             //printf("\tAFTER data bytes = ");
                             //for (int i = 0; i < leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET; i++) {
                             //    printf("%02x, ", leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA[i]);
                             //}
                             //printf("\n");
                     }
                     //if (reorderCode >= 0x1000) {
                      //   printf("@@@@ reorderCode = %x, offset = %x\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT].reorderCode, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT].offset);
                      //   for (int i = 0; i < leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET; i++) {
                     //        printf("%02x, ", leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA[i]);
                      //   }
                     //    printf("\n");
                    // }
                     leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT++;
                 }
             }
             return NULL;
         }
       }
       fprintf(stderr, "Warning: unrecognized option: %s\n", buffer);
       //*status = U_INVALID_FORMAT_ERROR;
       return NULL;
     }

     startCodePoint = buffer;
     endCodePoint = strchr(startCodePoint, ';');

     if(endCodePoint == 0) {
         fprintf(stderr, "error - line with no code point!\n");
         *status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
         return NULL;
     } else {
         *(endCodePoint) = 0;
     }

     char *pipePointer = strchr(buffer, '|');
     if (pipePointer != NULL) {
         // Read the prefix string which precedes the actual string.
         *pipePointer = 0;
         element->prefixSize =
             u_parseString(startCodePoint,
                           element->prefixChars, LENGTHOF(element->prefixChars),
                           NULL, status);
         if(U_FAILURE(*status)) {
             fprintf(stderr, "error - parsing of prefix \"%s\" failed: %s\n",
                     startCodePoint, u_errorName(*status));
             *status = U_INVALID_FORMAT_ERROR;
             return NULL;
         }
         element->prefix = element->prefixChars;
         startCodePoint = pipePointer + 1;
     }

     // Read the string which gets the CE(s) assigned.
     element->cSize =
         u_parseString(startCodePoint,
                       element->uchars, LENGTHOF(element->uchars),
                       NULL, status);
     if(U_FAILURE(*status)) {
         fprintf(stderr, "error - parsing of code point(s) \"%s\" failed: %s\n",
                 startCodePoint, u_errorName(*status));
         *status = U_INVALID_FORMAT_ERROR;
         return NULL;
     }
     element->cPoints = element->uchars;

     startCodePoint = endCodePoint+1;

     commentStart = strchr(startCodePoint, '#');
     if(commentStart == NULL) {
         commentStart = strlen(startCodePoint) + startCodePoint;
     }

     i = 0;
     uint32_t CEindex = 0;
     element->noOfCEs = 0;
     for(;;) {
         endCodePoint = strchr(startCodePoint, ']');
         if(endCodePoint == NULL || endCodePoint >= commentStart) {
             break;
         }
         pointer = strchr(startCodePoint, '[');
         pointer++;

         element->sizePrim[i]=readElement(&pointer, primary, ',', status) / 2;
         element->sizeSec[i]=readElement(&pointer, secondary, ',', status) / 2;
         element->sizeTer[i]=readElement(&pointer, tertiary, ']', status) / 2;


         /* I want to get the CEs entered right here, including continuation */
         element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, status);

         uint32_t CEi = 1;
         while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
           uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
             if(2*CEi<element->sizePrim[i]) {
                 value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
                 value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
             }

             if(2*CEi+1<element->sizePrim[i]) {
                 value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
                 value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
             }

             if(CEi<element->sizeSec[i]) {
                 value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
                 value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
             }

             if(CEi<element->sizeTer[i]) {
                 value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
                 value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
             }

             CEi++;

             element->CEs[CEindex++] = value;
         }

       startCodePoint = endCodePoint+1;
       i++;
     }
     element->noOfCEs = CEindex;
 #if 0
     element->isThai = UCOL_ISTHAIPREVOWEL(element->cPoints[0]);
 #endif
     // we don't want any strange stuff after useful data!
     if (pointer == NULL) {
         /* huh? Did we get ']' without the '['? Pair your brackets! */
         *status=U_INVALID_FORMAT_ERROR;
     }
     else {
         while(pointer < commentStart)  {
             if(*pointer != ' ' && *pointer != '\t')
             {
                 *status=U_INVALID_FORMAT_ERROR;
                 break;
             }
             pointer++;
         }
     }
     if(element->cSize == 1 && element->cPoints[0] == 0xfffe) {
         // UCA 6.0 gives U+FFFE a special minimum weight using the
         // byte 02 which is the merge-sort-key separator and illegal for any
         // other characters.
     } else {
         // Rudimentary check for valid bytes in CE weights.
         // For a more comprehensive check see cintltst /tscoll/citertst/TestCEValidity
         for (i = 0; i < (int32_t)CEindex; ++i) {
             uint32_t value = element->CEs[i];
             uint8_t bytes[4] = {
                 (uint8_t)(value >> 24),
                 (uint8_t)(value >> 16),
                 (uint8_t)(value >> 8),
                 (uint8_t)(value & UCOL_NEW_TERTIARYORDERMASK)
             };
             for (int j = 0; j < 4; ++j) {
                 if (0 != bytes[j] && bytes[j] < 3) {
                     fprintf(stderr, "Warning: invalid UCA weight byte %02X for %s\n", bytes[j], buffer);
                     return NULL;
                 }
             }
             // Primary second bytes 03 and FF are compression terminators.
             if (!isContinuation(value) && (bytes[1] == 3 || bytes[1] == 0xFF)) {
                 fprintf(stderr, "Warning: invalid UCA primary second weight byte %02X for %s\n",
                         bytes[1], buffer);
                 return NULL;
             }
         }
     }

     if(U_FAILURE(*status)) {
         fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
         *status = U_INTERNAL_PROGRAM_ERROR;
         return NULL;
     }

     return element;
 }


 void writeOutData(UCATableHeader *data,
                   UCAConstants *consts,
                   LeadByteConstants *leadByteConstants,
                   UChar contractions[][MAX_UCA_CONTRACTION_LENGTH],
                   uint32_t noOfcontractions,
                   const char *outputDir,
                   const char *copyright,
                   UErrorCode *status)
 {
     if(U_FAILURE(*status)) {
         return;
     }

     uint32_t size = data->size;

     data->UCAConsts = data->size;
     data->size += paddedsize(sizeof(UCAConstants));

     if(noOfcontractions != 0) {
       uprv_memset(&contractions[noOfcontractions][0], 0, MAX_UCA_CONTRACTION_LENGTH*U_SIZEOF_UCHAR);
       noOfcontractions++;


       data->contractionUCACombos = data->size;
       data->contractionUCACombosWidth = (uint8_t)MAX_UCA_CONTRACTION_LENGTH;
       data->contractionUCACombosSize = noOfcontractions;
       data->size += paddedsize((noOfcontractions*MAX_UCA_CONTRACTION_LENGTH*U_SIZEOF_UCHAR));
     }
     data->scriptToLeadByte = data->size;
     //printf("@@@@ script to lead byte offset = 0x%x (%d)\n", data->size, data->size);
     data->size +=
         sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT) +                                                       // index table header
         leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT * sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[0]) +    // index table
         sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET) +                                                       // data table header
         leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET * sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA[0]);        // data table
     data->leadByteToScript = data->size;
     //printf("@@@@ lead byte to script offset = 0x%x (%d)\n", data->size, data->size);
     data->size +=
         sizeof(leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH) +                                                      // index table header
         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH * sizeof(leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX[0]) +   // index table
         sizeof(leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET) +                                                       // data table header
         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET * sizeof(leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA[0]);        // data table

     UNewDataMemory *pData;

     long dataLength;
     UDataInfo ucaInfo;
     uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo));
     u_getUnicodeVersion(ucaInfo.dataVersion);

     pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo,
                        copyright, status);

     if(U_FAILURE(*status)) {
         fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
         return;
     }

     /* write the data to the file */
     if (beVerbose) {
         printf("Writing out UCA table: %s%c%s.%s\n", outputDir,
                                                         U_FILE_SEP_CHAR,
                                                         U_ICUDATA_NAME "_" UCA_DATA_NAME,
                                                         UCA_DATA_TYPE);
     }
     udata_writeBlock(pData, data, size);

     // output the constants here
     udata_writeBlock(pData, consts, sizeof(UCAConstants));

     if (beVerbose) {
         printf("first tertiary ignorable = %x %x\n", consts->UCA_FIRST_TERTIARY_IGNORABLE[0], consts->UCA_FIRST_TERTIARY_IGNORABLE[1]);
         printf("last tertiary ignorable = %x %x\n", consts->UCA_LAST_TERTIARY_IGNORABLE[0], consts->UCA_LAST_TERTIARY_IGNORABLE[1]);
         printf("first secondary ignorable = %x %x\n", consts->UCA_FIRST_SECONDARY_IGNORABLE[0], consts->UCA_FIRST_SECONDARY_IGNORABLE[1]);
         printf("contractionUCACombosSize = %d\n", data->contractionUCACombosSize);
         printf("contractionSize = %d\n", data->contractionSize);
         printf("number of UCA contractions = %d\n", noOfcontractions);
     }

     if(noOfcontractions != 0) {
       udata_writeBlock(pData, contractions, noOfcontractions*MAX_UCA_CONTRACTION_LENGTH*U_SIZEOF_UCHAR);
       udata_writePadding(pData, paddedsize((noOfcontractions*MAX_UCA_CONTRACTION_LENGTH*U_SIZEOF_UCHAR)) - noOfcontractions*MAX_UCA_CONTRACTION_LENGTH*U_SIZEOF_UCHAR);
     }

     // output the script to lead bytes table here
     if (beVerbose) {
         printf("Writing Script to Lead Byte Data\n");
         printf("\tindex table size = %x\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT);
         printf("\tdata block size = %x\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET);
     }
     udata_write16(pData, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT);
     udata_write16(pData, leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET);
 //     printf("#### Script to Lead Byte Index Before Sort\n");
 //     for (int reorderCodeIndex = 0; reorderCodeIndex < leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT; reorderCodeIndex++) {
 //         printf("\t%04x = %04x\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[reorderCodeIndex].reorderCode, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[reorderCodeIndex].offset);
 //     }
     qsort(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT, sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[0]), ReorderIndexComparer);
     udata_writeBlock(pData, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT * sizeof(leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[0]));
 //     printf("#### Script to Lead Byte Index After Sort\n");
 //     for (int reorderCodeIndex = 0; reorderCodeIndex < leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX_COUNT; reorderCodeIndex++) {
 //         printf("\t%04x = %04x\n", leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[reorderCodeIndex].reorderCode, leadByteConstants->SCRIPT_TO_LEAD_BYTES_INDEX[reorderCodeIndex].offset);
 //     }

     // write out the script to lead bytes data block
     udata_writeBlock(pData, leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA, leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA_OFFSET * sizeof(*leadByteConstants->SCRIPT_TO_LEAD_BYTES_DATA));

     if (beVerbose) {
         printf("Writing Lead Byte To Script Data\n");
         printf("\tindex table size = %x\n", leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH);
         printf("\tdata block size = %x\n", leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET);
     }
     // output the header info
     udata_write16(pData, leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH);
     udata_write16(pData, leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET);

     // output the index table
     udata_writeBlock(pData, leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX,
         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH * sizeof(leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX)[0]);
 //     for (int leadByte = 0; leadByte < leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH; leadByte++) {
 //         printf("\t%02x = %04x\n", leadByte, leadByteConstants->LEAD_BYTE_TO_SCRIPTS_INDEX[leadByte]);
 //     }

     // output the data
     udata_writeBlock(pData, leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA,
         leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET * sizeof(*leadByteConstants->LEAD_BYTE_TO_SCRIPTS_DATA));


     /* finish up */
     dataLength=udata_finish(pData, status);
     if(U_FAILURE(*status)) {
         fprintf(stderr, "Error: error %d writing the output file\n", *status);
         return;
     }
 }

 enum {
     /*
      * Maximum number of UCA contractions we can store.
      * May need to be increased for a new Unicode version.
      */
     MAX_UCA_CONTRACTIONS=2048
 };

 static int32_t
 write_uca_table(const char *filename,
                 const char *outputDir,
                 const char *copyright,
                 UErrorCode *status)
 {
     FILE *data = fopen(filename, "r");
     if(data == NULL) {
         fprintf(stderr, "Couldn't open file: %s\n", filename);
         return -1;
     }
     uint32_t line = 0;
     UCAElements *element = NULL;
     UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
     /* test for NULL */
     if(myD == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         fclose(data);
         return 0;
     }
     uprv_memset(myD, 0, sizeof(UCATableHeader));
     UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
     /* test for NULL */
     if(opts == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         uprv_free(myD);
         fclose(data);
         return 0;
     }
     uprv_memset(opts, 0, sizeof(UColOptionSet));
     UChar contractions[MAX_UCA_CONTRACTIONS][MAX_UCA_CONTRACTION_LENGTH];
     uprv_memset(contractions, 0, sizeof(contractions));
     uint32_t noOfContractions = 0;
     UCAConstants consts;
     uprv_memset(&consts, 0, sizeof(consts));
 #if 0
     UCAConstants consts = {
       UCOL_RESET_TOP_VALUE,
       UCOL_FIRST_PRIMARY_IGNORABLE,
       UCOL_LAST_PRIMARY_IGNORABLE,
       UCOL_LAST_PRIMARY_IGNORABLE_CONT,
       UCOL_FIRST_SECONDARY_IGNORABLE,
       UCOL_LAST_SECONDARY_IGNORABLE,
       UCOL_FIRST_TERTIARY_IGNORABLE,
       UCOL_LAST_TERTIARY_IGNORABLE,
       UCOL_FIRST_VARIABLE,
       UCOL_LAST_VARIABLE,
       UCOL_FIRST_NON_VARIABLE,
       UCOL_LAST_NON_VARIABLE,

       UCOL_NEXT_TOP_VALUE,
 /*
       UCOL_NEXT_FIRST_PRIMARY_IGNORABLE,
       UCOL_NEXT_LAST_PRIMARY_IGNORABLE,
       UCOL_NEXT_FIRST_SECONDARY_IGNORABLE,
       UCOL_NEXT_LAST_SECONDARY_IGNORABLE,
       UCOL_NEXT_FIRST_TERTIARY_IGNORABLE,
       UCOL_NEXT_LAST_TERTIARY_IGNORABLE,
       UCOL_NEXT_FIRST_VARIABLE,
       UCOL_NEXT_LAST_VARIABLE,
 */

       PRIMARY_IMPLICIT_MIN,
       PRIMARY_IMPLICIT_MAX
     };
 #endif

     //printf("Allocating LeadByteConstants\n");
     LeadByteConstants leadByteConstants;
     uprv_memset(&leadByteConstants, 0x00, sizeof(LeadByteConstants));

     leadByteConstants.SCRIPT_TO_LEAD_BYTES_INDEX_LENGTH = 256;
     leadByteConstants.SCRIPT_TO_LEAD_BYTES_INDEX = (ReorderIndex*) uprv_malloc(leadByteConstants.SCRIPT_TO_LEAD_BYTES_INDEX_LENGTH * sizeof(ReorderIndex));
     uprv_memset(leadByteConstants.SCRIPT_TO_LEAD_BYTES_INDEX, 0x00, leadByteConstants.SCRIPT_TO_LEAD_BYTES_INDEX_LENGTH * sizeof(ReorderIndex));
     leadByteConstants.SCRIPT_TO_LEAD_BYTES_DATA_LENGTH = 1024;
     leadByteConstants.SCRIPT_TO_LEAD_BYTES_DATA = (uint16_t*) uprv_malloc(leadByteConstants.SCRIPT_TO_LEAD_BYTES_DATA_LENGTH * sizeof(uint16_t));
     uprv_memset(leadByteConstants.SCRIPT_TO_LEAD_BYTES_DATA, 0x00, leadByteConstants.SCRIPT_TO_LEAD_BYTES_DATA_LENGTH * sizeof(uint16_t));
     //printf("\tFinished Allocating LeadByteConstants\n");

     leadByteConstants.LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH = 256;
     leadByteConstants.LEAD_BYTE_TO_SCRIPTS_INDEX = (uint16_t*) uprv_malloc(leadByteConstants.LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH * sizeof(uint16_t));
     uprv_memset(leadByteConstants.LEAD_BYTE_TO_SCRIPTS_INDEX, 0x8000 | USCRIPT_INVALID_CODE, leadByteConstants.LEAD_BYTE_TO_SCRIPTS_INDEX_LENGTH * sizeof(uint16_t));
     leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA_LENGTH = 1024;
     leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA_OFFSET = 1;     // offset by 1 to leave zero location for those lead bytes with no reorder codes
     leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA = (uint16_t*) uprv_malloc(leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA_LENGTH * sizeof(uint16_t));
     uprv_memset(leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA, 0x00, leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA_LENGTH * sizeof(uint16_t));

     uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);

     opts->variableTopValue = 0;
     opts->strength = UCOL_TERTIARY;
     opts->frenchCollation = UCOL_OFF;
     opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/
     opts->caseFirst = UCOL_OFF;         /* who goes first, lower case or uppercase */
     opts->caseLevel = UCOL_OFF;         /* do we have an extra case level */
     opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
     opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */
     opts->numericCollation = UCOL_OFF;
     myD->jamoSpecial = FALSE;

     tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status);
     if(U_FAILURE(*status))
     {
         fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status));
         uprv_free(opts);
         uprv_free(myD);
         fclose(data);
         return -1;
     }

     // * set to zero
     struct {
         UChar32 start;
         UChar32 end;
         int32_t value;
     } ranges[] =
     {
         {0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) },  //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
         //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24)  },  //1 LEAD_SURROGATE_TAG, already set in utrie_open() /* D800-DBFF*/
         {0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) },  //2 TRAIL_SURROGATE DC00-DFFF
         // Now directly handled in the collation code by the swapCJK function.
         //{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //3 CJK_IMPLICIT_TAG,   /* 0x3400-0x4DB5*/
         //{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //4 CJK_IMPLICIT_TAG,   /* 0x4E00-0x9FA5*/
         //{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //5 CJK_IMPLICIT_TAG,   /* 0xF900-0xFA2D*/
         //{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //6 CJK_IMPLICIT_TAG,   /* 0x20000-0x2A6D6*/
         //{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //7 CJK_IMPLICIT_TAG,   /* 0x2F800-0x2FA1D*/
     };
     uint32_t i = 0;

     for(i = 0; i<sizeof(ranges)/sizeof(ranges[0]); i++) {
       /*ucmpe32_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value); */
       utrie_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value, TRUE);
     }


     int32_t surrogateCount = 0;
     while(!feof(data)) {
         if(U_FAILURE(*status)) {
             fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
                 *status, u_errorName(*status), (int)line, filename);
             exit(*status);
         }

         line++;
         if(beVerbose) {
           printf("%u ", (int)line);
         }
         element = readAnElement(data, t, &consts, &leadByteConstants, status);
         if(element != NULL) {
             // we have read the line, now do something sensible with the read data!

             // if element is a contraction, we want to add it to contractions[]
             int32_t length = (int32_t)element->cSize;
             if(length > 1 && element->cPoints[0] != 0xFDD0) { // this is a contraction
               if(U16_IS_LEAD(element->cPoints[0]) && U16_IS_TRAIL(element->cPoints[1]) && length == 2) {
                 surrogateCount++;
               } else {
                 if(noOfContractions>=MAX_UCA_CONTRACTIONS) {
                   fprintf(stderr,
                           "\nMore than %d contractions. Please increase MAX_UCA_CONTRACTIONS in genuca.cpp. "
                           "Exiting...\n",
                           (int)MAX_UCA_CONTRACTIONS);
                   exit(U_BUFFER_OVERFLOW_ERROR);
                 }
                 if(length > MAX_UCA_CONTRACTION_LENGTH) {
                   fprintf(stderr,
                           "\nLine %d: Contraction of length %d is too long. Please increase MAX_UCA_CONTRACTION_LENGTH in genuca.cpp. "
                           "Exiting...\n",
                           (int)line, (int)length);
                   exit(U_BUFFER_OVERFLOW_ERROR);
                 }
                 UChar *t = &contractions[noOfContractions][0];
                 u_memcpy(t, element->cPoints, length);
                 t += length;
                 for(; length < MAX_UCA_CONTRACTION_LENGTH; ++length) {
                     *t++ = 0;
                 }
                 noOfContractions++;
               }
             }
             else {
                 // TODO (claireho): does this work? Need more tests
                 // The following code is to handle the UCA pre-context rules
                 // for L/l with middle dot. We share the structures for contractionCombos.
                 // The format for pre-context character is
                 // contractions[0]: codepoint in element->cPoints[0]
                 // contractions[1]: '\0' to differentiate from a contraction
                 // contractions[2]: prefix char
                 if (element->prefixSize>0) {
                     if(length > 1 || element->prefixSize > 1) {
                         fprintf(stderr,
                                 "\nLine %d: Character with prefix, "
                                 "either too many characters or prefix too long.\n",
                                 (int)line);
                         exit(U_INTERNAL_PROGRAM_ERROR);
                     }
                     if(noOfContractions>=MAX_UCA_CONTRACTIONS) {
                       fprintf(stderr,
                               "\nMore than %d contractions. Please increase MAX_UCA_CONTRACTIONS in genuca.cpp. "
                               "Exiting...\n",
                               (int)MAX_UCA_CONTRACTIONS);
                       exit(U_BUFFER_OVERFLOW_ERROR);
                     }
                     UChar *t = &contractions[noOfContractions][0];
                     t[0]=element->cPoints[0];
                     t[1]=0;
                     t[2]=element->prefixChars[0];
                     t += 3;
                     for(length = 3; length < MAX_UCA_CONTRACTION_LENGTH; ++length) {
                         *t++ = 0;
                     }
                     noOfContractions++;
                 }
             }

             /* we're first adding to inverse, because addAnElement will reverse the order */
             /* of code points and stuff... we don't want that to happen */
             if((element->CEs[0] >> 24) != 2) {
                 // Add every element except for the special minimum-weight character U+FFFE
                 // which has 02 weights.
                 // If we had 02 weights in the invuca table, then tailoring primary
                 // after an ignorable would try to put a weight before 02 which is not valid.
                 // We could fix this in a complicated way in the from-rule-string builder,
                 // but omitting this special element from invuca is simple and effective.
                 addToInverse(element, status);
             }
             if(!(length > 1 && element->cPoints[0] == 0xFDD0)) {
               uprv_uca_addAnElement(t, element, status);
             }
         }
     }

     if(UCAVersion[0] == 0 && UCAVersion[1] == 0 && UCAVersion[2] == 0 && UCAVersion[3] == 0) {
         fprintf(stderr, "UCA version not specified. Cannot create data file!\n");
         uprv_uca_closeTempTable(t);
         uprv_free(opts);
         uprv_free(myD);
         fclose(data);
         return -1;
     }
 /*    {
         uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL);
     }*/

     if (beVerbose) {
         printf("\nLines read: %u\n", (int)line);
         printf("Surrogate count: %i\n", (int)surrogateCount);
         printf("Raw data breakdown:\n");
         /*printf("Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
         printf("Number of contractions: %u\n", (int)noOfContractions);
         printf("Contraction image size: %u\n", (int)t->image->contractionSize);
         printf("Expansions size: %i\n", (int)t->expansions->position);
     }


     /* produce canonical closure for table */
     /* first set up constants for implicit calculation */
     uprv_uca_initImplicitConstants(status);
     /* do the closure */
     UnicodeSet closed;
     int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, &closed, status);
     if(noOfClosures != 0) {
         fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
         UnicodeString pattern;
         std::string utf8;
         closed.toPattern(pattern, TRUE).toUTF8String(utf8);
         fprintf(stderr, "UTF-8 pattern string: %s\n", utf8.c_str());
     }

     /* test */
     UCATableHeader *myData = uprv_uca_assembleTable(t, status);

     if (beVerbose) {
         printf("Compacted data breakdown:\n");
         /*printf("Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
         printf("Number of contractions: %u\n", (int)noOfContractions);
         printf("Contraction image size: %u\n", (int)t->image->contractionSize);
         printf("Expansions size: %i\n", (int)t->expansions->position);
     }

     if(U_FAILURE(*status)) {
         fprintf(stderr, "Error creating table: %s\n", u_errorName(*status));
         uprv_uca_closeTempTable(t);
         uprv_free(opts);
         uprv_free(myD);
         fclose(data);
         return -1;
     }

     /* populate the version info struct with version info*/
     myData->version[0] = UCOL_BUILDER_VERSION;
     myData->version[1] = UCAVersion[0];
     myData->version[2] = UCAVersion[1];
     myData->version[3] = UCAVersion[2];
     /*TODO:The fractional rules version should be taken from FractionalUCA.txt*/
     // Removed this macro. Instead, we use the fields below
     //myD->version[1] = UCOL_FRACTIONAL_UCA_VERSION;
     //myD->UCAVersion = UCAVersion; // out of FractionalUCA.txt
     uprv_memcpy(myData->UCAVersion, UCAVersion, sizeof(UVersionInfo));
     u_getUnicodeVersion(myData->UCDVersion);

     writeOutData(myData, &consts, &leadByteConstants, contractions, noOfContractions, outputDir, copyright, status);

     InverseUCATableHeader *inverse = assembleInverseTable(status);
     uprv_memcpy(inverse->UCAVersion, UCAVersion, sizeof(UVersionInfo));
     writeOutInverseData(inverse, outputDir, copyright, status);

     uprv_uca_closeTempTable(t);
     uprv_free(myD);
     uprv_free(opts);

     uprv_free(myData);
     uprv_free(inverse);

     uprv_free(leadByteConstants.LEAD_BYTE_TO_SCRIPTS_INDEX);
     uprv_free(leadByteConstants.LEAD_BYTE_TO_SCRIPTS_DATA);
     uprv_free(leadByteConstants.SCRIPT_TO_LEAD_BYTES_INDEX);
     uprv_free(leadByteConstants.SCRIPT_TO_LEAD_BYTES_DATA);

     fclose(data);

     return 0;
 }

 #endif /* #if !UCONFIG_NO_COLLATION */

 enum {
     HELP_H,
     HELP_QUESTION_MARK,
     COPYRIGHT,
     VERSION,
     VERBOSE,
     ICUDATADIR
 };

 /* Keep these values in sync with the above enums */
 static UOption options[]={
     UOPTION_HELP_H,
     UOPTION_HELP_QUESTION_MARK,
     UOPTION_COPYRIGHT,
     UOPTION_VERSION,
     UOPTION_VERBOSE,
     UOPTION_ICUDATADIR
 };

 int main(int argc, char* argv[]) {
     uprv_memset(&UCAVersion, 0, 4);

     U_MAIN_INIT_ARGS(argc, argv);
     argc=u_parseArgs(argc, argv, LENGTHOF(options), options);

     /* error handling, printing usage message */
     if(argc<0) {
         fprintf(stderr,
             "error in command line argument \"%s\"\n",
             argv[-argc]);
     }
     if(argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
         fprintf(stderr,
             "usage: %s [-options] path/to/ICU/src/root\n"
             "\tRead in UCA collation text data and write out the binary collation data\n"
             "options:\n"
             "\t-h or -? or --help  this usage text\n"
             "\t-V or --version     show a version message\n"
             "\t-c or --copyright   include a copyright notice\n"
             "\t-v or --verbose     turn on verbose output\n"
             "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
             "\t                    followed by path, defaults to %s\n",
             argv[0], u_getDataDirectory());
         return argc<2 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
     }
     if(options[VERSION].doesOccur) {
         printf("genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
 #if UCONFIG_NO_COLLATION
             0, 0
 #else
             UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1
 #endif
             );
         printf(U_COPYRIGHT_STRING"\n");
         exit(0);
     }

     /* get the options values */
     beVerbose = options[VERBOSE].doesOccur;

     const char *copyright = NULL;
     if (options[COPYRIGHT].doesOccur) {
         copyright = U_COPYRIGHT_STRING;
     }

     if (options[ICUDATADIR].doesOccur) {
         u_setDataDirectory(options[ICUDATADIR].value);
     }
     /* Initialize ICU */
     IcuToolErrorCode errorCode("genuca");
     u_init(errorCode);
     if (errorCode.isFailure() && errorCode.get() != U_FILE_ACCESS_ERROR) {
         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
             argv[0], errorCode.errorName());
         exit(errorCode.reset());
     }
     errorCode.reset();

     CharString icuSrcRoot(argv[1], errorCode);

     CharString icuSourceData(icuSrcRoot, errorCode);
     icuSourceData.appendPathPart("source", errorCode);
     icuSourceData.appendPathPart("data", errorCode);

     CharString srcDir(icuSourceData, errorCode);
     srcDir.appendPathPart("unidata", errorCode);

     CharString destDir(icuSourceData, errorCode);
     destDir.appendPathPart("in", errorCode);
     destDir.appendPathPart("coll", errorCode);

     CharString ucaFile(srcDir, errorCode);
     ucaFile.appendPathPart("FractionalUCA.txt", errorCode);

     if(errorCode.isFailure()) {
         fprintf(stderr, "genuca: unable to build file paths - %s\n",
                 errorCode.errorName());
         return errorCode.reset();
     }

 #if UCONFIG_NO_COLLATION

     UNewDataMemory *pData;
     const char *msg;

     msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
     fprintf(stderr, "%s\n", msg);
     pData = udata_create(destDir.data(), UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo,
                          NULL, errorCode);
     udata_writeBlock(pData, msg, strlen(msg));
     udata_finish(pData, errorCode);

     msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
     fprintf(stderr, "%s\n", msg);
     pData = udata_create(destDir.data(), INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo,
                          NULL, errorCode);
     udata_writeBlock(pData, msg, strlen(msg));
     udata_finish(pData, errorCode);

     return errorCode.reset();

 #else

     return write_uca_table(ucaFile.data(), destDir.data(), copyright, errorCode);

 #endif
 }

 /*
  * Hey, Emacs, please set the following:
  *
  * Local Variables:
  * indent-tabs-mode: nil
  * End:
  *
  */