source/tools/gentz/gentz.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (C) 1999-2001, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   11/24/99    aliu        Creation.
 *   09/26/00    aliu        Support for equivalency groups added.
 *   01/31/01    aliu        Support for ISO 3166 country codes added.
 **********************************************************************
 */

 /* This program reads a text file full of parsed time zone data and
  * outputs a binary file, tz.dat, which then goes on to become part of
  * the memory-mapped (or dll) ICU data file.
  *
  * The data file read by this program is generated by a perl script,
  * tz.pl.  The input to tz.pl is standard unix time zone data from
  * ftp://elsie.nci.nih.gov.
  *
  * As a matter of policy, the perl script tz.pl wants to do as much of
  * the parsing, data processing, and error checking as possible, and
  * this program wants to just do the binary translation step.
  *
  * See tz.pl for the file format that is READ by this program.
  */

 #include <stdio.h>
 #include <stdlib.h>
 #include "unicode/utypes.h"
 #include "unicode/putil.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "filestrm.h"
 #include "unewdata.h"
 #include "uoptions.h"
 #include "tzdat.h"

 #define INPUT_FILE "tz.txt"
 #define OUTPUT_FILE "tz.dat"

 /* UDataInfo cf. udata.h */
 static UDataInfo dataInfo = {
     sizeof(UDataInfo),
     0,

     U_IS_BIG_ENDIAN,
     U_CHARSET_FAMILY,
     sizeof(UChar),
     0,

     {TZ_SIG_0, TZ_SIG_1, TZ_SIG_2, TZ_SIG_3},
     {TZ_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
     {0, 0, 0, 0} /* dataVersion - will be filled in with year.suffix */
 };


 class gentz {
     // These must match SimpleTimeZone!!!
     enum { WALL_TIME = 0,
            STANDARD_TIME,
            UTC_TIME
     };

     // The largest number of zones we accept as sensible.  Anything
     // larger is considered an error.  Adjust as needed.
     enum { MAX_ZONES = 1000 };

     // The maximum sensible GMT offset, in seconds
     static const int32_t MAX_GMT_OFFSET;

     static const char COMMENT;
     static const char CR;
     static const char LF;
     static const char MINUS;
     static const char SPACE;
     static const char TAB;
     static const char ZERO;
     static const char STANDARD_MARK;
     static const char DST_MARK;
     static const char SEP;
     static const char NUL;

     static const char* END_KEYWORD;

     enum { BUFLEN = 1024 };
     char buffer[BUFLEN];
     int32_t lineNumber;

     // Binary data that we construct from tz.txt and write out as tz.dat
     TZHeader              header;
     TZEquivalencyGroup*   equivTable;
     OffsetIndex*          offsetIndex;
     CountryIndex*         countryIndex;
     uint32_t*             nameToEquiv;
     char*                 nameTable;

     uint32_t equivTableSize;  // Total bytes in equivalency group table
     uint32_t offsetIndexSize; // Total bytes in offset index table
     uint32_t countryIndexSize; // Total bytes in country index table
     uint32_t nameToEquivSize; // Total bytes in nameToEquiv
     uint32_t nameTableSize;   // Total bytes in name table

     uint32_t maxPerOffset; // Maximum number of zones per offset
     uint32_t maxPerEquiv; // Maximum number of zones per equivalency group
     uint32_t equivCount; // Number of equivalency groups

     UBool useCopyright;
     UBool verbose;


 public:
     int      MMain(int argc, char *argv[]);
 private:
     int32_t  writeTzDatFile(const char *destdir);
     void     parseTzTextFile(FileStream* in);

     // High level parsing
     void          parseHeader(FileStream* in);

     TZEquivalencyGroup* parseEquivTable(FileStream* in);

     void          fixupNameToEquiv();

     void          parseDSTRule(char*& p, TZRule& rule);

     OffsetIndex*  parseOffsetIndexTable(FileStream* in);

     CountryIndex* parseCountryIndexTable(FileStream* in);

     char*         parseNameTable(FileStream* in);

     // Low level parsing and reading
     void     readEndMarker(FileStream* in);
     int32_t  readIntegerLine(FileStream* in, int32_t min, int32_t max);
     int32_t  _parseInteger(char*& p);
     int32_t  parseInteger(char*& p, char nextExpectedChar, int32_t, int32_t);
     int32_t  readLine(FileStream* in);

     // Error handling
     void    die(const char* msg);
 };

 int main(int argc, char *argv[]) {
     gentz x;

     U_MAIN_INIT_ARGS(argc, argv);

     return x.MMain(argc, argv);
 }

 const int32_t gentz::MAX_GMT_OFFSET = (int32_t)24*60*60; // seconds
 const char    gentz::COMMENT        = '#';
 const char    gentz::CR             = '\r';
 const char    gentz::LF             = '\n';
 const char    gentz::MINUS          = '-';
 const char    gentz::SPACE          = ' ';
 const char    gentz::TAB            = '\t';
 const char    gentz::ZERO           = '0';
 const char    gentz::SEP            = ',';
 const char    gentz::STANDARD_MARK  = 's';
 const char    gentz::DST_MARK       = 'd';
 const char    gentz::NUL            = '\0';
 const char*   gentz::END_KEYWORD    = "end";

 static UOption options[]={
     UOPTION_HELP_H,
     UOPTION_HELP_QUESTION_MARK,
     UOPTION_COPYRIGHT,
     UOPTION_DESTDIR,
     UOPTION_VERBOSE
 };

 int gentz::MMain(int argc, char* argv[]) {
     /* preset then read command line options */
     options[3].value=u_getDataDirectory();
     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

     /* error handling, printing usage message */
     if(argc<0) {
         fprintf(stderr,
             "error in command line argument \"%s\"\n",
             argv[-argc]);
     } else if(argc<2) {
         argc=-1;
     }
     if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
         fprintf(stderr,
             "usage: %s [-options] timezone-file\n"
             "\tread the timezone file produced by tz.pl and create " U_ICUDATA_NAME "_" TZ_DATA_NAME "." TZ_DATA_TYPE "\n"
             "options:\n"
             "\t-h or -? or --help  this usage text\n"
             "\t-v or --verbose     turn on verbose output\n"
             "\t-c or --copyright   include a copyright notice\n"
             "\t-d or --destdir     destination directory, followed by the path\n",
             argv[0]);
         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
     }

     /* get the options values */
     useCopyright=options[2].doesOccur;
     verbose = options[4].doesOccur;


     ////////////////////////////////////////////////////////////
     // Read the input file
     ////////////////////////////////////////////////////////////
     *buffer = NUL;
     lineNumber = 0;
     if (verbose) {
         fprintf(stdout, "Input file: %s\n", argv[1]);
     }
     FileStream* in = T_FileStream_open(argv[1], "r");
     if (in == 0) {
         die("Cannot open input file");
     }
     parseTzTextFile(in);
     T_FileStream_close(in);
     *buffer = NUL;

     ////////////////////////////////////////////////////////////
     // Write the output file
     ////////////////////////////////////////////////////////////
     int32_t wlen = writeTzDatFile(options[3].value);
     if (verbose) {
         fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
             U_ICUDATA_NAME "_" TZ_DATA_NAME, TZ_DATA_TYPE, (long)wlen);
     }

     return 0; // success
 }

 int32_t gentz::writeTzDatFile(const char *destdir) {
     UNewDataMemory *pdata;
     UErrorCode status = U_ZERO_ERROR;

     // Careful: The order in which the tables are written must match the offsets.
     // Our order is:
     // - equiv table
     // - offset index
     // - country index
     // - name index (name to equiv map)
     // - name table (must be last!)
     header.equivTableDelta = sizeof(header);
     header.offsetIndexDelta = header.equivTableDelta + equivTableSize;
     header.countryIndexDelta = header.offsetIndexDelta + offsetIndexSize;
     header.nameIndexDelta = header.countryIndexDelta + countryIndexSize;
     // Must be last:
     header.nameTableDelta = header.nameIndexDelta + nameToEquivSize;

 /*  // Don't need to check for negative values on unsigned numbers.
     if (header.equivTableDelta < 0 ||
         header.offsetIndexDelta < 0 ||
         header.countryIndexDelta < 0 ||
         header.nameIndexDelta < 0 ||
         header.nameTableDelta < 0) {
         die("Table too big -- negative delta");
     }
 */

     // Convert equivalency table indices to offsets.  This can only
     // be done after the header offsets have been set up.
     fixupNameToEquiv();

     // Fill in dataInfo with year.suffix
     *(uint16_t*)&(dataInfo.dataVersion[0]) = header.versionYear;
     *(uint16_t*)&(dataInfo.dataVersion[2]) = header.versionSuffix;

     pdata = udata_create(destdir, TZ_DATA_TYPE, U_ICUDATA_NAME "_" TZ_DATA_NAME, &dataInfo,
                          useCopyright ? U_COPYRIGHT_STRING : 0, &status);
     if (U_FAILURE(status)) {
         die("Unable to create data memory");
     }

     udata_writeBlock(pdata, &header, sizeof(header));
     udata_writeBlock(pdata, equivTable, equivTableSize);
     udata_writeBlock(pdata, offsetIndex, offsetIndexSize);
     udata_writeBlock(pdata, countryIndex, countryIndexSize);
     udata_writeBlock(pdata, nameToEquiv, nameToEquivSize);
     udata_writeBlock(pdata, nameTable, nameTableSize);

     uint32_t dataLength = udata_finish(pdata, &status);
     if (U_FAILURE(status)) {
         die("Error writing output file");
     }

     if (dataLength != (sizeof(header) + equivTableSize +
                        offsetIndexSize + countryIndexSize +
                        nameTableSize + nameToEquivSize
                        )) {
         die("Written file doesn't match expected size");
     }
     return dataLength;
 }

 void gentz::parseTzTextFile(FileStream* in) {
     parseHeader(in);

     // Read name table, create it, also create nameToEquiv index table
     // as a side effect.
     nameTable = parseNameTable(in);

     // Parse the equivalency groups
     equivTable = parseEquivTable(in);

     // Parse the GMT offset index table
     offsetIndex = parseOffsetIndexTable(in);

     // Parse the ISO 3166 country index table
     countryIndex = parseCountryIndexTable(in);
 }

 /**
  * Convert equivalency table indices to offsets.  The equivalency
  * table offset (in the header) must be set already.
  */
 void gentz::fixupNameToEquiv() {
     uint32_t i;

     // First make a list that maps indices to offsets
     uint32_t *offsets = (uint32_t*) uprv_malloc(sizeof(uint32_t) * equivCount);
 	/* test for NULL */
 	if(offsets == NULL) {
 		die("Out of memory");
 	}
     offsets[0] = header.equivTableDelta;
     if (offsets[0] % 4 != 0) {
         die("Header size is not 4-aligned");
     }
     TZEquivalencyGroup *eg = equivTable;
     for (i=1; i<equivCount; ++i) {
         offsets[i] = offsets[i-1] + eg->nextEntryDelta;
         if (offsets[i] % 4 != 0) {
             die("Equivalency group table is not 4-aligned");
         }
         eg = (TZEquivalencyGroup*) (eg->nextEntryDelta + (int8_t*)eg);
     }

     // Now remap index values to offsets
     for (i=0; i<header.count; ++i) {
         uint32_t x = nameToEquiv[i];
         if (x >= equivCount) {
             die("Equiv index out of range");
         }
         nameToEquiv[i] = offsets[x];
     }

     uprv_free(offsets);
 }

 TZEquivalencyGroup* gentz::parseEquivTable(FileStream* in) {
     uint32_t n = readIntegerLine(in, 1, MAX_ZONES);
     if (n != equivCount) {
         die("Equivalency table count mismatch");
     }

     // We don't know how big the whole thing will be yet, but we can use
     // the maxPerEquiv number to compute an upper limit.
     //
     // The gmtOffset field within each struct must be
     // 4-aligned for some architectures.  To ensure this, we do two
     // things: 1. The entire struct is 4-aligned.  2. The gmtOffset is
     // placed at a 4-aligned position within the struct.  3. The size
     // of the whole structure is padded out to 4n bytes.  We achieve
     // this last condition by adding two bytes of padding after the
     // last entry, if necessary.  We adjust
     // the nextEntryDelta and add 2 bytes of padding if necessary.
     uint32_t maxPossibleSize = sizeof(TZEquivalencyGroup) +
         (maxPerEquiv-1) * sizeof(uint16_t);
     // Pad this out
     if ((maxPossibleSize % 4) != 0) {
         maxPossibleSize += 2;
     }
     if ((maxPossibleSize % 4) != 0) {
         die("Bug in 4-align code for equiv table");
     }
     maxPossibleSize *= n; // Get size of entire set of structs.

     int8_t *result = (int8_t*) uprv_malloc(sizeof(int8_t) * maxPossibleSize);
     if (result == 0) {
         die("Out of memory");
     }

     // Read each line and construct the corresponding entry
     TZEquivalencyGroup* eg = (TZEquivalencyGroup*)result;
     for (uint32_t i=0; i<n; ++i) {
         char *p;

         readLine(in);

         // Each line starts with 's,' or 'd,' to specify the zone type
         char flavor = buffer[0];
         if (buffer[1] != SEP) {
             die("Syntax error in equiv table");
         }
         p = buffer + 2;

         // This pointer will be adjusted to point to the start of the
         // list of zones in this group.
         uint16_t* pList = 0;

         switch (flavor) {
         case STANDARD_MARK:
             eg->isDST = 0;
             eg->u.s.zone.gmtOffset = 1000 * // Convert s -> ms
                 parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
             pList = &(eg->u.s.count);
             break;
         case DST_MARK:
             eg->isDST = 1;
             eg->u.d.zone.gmtOffset = 1000 * // Convert s -> ms
                 parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
             parseDSTRule(p, eg->u.d.zone.onsetRule);
             parseDSTRule(p, eg->u.d.zone.ceaseRule);
             eg->u.d.zone.dstSavings = (uint16_t) parseInteger(p, SEP, 0, 12*60);
             pList = &(eg->u.d.count);
             break;
         default:
             die("Invalid equiv table type marker (not s or d)");
         }

         // Now parse the list of zones in this group
         uint16_t egCount = (uint16_t) parseInteger(p, SEP, 1, maxPerEquiv);
         *pList++ = egCount;
         for (uint16_t j=0; j<egCount; ++j) {
             *pList++ = (uint16_t) parseInteger(p, (j==(egCount-1)) ? NUL : SEP,
                                                0, header.count-1);
         }

         // At this point pList points to the byte after the last byte of this
         // equiv group struct.  Time to 4-align it.
         uint16_t structSize = (uint16_t) (((int8_t*)pList) - ((int8_t*)eg));
         if ((structSize % 4) != 0) {
             // assert(structSize % 4 == 2);
             *pList++ = 0xFFFF; // Pad with invalid zone index
             structSize += 2;
         }

         // Set up next entry delta
         eg->nextEntryDelta = (i==(n-1)) ? (uint16_t) 0 : structSize;

         eg->reserved = 0; // ignored

         eg = (TZEquivalencyGroup*) (structSize + (int8_t*)eg);
     }
     equivTableSize = (int8_t*)eg - (int8_t*)result;
     readEndMarker(in);
     if (verbose) {
         fprintf(stdout, " Read %lu equivalency table entries, in-memory size %ld bytes\n",
             (unsigned long)equivCount, (long)equivTableSize);
     }
     return (TZEquivalencyGroup*)result;
 }

 OffsetIndex* gentz::parseOffsetIndexTable(FileStream* in) {
     uint32_t n = readIntegerLine(in, 1, MAX_ZONES);

     // We don't know how big the whole thing will be yet, but we can use
     // the maxPerOffset number to compute an upper limit.
     //
     // The gmtOffset field within each OffsetIndex struct must be
     // 4-aligned for some architectures.  To ensure this, we do two
     // things: 1. The entire struct is 4-aligned.  2. The gmtOffset is
     // placed at a 4-aligned position within the struct.  3. The size
     // of the whole structure is padded out to 4n bytes.  We achieve
     // this last condition by adding two bytes of padding after the
     // last zoneNumber, if count is _even_.  That is, the struct size
     // is 10+2count+padding, where padding is (count%2==0 ? 2:0).
     //
     // Note that we don't change the count itself, but rather adjust
     // the nextEntryDelta and add 2 bytes of padding if necessary.
     //
     // Don't try to compute the exact size in advance
     // (unless we want to avoid the use of sizeof(), which may
     // introduce padding that we won't actually employ).
     uint32_t maxPossibleSize = n * (sizeof(OffsetIndex) +
         (maxPerOffset-1) * sizeof(uint16_t));

     int8_t *result = (int8_t*) uprv_malloc(sizeof(int8_t) * maxPossibleSize);
     if (result == 0) {
         die("Out of memory");
     }

     // Read each line and construct the corresponding entry
     OffsetIndex* index = (OffsetIndex*)result;
     for (uint32_t i=0; i<n; ++i) {
         uint16_t alignedCount;
         readLine(in);
         char* p = buffer;
         index->gmtOffset = 1000 * // Convert s -> ms
             parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
         index->defaultZone = (uint16_t)parseInteger(p, SEP, 0, header.count-1);
         index->count = (uint16_t)parseInteger(p, SEP, 1, maxPerOffset);
         uint16_t* zoneNumberArray = &(index->zoneNumber);
         UBool sawOffset = FALSE; // Sanity check - make sure offset is in zone list
         for (uint16_t j=0; j<index->count; ++j) {
             zoneNumberArray[j] = (uint16_t)
                 parseInteger(p, (j==(index->count-1))?NUL:SEP,
                              0, header.count-1);
             if (zoneNumberArray[j] == index->defaultZone) {
                 sawOffset = TRUE;
             }
         }
         if (!sawOffset) {
             die("Error: bad offset index entry; default not in zone list");
         }
         alignedCount = index->count;
         if((alignedCount%2)==0) /* force count to be ODD - see above */
         {
             // Use invalid zoneNumber for 2 bytes of padding
             zoneNumberArray[alignedCount++] = (uint16_t)0xFFFF;
         }
         int8_t* nextIndex = (int8_t*)&(zoneNumberArray[alignedCount]);

         index->nextEntryDelta = (uint16_t) ((i==(n-1)) ? 0 : (nextIndex - (int8_t*)index));
         index = (OffsetIndex*)nextIndex;
     }
     offsetIndexSize = (int8_t*)index - (int8_t*)result;
     if (offsetIndexSize > maxPossibleSize) {
         die("Yikes! Interal error while constructing offset index table");
     }
     readEndMarker(in);
     if (verbose) {
         fprintf(stdout, " Read %lu offset index table entries, in-memory size %ld bytes\n",
             (unsigned long)n, (long)offsetIndexSize);
     }
     return (OffsetIndex*)result;
 }

 CountryIndex* gentz::parseCountryIndexTable(FileStream* in) {
     uint32_t n = readIntegerLine(in, 1, MAX_ZONES);

     // We know how big the whole thing will be: Each zone occupies an
     // int, and each country adds 3 ints (one for the intcode, one for
     // next entry offset, one for the zone count).  Each int is 16
     // bits.
     //
     // Everything is 16-bits, so we don't 4-align the entries.
     // However, we do pad at the end of the table to make the whole
     // thing of size 4n, if necessary.
     uint32_t expectedSize = n*(sizeof(CountryIndex)-sizeof(uint16_t)) +
         header.count * sizeof(uint16_t);
     uint32_t pad = (4 - (expectedSize % 4)) % 4; // This will be 0 or 2
     int8_t *result = (int8_t*) uprv_malloc(sizeof(int8_t) * (expectedSize + pad));
     if (result == 0) {
         die("Out of memory");
     }

     // Read each line and construct the corresponding entry.
     // Along the way, make sure we don't write past 'limit'.
     CountryIndex* index = (CountryIndex*)result;
     int8_t* limit = ((int8_t*)result) + expectedSize; // Don't include pad
     uint32_t i;
     for (i=0; i<n && (int8_t*)(&index->zoneNumber) < limit; ++i) {
         readLine(in);
         char* p = buffer;
         index->intcode = (uint16_t)parseInteger(p, SEP, 0, 25*32+25 /*ZZ*/);
         index->count = (uint16_t)parseInteger(p, SEP, 0, header.count-1);
         uint16_t* zoneNumberArray = &(index->zoneNumber);
         if ((int8_t*)(&index->zoneNumber + index->count - 1) >= limit) {
             // Oops -- out of space
             break;
         }
         for (uint16_t j=0; j<index->count; ++j) {
             zoneNumberArray[j] = (uint16_t)
                 parseInteger(p, (j==(index->count-1))?NUL:SEP,
                              0, header.count-1);
         }
         int8_t* nextIndex = (int8_t*)&(zoneNumberArray[index->count]);
         index->nextEntryDelta = (uint16_t) ((i==(n-1)) ? 0 : (nextIndex - (int8_t*)index));
         index = (CountryIndex*)nextIndex;
     }
     readEndMarker(in);

     // Make sure size matches expected value, and pad the total size
     countryIndexSize = (int8_t*)index - (int8_t*)result + pad;
     if (i != n || countryIndexSize != expectedSize) {
         die("Yikes! Interal error while constructing offset index table");
     }
     if (pad != 0) {
         countryIndexSize += pad;
         *(uint16_t*)index = 0; // Clear pad bits
     }
     if (verbose) {
         fprintf(stdout, " Read %lu country index table entries, in-memory size %ld bytes\n", (unsigned long)n, (long)countryIndexSize);
     }
     return (CountryIndex*)result;
 }

 void gentz::parseHeader(FileStream* in) {

     int32_t version = readIntegerLine(in, 0, 0xFFFF);
     if (version != TZ_FORMAT_VERSION) {
         die("Version mismatch between gentz and input file");
     }

     // Version string, e.g., "1999j" -> (1999<<16) | 10
     header.versionYear = (uint16_t) readIntegerLine(in, 1990, 0xFFFF);
     header.versionSuffix = (uint16_t) readIntegerLine(in, 0, 0xFFFF);

     header.count = readIntegerLine(in, 1, MAX_ZONES);
     equivCount = readIntegerLine(in, 1, header.count);
     maxPerOffset = readIntegerLine(in, 1, header.count);
     maxPerEquiv = readIntegerLine(in, 1, equivCount);

     // Size of name table in bytes
     // (0x00FFFFFF is an arbitrary upper limit; adjust as needed.)
     nameTableSize = readIntegerLine(in, 1, 0x00FFFFFF);

     readEndMarker(in);

     if (verbose) {
         fprintf(stdout, " Read header, data version %u(%u), in-memory size %ld bytes\n",
             header.versionYear, header.versionSuffix,
             (unsigned long)sizeof(header));
     }
 }

 void gentz::parseDSTRule(char*& p, TZRule& rule) {
     rule.month = (uint8_t) parseInteger(p, SEP, 0, 11);
     rule.dowim = (int8_t) parseInteger(p, SEP, -31, 31);
     rule.dow = (int8_t) parseInteger(p, SEP, -7, 7);
     rule.time = (uint16_t) parseInteger(p, SEP, 0, 24*60);
     rule.mode = *p++;
     if (*p++ != SEP) {
         die("Separator missing");
     }
     switch ((char)rule.mode) {
     case 'w':
         rule.mode = WALL_TIME;
         break;
     case 's':
         rule.mode = STANDARD_TIME;
         break;
     case 'u':
         rule.mode = UTC_TIME;
         break;
     default:
         die("Invalid rule time mode");
         break;
     }
 }

 /**
  * Parse the name table.
  * Each entry of the name table looks like this:
  * |36,Africa/Djibouti
  * The integer is an equivalency table index.  We build up a name
  * table, that just contains the names, and we return it.  We also
  * build up the name index, which indexes names to equivalency table
  * entries.  This is stored in the member variable nameToEquiv.
  */
 char* gentz::parseNameTable(FileStream* in) {
     int32_t n = readIntegerLine(in, 1, MAX_ZONES);
     if (n != (int32_t)header.count) {
         die("Zone count doesn't match name table count");
     }
     char* names = (char*) uprv_malloc(sizeof(char) * nameTableSize);
     nameToEquiv = (uint32_t*) uprv_malloc(sizeof(uint32_t) * n);
     if (names == 0 || nameToEquiv == 0) {
         die("Out of memory");
     }
     nameToEquivSize = n * sizeof(nameToEquiv[0]);
     char* p = names;
     char* limit = names + nameTableSize;
     for (int32_t i=0; i<n; ++i) {
         readLine(in);
         char* q = buffer;
         // We store an index here for now -- later, in fixNameToEquiv,
         // we convert it to an offset.
         nameToEquiv[i] = (uint32_t) parseInteger(q, SEP, 0, equivCount-1);
         int32_t len = uprv_strlen(q);
         if ((p + len) <= limit) {
             uprv_memcpy(p, q, len);
             p += len;
             *p++ = NUL;
         } else {
             die("Name table longer than declared size");
         }
     }
     if (p != limit) {
         die("Name table shorter than declared size");
     }
     readEndMarker(in);
     if (verbose) {
         fprintf(stdout, " Read %ld names, in-memory size %ld bytes\n",
         (long)n, (long)nameTableSize);
     }
     return names;
 }

 /**
  * Read the end marker (terminates each list).
  */
 void gentz::readEndMarker(FileStream* in) {
     readLine(in);
     if (uprv_strcmp(buffer, END_KEYWORD) != 0) {
         die("Keyword 'end' missing");
     }
 }

 /**
  * Read a line from the FileStream and parse it as an
  * integer.  There should be nothing else on the line.
  */
 int32_t gentz::readIntegerLine(FileStream* in, int32_t min, int32_t max) {
     readLine(in);
     char* p = buffer;
     return parseInteger(p, NUL, min, max);
 }

 /**
  * Parse an integer from the given character buffer.
  * Advance p past the last parsed character.  Return
  * the result.  The integer must be of the form
  * /-?\d+/.
  */
 int32_t gentz::_parseInteger(char*& p) {
     int32_t n = 0;
     int32_t digitCount = 0;
     int32_t digit;
     UBool negative = FALSE;
     if (*p == MINUS) {
         ++p;
         negative = TRUE;
     }
     for (;;) {
         digit = *p - ZERO;
         if (digit < 0 || digit > 9) {
             break;
         }
         n = 10*n + digit;
         p++;
         digitCount++;
     }
     if (digitCount < 1) {
         die("Unable to parse integer");
     }
     if (negative) {
         n = -n;
     }
     return n;
 }

 int32_t gentz::parseInteger(char*& p, char nextExpectedChar,
                             int32_t min, int32_t max) {
     int32_t n = _parseInteger(p);
     if (*p++ != nextExpectedChar) {
         die("Character following integer unexpected");
     }
     if (n < min || n > max) {
         die("Integer field out of range");
     }
     return n;
 }

 void gentz::die(const char* msg) {
     fprintf(stderr, "ERROR, %s\n", msg);
     if (*buffer) {
         fprintf(stderr, "Input file line %ld: \"%s\"\n", (long)lineNumber, buffer);
     }
     exit(1);
 }

 /**
  * Read a line.  Trim trailing comment and whitespace.  Ignore (skip)
  * blank lines, or comment-only lines.  Return the number of characters
  * on the line remaining.  On EOF, die.
  */
 int32_t gentz::readLine(FileStream* in) {
     ++lineNumber;
     char* result = T_FileStream_readLine(in, buffer, BUFLEN);
     if (result == 0) {
         *buffer = 0;
         die("Unexpected end of file");
     }
     // Trim off trailing comment
     char* p = uprv_strchr(buffer, COMMENT);
     if (p != 0) {
         *p = NUL;
     }
     // Delete trailing whitespace
     p = buffer + uprv_strlen(buffer);
     while (p > buffer && (p[-1] == CR || p[-1] == LF ||
                           p[-1] == SPACE || p[-1] == TAB)) {
         p--;
     }
     *p = NUL;
     // If line is empty after trimming comments & whitespace,
     // then read the next line.
     return (*buffer == NUL) ? readLine(in) : uprv_strlen(buffer);
 }