| /* |
| ********************************************************************** |
| * Copyright (C) 1999-2001, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * Date Name Description |
| * 11/24/99 aliu Creation. |
| * 09/26/00 aliu Support for equivalency groups added. |
| * 01/31/01 aliu Support for ISO 3166 country codes added. |
| ********************************************************************** |
| */ |
| |
| /* This program reads a text file full of parsed time zone data and |
| * outputs a binary file, tz.dat, which then goes on to become part of |
| * the memory-mapped (or dll) ICU data file. |
| * |
| * The data file read by this program is generated by a perl script, |
| * tz.pl. The input to tz.pl is standard unix time zone data from |
| * ftp://elsie.nci.nih.gov. |
| * |
| * As a matter of policy, the perl script tz.pl wants to do as much of |
| * the parsing, data processing, and error checking as possible, and |
| * this program wants to just do the binary translation step. |
| * |
| * See tz.pl for the file format that is READ by this program. |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include "unicode/utypes.h" |
| #include "unicode/putil.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "filestrm.h" |
| #include "unewdata.h" |
| #include "uoptions.h" |
| #include "tzdat.h" |
| |
| #ifdef XP_MAC_CONSOLE |
| # include <console.h> |
| #endif |
| |
| #define INPUT_FILE "tz.txt" |
| #define OUTPUT_FILE "tz.dat" |
| |
| /* UDataInfo cf. udata.h */ |
| static UDataInfo dataInfo = { |
| sizeof(UDataInfo), |
| 0, |
| |
| U_IS_BIG_ENDIAN, |
| U_CHARSET_FAMILY, |
| sizeof(UChar), |
| 0, |
| |
| {TZ_SIG_0, TZ_SIG_1, TZ_SIG_2, TZ_SIG_3}, |
| {TZ_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */ |
| {0, 0, 0, 0} /* dataVersion - will be filled in with year.suffix */ |
| }; |
| |
| |
| class gentz { |
| // These must match SimpleTimeZone!!! |
| enum { WALL_TIME = 0, |
| STANDARD_TIME, |
| UTC_TIME |
| }; |
| |
| // The largest number of zones we accept as sensible. Anything |
| // larger is considered an error. Adjust as needed. |
| enum { MAX_ZONES = 1000 }; |
| |
| // The maximum sensible GMT offset, in seconds |
| static const int32_t MAX_GMT_OFFSET; |
| |
| static const char COMMENT; |
| static const char CR; |
| static const char LF; |
| static const char MINUS; |
| static const char SPACE; |
| static const char TAB; |
| static const char ZERO; |
| static const char STANDARD_MARK; |
| static const char DST_MARK; |
| static const char SEP; |
| static const char NUL; |
| |
| static const char* END_KEYWORD; |
| |
| enum { BUFLEN = 1024 }; |
| char buffer[BUFLEN]; |
| int32_t lineNumber; |
| |
| // Binary data that we construct from tz.txt and write out as tz.dat |
| TZHeader header; |
| TZEquivalencyGroup* equivTable; |
| OffsetIndex* offsetIndex; |
| CountryIndex* countryIndex; |
| uint32_t* nameToEquiv; |
| char* nameTable; |
| |
| uint32_t equivTableSize; // Total bytes in equivalency group table |
| uint32_t offsetIndexSize; // Total bytes in offset index table |
| uint32_t countryIndexSize; // Total bytes in country index table |
| uint32_t nameToEquivSize; // Total bytes in nameToEquiv |
| uint32_t nameTableSize; // Total bytes in name table |
| |
| uint32_t maxPerOffset; // Maximum number of zones per offset |
| uint32_t maxPerEquiv; // Maximum number of zones per equivalency group |
| uint32_t equivCount; // Number of equivalency groups |
| |
| UBool useCopyright; |
| |
| public: |
| int MMain(int argc, char *argv[]); |
| private: |
| int32_t writeTzDatFile(const char *destdir); |
| void parseTzTextFile(FileStream* in); |
| |
| // High level parsing |
| void parseHeader(FileStream* in); |
| |
| TZEquivalencyGroup* parseEquivTable(FileStream* in); |
| |
| void fixupNameToEquiv(); |
| |
| void parseDSTRule(char*& p, TZRule& rule); |
| |
| OffsetIndex* parseOffsetIndexTable(FileStream* in); |
| |
| CountryIndex* parseCountryIndexTable(FileStream* in); |
| |
| char* parseNameTable(FileStream* in); |
| |
| // Low level parsing and reading |
| void readEndMarker(FileStream* in); |
| int32_t readIntegerLine(FileStream* in, int32_t min, int32_t max); |
| int32_t _parseInteger(char*& p); |
| int32_t parseInteger(char*& p, char nextExpectedChar, int32_t, int32_t); |
| int32_t readLine(FileStream* in); |
| |
| // Error handling |
| void die(const char* msg); |
| }; |
| |
| int main(int argc, char *argv[]) { |
| gentz x; |
| #ifdef XP_MAC_CONSOLE |
| argc=ccommand((char***)&argv); |
| #endif |
| return x.MMain(argc, argv); |
| } |
| |
| const int32_t gentz::MAX_GMT_OFFSET = (int32_t)24*60*60; // seconds |
| const char gentz::COMMENT = '#'; |
| const char gentz::CR = '\r'; |
| const char gentz::LF = '\n'; |
| const char gentz::MINUS = '-'; |
| const char gentz::SPACE = ' '; |
| const char gentz::TAB = '\t'; |
| const char gentz::ZERO = '0'; |
| const char gentz::SEP = ','; |
| const char gentz::STANDARD_MARK = 's'; |
| const char gentz::DST_MARK = 'd'; |
| const char gentz::NUL = '\0'; |
| const char* gentz::END_KEYWORD = "end"; |
| |
| static UOption options[]={ |
| UOPTION_HELP_H, |
| UOPTION_HELP_QUESTION_MARK, |
| UOPTION_COPYRIGHT, |
| UOPTION_DESTDIR |
| }; |
| |
| int gentz::MMain(int argc, char* argv[]) { |
| /* preset then read command line options */ |
| options[3].value=u_getDataDirectory(); |
| argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); |
| |
| /* error handling, printing usage message */ |
| if(argc<0) { |
| fprintf(stderr, |
| "error in command line argument \"%s\"\n", |
| argv[-argc]); |
| } else if(argc<2) { |
| argc=-1; |
| } |
| if(argc<0 || options[0].doesOccur || options[1].doesOccur) { |
| fprintf(stderr, |
| "usage: %s [-options] timezone-file\n" |
| "\tread the timezone file produced by tz.pl and create " TZ_DATA_NAME "." TZ_DATA_TYPE "\n" |
| "\toptions:\n" |
| "\t\t-h or -? or --help this usage text\n" |
| "\t\t-c or --copyright include a copyright notice\n" |
| "\t\t-d or --destdir destination directory, followed by the path\n", |
| argv[0]); |
| return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
| } |
| |
| /* get the options values */ |
| useCopyright=options[2].doesOccur; |
| |
| //////////////////////////////////////////////////////////// |
| // Read the input file |
| //////////////////////////////////////////////////////////// |
| *buffer = NUL; |
| lineNumber = 0; |
| fprintf(stdout, "Input file: %s\n", argv[1]); |
| FileStream* in = T_FileStream_open(argv[1], "r"); |
| if (in == 0) { |
| die("Cannot open input file"); |
| } |
| parseTzTextFile(in); |
| T_FileStream_close(in); |
| *buffer = NUL; |
| |
| //////////////////////////////////////////////////////////// |
| // Write the output file |
| //////////////////////////////////////////////////////////// |
| int32_t wlen = writeTzDatFile(options[3].value); |
| fprintf(stdout, "Output file: %s.%s, %ld bytes\n", |
| TZ_DATA_NAME, TZ_DATA_TYPE, (long)wlen); |
| |
| return 0; // success |
| } |
| |
| int32_t gentz::writeTzDatFile(const char *destdir) { |
| UNewDataMemory *pdata; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| // Careful: The order in which the tables are written must match the offsets. |
| // Our order is: |
| // - equiv table |
| // - offset index |
| // - country index |
| // - name index (name to equiv map) |
| // - name table (must be last!) |
| header.equivTableDelta = sizeof(header); |
| header.offsetIndexDelta = header.equivTableDelta + equivTableSize; |
| header.countryIndexDelta = header.offsetIndexDelta + offsetIndexSize; |
| header.nameIndexDelta = header.countryIndexDelta + countryIndexSize; |
| // Must be last: |
| header.nameTableDelta = header.nameIndexDelta + nameToEquivSize; |
| |
| /* // Don't need to check for negative values on unsigned numbers. |
| if (header.equivTableDelta < 0 || |
| header.offsetIndexDelta < 0 || |
| header.countryIndexDelta < 0 || |
| header.nameIndexDelta < 0 || |
| header.nameTableDelta < 0) { |
| die("Table too big -- negative delta"); |
| } |
| */ |
| |
| // Convert equivalency table indices to offsets. This can only |
| // be done after the header offsets have been set up. |
| fixupNameToEquiv(); |
| |
| // Fill in dataInfo with year.suffix |
| *(uint16_t*)&(dataInfo.dataVersion[0]) = header.versionYear; |
| *(uint16_t*)&(dataInfo.dataVersion[2]) = header.versionSuffix; |
| |
| pdata = udata_create(destdir, TZ_DATA_TYPE, TZ_DATA_NAME, &dataInfo, |
| useCopyright ? U_COPYRIGHT_STRING : 0, &status); |
| if (U_FAILURE(status)) { |
| die("Unable to create data memory"); |
| } |
| |
| udata_writeBlock(pdata, &header, sizeof(header)); |
| udata_writeBlock(pdata, equivTable, equivTableSize); |
| udata_writeBlock(pdata, offsetIndex, offsetIndexSize); |
| udata_writeBlock(pdata, countryIndex, countryIndexSize); |
| udata_writeBlock(pdata, nameToEquiv, nameToEquivSize); |
| udata_writeBlock(pdata, nameTable, nameTableSize); |
| |
| uint32_t dataLength = udata_finish(pdata, &status); |
| if (U_FAILURE(status)) { |
| die("Error writing output file"); |
| } |
| |
| if (dataLength != (sizeof(header) + equivTableSize + |
| offsetIndexSize + countryIndexSize + |
| nameTableSize + nameToEquivSize |
| )) { |
| die("Written file doesn't match expected size"); |
| } |
| return dataLength; |
| } |
| |
| void gentz::parseTzTextFile(FileStream* in) { |
| parseHeader(in); |
| |
| // Read name table, create it, also create nameToEquiv index table |
| // as a side effect. |
| nameTable = parseNameTable(in); |
| |
| // Parse the equivalency groups |
| equivTable = parseEquivTable(in); |
| |
| // Parse the GMT offset index table |
| offsetIndex = parseOffsetIndexTable(in); |
| |
| // Parse the ISO 3166 country index table |
| countryIndex = parseCountryIndexTable(in); |
| } |
| |
| /** |
| * Convert equivalency table indices to offsets. The equivalency |
| * table offset (in the header) must be set already. |
| */ |
| void gentz::fixupNameToEquiv() { |
| uint32_t i; |
| |
| // First make a list that maps indices to offsets |
| uint32_t *offsets = new uint32_t[equivCount]; |
| offsets[0] = header.equivTableDelta; |
| if (offsets[0] % 4 != 0) { |
| die("Header size is not 4-aligned"); |
| } |
| TZEquivalencyGroup *eg = equivTable; |
| for (i=1; i<equivCount; ++i) { |
| offsets[i] = offsets[i-1] + eg->nextEntryDelta; |
| if (offsets[i] % 4 != 0) { |
| die("Equivalency group table is not 4-aligned"); |
| } |
| eg = (TZEquivalencyGroup*) (eg->nextEntryDelta + (int8_t*)eg); |
| } |
| |
| // Now remap index values to offsets |
| for (i=0; i<header.count; ++i) { |
| uint32_t x = nameToEquiv[i]; |
| if (x >= equivCount) { |
| die("Equiv index out of range"); |
| } |
| nameToEquiv[i] = offsets[x]; |
| } |
| |
| delete[] offsets; |
| } |
| |
| TZEquivalencyGroup* gentz::parseEquivTable(FileStream* in) { |
| uint32_t n = readIntegerLine(in, 1, MAX_ZONES); |
| if (n != equivCount) { |
| die("Equivalency table count mismatch"); |
| } |
| |
| // We don't know how big the whole thing will be yet, but we can use |
| // the maxPerEquiv number to compute an upper limit. |
| // |
| // The gmtOffset field within each struct must be |
| // 4-aligned for some architectures. To ensure this, we do two |
| // things: 1. The entire struct is 4-aligned. 2. The gmtOffset is |
| // placed at a 4-aligned position within the struct. 3. The size |
| // of the whole structure is padded out to 4n bytes. We achieve |
| // this last condition by adding two bytes of padding after the |
| // last entry, if necessary. We adjust |
| // the nextEntryDelta and add 2 bytes of padding if necessary. |
| uint32_t maxPossibleSize = sizeof(TZEquivalencyGroup) + |
| (maxPerEquiv-1) * sizeof(uint16_t); |
| // Pad this out |
| if ((maxPossibleSize % 4) != 0) { |
| maxPossibleSize += 2; |
| } |
| if ((maxPossibleSize % 4) != 0) { |
| die("Bug in 4-align code for equiv table"); |
| } |
| maxPossibleSize *= n; // Get size of entire set of structs. |
| |
| int8_t *result = new int8_t[maxPossibleSize]; |
| if (result == 0) { |
| die("Out of memory"); |
| } |
| |
| // Read each line and construct the corresponding entry |
| TZEquivalencyGroup* eg = (TZEquivalencyGroup*)result; |
| for (uint32_t i=0; i<n; ++i) { |
| char *p; |
| |
| readLine(in); |
| |
| // Each line starts with 's,' or 'd,' to specify the zone type |
| char flavor = buffer[0]; |
| if (buffer[1] != SEP) { |
| die("Syntax error in equiv table"); |
| } |
| p = buffer + 2; |
| |
| // This pointer will be adjusted to point to the start of the |
| // list of zones in this group. |
| uint16_t* pList = 0; |
| |
| switch (flavor) { |
| case STANDARD_MARK: |
| eg->isDST = 0; |
| eg->u.s.zone.gmtOffset = 1000 * // Convert s -> ms |
| parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET); |
| pList = &(eg->u.s.count); |
| break; |
| case DST_MARK: |
| eg->isDST = 1; |
| eg->u.d.zone.gmtOffset = 1000 * // Convert s -> ms |
| parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET); |
| parseDSTRule(p, eg->u.d.zone.onsetRule); |
| parseDSTRule(p, eg->u.d.zone.ceaseRule); |
| eg->u.d.zone.dstSavings = (uint16_t) parseInteger(p, SEP, 0, 12*60); |
| pList = &(eg->u.d.count); |
| break; |
| default: |
| die("Invalid equiv table type marker (not s or d)"); |
| } |
| |
| // Now parse the list of zones in this group |
| uint16_t egCount = (uint16_t) parseInteger(p, SEP, 1, maxPerEquiv); |
| *pList++ = egCount; |
| for (uint16_t j=0; j<egCount; ++j) { |
| *pList++ = (uint16_t) parseInteger(p, (j==(egCount-1)) ? NUL : SEP, |
| 0, header.count-1); |
| } |
| |
| // At this point pList points to the byte after the last byte of this |
| // equiv group struct. Time to 4-align it. |
| uint16_t structSize = (uint16_t) (((int8_t*)pList) - ((int8_t*)eg)); |
| if ((structSize % 4) != 0) { |
| // assert(structSize % 4 == 2); |
| *pList++ = 0xFFFF; // Pad with invalid zone index |
| structSize += 2; |
| } |
| |
| // Set up next entry delta |
| eg->nextEntryDelta = (i==(n-1)) ? (uint16_t) 0 : structSize; |
| |
| eg->reserved = 0; // ignored |
| |
| eg = (TZEquivalencyGroup*) (structSize + (int8_t*)eg); |
| } |
| equivTableSize = (int8_t*)eg - (int8_t*)result; |
| readEndMarker(in); |
| fprintf(stdout, " Read %lu equivalency table entries, in-memory size %ld bytes\n", |
| (unsigned long)equivCount, (long)equivTableSize); |
| return (TZEquivalencyGroup*)result; |
| } |
| |
| OffsetIndex* gentz::parseOffsetIndexTable(FileStream* in) { |
| uint32_t n = readIntegerLine(in, 1, MAX_ZONES); |
| |
| // We don't know how big the whole thing will be yet, but we can use |
| // the maxPerOffset number to compute an upper limit. |
| // |
| // The gmtOffset field within each OffsetIndex struct must be |
| // 4-aligned for some architectures. To ensure this, we do two |
| // things: 1. The entire struct is 4-aligned. 2. The gmtOffset is |
| // placed at a 4-aligned position within the struct. 3. The size |
| // of the whole structure is padded out to 4n bytes. We achieve |
| // this last condition by adding two bytes of padding after the |
| // last zoneNumber, if count is _even_. That is, the struct size |
| // is 10+2count+padding, where padding is (count%2==0 ? 2:0). |
| // |
| // Note that we don't change the count itself, but rather adjust |
| // the nextEntryDelta and add 2 bytes of padding if necessary. |
| // |
| // Don't try to compute the exact size in advance |
| // (unless we want to avoid the use of sizeof(), which may |
| // introduce padding that we won't actually employ). |
| uint32_t maxPossibleSize = n * (sizeof(OffsetIndex) + |
| (maxPerOffset-1) * sizeof(uint16_t)); |
| |
| int8_t *result = new int8_t[maxPossibleSize]; |
| if (result == 0) { |
| die("Out of memory"); |
| } |
| |
| // Read each line and construct the corresponding entry |
| OffsetIndex* index = (OffsetIndex*)result; |
| for (uint32_t i=0; i<n; ++i) { |
| uint16_t alignedCount; |
| readLine(in); |
| char* p = buffer; |
| index->gmtOffset = 1000 * // Convert s -> ms |
| parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET); |
| index->defaultZone = (uint16_t)parseInteger(p, SEP, 0, header.count-1); |
| index->count = (uint16_t)parseInteger(p, SEP, 1, maxPerOffset); |
| uint16_t* zoneNumberArray = &(index->zoneNumber); |
| UBool sawOffset = FALSE; // Sanity check - make sure offset is in zone list |
| for (uint16_t j=0; j<index->count; ++j) { |
| zoneNumberArray[j] = (uint16_t) |
| parseInteger(p, (j==(index->count-1))?NUL:SEP, |
| 0, header.count-1); |
| if (zoneNumberArray[j] == index->defaultZone) { |
| sawOffset = TRUE; |
| } |
| } |
| if (!sawOffset) { |
| die("Error: bad offset index entry; default not in zone list"); |
| } |
| alignedCount = index->count; |
| if((alignedCount%2)==0) /* force count to be ODD - see above */ |
| { |
| // Use invalid zoneNumber for 2 bytes of padding |
| zoneNumberArray[alignedCount++] = (uint16_t)0xFFFF; |
| } |
| int8_t* nextIndex = (int8_t*)&(zoneNumberArray[alignedCount]); |
| |
| index->nextEntryDelta = (uint16_t) ((i==(n-1)) ? 0 : (nextIndex - (int8_t*)index)); |
| index = (OffsetIndex*)nextIndex; |
| } |
| offsetIndexSize = (int8_t*)index - (int8_t*)result; |
| if (offsetIndexSize > maxPossibleSize) { |
| die("Yikes! Interal error while constructing offset index table"); |
| } |
| readEndMarker(in); |
| fprintf(stdout, " Read %lu offset index table entries, in-memory size %ld bytes\n", |
| (unsigned long)n, (long)offsetIndexSize); |
| return (OffsetIndex*)result; |
| } |
| |
| CountryIndex* gentz::parseCountryIndexTable(FileStream* in) { |
| uint32_t n = readIntegerLine(in, 1, MAX_ZONES); |
| |
| // We know how big the whole thing will be: Each zone occupies an |
| // int, and each country adds 3 ints (one for the intcode, one for |
| // next entry offset, one for the zone count). Each int is 16 |
| // bits. |
| // |
| // Everything is 16-bits, so we don't 4-align the entries. |
| // However, we do pad at the end of the table to make the whole |
| // thing of size 4n, if necessary. |
| uint32_t expectedSize = n*(sizeof(CountryIndex)-sizeof(uint16_t)) + |
| header.count * sizeof(uint16_t); |
| uint32_t pad = (4 - (expectedSize % 4)) % 4; // This will be 0 or 2 |
| int8_t *result = new int8_t[expectedSize + pad]; |
| if (result == 0) { |
| die("Out of memory"); |
| } |
| |
| // Read each line and construct the corresponding entry. |
| // Along the way, make sure we don't write past 'limit'. |
| CountryIndex* index = (CountryIndex*)result; |
| int8_t* limit = ((int8_t*)result) + expectedSize; // Don't include pad |
| uint32_t i; |
| for (i=0; i<n && (int8_t*)(&index->zoneNumber) < limit; ++i) { |
| readLine(in); |
| char* p = buffer; |
| index->intcode = (uint16_t)parseInteger(p, SEP, 0, 25*32+25 /*ZZ*/); |
| index->count = (uint16_t)parseInteger(p, SEP, 0, header.count-1); |
| uint16_t* zoneNumberArray = &(index->zoneNumber); |
| if ((int8_t*)(&index->zoneNumber + index->count - 1) >= limit) { |
| // Oops -- out of space |
| break; |
| } |
| for (uint16_t j=0; j<index->count; ++j) { |
| zoneNumberArray[j] = (uint16_t) |
| parseInteger(p, (j==(index->count-1))?NUL:SEP, |
| 0, header.count-1); |
| } |
| int8_t* nextIndex = (int8_t*)&(zoneNumberArray[index->count]); |
| index->nextEntryDelta = (uint16_t) ((i==(n-1)) ? 0 : (nextIndex - (int8_t*)index)); |
| index = (CountryIndex*)nextIndex; |
| } |
| readEndMarker(in); |
| |
| // Make sure size matches expected value, and pad the total size |
| countryIndexSize = (int8_t*)index - (int8_t*)result + pad; |
| if (i != n || countryIndexSize != expectedSize) { |
| die("Yikes! Interal error while constructing offset index table"); |
| } |
| if (pad != 0) { |
| countryIndexSize += pad; |
| *(uint16_t*)index = 0; // Clear pad bits |
| } |
| fprintf(stdout, " Read %lu country index table entries, in-memory size %ld bytes\n", |
| (unsigned long)n, (long)countryIndexSize); |
| return (CountryIndex*)result; |
| } |
| |
| void gentz::parseHeader(FileStream* in) { |
| |
| int32_t version = readIntegerLine(in, 0, 0xFFFF); |
| if (version != TZ_FORMAT_VERSION) { |
| die("Version mismatch between gentz and input file"); |
| } |
| |
| // Version string, e.g., "1999j" -> (1999<<16) | 10 |
| header.versionYear = (uint16_t) readIntegerLine(in, 1990, 0xFFFF); |
| header.versionSuffix = (uint16_t) readIntegerLine(in, 0, 0xFFFF); |
| |
| header.count = readIntegerLine(in, 1, MAX_ZONES); |
| equivCount = readIntegerLine(in, 1, header.count); |
| maxPerOffset = readIntegerLine(in, 1, header.count); |
| maxPerEquiv = readIntegerLine(in, 1, equivCount); |
| |
| // Size of name table in bytes |
| // (0x00FFFFFF is an arbitrary upper limit; adjust as needed.) |
| nameTableSize = readIntegerLine(in, 1, 0x00FFFFFF); |
| |
| readEndMarker(in); |
| |
| fprintf(stdout, " Read header, data version %u(%u), in-memory size %ld bytes\n", |
| header.versionYear, header.versionSuffix, |
| (unsigned long)sizeof(header)); |
| } |
| |
| void gentz::parseDSTRule(char*& p, TZRule& rule) { |
| rule.month = (uint8_t) parseInteger(p, SEP, 0, 11); |
| rule.dowim = (int8_t) parseInteger(p, SEP, -31, 31); |
| rule.dow = (int8_t) parseInteger(p, SEP, -7, 7); |
| rule.time = (uint16_t) parseInteger(p, SEP, 0, 24*60); |
| rule.mode = *p++; |
| if (*p++ != SEP) { |
| die("Separator missing"); |
| } |
| switch ((char)rule.mode) { |
| case 'w': |
| rule.mode = WALL_TIME; |
| break; |
| case 's': |
| rule.mode = STANDARD_TIME; |
| break; |
| case 'u': |
| rule.mode = UTC_TIME; |
| break; |
| default: |
| die("Invalid rule time mode"); |
| break; |
| } |
| } |
| |
| /** |
| * Parse the name table. |
| * Each entry of the name table looks like this: |
| * |36,Africa/Djibouti |
| * The integer is an equivalency table index. We build up a name |
| * table, that just contains the names, and we return it. We also |
| * build up the name index, which indexes names to equivalency table |
| * entries. This is stored in the member variable nameToEquiv. |
| */ |
| char* gentz::parseNameTable(FileStream* in) { |
| int32_t n = readIntegerLine(in, 1, MAX_ZONES); |
| if (n != (int32_t)header.count) { |
| die("Zone count doesn't match name table count"); |
| } |
| char* names = new char[nameTableSize]; |
| nameToEquiv = new uint32_t[n]; |
| if (names == 0 || nameToEquiv == 0) { |
| die("Out of memory"); |
| } |
| nameToEquivSize = n * sizeof(nameToEquiv[0]); |
| char* p = names; |
| char* limit = names + nameTableSize; |
| for (int32_t i=0; i<n; ++i) { |
| readLine(in); |
| char* q = buffer; |
| // We store an index here for now -- later, in fixNameToEquiv, |
| // we convert it to an offset. |
| nameToEquiv[i] = (uint32_t) parseInteger(q, SEP, 0, equivCount-1); |
| int32_t len = uprv_strlen(q); |
| if ((p + len) <= limit) { |
| uprv_memcpy(p, q, len); |
| p += len; |
| *p++ = NUL; |
| } else { |
| die("Name table longer than declared size"); |
| } |
| } |
| if (p != limit) { |
| die("Name table shorter than declared size"); |
| } |
| readEndMarker(in); |
| fprintf(stdout, " Read %ld names, in-memory size %ld bytes\n", |
| (long)n, (long)nameTableSize); |
| return names; |
| } |
| |
| /** |
| * Read the end marker (terminates each list). |
| */ |
| void gentz::readEndMarker(FileStream* in) { |
| readLine(in); |
| if (uprv_strcmp(buffer, END_KEYWORD) != 0) { |
| die("Keyword 'end' missing"); |
| } |
| } |
| |
| /** |
| * Read a line from the FileStream and parse it as an |
| * integer. There should be nothing else on the line. |
| */ |
| int32_t gentz::readIntegerLine(FileStream* in, int32_t min, int32_t max) { |
| readLine(in); |
| char* p = buffer; |
| return parseInteger(p, NUL, min, max); |
| } |
| |
| /** |
| * Parse an integer from the given character buffer. |
| * Advance p past the last parsed character. Return |
| * the result. The integer must be of the form |
| * /-?\d+/. |
| */ |
| int32_t gentz::_parseInteger(char*& p) { |
| int32_t n = 0; |
| int32_t digitCount = 0; |
| int32_t digit; |
| UBool negative = FALSE; |
| if (*p == MINUS) { |
| ++p; |
| negative = TRUE; |
| } |
| for (;;) { |
| digit = *p - ZERO; |
| if (digit < 0 || digit > 9) { |
| break; |
| } |
| n = 10*n + digit; |
| p++; |
| digitCount++; |
| } |
| if (digitCount < 1) { |
| die("Unable to parse integer"); |
| } |
| if (negative) { |
| n = -n; |
| } |
| return n; |
| } |
| |
| int32_t gentz::parseInteger(char*& p, char nextExpectedChar, |
| int32_t min, int32_t max) { |
| int32_t n = _parseInteger(p); |
| if (*p++ != nextExpectedChar) { |
| die("Character following integer unexpected"); |
| } |
| if (n < min || n > max) { |
| die("Integer field out of range"); |
| } |
| return n; |
| } |
| |
| void gentz::die(const char* msg) { |
| fprintf(stderr, "ERROR, %s\n", msg); |
| if (*buffer) { |
| fprintf(stderr, "Input file line %ld: \"%s\"\n", (long)lineNumber, buffer); |
| } |
| exit(1); |
| } |
| |
| /** |
| * Read a line. Trim trailing comment and whitespace. Ignore (skip) |
| * blank lines, or comment-only lines. Return the number of characters |
| * on the line remaining. On EOF, die. |
| */ |
| int32_t gentz::readLine(FileStream* in) { |
| ++lineNumber; |
| char* result = T_FileStream_readLine(in, buffer, BUFLEN); |
| if (result == 0) { |
| *buffer = 0; |
| die("Unexpected end of file"); |
| } |
| // Trim off trailing comment |
| char* p = uprv_strchr(buffer, COMMENT); |
| if (p != 0) { |
| *p = NUL; |
| } |
| // Delete trailing whitespace |
| p = buffer + uprv_strlen(buffer); |
| while (p > buffer && (p[-1] == CR || p[-1] == LF || |
| p[-1] == SPACE || p[-1] == TAB)) { |
| p--; |
| } |
| *p = NUL; |
| // If line is empty after trimming comments & whitespace, |
| // then read the next line. |
| return (*buffer == NUL) ? readLine(in) : uprv_strlen(buffer); |
| } |