| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 2005-2012, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * file name: writesrc.c |
| * encoding: UTF-8 |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2005apr23 |
| * created by: Markus W. Scherer |
| * |
| * Helper functions for writing source code for data. |
| */ |
| |
| #include <stdio.h> |
| #include <inttypes.h> |
| #include <time.h> |
| #include "unicode/utypes.h" |
| #include "unicode/putil.h" |
| #include "unicode/ucptrie.h" |
| #include "unicode/errorcode.h" |
| #include "unicode/uniset.h" |
| #include "unicode/usetiter.h" |
| #include "unicode/utf16.h" |
| #include "utrie2.h" |
| #include "cstring.h" |
| #include "writesrc.h" |
| #include "util.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| ValueNameGetter::~ValueNameGetter() {} |
| |
| U_NAMESPACE_END |
| |
| U_NAMESPACE_USE |
| |
| static FILE * |
| usrc_createWithoutHeader(const char *path, const char *filename) { |
| char buffer[1024]; |
| const char *p; |
| char *q; |
| FILE *f; |
| char c; |
| |
| if(path==NULL) { |
| p=filename; |
| } else { |
| /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */ |
| uprv_strcpy(buffer, path); |
| q=buffer+uprv_strlen(buffer); |
| if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { |
| *q++=U_FILE_SEP_CHAR; |
| } |
| uprv_strcpy(q, filename); |
| p=buffer; |
| } |
| |
| f=fopen(p, "w"); |
| if (f==NULL) { |
| fprintf( |
| stderr, |
| "usrc_create(%s, %s): unable to create file\n", |
| path!=NULL ? path : "", filename); |
| } |
| return f; |
| } |
| |
| U_CAPI FILE * U_EXPORT2 |
| usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { |
| FILE *f = usrc_createWithoutHeader(path, filename); |
| if (f == NULL) { |
| return f; |
| } |
| usrc_writeCopyrightHeader(f, "//", copyrightYear); |
| usrc_writeFileNameGeneratedBy(f, "//", filename, generator); |
| return f; |
| } |
| |
| U_CAPI FILE * U_EXPORT2 |
| usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { |
| FILE *f = usrc_createWithoutHeader(path, filename); |
| if (f == NULL) { |
| return f; |
| } |
| usrc_writeCopyrightHeader(f, "#", copyrightYear); |
| usrc_writeFileNameGeneratedBy(f, "#", filename, generator); |
| return f; |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) { |
| fprintf(f, |
| "%s Copyright (C) %d and later: Unicode, Inc. and others.\n" |
| "%s License & terms of use: http://www.unicode.org/copyright.html\n", |
| prefix, copyrightYear, prefix); |
| if (copyrightYear <= 2016) { |
| fprintf(f, |
| "%s Copyright (C) 1999-2016, International Business Machines\n" |
| "%s Corporation and others. All Rights Reserved.\n", |
| prefix, prefix); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeFileNameGeneratedBy( |
| FILE *f, |
| const char *prefix, |
| const char *filename, |
| const char *generator) { |
| char buffer[1024]; |
| const struct tm *lt; |
| time_t t; |
| |
| const char *pattern = |
| "%s\n" |
| "%s file name: %s\n" |
| "%s\n" |
| "%s machine-generated by: %s\n" |
| "\n"; |
| |
| time(&t); |
| lt=localtime(&t); |
| if(generator==NULL) { |
| strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); |
| fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer); |
| } else { |
| fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeArray(FILE *f, |
| const char *prefix, |
| const void *p, int32_t width, int32_t length, |
| const char *indent, |
| const char *postfix) { |
| const uint8_t *p8; |
| const uint16_t *p16; |
| const uint32_t *p32; |
| const int64_t *p64; // Signed due to TOML! |
| int64_t value; // Signed due to TOML! |
| int32_t i, col; |
| |
| p8=NULL; |
| p16=NULL; |
| p32=NULL; |
| p64=NULL; |
| switch(width) { |
| case 8: |
| p8=(const uint8_t *)p; |
| break; |
| case 16: |
| p16=(const uint16_t *)p; |
| break; |
| case 32: |
| p32=(const uint32_t *)p; |
| break; |
| case 64: |
| p64=(const int64_t *)p; |
| break; |
| default: |
| fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width); |
| return; |
| } |
| if(prefix!=NULL) { |
| fprintf(f, prefix, (long)length); |
| } |
| for(i=col=0; i<length; ++i, ++col) { |
| if(i>0) { |
| if(col<16) { |
| fputc(',', f); |
| } else { |
| fputs(",\n", f); |
| fputs(indent, f); |
| col=0; |
| } |
| } |
| switch(width) { |
| case 8: |
| value=p8[i]; |
| break; |
| case 16: |
| value=p16[i]; |
| break; |
| case 32: |
| value=p32[i]; |
| break; |
| case 64: |
| value=p64[i]; |
| break; |
| default: |
| value=0; /* unreachable */ |
| break; |
| } |
| fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value); |
| } |
| if(postfix!=NULL) { |
| fputs(postfix, f); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUTrie2Arrays(FILE *f, |
| const char *indexPrefix, const char *data32Prefix, |
| const UTrie2 *pTrie, |
| const char *postfix) { |
| if(pTrie->data32==NULL) { |
| /* 16-bit trie */ |
| usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix); |
| } else { |
| /* 32-bit trie */ |
| usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix); |
| usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUTrie2Struct(FILE *f, |
| const char *prefix, |
| const UTrie2 *pTrie, |
| const char *indexName, const char *data32Name, |
| const char *postfix) { |
| if(prefix!=NULL) { |
| fputs(prefix, f); |
| } |
| if(pTrie->data32==NULL) { |
| /* 16-bit trie */ |
| fprintf( |
| f, |
| " %s,\n" /* index */ |
| " %s+%ld,\n" /* data16 */ |
| " NULL,\n", /* data32 */ |
| indexName, |
| indexName, |
| (long)pTrie->indexLength); |
| } else { |
| /* 32-bit trie */ |
| fprintf( |
| f, |
| " %s,\n" /* index */ |
| " NULL,\n" /* data16 */ |
| " %s,\n", /* data32 */ |
| indexName, |
| data32Name); |
| } |
| fprintf( |
| f, |
| " %ld,\n" /* indexLength */ |
| " %ld,\n" /* dataLength */ |
| " 0x%hx,\n" /* index2NullOffset */ |
| " 0x%hx,\n" /* dataNullOffset */ |
| " 0x%lx,\n" /* initialValue */ |
| " 0x%lx,\n" /* errorValue */ |
| " 0x%lx,\n" /* highStart */ |
| " 0x%lx,\n" /* highValueIndex */ |
| " NULL, 0, false, false, 0, NULL\n", |
| (long)pTrie->indexLength, (long)pTrie->dataLength, |
| (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, |
| (long)pTrie->initialValue, (long)pTrie->errorValue, |
| (long)pTrie->highStart, (long)pTrie->highValueIndex); |
| if(postfix!=NULL) { |
| fputs(postfix, f); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUCPTrieArrays(FILE *f, |
| const char *indexPrefix, const char *dataPrefix, |
| const UCPTrie *pTrie, |
| const char *postfix, |
| UTargetSyntax syntax) { |
| const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : ""; |
| usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix); |
| int32_t width= |
| pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : |
| pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : |
| pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; |
| usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUCPTrieStruct(FILE *f, |
| const char *prefix, |
| const UCPTrie *pTrie, |
| const char *indexName, const char *dataName, |
| const char *postfix, |
| UTargetSyntax syntax) { |
| if(prefix!=NULL) { |
| fputs(prefix, f); |
| } |
| if (syntax == UPRV_TARGET_SYNTAX_CCODE) { |
| fprintf( |
| f, |
| " %s,\n" // index |
| " { %s },\n", // data (union) |
| indexName, |
| dataName); |
| } |
| const char* pattern = |
| (syntax == UPRV_TARGET_SYNTAX_CCODE) ? |
| " %ld, %ld,\n" // indexLength, dataLength |
| " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart |
| " %d, %d,\n" // type, valueWidth |
| " 0, 0,\n" // reserved32, reserved16 |
| " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset |
| " 0x%lx,\n" // nullValue |
| : |
| "indexLength = %ld\n" |
| "dataLength = %ld\n" |
| "highStart = 0x%lx\n" |
| "shifted12HighStart = 0x%x\n" |
| "type = %d\n" |
| "valueWidth = %d\n" |
| "index3NullOffset = 0x%x\n" |
| "dataNullOffset = 0x%lx\n" |
| "nullValue = 0x%lx\n" |
| ; |
| fprintf( |
| f, |
| pattern, |
| (long)pTrie->indexLength, (long)pTrie->dataLength, |
| (long)pTrie->highStart, pTrie->shifted12HighStart, |
| pTrie->type, pTrie->valueWidth, |
| pTrie->index3NullOffset, (long)pTrie->dataNullOffset, |
| (long)pTrie->nullValue); |
| if(postfix!=NULL) { |
| fputs(postfix, f); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) { |
| int32_t width= |
| pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : |
| pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : |
| pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; |
| char line[100], line2[100], line3[100], line4[100]; |
| |
| switch (syntax) { |
| case UPRV_TARGET_SYNTAX_CCODE: |
| snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name); |
| snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name); |
| snprintf(line3, sizeof(line3), "\n};\n\n"); |
| break; |
| case UPRV_TARGET_SYNTAX_TOML: |
| snprintf(line, sizeof(line), "index = [\n "); |
| snprintf(line2, sizeof(line2), "data_%d = [\n ", (int)width); |
| snprintf(line3, sizeof(line3), "\n]\n"); |
| break; |
| default: |
| UPRV_UNREACHABLE_EXIT; |
| } |
| usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax); |
| |
| switch (syntax) { |
| case UPRV_TARGET_SYNTAX_CCODE: |
| snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name); |
| snprintf(line2, sizeof(line2), "%s_trieIndex", name); |
| snprintf(line3, sizeof(line3), "%s_trieData", name); |
| snprintf(line4, sizeof(line4), "};\n\n"); |
| break; |
| case UPRV_TARGET_SYNTAX_TOML: |
| line[0] = 0; |
| line2[0] = 0; |
| line3[0] = 0; |
| line4[0] = 0; |
| break; |
| default: |
| UPRV_UNREACHABLE_EXIT; |
| } |
| usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUnicodeSet( |
| FILE *f, |
| const USet *pSet, |
| UTargetSyntax syntax) { |
| // ccode is not yet supported |
| U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); |
| |
| // Write out a list of ranges |
| const UnicodeSet* set = UnicodeSet::fromUSet(pSet); |
| UnicodeSetIterator it(*set); |
| fprintf(f, "# Inclusive ranges of the code points in the set.\n"); |
| fprintf(f, "ranges = [\n"); |
| bool seenFirstString = false; |
| while (it.nextRange()) { |
| if (it.isString()) { |
| if (!seenFirstString) { |
| seenFirstString = true; |
| fprintf(f, "]\nstrings = [\n"); |
| } |
| const UnicodeString& str = it.getString(); |
| fprintf(f, " "); |
| usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax); |
| fprintf(f, ",\n"); |
| } else { |
| U_ASSERT(!seenFirstString); |
| UChar32 start = it.getCodepoint(); |
| UChar32 end = it.getCodepointEnd(); |
| fprintf(f, " [0x%x, 0x%x],\n", start, end); |
| } |
| } |
| fprintf(f, "]\n"); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeUCPMap( |
| FILE *f, |
| const UCPMap *pMap, |
| icu::ValueNameGetter *valueNameGetter, |
| UTargetSyntax syntax) { |
| // ccode is not yet supported |
| U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); |
| (void) syntax; // silence unused variable errors |
| |
| // Print out list of ranges |
| UChar32 start = 0, end; |
| uint32_t value; |
| fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n"); |
| fprintf(f, "ranges = [\n"); |
| while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) { |
| if (valueNameGetter != nullptr) { |
| const char *name = valueNameGetter->getName(value); |
| fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name); |
| } else { |
| fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value); |
| } |
| start = end + 1; |
| } |
| fprintf(f, "]\n"); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeArrayOfMostlyInvChars(FILE *f, |
| const char *prefix, |
| const char *p, int32_t length, |
| const char *postfix) { |
| int32_t i, col; |
| int prev2, prev, c; |
| |
| if(prefix!=NULL) { |
| fprintf(f, prefix, (long)length); |
| } |
| prev2=prev=-1; |
| for(i=col=0; i<length; ++i, ++col) { |
| c=(uint8_t)p[i]; |
| if(i>0) { |
| /* Break long lines. Try to break at interesting places, to minimize revision diffs. */ |
| if( |
| /* Very long line. */ |
| col>=32 || |
| /* Long line, break after terminating NUL. */ |
| (col>=24 && prev2>=0x20 && prev==0) || |
| /* Medium-long line, break before non-NUL, non-character byte. */ |
| (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20) |
| ) { |
| fputs(",\n", f); |
| col=0; |
| } else { |
| fputc(',', f); |
| } |
| } |
| fprintf(f, c<0x20 ? "%u" : "'%c'", c); |
| prev2=prev; |
| prev=c; |
| } |
| if(postfix!=NULL) { |
| fputs(postfix, f); |
| } |
| } |
| |
| U_CAPI void U_EXPORT2 |
| usrc_writeStringAsASCII(FILE *f, |
| const UChar* ptr, int32_t length, |
| UTargetSyntax) { |
| // For now, assume all UTargetSyntax values are valid here. |
| fprintf(f, "\""); |
| int32_t i = 0; |
| UChar32 cp; |
| while (i < length) { |
| U16_NEXT(ptr, i, length, cp); |
| if (cp == u'"') { |
| fprintf(f, "\\\""); |
| } else if (ICU_Utility::isUnprintable(cp)) { |
| UnicodeString u16result; |
| ICU_Utility::escapeUnprintable(u16result, cp); |
| std::string u8result; |
| u16result.toUTF8String(u8result); |
| fprintf(f, "%s", u8result.data()); |
| } else { |
| U_ASSERT(cp < 0x80); |
| char s[2] = {static_cast<char>(cp), 0}; |
| fprintf(f, "%s", s); |
| } |
| } |
| fprintf(f, "\""); |
| } |