| /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc. |
| This file is part of the GNU LIBICONV Library. |
| |
| The GNU LIBICONV Library is free software; you can redistribute it |
| and/or modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either version 2.1 |
| of the License, or (at your option) any later version. |
| |
| The GNU LIBICONV Library is distributed in the hope that it will be |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
| If not, see <https://www.gnu.org/licenses/>. */ |
| |
| /* |
| * Generates a table of small strings, used for transliteration, from a table |
| * containing lines of the form |
| * Unicode <tab> utf-8 replacement <tab> # comment |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <stdbool.h> |
| |
| int main (int argc, char *argv[]) |
| { |
| unsigned int *data; |
| int *uni2index; |
| int index; |
| |
| if (argc != 1) |
| exit(1); |
| |
| data = malloc(0x100000 * sizeof(*data)); |
| uni2index = malloc(0x110000 * sizeof(*uni2index)); |
| if (data == NULL || uni2index == NULL) { |
| fprintf(stderr, "out of memory\n"); |
| exit(1); |
| } |
| |
| printf("/*\n"); |
| printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n"); |
| printf(" * This file is part of the GNU LIBICONV Library.\n"); |
| printf(" *\n"); |
| printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n"); |
| printf(" * and/or modify it under the terms of the GNU Lesser General Public\n"); |
| printf(" * License as published by the Free Software Foundation; either version 2\n"); |
| printf(" * of the License, or (at your option) any later version.\n"); |
| printf(" *\n"); |
| printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n"); |
| printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); |
| printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"); |
| printf(" * Lesser General Public License for more details.\n"); |
| printf(" *\n"); |
| printf(" * You should have received a copy of the GNU Lesser General Public\n"); |
| printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n"); |
| printf(" * If not, see <https://www.gnu.org/licenses/>.\n"); |
| printf(" */\n"); |
| printf("\n"); |
| printf("/*\n"); |
| printf(" * Transliteration table\n"); |
| printf(" */\n"); |
| printf("\n"); |
| { |
| int c; |
| int j; |
| for (j = 0; j < 0x110000; j++) |
| uni2index[j] = -1; |
| index = 0; |
| for (;;) { |
| c = getc(stdin); |
| if (c == EOF) |
| break; |
| if (c == '#') { |
| do { c = getc(stdin); } while (!(c == EOF || c == '\n')); |
| continue; |
| } |
| ungetc(c,stdin); |
| if (scanf("%x",&j) != 1) |
| exit(1); |
| c = getc(stdin); |
| if (c != '\t') |
| exit(1); |
| for (;;) { |
| c = getc(stdin); |
| if (c == EOF || c == '\n') |
| exit(1); |
| if (c == '\t') |
| break; |
| if (uni2index[j] < 0) { |
| uni2index[j] = index; |
| data[index++] = 0; |
| } |
| if (c >= 0x80) { |
| /* Finish reading an UTF-8 character. */ |
| if (c < 0xc0) |
| exit(1); |
| else { |
| unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6); |
| c &= (1 << (8-i)) - 1; |
| while (--i > 0) { |
| int cc = getc(stdin); |
| if (!(cc >= 0x80 && cc < 0xc0)) |
| exit(1); |
| c <<= 6; c |= (cc & 0x3f); |
| } |
| } |
| } |
| data[index++] = (unsigned int) c; |
| } |
| if (uni2index[j] >= 0) |
| data[uni2index[j]] = index - uni2index[j] - 1; |
| do { c = getc(stdin); } while (!(c == EOF || c == '\n')); |
| } |
| } |
| printf("static const unsigned int translit_data[%d] = {",index); |
| { |
| int i; |
| for (i = 0; i < index; i++) { |
| if (data[i] < 32) |
| printf("\n %3d,",data[i]); |
| else if (data[i] == '\'') |
| printf("'\\'',"); |
| else if (data[i] == '\\') |
| printf("'\\\\',"); |
| else if (data[i] < 127) |
| printf(" '%c',",data[i]); |
| else if (data[i] < 256) |
| printf("0x%02X,",data[i]); |
| else |
| printf("0x%04X,",data[i]); |
| } |
| printf("\n};\n"); |
| } |
| printf("\n"); |
| { |
| int line[0x22000]; |
| int tableno; |
| struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000]; |
| int i, j, p, j1, j2, t; |
| |
| for (j1 = 0; j1 < 0x22000; j1++) { |
| bool all_invalid = true; |
| for (j2 = 0; j2 < 8; j2++) { |
| j = 8*j1+j2; |
| if (uni2index[j] >= 0) |
| all_invalid = false; |
| } |
| if (all_invalid) |
| line[j1] = -1; |
| else |
| line[j1] = 0; |
| } |
| tableno = 0; |
| for (j1 = 0; j1 < 0x22000; j1++) { |
| if (line[j1] >= 0) { |
| if (tableno > 0 |
| && ((j1 > 0 && line[j1-1] == tableno-1) |
| || ((tables[tableno-1].maxline >> 5) == (j1 >> 5) |
| && j1 - tables[tableno-1].maxline <= 8))) { |
| line[j1] = tableno-1; |
| tables[tableno-1].maxline = j1; |
| } else { |
| tableno++; |
| line[j1] = tableno-1; |
| tables[tableno-1].minline = tables[tableno-1].maxline = j1; |
| } |
| } |
| } |
| for (t = 0; t < tableno; t++) { |
| tables[t].usecount = 0; |
| j1 = 8*tables[t].minline; |
| j2 = 8*(tables[t].maxline+1); |
| for (j = j1; j < j2; j++) |
| if (uni2index[j] >= 0) |
| tables[t].usecount++; |
| } |
| for (t = 0, p = -1, i = 0; t < tableno; t++) { |
| if (tables[t].usecount > 1) { |
| char* s; |
| if (p == tables[t].minline >> 5) { |
| i++; |
| /* i is the number of tables with the same (tables[t].minline >> 5) |
| that we have seen so far. Since the tables[t].minline values are |
| strongly monotonically increasing, there are at most 32 of them. */ |
| if (!(i >= 0 && i <= 32)) abort(); |
| s = (char*) malloc(4+1+2+1); |
| sprintf(s, "%02x_%d", p, i); |
| } else { |
| p = tables[t].minline >> 5; |
| i = 0; |
| s = (char*) malloc(4+1); |
| sprintf(s, "%02x", p); |
| } |
| tables[t].suffix = s; |
| } else |
| tables[t].suffix = NULL; |
| } |
| { |
| p = -1; |
| for (t = 0; t < tableno; t++) |
| if (tables[t].usecount > 1) { |
| p = 0; |
| printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1)); |
| for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) { |
| if ((j1 % 0x20) == 0 && j1 > tables[t].minline) |
| printf(" /* 0x%04x */\n", 8*j1); |
| printf(" "); |
| for (j2 = 0; j2 < 8; j2++) { |
| j = 8*j1+j2; |
| printf(" %4d,", uni2index[j]); |
| } |
| printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7); |
| } |
| printf("};\n"); |
| } |
| if (p >= 0) |
| printf("\n"); |
| } |
| printf("#define translit_index(wc) \\\n ("); |
| for (j1 = 0; j1 < 0x22000;) { |
| t = line[j1]; |
| for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++); |
| if (t >= 0) { |
| if (j1 != tables[t].minline) abort(); |
| if (j2 > tables[t].maxline+1) abort(); |
| j2 = tables[t].maxline+1; |
| } |
| if (t == -1) { |
| } else { |
| if (t >= 0 && tables[t].usecount == 0) abort(); |
| if (t >= 0 && tables[t].usecount == 1) { |
| if (j2 != j1+1) abort(); |
| for (j = 8*j1; j < 8*j2; j++) |
| if (uni2index[j] >= 0) { |
| printf("wc == 0x%04x ? %d", j, uni2index[j]); |
| break; |
| } |
| } else { |
| if (j1 == 0) { |
| printf("wc < 0x%04x", 8*j2); |
| } else { |
| printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2); |
| } |
| printf(" ? translit_page%s[wc", tables[t].suffix); |
| if (tables[t].minline > 0) |
| printf("-0x%04x", 8*j1); |
| printf("]"); |
| } |
| printf(" : \\\n "); |
| } |
| j1 = j2; |
| } |
| printf("-1)\n"); |
| } |
| |
| if (ferror(stdout) || fclose(stdout)) |
| exit(1); |
| exit(0); |
| } |