blob: b9ee780bb4c61c0c399bc3f9dfc4ab80b58e7c70 [file] [log] [blame]
/* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc.
This file is part of the GNU LIBICONV Library.
The GNU LIBICONV Library is free software; you can redistribute it
and/or modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either version 2.1
of the License, or (at your option) any later version.
The GNU LIBICONV Library is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU LIBICONV Library; see the file COPYING.LIB.
If not, see <https://www.gnu.org/licenses/>. */
/*
* Generates a table of small strings, used for transliteration, from a table
* containing lines of the form
* Unicode <tab> utf-8 replacement <tab> # comment
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
int main (int argc, char *argv[])
{
unsigned int *data;
int *uni2index;
int index;
if (argc != 1)
exit(1);
data = malloc(0x100000 * sizeof(*data));
uni2index = malloc(0x110000 * sizeof(*uni2index));
if (data == NULL || uni2index == NULL) {
fprintf(stderr, "out of memory\n");
exit(1);
}
printf("/*\n");
printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
printf(" * This file is part of the GNU LIBICONV Library.\n");
printf(" *\n");
printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
printf(" * and/or modify it under the terms of the GNU Lesser General Public\n");
printf(" * License as published by the Free Software Foundation; either version 2\n");
printf(" * of the License, or (at your option) any later version.\n");
printf(" *\n");
printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
printf(" * Lesser General Public License for more details.\n");
printf(" *\n");
printf(" * You should have received a copy of the GNU Lesser General Public\n");
printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
printf(" */\n");
printf("\n");
printf("/*\n");
printf(" * Transliteration table\n");
printf(" */\n");
printf("\n");
{
int c;
int j;
for (j = 0; j < 0x110000; j++)
uni2index[j] = -1;
index = 0;
for (;;) {
c = getc(stdin);
if (c == EOF)
break;
if (c == '#') {
do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
continue;
}
ungetc(c,stdin);
if (scanf("%x",&j) != 1)
exit(1);
c = getc(stdin);
if (c != '\t')
exit(1);
for (;;) {
c = getc(stdin);
if (c == EOF || c == '\n')
exit(1);
if (c == '\t')
break;
if (uni2index[j] < 0) {
uni2index[j] = index;
data[index++] = 0;
}
if (c >= 0x80) {
/* Finish reading an UTF-8 character. */
if (c < 0xc0)
exit(1);
else {
unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
c &= (1 << (8-i)) - 1;
while (--i > 0) {
int cc = getc(stdin);
if (!(cc >= 0x80 && cc < 0xc0))
exit(1);
c <<= 6; c |= (cc & 0x3f);
}
}
}
data[index++] = (unsigned int) c;
}
if (uni2index[j] >= 0)
data[uni2index[j]] = index - uni2index[j] - 1;
do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
}
}
printf("static const unsigned int translit_data[%d] = {",index);
{
int i;
for (i = 0; i < index; i++) {
if (data[i] < 32)
printf("\n %3d,",data[i]);
else if (data[i] == '\'')
printf("'\\'',");
else if (data[i] == '\\')
printf("'\\\\',");
else if (data[i] < 127)
printf(" '%c',",data[i]);
else if (data[i] < 256)
printf("0x%02X,",data[i]);
else
printf("0x%04X,",data[i]);
}
printf("\n};\n");
}
printf("\n");
{
int line[0x22000];
int tableno;
struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
int i, j, p, j1, j2, t;
for (j1 = 0; j1 < 0x22000; j1++) {
bool all_invalid = true;
for (j2 = 0; j2 < 8; j2++) {
j = 8*j1+j2;
if (uni2index[j] >= 0)
all_invalid = false;
}
if (all_invalid)
line[j1] = -1;
else
line[j1] = 0;
}
tableno = 0;
for (j1 = 0; j1 < 0x22000; j1++) {
if (line[j1] >= 0) {
if (tableno > 0
&& ((j1 > 0 && line[j1-1] == tableno-1)
|| ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
&& j1 - tables[tableno-1].maxline <= 8))) {
line[j1] = tableno-1;
tables[tableno-1].maxline = j1;
} else {
tableno++;
line[j1] = tableno-1;
tables[tableno-1].minline = tables[tableno-1].maxline = j1;
}
}
}
for (t = 0; t < tableno; t++) {
tables[t].usecount = 0;
j1 = 8*tables[t].minline;
j2 = 8*(tables[t].maxline+1);
for (j = j1; j < j2; j++)
if (uni2index[j] >= 0)
tables[t].usecount++;
}
for (t = 0, p = -1, i = 0; t < tableno; t++) {
if (tables[t].usecount > 1) {
char* s;
if (p == tables[t].minline >> 5) {
i++;
/* i is the number of tables with the same (tables[t].minline >> 5)
that we have seen so far. Since the tables[t].minline values are
strongly monotonically increasing, there are at most 32 of them. */
if (!(i >= 0 && i <= 32)) abort();
s = (char*) malloc(4+1+2+1);
sprintf(s, "%02x_%d", p, i);
} else {
p = tables[t].minline >> 5;
i = 0;
s = (char*) malloc(4+1);
sprintf(s, "%02x", p);
}
tables[t].suffix = s;
} else
tables[t].suffix = NULL;
}
{
p = -1;
for (t = 0; t < tableno; t++)
if (tables[t].usecount > 1) {
p = 0;
printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
printf(" /* 0x%04x */\n", 8*j1);
printf(" ");
for (j2 = 0; j2 < 8; j2++) {
j = 8*j1+j2;
printf(" %4d,", uni2index[j]);
}
printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
}
printf("};\n");
}
if (p >= 0)
printf("\n");
}
printf("#define translit_index(wc) \\\n (");
for (j1 = 0; j1 < 0x22000;) {
t = line[j1];
for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
if (t >= 0) {
if (j1 != tables[t].minline) abort();
if (j2 > tables[t].maxline+1) abort();
j2 = tables[t].maxline+1;
}
if (t == -1) {
} else {
if (t >= 0 && tables[t].usecount == 0) abort();
if (t >= 0 && tables[t].usecount == 1) {
if (j2 != j1+1) abort();
for (j = 8*j1; j < 8*j2; j++)
if (uni2index[j] >= 0) {
printf("wc == 0x%04x ? %d", j, uni2index[j]);
break;
}
} else {
if (j1 == 0) {
printf("wc < 0x%04x", 8*j2);
} else {
printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
}
printf(" ? translit_page%s[wc", tables[t].suffix);
if (tables[t].minline > 0)
printf("-0x%04x", 8*j1);
printf("]");
}
printf(" : \\\n ");
}
j1 = j2;
}
printf("-1)\n");
}
if (ferror(stdout) || fclose(stdout))
exit(1);
exit(0);
}