blob: d15beb7db49ef664affd8c154203d30297de6b40 [file] [log] [blame]
from __future__ import absolute_import, division, print_function
import sys
import unicodedata
if sys.version_info[0] == 2:
chr = unichr
UNICODE_LAST_CHAR_PART1 = 0x2FAFF
HANGUL_S_BASE = 0xAC00
HANGUL_S_COUNT = 19 * 21 * 28
print("""// Generated by gen-unicode-tables.py
typedef struct {
Unicode character;
int length;
int offset;
} decomposition;
""")
decomp_table = []
max_index = 0
decomp_expansion_index = {}
decomp_expansion = []
for u in range(0, UNICODE_LAST_CHAR_PART1):
if HANGUL_S_BASE <= u < HANGUL_S_BASE + HANGUL_S_COUNT:
continue
norm = tuple(map(ord, unicodedata.normalize("NFKD", chr(u))))
if norm != (u, ):
try:
i = decomp_expansion_index[norm]
decomp_table.append((u, len(norm), i))
except KeyError:
decomp_table.append((u, len(norm), max_index))
decomp_expansion_index[norm] = max_index
decomp_expansion.append((norm, max_index))
max_index += len(norm)
print("#define DECOMP_TABLE_LENGTH %d" % (len(decomp_table), ))
print()
print("static const decomposition decomp_table[] = {")
print(*(" { 0x%x, %d, %d }" % (character, length, offset)
for character, length, offset in decomp_table),
sep=",\n")
print("};")
print()
print("static const Unicode decomp_expansion[] = {")
print(*(" %s /* offset %d */" % (", ".join("0x%x" % u for u in norm), index)
for norm, index in decomp_expansion),
sep=" ,\n")
print("};")