| from __future__ import absolute_import, division, print_function |
| |
| import sys |
| import unicodedata |
| |
| |
| if sys.version_info[0] == 2: |
| chr = unichr |
| |
| UNICODE_LAST_CHAR_PART1 = 0x2FAFF |
| HANGUL_S_BASE = 0xAC00 |
| HANGUL_S_COUNT = 19 * 21 * 28 |
| |
| |
| print("""// Generated by gen-unicode-tables.py |
| |
| typedef struct { |
| Unicode character; |
| int length; |
| int offset; |
| } decomposition; |
| """) |
| |
| decomp_table = [] |
| max_index = 0 |
| decomp_expansion_index = {} |
| decomp_expansion = [] |
| for u in range(0, UNICODE_LAST_CHAR_PART1): |
| if HANGUL_S_BASE <= u < HANGUL_S_BASE + HANGUL_S_COUNT: |
| continue |
| norm = tuple(map(ord, unicodedata.normalize("NFKD", chr(u)))) |
| if norm != (u, ): |
| try: |
| i = decomp_expansion_index[norm] |
| decomp_table.append((u, len(norm), i)) |
| except KeyError: |
| decomp_table.append((u, len(norm), max_index)) |
| decomp_expansion_index[norm] = max_index |
| decomp_expansion.append((norm, max_index)) |
| max_index += len(norm) |
| print("#define DECOMP_TABLE_LENGTH %d" % (len(decomp_table), )) |
| print() |
| print("static const decomposition decomp_table[] = {") |
| print(*(" { 0x%x, %d, %d }" % (character, length, offset) |
| for character, length, offset in decomp_table), |
| sep=",\n") |
| print("};") |
| print() |
| print("static const Unicode decomp_expansion[] = {") |
| print(*(" %s /* offset %d */" % (", ".join("0x%x" % u for u in norm), index) |
| for norm, index in decomp_expansion), |
| sep=" ,\n") |
| print("};") |