| UNICODE_LAST_CHAR_PART1 = 0x2FAFF |
| HANGUL_S_BASE = 0xAC00 |
| HANGUL_S_COUNT = 19 * 21 * 28 |
| import unicodedata |
| |
| print """// Generated by gen-unicode-tables.py |
| |
| typedef struct { |
| Unicode character; |
| int length; |
| int offset; |
| } decomposition; |
| """ |
| |
| decomp_table = [] |
| max_index = 0 |
| decomp_expansion_index = {} |
| decomp_expansion = [] |
| for u in xrange(0, UNICODE_LAST_CHAR_PART1): |
| if (u >= HANGUL_S_BASE and u < HANGUL_S_BASE + HANGUL_S_COUNT): |
| continue |
| norm = tuple(map(ord, unicodedata.normalize("NFKD", unichr(u)))) |
| if norm != (u,): |
| try: |
| i = decomp_expansion_index[norm] |
| decomp_table.append((u, len(norm), i)) |
| except KeyError: |
| decomp_table.append((u, len(norm), max_index)) |
| decomp_expansion_index[norm] = max_index |
| decomp_expansion.append((norm, max_index)) |
| max_index += len(norm) |
| print "#define DECOMP_TABLE_LENGTH %d\n" % len(decomp_table) |
| print "static const decomposition decomp_table[] = {\n%s\n};\n" % ", \n".join( |
| " { 0x%x, %d, %d }" % (character, length, offset) |
| for character, length, offset in decomp_table) |
| print "static const Unicode decomp_expansion[] = {\n%s\n};\n" % ", \n".join( |
| " %s /* offset %d */ " % (", ".join("0x%x" % u for u in norm), |
| index) for norm, index in decomp_expansion) |