| # Step 04 - generate Java literals. |
| # |
| # Java byte-code has ridiculous restrictions. There is no such thing as |
| # "array literal" - those are implemented as series of data[x] = y; |
| # as a consequence N-byte array will use 7N bytes in class, plus N bytes |
| # in instantiated variable. Also no literal could be longer than 64KiB. |
| # |
| # To keep dictionary data compact both in source code and in compiled format |
| # we use the following tricks: |
| # * use String as a data container |
| # * store only lowest 7 bits; i.e. all characters fit ASCII table; this allows |
| # efficient conversion to byte array; also ASCII characters use only 1 byte |
| #. of memory (UTF-8 encoding) |
| # * RLE-compress sequence of 8-th bits |
| # |
| # This script generates literals used in Java code. |
| |
| try: |
| unichr # Python 2 |
| except NameError |
| unichr = chr # Python 3 |
| |
| bin_path = "dictionary.bin" |
| |
| with open(bin_path, "rb") as raw: |
| data = raw.read() |
| |
| low = [] |
| hi = [] |
| is_skip = True |
| skip_flip_offset = 36 |
| cntr = skip_flip_offset |
| for b in data: |
| value = ord(b) |
| low.append(chr(value & 0x7F)) |
| if is_skip: |
| if value < 0x80: |
| cntr += 1 |
| else: |
| is_skip = False |
| hi.append(unichr(cntr)) |
| cntr = skip_flip_offset + 1 |
| else: |
| if value >= 0x80: |
| cntr += 1 |
| else: |
| is_skip = True |
| hi.append(unichr(cntr)) |
| cntr = skip_flip_offset + 1 |
| hi.append(unichr(cntr)) |
| |
| low0 = low[0 : len(low) // 2] |
| low1 = low[len(low) // 2 : len(low)] |
| |
| def escape(chars): |
| result = [] |
| for c in chars: |
| if "\r" == c: |
| result.append("\\r") |
| elif "\n" == c: |
| result.append("\\n") |
| elif "\t" == c: |
| result.append("\\t") |
| elif "\"" == c: |
| result.append("\\\"") |
| elif "\\" == c: |
| result.append("\\\\") |
| elif ord(c) < 32 or ord(c) >= 127: |
| result.append("\\u%04X" % ord(c)) |
| else: |
| result.append(c); |
| return result |
| |
| |
| source_code = [ |
| " private static final String DATA0 = \"", "".join(escape(low0)), "\";\n", |
| " private static final String DATA1 = \"", "".join(escape(low1)), "\";\n", |
| " private static final String SKIP_FLIP = \"", "".join(escape(hi)), "\";\n" |
| ] |
| |
| src_path = "DictionaryData.inc.java" |
| |
| with open(src_path, "w") as source: |
| source.write("".join(source_code)) |