| /* Copyright 2015 Google Inc. All Rights Reserved. |
| |
| Distributed under MIT license. |
| See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| */ |
| |
| package org.brotli.dec; |
| |
| import java.nio.ByteBuffer; |
| |
| /** |
| * Transformations on dictionary words. |
| * |
| * Transform descriptor is a triplet: {prefix, operator, suffix}. |
| * "prefix" and "suffix" are short strings inserted before and after transformed dictionary word. |
| * "operator" is applied to dictionary word itself. |
| * |
| * Some operators has "built-in" parameters, i.e. parameter is defined by operator ordinal. Other |
| * operators have "external" parameters, supplied via additional table encoded in shared dictionary. |
| * |
| * Operators: |
| * - IDENTITY (0): dictionary word is inserted "as is" |
| * - OMIT_LAST_N (1 - 9): last N octets of dictionary word are not inserted; N == ordinal |
| * - OMIT_FIRST_M (12-20): first M octets of dictionary word are not inserted; M == ordinal - 11 |
| * - UPPERCASE_FIRST (10): first "scalar" is XOR'ed with number 32 |
| * - UPPERCASE_ALL (11): all "scalars" are XOR'ed with number 32 |
| * - SHIFT_FIRST (21): first "scalar" is shifted by number form parameter table |
| * - SHIFT_ALL (22): all "scalar" is shifted by number form parameter table |
| * |
| * Here "scalar" is a variable length character coding similar to UTF-8 encoding. |
| * UPPERCASE_XXX / SHIFT_XXX operators were designed to change the case of UTF-8 encoded characters. |
| * While UPPERCASE_XXX works well only on ASCII charset, SHIFT is much more generic and could be |
| * used for most (all?) alphabets. |
| */ |
| final class Transform { |
| |
| static final class Transforms { |
| final int numTransforms; |
| final int[] triplets; |
| final byte[] prefixSuffixStorage; |
| final int[] prefixSuffixHeads; |
| final short[] params; |
| |
| Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount) { |
| this.numTransforms = numTransforms; |
| this.triplets = new int[numTransforms * 3]; |
| this.params = new short[numTransforms]; |
| this.prefixSuffixStorage = new byte[prefixSuffixLen]; |
| this.prefixSuffixHeads = new int[prefixSuffixCount + 1]; |
| } |
| } |
| |
| static final int NUM_RFC_TRANSFORMS = 121; |
| static final Transforms RFC_TRANSFORMS = new Transforms(NUM_RFC_TRANSFORMS, 167, 50); |
| |
| private static final int OMIT_FIRST_LAST_LIMIT = 9; |
| |
| private static final int IDENTITY = 0; |
| private static final int OMIT_LAST_BASE = IDENTITY + 1 - 1; // there is no OMIT_LAST_0. |
| private static final int UPPERCASE_FIRST = OMIT_LAST_BASE + OMIT_FIRST_LAST_LIMIT + 1; |
| private static final int UPPERCASE_ALL = UPPERCASE_FIRST + 1; |
| private static final int OMIT_FIRST_BASE = UPPERCASE_ALL + 1 - 1; // there is no OMIT_FIRST_0. |
| private static final int SHIFT_FIRST = OMIT_FIRST_BASE + OMIT_FIRST_LAST_LIMIT + 1; |
| private static final int SHIFT_ALL = SHIFT_FIRST + 1; |
| |
| // Bundle of 0-terminated strings. |
| private static final String PREFIX_SUFFIX_SRC = "# #s #, #e #.# the #.com/#\u00C2\u00A0# of # and" |
| + " # in # to #\"#\">#\n#]# for # a # that #. # with #'# from # by #. The # on # as # is #ing" |
| + " #\n\t#:#ed #(# at #ly #=\"# of the #. This #,# not #er #al #='#ful #ive #less #est #ize #" |
| + "ous #"; |
| private static final String TRANSFORMS_SRC = " !! ! , *! &! \" ! ) * * - ! # ! #!*! " |
| + "+ ,$ ! - % . / # 0 1 . \" 2 3!* 4% ! # / 5 6 7 8 0 1 & $ 9 + : " |
| + " ; < ' != > ?! 4 @ 4 2 & A *# ( B C& ) % ) !*# *-% A +! *. D! %' & E *6 F " |
| + " G% ! *A *% H! D I!+! J!+ K +- *4! A L!*4 M N +6 O!*% +.! K *G P +%( ! G *D +D " |
| + " Q +# *K!*G!+D!+# +G +A +4!+% +K!+4!*D!+K!*K"; |
| |
| private static void unpackTransforms(byte[] prefixSuffix, |
| int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc) { |
| int n = prefixSuffixSrc.length(); |
| int index = 1; |
| int j = 0; |
| for (int i = 0; i < n; ++i) { |
| char c = prefixSuffixSrc.charAt(i); |
| if (c == 35) { // == # |
| prefixSuffixHeads[index++] = j; |
| } else { |
| prefixSuffix[j++] = (byte) c; |
| } |
| } |
| |
| for (int i = 0; i < NUM_RFC_TRANSFORMS * 3; ++i) { |
| transforms[i] = transformsSrc.charAt(i) - 32; |
| } |
| } |
| |
| static { |
| unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads, |
| RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC); |
| } |
| |
| static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset, |
| int len, Transforms transforms, int transformIndex) { |
| int offset = dstOffset; |
| int[] triplets = transforms.triplets; |
| byte[] prefixSuffixStorage = transforms.prefixSuffixStorage; |
| int[] prefixSuffixHeads = transforms.prefixSuffixHeads; |
| int transformOffset = 3 * transformIndex; |
| int prefixIdx = triplets[transformOffset]; |
| int transformType = triplets[transformOffset + 1]; |
| int suffixIdx = triplets[transformOffset + 2]; |
| int prefix = prefixSuffixHeads[prefixIdx]; |
| int prefixEnd = prefixSuffixHeads[prefixIdx + 1]; |
| int suffix = prefixSuffixHeads[suffixIdx]; |
| int suffixEnd = prefixSuffixHeads[suffixIdx + 1]; |
| |
| int omitFirst = transformType - OMIT_FIRST_BASE; |
| int omitLast = transformType - OMIT_LAST_BASE; |
| if (omitFirst < 1 || omitFirst > OMIT_FIRST_LAST_LIMIT) { |
| omitFirst = 0; |
| } |
| if (omitLast < 1 || omitLast > OMIT_FIRST_LAST_LIMIT) { |
| omitLast = 0; |
| } |
| |
| // Copy prefix. |
| while (prefix != prefixEnd) { |
| dst[offset++] = prefixSuffixStorage[prefix++]; |
| } |
| |
| // Copy trimmed word. |
| if (omitFirst > len) { |
| omitFirst = len; |
| } |
| srcOffset += omitFirst; |
| len -= omitFirst; |
| len -= omitLast; |
| int i = len; |
| while (i > 0) { |
| dst[offset++] = src.get(srcOffset++); |
| i--; |
| } |
| |
| // Ferment. |
| if (transformType == UPPERCASE_FIRST || transformType == UPPERCASE_ALL) { |
| int uppercaseOffset = offset - len; |
| if (transformType == UPPERCASE_FIRST) { |
| len = 1; |
| } |
| while (len > 0) { |
| int c0 = dst[uppercaseOffset] & 0xFF; |
| if (c0 < 0xC0) { |
| if (c0 >= 97 && c0 <= 122) { // in [a..z] range |
| dst[uppercaseOffset] ^= (byte) 32; |
| } |
| uppercaseOffset += 1; |
| len -= 1; |
| } else if (c0 < 0xE0) { |
| dst[uppercaseOffset + 1] ^= (byte) 32; |
| uppercaseOffset += 2; |
| len -= 2; |
| } else { |
| dst[uppercaseOffset + 2] ^= (byte) 5; |
| uppercaseOffset += 3; |
| len -= 3; |
| } |
| } |
| } else if (transformType == SHIFT_FIRST || transformType == SHIFT_ALL) { |
| int shiftOffset = offset - len; |
| short param = transforms.params[transformIndex]; |
| /* Limited sign extension: scalar < (1 << 24). */ |
| int scalar = (param & 0x7FFF) + (0x1000000 - (param & 0x8000)); |
| while (len > 0) { |
| int step = 1; |
| int c0 = dst[shiftOffset] & 0xFF; |
| if (c0 < 0x80) { |
| /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */ |
| scalar += c0; |
| dst[shiftOffset] = (byte) (scalar & 0x7F); |
| } else if (c0 < 0xC0) { |
| /* Continuation / 10AAAAAA. */ |
| } else if (c0 < 0xE0) { |
| /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */ |
| if (len >= 2) { |
| byte c1 = dst[shiftOffset + 1]; |
| scalar += (c1 & 0x3F) | ((c0 & 0x1F) << 6); |
| dst[shiftOffset] = (byte) (0xC0 | ((scalar >> 6) & 0x1F)); |
| dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | (scalar & 0x3F)); |
| step = 2; |
| } else { |
| step = len; |
| } |
| } else if (c0 < 0xF0) { |
| /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */ |
| if (len >= 3) { |
| byte c1 = dst[shiftOffset + 1]; |
| byte c2 = dst[shiftOffset + 2]; |
| scalar += (c2 & 0x3F) | ((c1 & 0x3F) << 6) | ((c0 & 0x0F) << 12); |
| dst[shiftOffset] = (byte) (0xE0 | ((scalar >> 12) & 0x0F)); |
| dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 6) & 0x3F)); |
| dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | (scalar & 0x3F)); |
| step = 3; |
| } else { |
| step = len; |
| } |
| } else if (c0 < 0xF8) { |
| /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */ |
| if (len >= 4) { |
| byte c1 = dst[shiftOffset + 1]; |
| byte c2 = dst[shiftOffset + 2]; |
| byte c3 = dst[shiftOffset + 3]; |
| scalar += (c3 & 0x3F) | ((c2 & 0x3F) << 6) | ((c1 & 0x3F) << 12) | ((c0 & 0x07) << 18); |
| dst[shiftOffset] = (byte) (0xF0 | ((scalar >> 18) & 0x07)); |
| dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 12) & 0x3F)); |
| dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | ((scalar >> 6) & 0x3F)); |
| dst[shiftOffset + 3] = (byte) ((c3 & 0xC0) | (scalar & 0x3F)); |
| step = 4; |
| } else { |
| step = len; |
| } |
| } |
| shiftOffset += step; |
| len -= step; |
| if (transformType == SHIFT_FIRST) { |
| len = 0; |
| } |
| } |
| } |
| |
| // Copy suffix. |
| while (suffix != suffixEnd) { |
| dst[offset++] = prefixSuffixStorage[suffix++]; |
| } |
| |
| return offset - dstOffset; |
| } |
| } |