java/org/brotli/dec/Transform.java - external/github.com/google/brotli - Git at Google

 /* Copyright 2015 Google Inc. All Rights Reserved.

    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */

 package org.brotli.dec;

 import java.nio.ByteBuffer;

 /**
  * Transformations on dictionary words.
  *
  * Transform descriptor is a triplet: {prefix, operator, suffix}.
  * "prefix" and "suffix" are short strings inserted before and after transformed dictionary word.
  * "operator" is applied to dictionary word itself.
  *
  * Some operators has "built-in" parameters, i.e. parameter is defined by operator ordinal. Other
  * operators have "external" parameters, supplied via additional table encoded in shared dictionary.
  *
  * Operators:
  *  - IDENTITY (0): dictionary word is inserted "as is"
  *  - OMIT_LAST_N (1 - 9): last N octets of dictionary word are not inserted; N == ordinal
  *  - OMIT_FIRST_M (12-20): first M octets of dictionary word are not inserted; M == ordinal - 11
  *  - UPPERCASE_FIRST (10): first "scalar" is XOR'ed with number 32
  *  - UPPERCASE_ALL (11): all "scalars" are XOR'ed with number 32
  *  - SHIFT_FIRST (21): first "scalar" is shifted by number form parameter table
  *  - SHIFT_ALL (22): all "scalar" is shifted by number form parameter table
  *
  * Here "scalar" is a variable length character coding similar to UTF-8 encoding.
  * UPPERCASE_XXX / SHIFT_XXX operators were designed to change the case of UTF-8 encoded characters.
  * While UPPERCASE_XXX works well only on ASCII charset, SHIFT is much more generic and could be
  * used for most (all?) alphabets.
  */
 final class Transform {

   static final class Transforms {
     final int numTransforms;
     final int[] triplets;
     final byte[] prefixSuffixStorage;
     final int[] prefixSuffixHeads;
     final short[] params;

     Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount) {
       this.numTransforms = numTransforms;
       this.triplets = new int[numTransforms * 3];
       this.params = new short[numTransforms];
       this.prefixSuffixStorage = new byte[prefixSuffixLen];
       this.prefixSuffixHeads = new int[prefixSuffixCount + 1];
     }
   }

   static final int NUM_RFC_TRANSFORMS = 121;
   static final Transforms RFC_TRANSFORMS = new Transforms(NUM_RFC_TRANSFORMS, 167, 50);

   private static final int OMIT_FIRST_LAST_LIMIT = 9;

   private static final int IDENTITY = 0;
   private static final int OMIT_LAST_BASE = IDENTITY + 1 - 1;  // there is no OMIT_LAST_0.
   private static final int UPPERCASE_FIRST = OMIT_LAST_BASE + OMIT_FIRST_LAST_LIMIT + 1;
   private static final int UPPERCASE_ALL = UPPERCASE_FIRST + 1;
   private static final int OMIT_FIRST_BASE = UPPERCASE_ALL + 1 - 1;  // there is no OMIT_FIRST_0.
   private static final int SHIFT_FIRST = OMIT_FIRST_BASE + OMIT_FIRST_LAST_LIMIT + 1;
   private static final int SHIFT_ALL = SHIFT_FIRST + 1;

   // Bundle of 0-terminated strings.
   private static final String PREFIX_SUFFIX_SRC = "# #s #, #e #.# the #.com/#\u00C2\u00A0# of # and"
       + " # in # to #\"#\">#\n#]# for # a # that #. # with #'# from # by #. The # on # as # is #ing"
       + " #\n\t#:#ed #(# at #ly #=\"# of the #. This #,# not #er #al #='#ful #ive #less #est #ize #"
       + "ous #";
   private static final String TRANSFORMS_SRC = "     !! ! ,  *!  &!  \" !  ) *   * -  ! # !  #!*!  "
       + "+  ,$ !  -  %  .  / #   0  1 .  \"   2  3!*   4%  ! # /   5  6  7  8 0  1 &   $   9 +   : "
       + " ;  < '  !=  >  ?! 4  @ 4  2  &   A *# (   B  C& ) %  ) !*# *-% A +! *.  D! %'  & E *6  F "
       + " G% ! *A *%  H! D  I!+!  J!+   K +- *4! A  L!*4  M  N +6  O!*% +.! K *G  P +%(  ! G *D +D "
       + " Q +# *K!*G!+D!+# +G +A +4!+% +K!+4!*D!+K!*K";

   private static void unpackTransforms(byte[] prefixSuffix,
       int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc) {
     int n = prefixSuffixSrc.length();
     int index = 1;
     int j = 0;
     for (int i = 0; i < n; ++i) {
       char c = prefixSuffixSrc.charAt(i);
       if (c == 35) { // == #
         prefixSuffixHeads[index++] = j;
       } else {
         prefixSuffix[j++] = (byte) c;
       }
     }

     for (int i = 0; i < NUM_RFC_TRANSFORMS * 3; ++i) {
       transforms[i] = transformsSrc.charAt(i) - 32;
     }
   }

   static {
     unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads,
         RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC);
   }

   static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset,
       int len, Transforms transforms, int transformIndex) {
     int offset = dstOffset;
     int[] triplets = transforms.triplets;
     byte[] prefixSuffixStorage = transforms.prefixSuffixStorage;
     int[] prefixSuffixHeads = transforms.prefixSuffixHeads;
     int transformOffset = 3 * transformIndex;
     int prefixIdx = triplets[transformOffset];
     int transformType = triplets[transformOffset + 1];
     int suffixIdx = triplets[transformOffset + 2];
     int prefix = prefixSuffixHeads[prefixIdx];
     int prefixEnd = prefixSuffixHeads[prefixIdx + 1];
     int suffix = prefixSuffixHeads[suffixIdx];
     int suffixEnd = prefixSuffixHeads[suffixIdx + 1];

     int omitFirst = transformType - OMIT_FIRST_BASE;
     int omitLast = transformType - OMIT_LAST_BASE;
     if (omitFirst < 1 || omitFirst > OMIT_FIRST_LAST_LIMIT) {
       omitFirst = 0;
     }
     if (omitLast < 1 || omitLast > OMIT_FIRST_LAST_LIMIT) {
       omitLast = 0;
     }

     // Copy prefix.
     while (prefix != prefixEnd) {
       dst[offset++] = prefixSuffixStorage[prefix++];
     }

     // Copy trimmed word.
     if (omitFirst > len) {
       omitFirst = len;
     }
     srcOffset += omitFirst;
     len -= omitFirst;
     len -= omitLast;
     int i = len;
     while (i > 0) {
       dst[offset++] = src.get(srcOffset++);
       i--;
     }

     // Ferment.
     if (transformType == UPPERCASE_FIRST || transformType == UPPERCASE_ALL) {
       int uppercaseOffset = offset - len;
       if (transformType == UPPERCASE_FIRST) {
         len = 1;
       }
       while (len > 0) {
         int c0 = dst[uppercaseOffset] & 0xFF;
         if (c0 < 0xC0) {
           if (c0 >= 97 && c0 <= 122) { // in [a..z] range
             dst[uppercaseOffset] ^= (byte) 32;
           }
           uppercaseOffset += 1;
           len -= 1;
         } else if (c0 < 0xE0) {
           dst[uppercaseOffset + 1] ^= (byte) 32;
           uppercaseOffset += 2;
           len -= 2;
         } else {
           dst[uppercaseOffset + 2] ^= (byte) 5;
           uppercaseOffset += 3;
           len -= 3;
         }
       }
     } else if (transformType == SHIFT_FIRST || transformType == SHIFT_ALL) {
       int shiftOffset = offset - len;
       short param = transforms.params[transformIndex];
       /* Limited sign extension: scalar < (1 << 24). */
       int scalar = (param & 0x7FFF) + (0x1000000 - (param & 0x8000));
       while (len > 0) {
         int step = 1;
         int c0 = dst[shiftOffset] & 0xFF;
         if (c0 < 0x80) {
           /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
           scalar += c0;
           dst[shiftOffset] = (byte) (scalar & 0x7F);
         } else if (c0 < 0xC0) {
           /* Continuation / 10AAAAAA. */
         } else if (c0 < 0xE0) {
           /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
           if (len >= 2) {
             byte c1 = dst[shiftOffset + 1];
             scalar += (c1 & 0x3F) | ((c0 & 0x1F) << 6);
             dst[shiftOffset] = (byte) (0xC0 | ((scalar >> 6) & 0x1F));
             dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | (scalar & 0x3F));
             step = 2;
           } else {
             step = len;
           }
         } else if (c0 < 0xF0) {
           /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
           if (len >= 3) {
             byte c1 = dst[shiftOffset + 1];
             byte c2 = dst[shiftOffset + 2];
             scalar += (c2 & 0x3F) | ((c1 & 0x3F) << 6) | ((c0 & 0x0F) << 12);
             dst[shiftOffset] = (byte) (0xE0 | ((scalar >> 12) & 0x0F));
             dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 6) & 0x3F));
             dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | (scalar & 0x3F));
             step = 3;
           } else {
             step = len;
           }
         } else if (c0 < 0xF8) {
           /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
           if (len >= 4) {
             byte c1 = dst[shiftOffset + 1];
             byte c2 = dst[shiftOffset + 2];
             byte c3 = dst[shiftOffset + 3];
             scalar += (c3 & 0x3F) | ((c2 & 0x3F) << 6) | ((c1 & 0x3F) << 12) | ((c0 & 0x07) << 18);
             dst[shiftOffset] = (byte) (0xF0 | ((scalar >> 18) & 0x07));
             dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 12) & 0x3F));
             dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | ((scalar >> 6) & 0x3F));
             dst[shiftOffset + 3] = (byte) ((c3 & 0xC0) | (scalar & 0x3F));
             step = 4;
           } else {
             step = len;
           }
         }
         shiftOffset += step;
         len -= step;
         if (transformType == SHIFT_FIRST) {
           len = 0;
         }
       }
     }

     // Copy suffix.
     while (suffix != suffixEnd) {
       dst[offset++] = prefixSuffixStorage[suffix++];
     }

     return offset - dstOffset;
   }
 }
	/* Copyright 2015 Google Inc. All Rights Reserved.

	Distributed under MIT license.
	See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
	*/

	package org.brotli.dec;

	import java.nio.ByteBuffer;

	/**
	* Transformations on dictionary words.
	*
	* Transform descriptor is a triplet: {prefix, operator, suffix}.
	* "prefix" and "suffix" are short strings inserted before and after transformed dictionary word.
	* "operator" is applied to dictionary word itself.
	*
	* Some operators has "built-in" parameters, i.e. parameter is defined by operator ordinal. Other
	* operators have "external" parameters, supplied via additional table encoded in shared dictionary.
	*
	* Operators:
	* - IDENTITY (0): dictionary word is inserted "as is"
	* - OMIT_LAST_N (1 - 9): last N octets of dictionary word are not inserted; N == ordinal
	* - OMIT_FIRST_M (12-20): first M octets of dictionary word are not inserted; M == ordinal - 11
	* - UPPERCASE_FIRST (10): first "scalar" is XOR'ed with number 32
	* - UPPERCASE_ALL (11): all "scalars" are XOR'ed with number 32
	* - SHIFT_FIRST (21): first "scalar" is shifted by number form parameter table
	* - SHIFT_ALL (22): all "scalar" is shifted by number form parameter table
	*
	* Here "scalar" is a variable length character coding similar to UTF-8 encoding.
	* UPPERCASE_XXX / SHIFT_XXX operators were designed to change the case of UTF-8 encoded characters.
	* While UPPERCASE_XXX works well only on ASCII charset, SHIFT is much more generic and could be
	* used for most (all?) alphabets.
	*/
	final class Transform {

	static final class Transforms {
	final int numTransforms;
	final int[] triplets;
	final byte[] prefixSuffixStorage;
	final int[] prefixSuffixHeads;
	final short[] params;

	Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount) {
	this.numTransforms = numTransforms;
	this.triplets = new int[numTransforms * 3];
	this.params = new short[numTransforms];
	this.prefixSuffixStorage = new byte[prefixSuffixLen];
	this.prefixSuffixHeads = new int[prefixSuffixCount + 1];
	}
	}

	static final int NUM_RFC_TRANSFORMS = 121;
	static final Transforms RFC_TRANSFORMS = new Transforms(NUM_RFC_TRANSFORMS, 167, 50);

	private static final int OMIT_FIRST_LAST_LIMIT = 9;

	private static final int IDENTITY = 0;
	private static final int OMIT_LAST_BASE = IDENTITY + 1 - 1; // there is no OMIT_LAST_0.
	private static final int UPPERCASE_FIRST = OMIT_LAST_BASE + OMIT_FIRST_LAST_LIMIT + 1;
	private static final int UPPERCASE_ALL = UPPERCASE_FIRST + 1;
	private static final int OMIT_FIRST_BASE = UPPERCASE_ALL + 1 - 1; // there is no OMIT_FIRST_0.
	private static final int SHIFT_FIRST = OMIT_FIRST_BASE + OMIT_FIRST_LAST_LIMIT + 1;
	private static final int SHIFT_ALL = SHIFT_FIRST + 1;

	// Bundle of 0-terminated strings.
	private static final String PREFIX_SUFFIX_SRC = "# #s #, #e #.# the #.com/#\u00C2\u00A0# of # and"
	+ " # in # to #\"#\">#\n#]# for # a # that #. # with #'# from # by #. The # on # as # is #ing"
	+ " #\n\t#:#ed #(# at #ly #=\"# of the #. This #,# not #er #al #='#ful #ive #less #est #ize #"
	+ "ous #";
	private static final String TRANSFORMS_SRC = " !! ! , ! &! \" ! ) * - ! # ! #!*! "
	+ "+ ,$ ! - % . / # 0 1 . \" 2 3!* 4% ! # / 5 6 7 8 0 1 & $ 9 + : "
	+ " ; < ' != > ?! 4 @ 4 2 & A # ( B C& ) % ) !# -% A +! . D! %' & E *6 F "
	+ " G% ! A % H! D I!+! J!+ K +- 4! A L!4 M N +6 O!% +.! K G P +%( ! G *D +D "
	+ " Q +# K!G!+D!+# +G +A +4!+% +K!+4!D!+K!K";

	private static void unpackTransforms(byte[] prefixSuffix,
	int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc) {
	int n = prefixSuffixSrc.length();
	int index = 1;
	int j = 0;
	for (int i = 0; i < n; ++i) {
	char c = prefixSuffixSrc.charAt(i);
	if (c == 35) { // == #
	prefixSuffixHeads[index++] = j;
	} else {
	prefixSuffix[j++] = (byte) c;
	}
	}

	for (int i = 0; i < NUM_RFC_TRANSFORMS * 3; ++i) {
	transforms[i] = transformsSrc.charAt(i) - 32;
	}
	}

	static {
	unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads,
	RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC);
	}

	static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset,
	int len, Transforms transforms, int transformIndex) {
	int offset = dstOffset;
	int[] triplets = transforms.triplets;
	byte[] prefixSuffixStorage = transforms.prefixSuffixStorage;
	int[] prefixSuffixHeads = transforms.prefixSuffixHeads;
	int transformOffset = 3 * transformIndex;
	int prefixIdx = triplets[transformOffset];
	int transformType = triplets[transformOffset + 1];
	int suffixIdx = triplets[transformOffset + 2];
	int prefix = prefixSuffixHeads[prefixIdx];
	int prefixEnd = prefixSuffixHeads[prefixIdx + 1];
	int suffix = prefixSuffixHeads[suffixIdx];
	int suffixEnd = prefixSuffixHeads[suffixIdx + 1];

	int omitFirst = transformType - OMIT_FIRST_BASE;
	int omitLast = transformType - OMIT_LAST_BASE;
	if (omitFirst < 1 \|\| omitFirst > OMIT_FIRST_LAST_LIMIT) {
	omitFirst = 0;
	}
	if (omitLast < 1 \|\| omitLast > OMIT_FIRST_LAST_LIMIT) {
	omitLast = 0;
	}

	// Copy prefix.
	while (prefix != prefixEnd) {
	dst[offset++] = prefixSuffixStorage[prefix++];
	}

	// Copy trimmed word.
	if (omitFirst > len) {
	omitFirst = len;
	}
	srcOffset += omitFirst;
	len -= omitFirst;
	len -= omitLast;
	int i = len;
	while (i > 0) {
	dst[offset++] = src.get(srcOffset++);
	i--;
	}

	// Ferment.
	if (transformType == UPPERCASE_FIRST \|\| transformType == UPPERCASE_ALL) {
	int uppercaseOffset = offset - len;
	if (transformType == UPPERCASE_FIRST) {
	len = 1;
	}
	while (len > 0) {
	int c0 = dst[uppercaseOffset] & 0xFF;
	if (c0 < 0xC0) {
	if (c0 >= 97 && c0 <= 122) { // in [a..z] range
	dst[uppercaseOffset] ^= (byte) 32;
	}
	uppercaseOffset += 1;
	len -= 1;
	} else if (c0 < 0xE0) {
	dst[uppercaseOffset + 1] ^= (byte) 32;
	uppercaseOffset += 2;
	len -= 2;
	} else {
	dst[uppercaseOffset + 2] ^= (byte) 5;
	uppercaseOffset += 3;
	len -= 3;
	}
	}
	} else if (transformType == SHIFT_FIRST \|\| transformType == SHIFT_ALL) {
	int shiftOffset = offset - len;
	short param = transforms.params[transformIndex];
	/* Limited sign extension: scalar < (1 << 24). */
	int scalar = (param & 0x7FFF) + (0x1000000 - (param & 0x8000));
	while (len > 0) {
	int step = 1;
	int c0 = dst[shiftOffset] & 0xFF;
	if (c0 < 0x80) {
	/* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
	scalar += c0;
	dst[shiftOffset] = (byte) (scalar & 0x7F);
	} else if (c0 < 0xC0) {
	/* Continuation / 10AAAAAA. */
	} else if (c0 < 0xE0) {
	/* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
	if (len >= 2) {
	byte c1 = dst[shiftOffset + 1];
	scalar += (c1 & 0x3F) \| ((c0 & 0x1F) << 6);
	dst[shiftOffset] = (byte) (0xC0 \| ((scalar >> 6) & 0x1F));
	dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) \| (scalar & 0x3F));
	step = 2;
	} else {
	step = len;
	}
	} else if (c0 < 0xF0) {
	/* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
	if (len >= 3) {
	byte c1 = dst[shiftOffset + 1];
	byte c2 = dst[shiftOffset + 2];
	scalar += (c2 & 0x3F) \| ((c1 & 0x3F) << 6) \| ((c0 & 0x0F) << 12);
	dst[shiftOffset] = (byte) (0xE0 \| ((scalar >> 12) & 0x0F));
	dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) \| ((scalar >> 6) & 0x3F));
	dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) \| (scalar & 0x3F));
	step = 3;
	} else {
	step = len;
	}
	} else if (c0 < 0xF8) {
	/* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
	if (len >= 4) {
	byte c1 = dst[shiftOffset + 1];
	byte c2 = dst[shiftOffset + 2];
	byte c3 = dst[shiftOffset + 3];
	scalar += (c3 & 0x3F) \| ((c2 & 0x3F) << 6) \| ((c1 & 0x3F) << 12) \| ((c0 & 0x07) << 18);
	dst[shiftOffset] = (byte) (0xF0 \| ((scalar >> 18) & 0x07));
	dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) \| ((scalar >> 12) & 0x3F));
	dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) \| ((scalar >> 6) & 0x3F));
	dst[shiftOffset + 3] = (byte) ((c3 & 0xC0) \| (scalar & 0x3F));
	step = 4;
	} else {
	step = len;
	}
	}
	shiftOffset += step;
	len -= step;
	if (transformType == SHIFT_FIRST) {
	len = 0;
	}
	}
	}

	// Copy suffix.
	while (suffix != suffixEnd) {
	dst[offset++] = prefixSuffixStorage[suffix++];
	}

	return offset - dstOffset;
	}
	}