icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java - external/github.com/unicode-org/icu - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html#License
 /*
  *******************************************************************************
  *   Copyright (C) 2009-2015, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  */

 package com.ibm.icu.impl;

 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;

 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.CodePointMap;
 import com.ibm.icu.util.CodePointTrie;
 import com.ibm.icu.util.ICUUncheckedIOException;
 import com.ibm.icu.util.MutableCodePointTrie;
 import com.ibm.icu.util.VersionInfo;

 /**
  * Low-level implementation of the Unicode Normalization Algorithm.
  * For the data structure and details see the documentation at the end of
  * C++ normalizer2impl.h and in the design doc at
  * http://site.icu-project.org/design/normalization/custom
  */
 public final class Normalizer2Impl {
     public static final class Hangul {
         /* Korean Hangul and Jamo constants */
         public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
         public static final int JAMO_L_END=0x1112;
         public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
         public static final int JAMO_V_END=0x1175;
         public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
         public static final int JAMO_T_END=0x11c2;

         public static final int HANGUL_BASE=0xac00;
         public static final int HANGUL_END=0xd7a3;

         public static final int JAMO_L_COUNT=19;
         public static final int JAMO_V_COUNT=21;
         public static final int JAMO_T_COUNT=28;

         public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT;
         public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT;

         public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT;

         public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
         public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;

         public static boolean isHangul(int c) {
             return HANGUL_BASE<=c && c<HANGUL_LIMIT;
         }
         public static boolean isHangulLV(int c) {
             c-=HANGUL_BASE;
             return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
         }
         public static boolean isJamoL(int c) {
             return JAMO_L_BASE<=c && c<JAMO_L_LIMIT;
         }
         public static boolean isJamoV(int c) {
             return JAMO_V_BASE<=c && c<JAMO_V_LIMIT;
         }
         public static boolean isJamoT(int c) {
             int t=c-JAMO_T_BASE;
             return 0<t && t<JAMO_T_COUNT;  // not JAMO_T_BASE itself
         }
         public static boolean isJamo(int c) {
             return JAMO_L_BASE<=c && c<=JAMO_T_END &&
                 (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
         }

         /**
          * Decomposes c, which must be a Hangul syllable, into buffer
          * and returns the length of the decomposition (2 or 3).
          */
         public static int decompose(int c, Appendable buffer) {
             try {
                 c-=HANGUL_BASE;
                 int c2=c%JAMO_T_COUNT;
                 c/=JAMO_T_COUNT;
                 buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
                 buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
                 if(c2==0) {
                     return 2;
                 } else {
                     buffer.append((char)(JAMO_T_BASE+c2));
                     return 3;
                 }
             } catch(IOException e) {
                 // Will not occur because we do not write to I/O.
                 throw new ICUUncheckedIOException(e);
             }
         }

         /**
          * Decomposes c, which must be a Hangul syllable, into buffer.
          * This is the raw, not recursive, decomposition. Its length is always 2.
          */
         public static void getRawDecomposition(int c, Appendable buffer) {
             try {
                 int orig=c;
                 c-=HANGUL_BASE;
                 int c2=c%JAMO_T_COUNT;
                 if(c2==0) {
                     c/=JAMO_T_COUNT;
                     buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
                     buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
                 } else {
                     buffer.append((char)(orig-c2));  // LV syllable
                     buffer.append((char)(JAMO_T_BASE+c2));
                 }
             } catch(IOException e) {
                 // Will not occur because we do not write to I/O.
                 throw new ICUUncheckedIOException(e);
             }
         }
     }

     /**
      * Writable buffer that takes care of canonical ordering.
      * Its Appendable methods behave like the C++ implementation's
      * appendZeroCC() methods.
      * <p>
      * If dest is a StringBuilder, then the buffer writes directly to it.
      * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
      * until no further changes are necessary and whole segments are appended.
      * append() methods that take combining-class values always write to the StringBuilder.
      * Other append() methods flush and append to the Appendable.
      */
     public static final class ReorderingBuffer implements Appendable {
         public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) {
             impl=ni;
             app=dest;
             if(app instanceof StringBuilder) {
                 appIsStringBuilder=true;
                 str=(StringBuilder)dest;
                 // In Java, the constructor subsumes public void init(int destCapacity) {
                 str.ensureCapacity(destCapacity);
                 reorderStart=0;
                 if(str.length()==0) {
                     lastCC=0;
                 } else {
                     setIterator();
                     lastCC=previousCC();
                     // Set reorderStart after the last code point with cc<=1 if there is one.
                     if(lastCC>1) {
                         while(previousCC()>1) {}
                     }
                     reorderStart=codePointLimit;
                 }
             } else {
                 appIsStringBuilder=false;
                 str=new StringBuilder();
                 reorderStart=0;
                 lastCC=0;
             }
         }

         public boolean isEmpty() { return str.length()==0; }
         public int length() { return str.length(); }
         public int getLastCC() { return lastCC; }

         public StringBuilder getStringBuilder() { return str; }

         public boolean equals(CharSequence s, int start, int limit) {
             return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
         }

         public void append(int c, int cc) {
             if(lastCC<=cc || cc==0) {
                 str.appendCodePoint(c);
                 lastCC=cc;
                 if(cc<=1) {
                     reorderStart=str.length();
                 }
             } else {
                 insert(c, cc);
             }
         }
         public void append(CharSequence s, int start, int limit, boolean isNFD,
                            int leadCC, int trailCC) {
             if(start==limit) {
                 return;
             }
             if(lastCC<=leadCC || leadCC==0) {
                 if(trailCC<=1) {
                     reorderStart=str.length()+(limit-start);
                 } else if(leadCC<=1) {
                     reorderStart=str.length()+1;  // Ok if not a code point boundary.
                 }
                 str.append(s, start, limit);
                 lastCC=trailCC;
             } else {
                 int c=Character.codePointAt(s, start);
                 start+=Character.charCount(c);
                 insert(c, leadCC);  // insert first code point
                 while(start<limit) {
                     c=Character.codePointAt(s, start);
                     start+=Character.charCount(c);
                     if(start<limit) {
                         if (isNFD) {
                             leadCC = getCCFromYesOrMaybe(impl.getNorm16(c));
                         } else {
                             leadCC = impl.getCC(impl.getNorm16(c));
                         }
                     } else {
                         leadCC=trailCC;
                     }
                     append(c, leadCC);
                 }
             }
         }
         // The following append() methods work like C++ appendZeroCC().
         // They assume that the cc or trailCC of their input is 0.
         // Most of them implement Appendable interface methods.
         @Override
         public ReorderingBuffer append(char c) {
             str.append(c);
             lastCC=0;
             reorderStart=str.length();
             return this;
         }
         public void appendZeroCC(int c) {
             str.appendCodePoint(c);
             lastCC=0;
             reorderStart=str.length();
         }
         @Override
         public ReorderingBuffer append(CharSequence s) {
             if(s.length()!=0) {
                 str.append(s);
                 lastCC=0;
                 reorderStart=str.length();
             }
             return this;
         }
         @Override
         public ReorderingBuffer append(CharSequence s, int start, int limit) {
             if(start!=limit) {
                 str.append(s, start, limit);
                 lastCC=0;
                 reorderStart=str.length();
             }
             return this;
         }
         /**
          * Flushes from the intermediate StringBuilder to the Appendable,
          * if they are different objects.
          * Used after recomposition.
          * Must be called at the end when writing to a non-StringBuilder Appendable.
          */
         public void flush() {
             if(appIsStringBuilder) {
                 reorderStart=str.length();
             } else {
                 try {
                     app.append(str);
                     str.setLength(0);
                     reorderStart=0;
                 } catch(IOException e) {
                     throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
                 }
             }
             lastCC=0;
         }
         /**
          * Flushes from the intermediate StringBuilder to the Appendable,
          * if they are different objects.
          * Then appends the new text to the Appendable or StringBuilder.
          * Normally used after quick check loops find a non-empty sequence.
          */
         public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) {
             if(appIsStringBuilder) {
                 str.append(s, start, limit);
                 reorderStart=str.length();
             } else {
                 try {
                     app.append(str).append(s, start, limit);
                     str.setLength(0);
                     reorderStart=0;
                 } catch(IOException e) {
                     throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
                 }
             }
             lastCC=0;
             return this;
         }
         public void remove() {
             str.setLength(0);
             lastCC=0;
             reorderStart=0;
         }
         public void removeSuffix(int suffixLength) {
             int oldLength=str.length();
             str.delete(oldLength-suffixLength, oldLength);
             lastCC=0;
             reorderStart=str.length();
         }

         /*
          * TODO: Revisit whether it makes sense to track reorderStart.
          * It is set to after the last known character with cc<=1,
          * which stops previousCC() before it reads that character and looks up its cc.
          * previousCC() is normally only called from insert().
          * In other words, reorderStart speeds up the insertion of a combining mark
          * into a multi-combining mark sequence where it does not belong at the end.
          * This might not be worth the trouble.
          * On the other hand, it's not a huge amount of trouble.
          *
          * We probably need it for UNORM_SIMPLE_APPEND.
          */

         // Inserts c somewhere before the last character.
         // Requires 0<cc<lastCC which implies reorderStart<limit.
         private void insert(int c, int cc) {
             for(setIterator(), skipPrevious(); previousCC()>cc;) {}
             // insert c at codePointLimit, after the character with prevCC<=cc
             if(c<=0xffff) {
                 str.insert(codePointLimit, (char)c);
                 if(cc<=1) {
                     reorderStart=codePointLimit+1;
                 }
             } else {
                 str.insert(codePointLimit, Character.toChars(c));
                 if(cc<=1) {
                     reorderStart=codePointLimit+2;
                 }
             }
         }

         private final Normalizer2Impl impl;
         private final Appendable app;
         private final StringBuilder str;
         private final boolean appIsStringBuilder;
         private int reorderStart;
         private int lastCC;

         // private backward iterator
         private void setIterator() { codePointStart=str.length(); }
         private void skipPrevious() {  // Requires 0<codePointStart.
             codePointLimit=codePointStart;
             codePointStart=str.offsetByCodePoints(codePointStart, -1);
         }
         private int previousCC() {  // Returns 0 if there is no previous character.
             codePointLimit=codePointStart;
             if(reorderStart>=codePointStart) {
                 return 0;
             }
             int c=str.codePointBefore(codePointStart);
             codePointStart-=Character.charCount(c);
             return impl.getCCFromYesOrMaybeCP(c);
         }

         private int codePointStart, codePointLimit;
     }

     // TODO: Propose as public API on the UTF16 class.
     // TODO: Propose widening UTF16 methods that take char to take int.
     // TODO: Propose widening UTF16 methods that take String to take CharSequence.
     public static final class UTF16Plus {
         /**
          * Is this code point a lead surrogate (U+d800..U+dbff)?
          * @param c code unit or code point
          * @return true or false
          */
         public static boolean isLeadSurrogate(int c) { return (c & 0xfffffc00) == 0xd800; }
         /**
          * Is this code point a trail surrogate (U+dc00..U+dfff)?
          * @param c code unit or code point
          * @return true or false
          */
         public static boolean isTrailSurrogate(int c) { return (c & 0xfffffc00) == 0xdc00; }
         /**
          * Is this code point a surrogate (U+d800..U+dfff)?
          * @param c code unit or code point
          * @return true or false
          */
         public static boolean isSurrogate(int c) { return (c & 0xfffff800) == 0xd800; }
         /**
          * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
          * is it a lead surrogate?
          * @param c code unit or code point
          * @return true or false
          */
         public static boolean isSurrogateLead(int c) { return (c&0x400)==0; }
         /**
          * Compares two CharSequence objects for binary equality.
          * @param s1 first sequence
          * @param s2 second sequence
          * @return true if s1 contains the same text as s2
          */
         public static boolean equal(CharSequence s1,  CharSequence s2) {
             if(s1==s2) {
                 return true;
             }
             int length=s1.length();
             if(length!=s2.length()) {
                 return false;
             }
             for(int i=0; i<length; ++i) {
                 if(s1.charAt(i)!=s2.charAt(i)) {
                     return false;
                 }
             }
             return true;
         }
         /**
          * Compares two CharSequence subsequences for binary equality.
          * @param s1 first sequence
          * @param start1 start offset in first sequence
          * @param limit1 limit offset in first sequence
          * @param s2 second sequence
          * @param start2 start offset in second sequence
          * @param limit2 limit offset in second sequence
          * @return true if s1.subSequence(start1, limit1) contains the same text
          *              as s2.subSequence(start2, limit2)
          */
         public static boolean equal(CharSequence s1, int start1, int limit1,
                                     CharSequence s2, int start2, int limit2) {
             if((limit1-start1)!=(limit2-start2)) {
                 return false;
             }
             if(s1==s2 && start1==start2) {
                 return true;
             }
             while(start1<limit1) {
                 if(s1.charAt(start1++)!=s2.charAt(start2++)) {
                     return false;
                 }
             }
             return true;
         }
     }

     public Normalizer2Impl() {}

     private static final class IsAcceptable implements ICUBinary.Authenticate {
         @Override
         public boolean isDataVersionAcceptable(byte version[]) {
             return version[0]==4;
         }
     }
     private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
     private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"

     public Normalizer2Impl load(ByteBuffer bytes) {
         try {
             dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
             int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
             if(indexesLength<=IX_MIN_LCCC_CP) {
                 throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
             }
             int[] inIndexes=new int[indexesLength];
             inIndexes[0]=indexesLength*4;
             for(int i=1; i<indexesLength; ++i) {
                 inIndexes[i]=bytes.getInt();
             }

             minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
             minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
             minLcccCP=inIndexes[IX_MIN_LCCC_CP];

             minYesNo=inIndexes[IX_MIN_YES_NO];
             minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
             minNoNo=inIndexes[IX_MIN_NO_NO];
             minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
             minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
             minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
             limitNoNo=inIndexes[IX_LIMIT_NO_NO];
             minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
             assert((minMaybeYes&7)==0);  // 8-aligned for noNoDelta bit fields
             centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;

             // Read the normTrie.
             int offset=inIndexes[IX_NORM_TRIE_OFFSET];
             int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
             int triePosition = bytes.position();
             normTrie = CodePointTrie.Fast16.fromBinary(bytes);
             int trieLength = bytes.position() - triePosition;
             if(trieLength>(nextOffset-offset)) {
                 throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie");
             }
             ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes

             // Read the composition and mapping data.
             offset=nextOffset;
             nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
             int numChars=(nextOffset-offset)/2;
             if(numChars!=0) {
                 maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
                 extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
             }

             // smallFCD: new in formatVersion 2
             offset=nextOffset;
             smallFCD=new byte[0x100];
             bytes.get(smallFCD);

             return this;
         } catch(IOException e) {
             throw new ICUUncheckedIOException(e);
         }
     }
     public Normalizer2Impl load(String name) {
         return load(ICUBinary.getRequiredData(name));
     }

     public void addLcccChars(UnicodeSet set) {
         int start = 0;
         CodePointMap.Range range = new CodePointMap.Range();
         while (normTrie.getRange(start, CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, INERT,
                 null, range)) {
             int end = range.getEnd();
             int norm16 = range.getValue();
             if (norm16 > MIN_NORMAL_MAYBE_YES && norm16 != JAMO_VT) {
                 set.add(start, end);
             } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
                 int fcd16 = getFCD16(start);
                 if (fcd16 > 0xff) { set.add(start, end); }
             }
             start = end + 1;
         }
     }

     public void addPropertyStarts(UnicodeSet set) {
         // Add the start code point of each same-value range of the trie.
         int start = 0;
         CodePointMap.Range range = new CodePointMap.Range();
         while (normTrie.getRange(start, CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, INERT,
                 null, range)) {
             int end = range.getEnd();
             int value = range.getValue();
             set.add(start);
             if (start != end && isAlgorithmicNoNo(value) &&
                     (value & DELTA_TCCC_MASK) > DELTA_TCCC_1) {
                 // Range of code points with same-norm16-value algorithmic decompositions.
                 // They might have different non-zero FCD16 values.
                 int prevFCD16 = getFCD16(start);
                 while (++start <= end) {
                     int fcd16 = getFCD16(start);
                     if (fcd16 != prevFCD16) {
                         set.add(start);
                         prevFCD16 = fcd16;
                     }
                 }
             }
             start = end + 1;
         }

         /* add Hangul LV syllables and LV+1 because of skippables */
         for(int c=Hangul.HANGUL_BASE; c<Hangul.HANGUL_LIMIT; c+=Hangul.JAMO_T_COUNT) {
             set.add(c);
             set.add(c+1);
         }
         set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
     }

     public void addCanonIterPropertyStarts(UnicodeSet set) {
         // Add the start code point of each same-value range of the canonical iterator data trie.
         ensureCanonIterData();
         // Currently only used for the SEGMENT_STARTER property.
         int start = 0;
         CodePointMap.Range range = new CodePointMap.Range();
         while (canonIterData.getRange(start, segmentStarterMapper, range)) {
             set.add(start);
             start = range.getEnd() + 1;
         }
     }
     private static final CodePointMap.ValueFilter segmentStarterMapper =
             new CodePointMap.ValueFilter() {
         @Override
         public int apply(int value) {
             return value & CANON_NOT_SEGMENT_STARTER;
         }
     };

     // low-level properties ------------------------------------------------ ***

     // Note: Normalizer2Impl.java r30983 (2011-nov-27)
     // still had getFCDTrie() which built and cached an FCD trie.
     // That provided faster access to FCD data than getFCD16FromNormData()
     // but required synchronization and consumed some 10kB of heap memory
     // in any process that uses FCD (e.g., via collation).
     // minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance,
     // at least for ASCII & CJK.

     /**
      * Builds the canonical-iterator data for this instance.
      * This is required before any of {@link #isCanonSegmentStarter(int)} or
      * {@link #getCanonStartSet(int, UnicodeSet)} are called,
      * or else they crash.
      * @return this
      */
     public synchronized Normalizer2Impl ensureCanonIterData() {
         if(canonIterData==null) {
             MutableCodePointTrie mutableTrie = new MutableCodePointTrie(0, 0);
             canonStartSets=new ArrayList<UnicodeSet>();
             int start = 0;
             CodePointMap.Range range = new CodePointMap.Range();
             while (normTrie.getRange(start, CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, INERT,
                     null, range)) {
                 final int end = range.getEnd();
                 final int norm16 = range.getValue();
                 if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) {
                     // Inert, or 2-way mapping (including Hangul syllable).
                     // We do not write a canonStartSet for any yesNo character.
                     // Composites from 2-way mappings are added at runtime from the
                     // starter's compositions list, and the other characters in
                     // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
                     // "maybe" characters.
                     start = end + 1;
                     continue;
                 }
                 for (int c = start; c <= end; ++c) {
                     final int oldValue = mutableTrie.get(c);
                     int newValue=oldValue;
                     if(isMaybeOrNonZeroCC(norm16)) {
                         // not a segment starter if it occurs in a decomposition or has cc!=0
                         newValue|=CANON_NOT_SEGMENT_STARTER;
                         if(norm16<MIN_NORMAL_MAYBE_YES) {
                             newValue|=CANON_HAS_COMPOSITIONS;
                         }
                     } else if(norm16<minYesNo) {
                         newValue|=CANON_HAS_COMPOSITIONS;
                     } else {
                         // c has a one-way decomposition
                         int c2=c;
                         // Do not modify the whole-range norm16 value.
                         int norm16_2=norm16;
                         if (isDecompNoAlgorithmic(norm16_2)) {
                             // Maps to an isCompYesAndZeroCC.
                             c2 = mapAlgorithmic(c2, norm16_2);
                             norm16_2 = getRawNorm16(c2);
                             // No compatibility mappings for the CanonicalIterator.
                             assert(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
                         }
                         if (norm16_2 > minYesNo) {
                             // c decomposes, get everything from the variable-length extra data
                             int mapping=norm16_2>>OFFSET_SHIFT;
                             int firstUnit=extraData.charAt(mapping);
                             int length=firstUnit&MAPPING_LENGTH_MASK;
                             if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
                                 if(c==c2 && (extraData.charAt(mapping-1)&0xff)!=0) {
                                     newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
                                 }
                             }
                             // Skip empty mappings (no characters in the decomposition).
                             if(length!=0) {
                                 ++mapping;  // skip over the firstUnit
                                 // add c to first code point's start set
                                 int limit=mapping+length;
                                 c2=extraData.codePointAt(mapping);
                                 addToStartSet(mutableTrie, c, c2);
                                 // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
                                 // one-way mapping. A 2-way mapping is possible here after
                                 // intermediate algorithmic mapping.
                                 if(norm16_2>=minNoNo) {
                                     while((mapping+=Character.charCount(c2))<limit) {
                                         c2=extraData.codePointAt(mapping);
                                         int c2Value = mutableTrie.get(c2);
                                         if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
                                             mutableTrie.set(c2, c2Value|CANON_NOT_SEGMENT_STARTER);
                                         }
                                     }
                                 }
                             }
                         } else {
                             // c decomposed to c2 algorithmically; c has cc==0
                             addToStartSet(mutableTrie, c, c2);
                         }
                     }
                     if(newValue!=oldValue) {
                         mutableTrie.set(c, newValue);
                     }
                 }
                 start = end + 1;
             }
             canonIterData = mutableTrie.buildImmutable(
                     CodePointTrie.Type.SMALL, CodePointTrie.ValueWidth.BITS_32);
         }
         return this;
     }

     // The trie stores values for lead surrogate code *units*.
     // Surrogate code *points* are inert.
     public int getNorm16(int c) {
         return UTF16Plus.isLeadSurrogate(c) ? INERT : normTrie.get(c);
     }
     public int getRawNorm16(int c) { return normTrie.get(c); }

     public int getCompQuickCheck(int norm16) {
         if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
             return 1;  // yes
         } else if(minMaybeYes<=norm16) {
             return 2;  // maybe
         } else {
             return 0;  // no
         }
     }
     public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
     public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
     public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }

     public int getCC(int norm16) {
         if(norm16>=MIN_NORMAL_MAYBE_YES) {
             return getCCFromNormalYesOrMaybe(norm16);
         }
         if(norm16<minNoNo || limitNoNo<=norm16) {
             return 0;
         }
         return getCCFromNoNo(norm16);
     }
     public static int getCCFromNormalYesOrMaybe(int norm16) {
         return (norm16 >> OFFSET_SHIFT) & 0xff;
     }
     public static int getCCFromYesOrMaybe(int norm16) {
         return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
     }
     public int getCCFromYesOrMaybeCP(int c) {
         if (c < minCompNoMaybeCP) { return 0; }
         return getCCFromYesOrMaybe(getNorm16(c));
     }

     /**
      * Returns the FCD data for code point c.
      * @param c A Unicode code point.
      * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
      */
     public int getFCD16(int c) {
         if(c<minDecompNoCP) {
             return 0;
         } else if(c<=0xffff) {
             if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
         }
         return getFCD16FromNormData(c);
     }
     /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
     public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
         // 0<=lead<=0xffff
         byte bits=smallFCD[lead>>8];
         if(bits==0) { return false; }
         return ((bits>>((lead>>5)&7))&1)!=0;
     }

     /** Gets the FCD value from the regular normalization data. */
     public int getFCD16FromNormData(int c) {
         int norm16=getNorm16(c);
         if (norm16 >= limitNoNo) {
             if(norm16>=MIN_NORMAL_MAYBE_YES) {
                 // combining mark
                 norm16=getCCFromNormalYesOrMaybe(norm16);
                 return norm16|(norm16<<8);
             } else if(norm16>=minMaybeYes) {
                 return 0;
             } else {  // isDecompNoAlgorithmic(norm16)
                 int deltaTrailCC = norm16 & DELTA_TCCC_MASK;
                 if (deltaTrailCC <= DELTA_TCCC_1) {
                     return deltaTrailCC >> OFFSET_SHIFT;
                 }
                 // Maps to an isCompYesAndZeroCC.
                 c=mapAlgorithmic(c, norm16);
                 norm16 = getRawNorm16(c);
             }
         }
         if(norm16<=minYesNo || isHangulLVT(norm16)) {
             // no decomposition or Hangul syllable, all zeros
             return 0;
         }
         // c decomposes, get everything from the variable-length extra data
         int mapping=norm16>>OFFSET_SHIFT;
         int firstUnit=extraData.charAt(mapping);
         int fcd16=firstUnit>>8;  // tccc
         if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
             fcd16|=extraData.charAt(mapping-1)&0xff00;  // lccc
         }
         return fcd16;
     }

     /**
      * Gets the decomposition for one code point.
      * @param c code point
      * @return c's decomposition, if it has one; returns null if it does not have a decomposition
      */
     public String getDecomposition(int c) {
         int norm16;
         if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
             // c does not decompose
             return null;
         }
         int decomp = -1;
         if(isDecompNoAlgorithmic(norm16)) {
             // Maps to an isCompYesAndZeroCC.
             decomp=c=mapAlgorithmic(c, norm16);
             // The mapping might decompose further.
             norm16 = getRawNorm16(c);
         }
         if (norm16 < minYesNo) {
             if(decomp<0) {
                 return null;
             } else {
                 return UTF16.valueOf(decomp);
             }
         } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
             // Hangul syllable: decompose algorithmically
             StringBuilder buffer=new StringBuilder();
             Hangul.decompose(c, buffer);
             return buffer.toString();
         }
         // c decomposes, get everything from the variable-length extra data
         int mapping=norm16>>OFFSET_SHIFT;
         int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK;
         return extraData.substring(mapping, mapping+length);
     }

     /**
      * Gets the raw decomposition for one code point.
      * @param c code point
      * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition
      */
     public String getRawDecomposition(int c) {
         int norm16;
         if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
             // c does not decompose
             return null;
         } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
             // Hangul syllable: decompose algorithmically
             StringBuilder buffer=new StringBuilder();
             Hangul.getRawDecomposition(c, buffer);
             return buffer.toString();
         } else if(isDecompNoAlgorithmic(norm16)) {
             return UTF16.valueOf(mapAlgorithmic(c, norm16));
         }
         // c decomposes, get everything from the variable-length extra data
         int mapping=norm16>>OFFSET_SHIFT;
         int firstUnit=extraData.charAt(mapping);
         int mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
         if((firstUnit&MAPPING_HAS_RAW_MAPPING)!=0) {
             // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
             // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
             int rawMapping=mapping-((firstUnit>>7)&1)-1;
             char rm0=extraData.charAt(rawMapping);
             if(rm0<=MAPPING_LENGTH_MASK) {
                 return extraData.substring(rawMapping-rm0, rawMapping);
             } else {
                 // Copy the normal mapping and replace its first two code units with rm0.
                 StringBuilder buffer=new StringBuilder(mLength-1).append(rm0);
                 mapping+=1+2;  // skip over the firstUnit and the first two mapping code units
                 return buffer.append(extraData, mapping, mapping+mLength-2).toString();
             }
         } else {
             mapping+=1;  // skip over the firstUnit
             return extraData.substring(mapping, mapping+mLength);
         }
     }

     /**
      * Returns true if code point c starts a canonical-iterator string segment.
      * <b>{@link #ensureCanonIterData()} must have been called before this method,
      * or else this method will crash.</b>
      * @param c A Unicode code point.
      * @return true if c starts a canonical-iterator string segment.
      */
     public boolean isCanonSegmentStarter(int c) {
         return canonIterData.get(c)>=0;
     }
     /**
      * Returns true if there are characters whose decomposition starts with c.
      * If so, then the set is cleared and then filled with those characters.
      * <b>{@link #ensureCanonIterData()} must have been called before this method,
      * or else this method will crash.</b>
      * @param c A Unicode code point.
      * @param set A UnicodeSet to receive the characters whose decompositions
      *        start with c, if there are any.
      * @return true if there are characters whose decomposition starts with c.
      */
     public boolean getCanonStartSet(int c, UnicodeSet set) {
         int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER;
         if(canonValue==0) {
             return false;
         }
         set.clear();
         int value=canonValue&CANON_VALUE_MASK;
         if((canonValue&CANON_HAS_SET)!=0) {
             set.addAll(canonStartSets.get(value));
         } else if(value!=0) {
             set.add(value);
         }
         if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
             int norm16 = getRawNorm16(c);
             if(norm16==JAMO_L) {
                 int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT;
                 set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1);
             } else {
                 addComposites(getCompositionsList(norm16), set);
             }
         }
         return true;
     }

     // Fixed norm16 values.
     public static final int MIN_YES_YES_WITH_CC=0xfe02;
     public static final int JAMO_VT=0xfe00;
     public static final int MIN_NORMAL_MAYBE_YES=0xfc00;
     public static final int JAMO_L=2;  // offset=1 hasCompBoundaryAfter=FALSE
     public static final int INERT=1;  // offset=0 hasCompBoundaryAfter=TRUE

     // norm16 bit 0 is comp-boundary-after.
     public static final int HAS_COMP_BOUNDARY_AFTER=1;
     public static final int OFFSET_SHIFT=1;

     // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
     // tccc (0, 1, >1) for quick FCC boundary-after tests.
     public static final int DELTA_TCCC_0=0;
     public static final int DELTA_TCCC_1=2;
     public static final int DELTA_TCCC_GT_1=4;
     public static final int DELTA_TCCC_MASK=6;
     public static final int DELTA_SHIFT=3;

     public static final int MAX_DELTA=0x40;

     // Byte offsets from the start of the data, after the generic header.
     public static final int IX_NORM_TRIE_OFFSET=0;
     public static final int IX_EXTRA_DATA_OFFSET=1;
     public static final int IX_SMALL_FCD_OFFSET=2;
     public static final int IX_RESERVED3_OFFSET=3;
     public static final int IX_TOTAL_SIZE=7;

     // Code point thresholds for quick check codes.
     public static final int IX_MIN_DECOMP_NO_CP=8;
     public static final int IX_MIN_COMP_NO_MAYBE_CP=9;

     // Norm16 value thresholds for quick check combinations and types of extra data.

     /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
     public static final int IX_MIN_YES_NO=10;
     /** Mappings are comp-normalized. */
     public static final int IX_MIN_NO_NO=11;
     public static final int IX_LIMIT_NO_NO=12;
     public static final int IX_MIN_MAYBE_YES=13;

     /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
     public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
     /** Mappings are not comp-normalized but have a comp boundary before. */
     public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15;
     /** Mappings do not have a comp boundary before. */
     public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16;
     /** Mappings to the empty string. */
     public static final int IX_MIN_NO_NO_EMPTY=17;

     public static final int IX_MIN_LCCC_CP=18;
     public static final int IX_COUNT=20;

     public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
     public static final int MAPPING_HAS_RAW_MAPPING=0x40;
     // unused bit 0x20;
     public static final int MAPPING_LENGTH_MASK=0x1f;

     public static final int COMP_1_LAST_TUPLE=0x8000;
     public static final int COMP_1_TRIPLE=1;
     public static final int COMP_1_TRAIL_LIMIT=0x3400;
     public static final int COMP_1_TRAIL_MASK=0x7ffe;
     public static final int COMP_1_TRAIL_SHIFT=9;  // 10-1 for the "triple" bit
     public static final int COMP_2_TRAIL_SHIFT=6;
     public static final int COMP_2_TRAIL_MASK=0xffc0;

     // higher-level functionality ------------------------------------------ ***

     // NFD without an NFD Normalizer2 instance.
     public Appendable decompose(CharSequence s, StringBuilder dest) {
         decompose(s, 0, s.length(), dest, s.length());
         return dest;
     }
     /**
      * Decomposes s[src, limit[ and writes the result to dest.
      * limit can be NULL if src is NUL-terminated.
      * destLengthEstimate is the initial dest buffer capacity and can be -1.
      */
     public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
                    int destLengthEstimate) {
         if(destLengthEstimate<0) {
             destLengthEstimate=limit-src;
         }
         dest.setLength(0);
         ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
         decompose(s, src, limit, buffer);
     }

     // Dual functionality:
     // buffer!=NULL: normalize
     // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
     public int decompose(CharSequence s, int src, int limit,
                          ReorderingBuffer buffer) {
         int minNoCP=minDecompNoCP;

         int prevSrc;
         int c=0;
         int norm16=0;

         // only for quick check
         int prevBoundary=src;
         int prevCC=0;

         for(;;) {
             // count code units below the minimum or with irrelevant data for the quick check
             for(prevSrc=src; src!=limit;) {
                 if( (c=s.charAt(src))<minNoCP ||
                     isMostDecompYesAndZeroCC(norm16=normTrie.bmpGet(c))
                 ) {
                     ++src;
                 } else if (!UTF16Plus.isLeadSurrogate(c)) {
                     break;
                 } else {
                     char c2;
                     if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
                         c = Character.toCodePoint((char)c, c2);
                         norm16 = normTrie.suppGet(c);
                         if (isMostDecompYesAndZeroCC(norm16)) {
                             src += 2;
                         } else {
                             break;
                         }
                     } else {
                         ++src;  // unpaired lead surrogate: inert
                     }
                 }
             }
             // copy these code units all at once
             if(src!=prevSrc) {
                 if(buffer!=null) {
                     buffer.flushAndAppendZeroCC(s, prevSrc, src);
                 } else {
                     prevCC=0;
                     prevBoundary=src;
                 }
             }
             if(src==limit) {
                 break;
             }

             // Check one above-minimum, relevant code point.
             src+=Character.charCount(c);
             if(buffer!=null) {
                 decompose(c, norm16, buffer);
             } else {
                 if(isDecompYes(norm16)) {
                     int cc=getCCFromYesOrMaybe(norm16);
                     if(prevCC<=cc || cc==0) {
                         prevCC=cc;
                         if(cc<=1) {
                             prevBoundary=src;
                         }
                         continue;
                     }
                 }
                 return prevBoundary;  // "no" or cc out of order
             }
         }
         return src;
     }
     public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
         int limit=s.length();
         if(limit==0) {
             return;
         }
         if(doDecompose) {
             decompose(s, 0, limit, buffer);
             return;
         }
         // Just merge the strings at the boundary.
         int c=Character.codePointAt(s, 0);
         int src=0;
         int firstCC, prevCC, cc;
         firstCC=prevCC=cc=getCC(getNorm16(c));
         while(cc!=0) {
             prevCC=cc;
             src+=Character.charCount(c);
             if(src>=limit) {
                 break;
             }
             c=Character.codePointAt(s, src);
             cc=getCC(getNorm16(c));
         };
         buffer.append(s, 0, src, false, firstCC, prevCC);
         buffer.append(s, src, limit);
     }

     // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
     // doCompose: normalize
     // !doCompose: isNormalized (buffer must be empty and initialized)
     public boolean compose(CharSequence s, int src, int limit,
                            boolean onlyContiguous,
                            boolean doCompose,
                            ReorderingBuffer buffer) {
         int prevBoundary=src;
         int minNoMaybeCP=minCompNoMaybeCP;

         for (;;) {
             // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
             // or with (compYes && ccc==0) properties.
             int prevSrc;
             int c = 0;
             int norm16 = 0;
             for (;;) {
                 if (src == limit) {
                     if (prevBoundary != limit && doCompose) {
                         buffer.append(s, prevBoundary, limit);
                     }
                     return true;
                 }
                 if( (c=s.charAt(src))<minNoMaybeCP ||
                     isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
                 ) {
                     ++src;
                 } else {
                     prevSrc = src++;
                     if (!UTF16Plus.isLeadSurrogate(c)) {
                         break;
                     } else {
                         char c2;
                         if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
                             ++src;
                             c = Character.toCodePoint((char)c, c2);
                             norm16 = normTrie.suppGet(c);
                             if (!isCompYesAndZeroCC(norm16)) {
                                 break;
                             }
                         }
                     }
                 }
             }
             // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
             // The current character is either a "noNo" (has a mapping)
             // or a "maybeYes" (combines backward)
             // or a "yesYes" with ccc!=0.
             // It is not a Hangul syllable or Jamo L because those have "yes" properties.

             // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
             if (!isMaybeOrNonZeroCC(norm16)) {  // minNoNo <= norm16 < minMaybeYes
                 if (!doCompose) {
                     return false;
                 }
                 // Fast path for mapping a character that is immediately surrounded by boundaries.
                 // In this case, we need not decompose around the current character.
                 if (isDecompNoAlgorithmic(norm16)) {
                     // Maps to a single isCompYesAndZeroCC character
                     // which also implies hasCompBoundaryBefore.
                     if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
                             hasCompBoundaryBefore(s, src, limit)) {
                         if (prevBoundary != prevSrc) {
                             buffer.append(s, prevBoundary, prevSrc);
                         }
                         buffer.append(mapAlgorithmic(c, norm16), 0);
                         prevBoundary = src;
                         continue;
                     }
                 } else if (norm16 < minNoNoCompBoundaryBefore) {
                     // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
                     if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
                             hasCompBoundaryBefore(s, src, limit)) {
                         if (prevBoundary != prevSrc) {
                             buffer.append(s, prevBoundary, prevSrc);
                         }
                         int mapping = norm16 >> OFFSET_SHIFT;
                         int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK;
                         buffer.append(extraData, mapping, mapping + length);
                         prevBoundary = src;
                         continue;
                     }
                 } else if (norm16 >= minNoNoEmpty) {
                     // The current character maps to nothing.
                     // Simply omit it from the output if there is a boundary before _or_ after it.
                     // The character itself implies no boundaries.
                     if (hasCompBoundaryBefore(s, src, limit) ||
                             hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) {
                         if (prevBoundary != prevSrc) {
                             buffer.append(s, prevBoundary, prevSrc);
                         }
                         prevBoundary = src;
                         continue;
                     }
                 }
                 // Other "noNo" type, or need to examine more text around this character:
                 // Fall through to the slow path.
             } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
                 char prev=s.charAt(prevSrc-1);
                 if(c<Hangul.JAMO_T_BASE) {
                     // The current character is a Jamo Vowel,
                     // compose with previous Jamo L and following Jamo T.
                     char l = (char)(prev-Hangul.JAMO_L_BASE);
                     if(l<Hangul.JAMO_L_COUNT) {
                         if (!doCompose) {
                             return false;
                         }
                         int t;
                         if (src != limit &&
                                 0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) &&
                                 t < Hangul.JAMO_T_COUNT) {
                             // The next character is a Jamo T.
                             ++src;
                         } else if (hasCompBoundaryBefore(s, src, limit)) {
                             // No Jamo T follows, not even via decomposition.
                             t = 0;
                         } else {
                             t = -1;
                         }
                         if (t >= 0) {
                             int syllable = Hangul.HANGUL_BASE +
                                 (l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) *
                                 Hangul.JAMO_T_COUNT + t;
                             --prevSrc;  // Replace the Jamo L as well.
                             if (prevBoundary != prevSrc) {
                                 buffer.append(s, prevBoundary, prevSrc);
                             }
                             buffer.append((char)syllable);
                             prevBoundary = src;
                             continue;
                         }
                         // If we see L+V+x where x!=T then we drop to the slow path,
                         // decompose and recompose.
                         // This is to deal with NFKC finding normal L and V but a
                         // compatibility variant of a T.
                         // We need to either fully compose that combination here
                         // (which would complicate the code and may not work with strange custom data)
                         // or use the slow path.
                     }
                 } else if (Hangul.isHangulLV(prev)) {
                     // The current character is a Jamo Trailing consonant,
                     // compose with previous Hangul LV that does not contain a Jamo T.
                     if (!doCompose) {
                         return false;
                     }
                     int syllable = prev + c - Hangul.JAMO_T_BASE;
                     --prevSrc;  // Replace the Hangul LV as well.
                     if (prevBoundary != prevSrc) {
                         buffer.append(s, prevBoundary, prevSrc);
                     }
                     buffer.append((char)syllable);
                     prevBoundary = src;
                     continue;
                 }
                 // No matching context, or may need to decompose surrounding text first:
                 // Fall through to the slow path.
             } else if (norm16 > JAMO_VT) {  // norm16 >= MIN_YES_YES_WITH_CC
                 // One or more combining marks that do not combine-back:
                 // Check for canonical order, copy unchanged if ok and
                 // if followed by a character with a boundary-before.
                 int cc = getCCFromNormalYesOrMaybe(norm16);  // cc!=0
                 if (onlyContiguous /* FCC */ && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) {
                     // Fails FCD test, need to decompose and contiguously recompose.
                     if (!doCompose) {
                         return false;
                     }
                 } else {
                     // If !onlyContiguous (not FCC), then we ignore the tccc of
                     // the previous character which passed the quick check "yes && ccc==0" test.
                     int n16;
                     for (;;) {
                         if (src == limit) {
                             if (doCompose) {
                                 buffer.append(s, prevBoundary, limit);
                             }
                             return true;
                         }
                         int prevCC = cc;
                         c = Character.codePointAt(s, src);
                         n16 = normTrie.get(c);
                         if (n16 >= MIN_YES_YES_WITH_CC) {
                             cc = getCCFromNormalYesOrMaybe(n16);
                             if (prevCC > cc) {
                                 if (!doCompose) {
                                     return false;
                                 }
                                 break;
                             }
                         } else {
                             break;
                         }
                         src += Character.charCount(c);
                     }
                     // p is after the last in-order combining mark.
                     // If there is a boundary here, then we continue with no change.
                     if (norm16HasCompBoundaryBefore(n16)) {
                         if (isCompYesAndZeroCC(n16)) {
                             src += Character.charCount(c);
                         }
                         continue;
                     }
                     // Use the slow path. There is no boundary in [prevSrc, src[.
                 }
             }

             // Slow path: Find the nearest boundaries around the current character,
             // decompose and recompose.
             if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
                 c = Character.codePointBefore(s, prevSrc);
                 norm16 = normTrie.get(c);
                 if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
                     prevSrc -= Character.charCount(c);
                 }
             }
             if (doCompose && prevBoundary != prevSrc) {
                 buffer.append(s, prevBoundary, prevSrc);
             }
             int recomposeStartIndex=buffer.length();
             // We know there is not a boundary here.
             decomposeShort(s, prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous,
                            buffer);
             // Decompose until the next boundary.
             src = decomposeShort(s, src, limit, true /* stopAtCompBoundary */, onlyContiguous,
                                  buffer);
             recompose(buffer, recomposeStartIndex, onlyContiguous);
             if(!doCompose) {
                 if(!buffer.equals(s, prevSrc, src)) {
                     return false;
                 }
                 buffer.remove();
             }
             prevBoundary=src;
         }
     }

     /**
      * Very similar to compose(): Make the same changes in both places if relevant.
      * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
      * !doSpan: quickCheck
      * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
      *         bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
      *         then the quick check result is "no"
      */
     public int composeQuickCheck(CharSequence s, int src, int limit,
                                  boolean onlyContiguous, boolean doSpan) {
         int qcResult=0;
         int prevBoundary=src;
         int minNoMaybeCP=minCompNoMaybeCP;

         for(;;) {
             // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
             // or with (compYes && ccc==0) properties.
             int prevSrc;
             int c = 0;
             int norm16 = 0;
             for (;;) {
                 if(src==limit) {
                     return (src<<1)|qcResult;  // "yes" or "maybe"
                 }
                 if( (c=s.charAt(src))<minNoMaybeCP ||
                     isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
                 ) {
                     ++src;
                 } else {
                     prevSrc = src++;
                     if (!UTF16Plus.isLeadSurrogate(c)) {
                         break;
                     } else {
                         char c2;
                         if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
                             ++src;
                             c = Character.toCodePoint((char)c, c2);
                             norm16 = normTrie.suppGet(c);
                             if (!isCompYesAndZeroCC(norm16)) {
                                 break;
                             }
                         }
                     }
                 }
             }
             // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
             // The current character is either a "noNo" (has a mapping)
             // or a "maybeYes" (combines backward)
             // or a "yesYes" with ccc!=0.
             // It is not a Hangul syllable or Jamo L because those have "yes" properties.

             int prevNorm16 = INERT;
             if (prevBoundary != prevSrc) {
                 prevBoundary = prevSrc;
                 if (!norm16HasCompBoundaryBefore(norm16)) {
                     c = Character.codePointBefore(s, prevSrc);
                     int n16 = getNorm16(c);
                     if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
                         prevBoundary -= Character.charCount(c);
                         prevNorm16 = n16;
                     }
                 }
             }

             if(isMaybeOrNonZeroCC(norm16)) {
                 int cc=getCCFromYesOrMaybe(norm16);
                 if (onlyContiguous /* FCC */ && cc != 0 &&
                         getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
                     // The [prevBoundary..prevSrc[ character
                     // passed the quick check "yes && ccc==0" test
                     // but is out of canonical order with the current combining mark.
                 } else {
                     // If !onlyContiguous (not FCC), then we ignore the tccc of
                     // the previous character which passed the quick check "yes && ccc==0" test.
                     for (;;) {
                         if (norm16 < MIN_YES_YES_WITH_CC) {
                             if (!doSpan) {
                                 qcResult = 1;
                             } else {
                                 return prevBoundary << 1;  // spanYes does not care to know it's "maybe"
                             }
                         }
                         if (src == limit) {
                             return (src<<1) | qcResult;  // "yes" or "maybe"
                         }
                         int prevCC = cc;
                         c = Character.codePointAt(s, src);
                         norm16 = getNorm16(c);
                         if (isMaybeOrNonZeroCC(norm16)) {
                             cc = getCCFromYesOrMaybe(norm16);
                             if (!(prevCC <= cc || cc == 0)) {
                                 break;
                             }
                         } else {
                             break;
                         }
                         src += Character.charCount(c);
                     }
                     // src is after the last in-order combining mark.
                     if (isCompYesAndZeroCC(norm16)) {
                         prevBoundary = src;
                         src += Character.charCount(c);
                         continue;
                     }
                 }
             }
             return prevBoundary<<1;  // "no"
         }
     }
     public void composeAndAppend(CharSequence s,
                                  boolean doCompose,
                                  boolean onlyContiguous,
                                  ReorderingBuffer buffer) {
         int src=0, limit=s.length();
         if(!buffer.isEmpty()) {
             int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous);
             if(0!=firstStarterInSrc) {
                 int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
                                                                buffer.length(), onlyContiguous);
                 StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
                                                        firstStarterInSrc+16);
                 middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
                 buffer.removeSuffix(buffer.length()-lastStarterInDest);
                 middle.append(s, 0, firstStarterInSrc);
                 compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
                 src=firstStarterInSrc;
             }
         }
         if(doCompose) {
             compose(s, src, limit, onlyContiguous, true, buffer);
         } else {
             buffer.append(s, src, limit);
         }
     }
     // Dual functionality:
     // buffer!=NULL: normalize
     // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
     public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
         // Note: In this function we use buffer->appendZeroCC() because we track
         // the lead and trail combining classes here, rather than leaving it to
         // the ReorderingBuffer.
         // The exception is the call to decomposeShort() which uses the buffer
         // in the normal way.

         // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
         // Similar to the prevBoundary in the compose() implementation.
         int prevBoundary=src;
         int prevSrc;
         int c=0;
         int prevFCD16=0;
         int fcd16=0;

         for(;;) {
             // count code units with lccc==0
             for(prevSrc=src; src!=limit;) {
                 if((c=s.charAt(src))<minLcccCP) {
                     prevFCD16=~c;
                     ++src;
                 } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
                     prevFCD16=0;
                     ++src;
                 } else {
                     if (UTF16Plus.isLeadSurrogate(c)) {
                         char c2;
                         if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
                             c = Character.toCodePoint((char)c, c2);
                         }
                     }
                     if((fcd16=getFCD16FromNormData(c))<=0xff) {
                         prevFCD16=fcd16;
                         src+=Character.charCount(c);
                     } else {
                         break;
                     }
                 }
             }
             // copy these code units all at once
             if(src!=prevSrc) {
                 if(src==limit) {
                     if(buffer!=null) {
                         buffer.flushAndAppendZeroCC(s, prevSrc, src);
                     }
                     break;
                 }
                 prevBoundary=src;
                 // We know that the previous character's lccc==0.
                 if(prevFCD16<0) {
                     // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
                     int prev=~prevFCD16;
                     if(prev<minDecompNoCP) {
                         prevFCD16=0;
                     } else {
                         prevFCD16=getFCD16FromNormData(prev);
                         if(prevFCD16>1) {
                             --prevBoundary;
                         }
                     }
                 } else {
                     int p=src-1;
                     if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
                         Character.isHighSurrogate(s.charAt(p-1))
                     ) {
                         --p;
                         // Need to fetch the previous character's FCD value because
                         // prevFCD16 was just for the trail surrogate code point.
                         prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1)));
                         // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
                     }
                     if(prevFCD16>1) {
                         prevBoundary=p;
                     }
                 }
                 if(buffer!=null) {
                     // The last lccc==0 character is excluded from the
                     // flush-and-append call in case it needs to be modified.
                     buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
                     buffer.append(s, prevBoundary, src);
                 }
                 // The start of the current character (c).
                 prevSrc=src;
             } else if(src==limit) {
                 break;
             }

             src+=Character.charCount(c);
             // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
             // Check for proper order, and decompose locally if necessary.
             if((prevFCD16&0xff)<=(fcd16>>8)) {
                 // proper order: prev tccc <= current lccc
                 if((fcd16&0xff)<=1) {
                     prevBoundary=src;
                 }
                 if(buffer!=null) {
                     buffer.appendZeroCC(c);
                 }
                 prevFCD16=fcd16;
                 continue;
             } else if(buffer==null) {
                 return prevBoundary;  // quick check "no"
             } else {
                 /*
                  * Back out the part of the source that we copied or appended
                  * already but is now going to be decomposed.
                  * prevSrc is set to after what was copied/appended.
                  */
                 buffer.removeSuffix(prevSrc-prevBoundary);
                 /*
                  * Find the part of the source that needs to be decomposed,
                  * up to the next safe boundary.
                  */
                 src=findNextFCDBoundary(s, src, limit);
                 /*
                  * The source text does not fulfill the conditions for FCD.
                  * Decompose and reorder a limited piece of the text.
                  */
                 decomposeShort(s, prevBoundary, src, false, false, buffer);
                 prevBoundary=src;
                 prevFCD16=0;
             }
         }
         return src;
     }
     public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
         int src=0, limit=s.length();
         if(!buffer.isEmpty()) {
             int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
             if(0!=firstBoundaryInSrc) {
                 int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
                                                                buffer.length());
                 StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
                                                        firstBoundaryInSrc+16);
                 middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
                 buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
                 middle.append(s, 0, firstBoundaryInSrc);
                 makeFCD(middle, 0, middle.length(), buffer);
                 src=firstBoundaryInSrc;
             }
         }
         if(doMakeFCD) {
             makeFCD(s, src, limit, buffer);
         } else {
             buffer.append(s, src, limit);
         }
     }

     public boolean hasDecompBoundaryBefore(int c) {
         return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
             norm16HasDecompBoundaryBefore(getNorm16(c));
     }
     public boolean norm16HasDecompBoundaryBefore(int norm16) {
         if (norm16 < minNoNoCompNoMaybeCC) {
             return true;
         }
         if (norm16 >= limitNoNo) {
             return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
         }
         // c decomposes, get everything from the variable-length extra data
         int mapping=norm16>>OFFSET_SHIFT;
         int firstUnit=extraData.charAt(mapping);
         // true if leadCC==0 (hasFCDBoundaryBefore())
         return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
     }
     public boolean hasDecompBoundaryAfter(int c) {
         if (c < minDecompNoCP) {
             return true;
         }
         if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
             return true;
         }
         return norm16HasDecompBoundaryAfter(getNorm16(c));
     }
     public boolean norm16HasDecompBoundaryAfter(int norm16) {
         if(norm16 <= minYesNo || isHangulLVT(norm16)) {
             return true;
         }
         if (norm16 >= limitNoNo) {
             if (isMaybeOrNonZeroCC(norm16)) {
                 return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
             }
             // Maps to an isCompYesAndZeroCC.
             return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
         }
         // c decomposes, get everything from the variable-length extra data
         int mapping=norm16>>OFFSET_SHIFT;
         int firstUnit=extraData.charAt(mapping);
         // decomp after-boundary: same as hasFCDBoundaryAfter(),
         // fcd16<=1 || trailCC==0
         if(firstUnit>0x1ff) {
             return false;  // trailCC>1
         }
         if(firstUnit<=0xff) {
             return true;  // trailCC==0
         }
         // if(trailCC==1) test leadCC==0, same as checking for before-boundary
         // true if leadCC==0 (hasFCDBoundaryBefore())
         return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
     }
     public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }

     public boolean hasCompBoundaryBefore(int c) {
         return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
     }
     public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) {
         return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
     }
     public boolean isCompInert(int c, boolean onlyContiguous) {
         int norm16=getNorm16(c);
         return isCompYesAndZeroCC(norm16) &&
             (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
             (!onlyContiguous || isInert(norm16) || extraData.charAt(norm16>>OFFSET_SHIFT) <= 0x1ff);
     }

     public boolean hasFCDBoundaryBefore(int c) { return hasDecompBoundaryBefore(c); }
     public boolean hasFCDBoundaryAfter(int c) { return hasDecompBoundaryAfter(c); }
     public boolean isFCDInert(int c) { return getFCD16(c)<=1; }

     private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
     private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
     private static boolean isInert(int norm16) { return norm16==INERT; }
     private static boolean isJamoL(int norm16) { return norm16==JAMO_L; }
     private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
     private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
     private boolean isHangulLV(int norm16) { return norm16==minYesNo; }
     private boolean isHangulLVT(int norm16) {
         return norm16==hangulLVT();
     }
     private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
     // UBool isCompYes(uint16_t norm16) const {
     //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
     // }
     // UBool isCompYesOrMaybe(uint16_t norm16) const {
     //     return norm16<minNoNo || minMaybeYes<=norm16;
     // }
     // private boolean hasZeroCCFromDecompYes(int norm16) {
     //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
     // }
     private boolean isDecompYesAndZeroCC(int norm16) {
         return norm16<minYesNo ||
                norm16==JAMO_VT ||
                (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
     }
     /**
      * A little faster and simpler than isDecompYesAndZeroCC() but does not include
      * the MaybeYes which combine-forward and have ccc=0.
      * (Standard Unicode 10 normalization does not have such characters.)
      */
     private boolean isMostDecompYesAndZeroCC(int norm16) {
         return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
     }
     private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }

     // For use with isCompYes().
     // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
     // static uint8_t getCCFromYes(uint16_t norm16) {
     //     return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
     // }
     private int getCCFromNoNo(int norm16) {
         int mapping=norm16>>OFFSET_SHIFT;
         if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
             return extraData.charAt(mapping-1)&0xff;
         } else {
             return 0;
         }
     }
     int getTrailCCFromCompYesAndZeroCC(int norm16) {
         if(norm16<=minYesNo) {
             return 0;  // yesYes and Hangul LV have ccc=tccc=0
         } else {
             // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
             return extraData.charAt(norm16>>OFFSET_SHIFT)>>8;  // tccc from yesNo
         }
     }

     // Requires algorithmic-NoNo.
     private int mapAlgorithmic(int c, int norm16) {
         return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
     }

     // Requires minYesNo<norm16<limitNoNo.
     // private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); }

     /**
      * @return index into maybeYesCompositions, or -1
      */
     private int getCompositionsListForDecompYes(int norm16) {
         if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
             return -1;
         } else {
             if((norm16-=minMaybeYes)<0) {
                 // norm16<minMaybeYes: index into extraData which is a substring at
                 //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
                 // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
                 norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
             }
             return norm16>>OFFSET_SHIFT;
         }
     }
     /**
      * @return index into maybeYesCompositions
      */
     private int getCompositionsListForComposite(int norm16) {
         // A composite has both mapping & compositions list.
         int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT;
         int firstUnit=maybeYesCompositions.charAt(list);
         return list+  // mapping in maybeYesCompositions
             1+  // +1 to skip the first unit with the mapping length
             (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
     }
     private int getCompositionsListForMaybe(int norm16) {
         // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
         return (norm16-minMaybeYes)>>OFFSET_SHIFT;
     }
     /**
      * @param c code point must have compositions
      * @return index into maybeYesCompositions
      */
     private int getCompositionsList(int norm16) {
         return isDecompYes(norm16) ?
                 getCompositionsListForDecompYes(norm16) :
                 getCompositionsListForComposite(norm16);
     }

     // Decompose a short piece of text which is likely to contain characters that
     // fail the quick check loop and/or where the quick check loop's overhead
     // is unlikely to be amortized.
     // Called by the compose() and makeFCD() implementations.
     // Public in Java for collation implementation code.
     private int decomposeShort(
             CharSequence s, int src, int limit,
             boolean stopAtCompBoundary, boolean onlyContiguous,
             ReorderingBuffer buffer) {
         while(src<limit) {
             int c=Character.codePointAt(s, src);
             if (stopAtCompBoundary && c < minCompNoMaybeCP) {
                 return src;
             }
             int norm16 = getNorm16(c);
             if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
                 return src;
             }
             src+=Character.charCount(c);
             decompose(c, norm16, buffer);
             if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
                 return src;
             }
         }
         return src;
     }
     private void decompose(int c, int norm16, ReorderingBuffer buffer) {
         // get the decomposition and the lead and trail cc's
         if (norm16 >= limitNoNo) {
             if (isMaybeOrNonZeroCC(norm16)) {
                 buffer.append(c, getCCFromYesOrMaybe(norm16));
                 return;
             }
             // Maps to an isCompYesAndZeroCC.
             c=mapAlgorithmic(c, norm16);
             norm16 = getRawNorm16(c);
         }
         if (norm16 < minYesNo) {
             // c does not decompose
             buffer.append(c, 0);
         } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
             // Hangul syllable: decompose algorithmically
             Hangul.decompose(c, buffer);
         } else {
             // c decomposes, get everything from the variable-length extra data
             int mapping=norm16>>OFFSET_SHIFT;
             int firstUnit=extraData.charAt(mapping);
             int length=firstUnit&MAPPING_LENGTH_MASK;
             int leadCC, trailCC;
             trailCC=firstUnit>>8;
             if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
                 leadCC=extraData.charAt(mapping-1)>>8;
             } else {
                 leadCC=0;
             }
             ++mapping;  // skip over the firstUnit
             buffer.append(extraData, mapping, mapping+length, true, leadCC, trailCC);
         }
     }

     /**
      * Finds the recomposition result for
      * a forward-combining "lead" character,
      * specified with a pointer to its compositions list,
      * and a backward-combining "trail" character.
      *
      * <p>If the lead and trail characters combine, then this function returns
      * the following "compositeAndFwd" value:
      * <pre>
      * Bits 21..1  composite character
      * Bit      0  set if the composite is a forward-combining starter
      * </pre>
      * otherwise it returns -1.
      *
      * <p>The compositions list has (trail, compositeAndFwd) pair entries,
      * encoded as either pairs or triples of 16-bit units.
      * The last entry has the high bit of its first unit set.
      *
      * <p>The list is sorted by ascending trail characters (there are no duplicates).
      * A linear search is used.
      *
      * <p>See normalizer2impl.h for a more detailed description
      * of the compositions list format.
      */
     private static int combine(String compositions, int list, int trail) {
         int key1, firstUnit;
         if(trail<COMP_1_TRAIL_LIMIT) {
             // trail character is 0..33FF
             // result entry may have 2 or 3 units
             key1=(trail<<1);
             while(key1>(firstUnit=compositions.charAt(list))) {
                 list+=2+(firstUnit&COMP_1_TRIPLE);
             }
             if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
                 if((firstUnit&COMP_1_TRIPLE)!=0) {
                     return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
                 } else {
                     return compositions.charAt(list+1);
                 }
             }
         } else {
             // trail character is 3400..10FFFF
             // result entry has 3 units
             key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE);
             int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff;
             int secondUnit;
             for(;;) {
                 if(key1>(firstUnit=compositions.charAt(list))) {
                     list+=2+(firstUnit&COMP_1_TRIPLE);
                 } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
                     if(key2>(secondUnit=compositions.charAt(list+1))) {
                         if((firstUnit&COMP_1_LAST_TUPLE)!=0) {
                             break;
                         } else {
                             list+=3;
                         }
                     } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
                         return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2);
                     } else {
                         break;
                     }
                 } else {
                     break;
                 }
             }
         }
         return -1;
     }
     /**
      * @param list some character's compositions list
      * @param set recursively receives the composites from these compositions
      */
     private void addComposites(int list, UnicodeSet set) {
         int firstUnit, compositeAndFwd;
         do {
             firstUnit=maybeYesCompositions.charAt(list);
             if((firstUnit&COMP_1_TRIPLE)==0) {
                 compositeAndFwd=maybeYesCompositions.charAt(list+1);
                 list+=2;
             } else {
                 compositeAndFwd=((maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)|
                                 maybeYesCompositions.charAt(list+2);
                 list+=3;
             }
             int composite=compositeAndFwd>>1;
             if((compositeAndFwd&1)!=0) {
                 addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
             }
             set.add(composite);
         } while((firstUnit&COMP_1_LAST_TUPLE)==0);
     }
     /*
      * Recomposes the buffer text starting at recomposeStartIndex
      * (which is in NFD - decomposed and canonically ordered),
      * and truncates the buffer contents.
      *
      * Note that recomposition never lengthens the text:
      * Any character consists of either one or two code units;
      * a composition may contain at most one more code unit than the original starter,
      * while the combining mark that is removed has at least one code unit.
      */
     private void recompose(ReorderingBuffer buffer, int recomposeStartIndex,
                            boolean onlyContiguous) {
         StringBuilder sb=buffer.getStringBuilder();
         int p=recomposeStartIndex;
         if(p==sb.length()) {
             return;
         }

         int starter, pRemove;
         int compositionsList;
         int c, compositeAndFwd;
         int norm16;
         int cc, prevCC;
         boolean starterIsSupplementary;

         // Some of the following variables are not used until we have a forward-combining starter
         // and are only initialized now to avoid compiler warnings.
         compositionsList=-1;  // used as indicator for whether we have a forward-combining starter
         starter=-1;
         starterIsSupplementary=false;
         prevCC=0;

         for(;;) {
             c=sb.codePointAt(p);
             p+=Character.charCount(c);
             norm16=getNorm16(c);
             cc=getCCFromYesOrMaybe(norm16);
             if( // this character combines backward and
                 isMaybe(norm16) &&
                 // we have seen a starter that combines forward and
                 compositionsList>=0 &&
                 // the backward-combining character is not blocked
                 (prevCC<cc || prevCC==0)
             ) {
                 if(isJamoVT(norm16)) {
                     // c is a Jamo V/T, see if we can compose it with the previous character.
                     if(c<Hangul.JAMO_T_BASE) {
                         // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
                         char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
                         if(prev<Hangul.JAMO_L_COUNT) {
                             pRemove=p-1;
                             char syllable=(char)
                                 (Hangul.HANGUL_BASE+
                                  (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
                                  Hangul.JAMO_T_COUNT);
                             char t;
                             if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
                                 ++p;
                                 syllable+=t;  // The next character was a Jamo T.
                             }
                             sb.setCharAt(starter, syllable);
                             // remove the Jamo V/T
                             sb.delete(pRemove, p);
                             p=pRemove;
                         }
                     }
                     /*
                      * No "else" for Jamo T:
                      * Since the input is in NFD, there are no Hangul LV syllables that
                      * a Jamo T could combine with.
                      * All Jamo Ts are combined above when handling Jamo Vs.
                      */
                     if(p==sb.length()) {
                         break;
                     }
                     compositionsList=-1;
                     continue;
                 } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) {
                     // The starter and the combining mark (c) do combine.
                     int composite=compositeAndFwd>>1;

                     // Remove the combining mark.
                     pRemove=p-Character.charCount(c);  // pRemove & p: start & limit of the combining mark
                     sb.delete(pRemove, p);
                     p=pRemove;
                     // Replace the starter with the composite.
                     if(starterIsSupplementary) {
                         if(composite>0xffff) {
                             // both are supplementary
                             sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
                             sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite));
                         } else {
                             sb.setCharAt(starter, (char)c);
                             sb.deleteCharAt(starter+1);
                             // The composite is shorter than the starter,
                             // move the intermediate characters forward one.
                             starterIsSupplementary=false;
                             --p;
                         }
                     } else if(composite>0xffff) {
                         // The composite is longer than the starter,
                         // move the intermediate characters back one.
                         starterIsSupplementary=true;
                         sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
                         sb.insert(starter+1, UTF16.getTrailSurrogate(composite));
                         ++p;
                     } else {
                         // both are on the BMP
                         sb.setCharAt(starter, (char)composite);
                     }

                     // Keep prevCC because we removed the combining mark.

                     if(p==sb.length()) {
                         break;
                     }
                     // Is the composite a starter that combines forward?
                     if((compositeAndFwd&1)!=0) {
                         compositionsList=
                             getCompositionsListForComposite(getRawNorm16(composite));
                     } else {
                         compositionsList=-1;
                     }

                     // We combined; continue with looking for compositions.
                     continue;
                 }
             }

             // no combination this time
             prevCC=cc;
             if(p==sb.length()) {
                 break;
             }

             // If c did not combine, then check if it is a starter.
             if(cc==0) {
                 // Found a new starter.
                 if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) {
                     // It may combine with something, prepare for it.
                     if(c<=0xffff) {
                         starterIsSupplementary=false;
                         starter=p-1;
                     } else {
                         starterIsSupplementary=true;
                         starter=p-2;
                     }
                 }
             } else if(onlyContiguous) {
                 // FCC: no discontiguous compositions; any intervening character blocks.
                 compositionsList=-1;
             }
         }
         buffer.flush();
     }

     public int composePair(int a, int b) {
         int norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16
         int list;
         if(isInert(norm16)) {
             return -1;
         } else if(norm16<minYesNoMappingsOnly) {
             // a combines forward.
             if(isJamoL(norm16)) {
                 b-=Hangul.JAMO_V_BASE;
                 if(0<=b && b<Hangul.JAMO_V_COUNT) {
                     return
                         (Hangul.HANGUL_BASE+
                          ((a-Hangul.JAMO_L_BASE)*Hangul.JAMO_V_COUNT+b)*
                          Hangul.JAMO_T_COUNT);
                 } else {
                     return -1;
                 }
             } else if(isHangulLV(norm16)) {
                 b-=Hangul.JAMO_T_BASE;
                 if(0<b && b<Hangul.JAMO_T_COUNT) {  // not b==0!
                     return a+b;
                 } else {
                     return -1;
                 }
             } else {
                 // 'a' has a compositions list in extraData
                 list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT;
                 if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
                     list+=  // mapping pointer
                         1+  // +1 to skip the first unit with the mapping length
                         (maybeYesCompositions.charAt(list)&MAPPING_LENGTH_MASK);  // + mapping length
                 }
             }
         } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
             return -1;
         } else {
             list=getCompositionsListForMaybe(norm16);  // offset into maybeYesCompositions
         }
         if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
             return -1;
         }
         return combine(maybeYesCompositions, list, b)>>1;
     }

     /**
      * Does c have a composition boundary before it?
      * True if its decomposition begins with a character that has
      * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
      * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
      * (isCompYesAndZeroCC()) so we need not decompose.
      */
     private boolean hasCompBoundaryBefore(int c, int norm16) {
         return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
     }
     private boolean norm16HasCompBoundaryBefore(int norm16) {
         return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
     }
     private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) {
         return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src));
     }
     private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) {
         return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
             (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
     }
     private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) {
         return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous);
     }
     /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
     private boolean isTrailCC01ForCompBoundaryAfter(int norm16) {
         return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
             (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff);
     }

     private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) {
         while(p>0) {
             int c=Character.codePointBefore(s, p);
             int norm16 = getNorm16(c);
             if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
                 break;
             }
             p-=Character.charCount(c);
             if(hasCompBoundaryBefore(c, norm16)) {
                 break;
             }
         }
         return p;
     }
     private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) {
         while(p<limit) {
             int c=Character.codePointAt(s, p);
             int norm16=normTrie.get(c);
             if(hasCompBoundaryBefore(c, norm16)) {
                 break;
             }
             p+=Character.charCount(c);
             if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
                 break;
             }
         }
         return p;
     }

     private int findPreviousFCDBoundary(CharSequence s, int p) {
         while(p>0) {
             int c=Character.codePointBefore(s, p);
             int norm16;
             if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16 = getNorm16(c))) {
                 break;
             }
             p-=Character.charCount(c);
             if (norm16HasDecompBoundaryBefore(norm16)) {
                 break;
             }
         }
         return p;
     }
     private int findNextFCDBoundary(CharSequence s, int p, int limit) {
         while(p<limit) {
             int c=Character.codePointAt(s, p);
             int norm16;
             if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) {
                 break;
             }
             p+=Character.charCount(c);
             if (norm16HasDecompBoundaryAfter(norm16)) {
                 break;
             }
         }
         return p;
     }

     private int getPreviousTrailCC(CharSequence s, int start, int p) {
         if (start == p) {
             return 0;
         }
         return getFCD16(Character.codePointBefore(s, p));
     }

     private void addToStartSet(MutableCodePointTrie mutableTrie, int origin, int decompLead) {
         int canonValue = mutableTrie.get(decompLead);
         if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
             // origin is the first character whose decomposition starts with
             // the character for which we are setting the value.
             mutableTrie.set(decompLead, canonValue|origin);
         } else {
             // origin is not the first character, or it is U+0000.
             UnicodeSet set;
             if((canonValue&CANON_HAS_SET)==0) {
                 int firstOrigin=canonValue&CANON_VALUE_MASK;
                 canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size();
                 mutableTrie.set(decompLead, canonValue);
                 canonStartSets.add(set=new UnicodeSet());
                 if(firstOrigin!=0) {
                     set.add(firstOrigin);
                 }
             } else {
                 set=canonStartSets.get(canonValue&CANON_VALUE_MASK);
             }
             set.add(origin);
         }
     }

     @SuppressWarnings("unused")
     private VersionInfo dataVersion;

     // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
     private int minDecompNoCP;
     private int minCompNoMaybeCP;
     private int minLcccCP;

     // Norm16 value thresholds for quick check combinations and types of extra data.
     private int minYesNo;
     private int minYesNoMappingsOnly;
     private int minNoNo;
     private int minNoNoCompBoundaryBefore;
     private int minNoNoCompNoMaybeCC;
     private int minNoNoEmpty;
     private int limitNoNo;
     private int centerNoNoDelta;
     private int minMaybeYes;

     private CodePointTrie.Fast16 normTrie;
     private String maybeYesCompositions;
     private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
     private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0

     private CodePointTrie canonIterData;
     private ArrayList<UnicodeSet> canonStartSets;

     // bits in canonIterData
     private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000;
     private static final int CANON_HAS_COMPOSITIONS = 0x40000000;
     private static final int CANON_HAS_SET = 0x200000;
     private static final int CANON_VALUE_MASK = 0x1fffff;
 }