src/com/ibm/icu/impl/NormalizerImpl.java - external/github.com/unicode-org/icu - Git at Google

  /*
  *******************************************************************************
  * Copyright (C) 1996-2004, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  */

 package com.ibm.icu.impl;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.BufferedInputStream;
 import java.io.InputStream;

 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.util.RangeValueIterator;
 import com.ibm.icu.util.VersionInfo;
 import com.ibm.icu.lang.UCharacter;

 /**
  * @version     1.0
  * @author  Ram Viswanadha
  */
 public final class NormalizerImpl {
     // Static block for the class to initialize its own self
     static final NormalizerImpl IMPL;

     static
     {
         try
         {
             IMPL = new NormalizerImpl();
         }
         catch (Exception e)
         {
             throw new RuntimeException(e.getMessage());
         }
     }

     static final int UNSIGNED_BYTE_MASK =0xFF;
     static final long UNSIGNED_INT_MASK = 0xffffffffL;
     /*
      * This new implementation of the normalization code loads its data from
      * unorm.icu, which is generated with the gennorm tool.
      * The format of that file is described at the end of this file.
      */
     private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/unorm.icu";

     // norm32 value constants

     // quick check flags 0..3 set mean "no" for their forms
     public static final int QC_NFC=0x11;          /* no|maybe */
     public static final int QC_NFKC=0x22;         /* no|maybe */
     public static final int QC_NFD=4;             /* no */
     public static final int QC_NFKD=8;            /* no */

     public static final int QC_ANY_NO=0xf;

     /* quick check flags 4..5 mean "maybe" for their forms;
      * test flags>=QC_MAYBE
      */
     public static final int QC_MAYBE=0x10;
     public static final int QC_ANY_MAYBE=0x30;

     public static final int QC_MASK=0x3f;

     private static final int COMBINES_FWD=0x40;
     private static final int COMBINES_BACK=0x80;
     public  static final int COMBINES_ANY=0xc0;
     // UnicodeData.txt combining class in bits 15.
     private static final int CC_SHIFT=8;
     public  static final int CC_MASK=0xff00;
     // 16 bits for the index to UChars and other extra data
     private static final int EXTRA_SHIFT=16;
     // start of surrogate specials after shift
     private static final int EXTRA_INDEX_TOP=0xfc00;

     private static final int EXTRA_SURROGATE_MASK=0x3ff;
     private static final int EXTRA_SURROGATE_TOP=0x3f0;    /* hangul etc. */

     private static final int EXTRA_HANGUL=EXTRA_SURROGATE_TOP;
     private static final int EXTRA_JAMO_L=EXTRA_SURROGATE_TOP+1;/* ### not used */
     private static final int EXTRA_JAMO_V=EXTRA_SURROGATE_TOP+2;
     private static final int EXTRA_JAMO_T=EXTRA_SURROGATE_TOP+3;

     /* norm32 value constants using >16 bits */
     private static final long  MIN_SPECIAL    =  (long)(0xfc000000 & UNSIGNED_INT_MASK);
     private static final long  SURROGATES_TOP =  (long)(0xfff00000 & UNSIGNED_INT_MASK);
     private static final long  MIN_HANGUL     =  (long)(0xfff00000 & UNSIGNED_INT_MASK);
     private static final long  MIN_JAMO_V     =  (long)(0xfff20000 & UNSIGNED_INT_MASK);
     private static final long  JAMO_V_TOP     =  (long)(0xfff30000 & UNSIGNED_INT_MASK);


     /* indexes[] value names */
     /* number of bytes in normalization trie */
     static final int INDEX_TRIE_SIZE           = 0;
      /* number of chars in extra data */
     static final int INDEX_CHAR_COUNT           = 1;
     /* number of uint16_t words for combining data */
     static final int INDEX_COMBINE_DATA_COUNT = 2;
     /* number of code points that combine forward */
     static final int INDEX_COMBINE_FWD_COUNT  = 3;
     /* number of code points that combine forward and backward */
     static final int INDEX_COMBINE_BOTH_COUNT = 4;
     /* number of code points that combine backward */
     static final int INDEX_COMBINE_BACK_COUNT = 5;
      /* first code point with quick check NFC NO/MAYBE */
     public static final int INDEX_MIN_NFC_NO_MAYBE   = 6;
     /* first code point with quick check NFKC NO/MAYBE */
     public static final int INDEX_MIN_NFKC_NO_MAYBE  = 7;
      /* first code point with quick check NFD NO/MAYBE */
     public static final int INDEX_MIN_NFD_NO_MAYBE   = 8;
     /* first code point with quick check NFKD NO/MAYBE */
     public static final int INDEX_MIN_NFKD_NO_MAYBE  = 9;
     /* number of bytes in FCD trie */
     static final int INDEX_FCD_TRIE_SIZE      = 10;
     /* number of bytes in the auxiliary trie */
     static final int INDEX_AUX_TRIE_SIZE      = 11;
     /* number of uint16_t in the array of serialized USet */
     static final int INDEX_CANON_SET_COUNT    = 12;
     /* changing this requires a new formatVersion */
     static final int INDEX_TOP                = 32;


     /* AUX constants */
     /* value constants for auxTrie */
     private static final int AUX_UNSAFE_SHIFT           = 11;
     private static final int AUX_COMP_EX_SHIFT           = 10;
     private static final int AUX_NFC_SKIPPABLE_F_SHIFT = 12;

     private static final int AUX_MAX_FNC          =   ((int)1<<AUX_COMP_EX_SHIFT);
     private static final int AUX_UNSAFE_MASK      =   (int)((1<<AUX_UNSAFE_SHIFT) & UNSIGNED_INT_MASK);
     private static final int AUX_FNC_MASK         =   (int)((AUX_MAX_FNC-1) & UNSIGNED_INT_MASK);
     private static final int AUX_COMP_EX_MASK     =   (int)((1<<AUX_COMP_EX_SHIFT) & UNSIGNED_INT_MASK);
     private static final long AUX_NFC_SKIP_F_MASK =   ((UNSIGNED_INT_MASK&1)<<AUX_NFC_SKIPPABLE_F_SHIFT);

     /* canonStartSets[0..31] contains indexes for what is in the array */
     /* number of uint16_t in canonical starter sets */
     static final int SET_INDEX_CANON_SETS_LENGTH        = 0;
     /* number of uint16_t in the BMP search table (contains pairs) */
     static final int SET_INDEX_CANON_BMP_TABLE_LENGTH    = 1;
     /* number of uint16_t in the supplementary search table(contains triplets)*/
     static final int SET_INDEX_CANON_SUPP_TABLE_LENGTH  = 2;
     /* changing this requires a new formatVersion */
     static final int SET_INDEX_TOP                        = 32;

     static final int CANON_SET_INDICIES_INDEX              = 0;
     static final int CANON_SET_START_SETS_INDEX            = 1;
     static final int CANON_SET_BMP_TABLE_INDEX            = 2;
     static final int CANON_SET_SUPP_TABLE_INDEX            = 3;
     /* 14 bit indexes to canonical USerializedSets */
     static final int CANON_SET_MAX_CANON_SETS             = 0x4000;
     /* single-code point BMP sets are encoded directly in the search table
      * except if result=0x4000..0x7fff
      */
     static final int CANON_SET_BMP_MASK                    = 0xc000;
     static final int CANON_SET_BMP_IS_INDEX                = 0x4000;

     private static final int MAX_BUFFER_SIZE                    = 20;

     /**
      * Internal option for cmpEquivFold() for decomposing.
      * If not set, just do strcasecmp().
      * @internal
      */
      public static final int COMPARE_EQUIV = 0x80000;

     /*******************************/

     /* Wrappers for Trie implementations */
     static final class NormTrieImpl implements Trie.DataManipulate{
         static IntTrie normTrie= null;
        /**
         * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
         * data the index array offset of the indexes for that lead surrogate.
         * @param property data value for a surrogate from the trie, including
         *         the folding offset
         * @return data offset or 0 if there is no data for the lead surrogate
         */
         /* normTrie: 32-bit trie result may contain a special extraData index with the folding offset */
         public int getFoldingOffset(int value){
             return  BMP_INDEX_LENGTH+
                     ((value>>(EXTRA_SHIFT-SURROGATE_BLOCK_BITS))&
                     (0x3ff<<SURROGATE_BLOCK_BITS));
         }

     }
     static final class FCDTrieImpl implements Trie.DataManipulate{
         static CharTrie fcdTrie=null;
        /**
         * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
         * data the index array offset of the indexes for that lead surrogate.
         * @param property data value for a surrogate from the trie, including
         *         the folding offset
         * @return data offset or 0 if there is no data for the lead surrogate
         */
         /* fcdTrie: the folding offset is the lead FCD value itself */
         public int getFoldingOffset(int value){
             return value;
         }
     }

     static final class AuxTrieImpl implements Trie.DataManipulate{
         static CharTrie auxTrie = null;
        /**
         * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
         * data the index array offset of the indexes for that lead surrogate.
         * @param property data value for a surrogate from the trie, including
         *        the folding offset
         * @return data offset or 0 if there is no data for the lead surrogate
         */
         /* auxTrie: the folding offset is in bits 9..0 of the 16-bit trie result */
         public int getFoldingOffset(int value){
             return (int)(value &AUX_FNC_MASK)<<SURROGATE_BLOCK_BITS;
         }
     }

     /****************************************************/


     private static FCDTrieImpl fcdTrieImpl;
     private static NormTrieImpl normTrieImpl;
     private static AuxTrieImpl auxTrieImpl;
     private static int[] indexes;
     private static char[] combiningTable;
     private static char[] extraData;
     private static Object[] canonStartSets;

     private static boolean isDataLoaded;
     private static boolean isFormatVersion_2_1;
     private static boolean isFormatVersion_2_2;
     private static byte[] unicodeVersion;

     /**
      * Default buffer size of datafile
      */
     private static final int DATA_BUFFER_SIZE = 25000;

     /**
      * FCD check: everything below this code point is known to have a 0
      * lead combining class
      */
     public static final int MIN_WITH_LEAD_CC=0x300;


     /**
      * Bit 7 of the length byte for a decomposition string in extra data is
      * a flag indicating whether the decomposition string is
      * preceded by a 16-bit word with the leading and trailing cc
      * of the decomposition (like for A-umlaut);
      * if not, then both cc's are zero (like for compatibility ideographs).
      */
     private static final int DECOMP_FLAG_LENGTH_HAS_CC=0x80;
     /**
      * Bits 6..0 of the length byte contain the actual length.
      */
     private static final int DECOMP_LENGTH_MASK=0x7f;

     /** Length of the BMP portion of the index (stage 1) array. */
     private static final int BMP_INDEX_LENGTH=0x10000>>Trie.INDEX_STAGE_1_SHIFT_;
     /** Number of bits of a trail surrogate that are used in index table
      * lookups.
      */
     private static final int SURROGATE_BLOCK_BITS=10-Trie.INDEX_STAGE_1_SHIFT_;


    // public utility
    public static int getFromIndexesArr(int index){
         return indexes[index];
    }

    // protected constructor ---------------------------------------------

     /**
     * Constructor
     * @exception thrown when data reading fails or data corrupted
     */
     private NormalizerImpl() throws IOException {
         //data should be loaded only once
         if(!isDataLoaded){

             // jar access
             InputStream i = ICUData.getRequiredStream(DATA_FILE_NAME);
             BufferedInputStream b = new BufferedInputStream(i,DATA_BUFFER_SIZE);
             NormalizerDataReader reader = new NormalizerDataReader(b);

             // read the indexes
             indexes = reader.readIndexes(NormalizerImpl.INDEX_TOP);

             byte[] normBytes = new byte[indexes[NormalizerImpl.INDEX_TRIE_SIZE]];

             int combiningTableTop = indexes[NormalizerImpl.INDEX_COMBINE_DATA_COUNT];
             combiningTable = new char[combiningTableTop];

             int extraDataTop = indexes[NormalizerImpl.INDEX_CHAR_COUNT];
             extraData = new char[extraDataTop];

             byte[] fcdBytes = new byte[indexes[NormalizerImpl.INDEX_FCD_TRIE_SIZE]];
             byte[] auxBytes = new byte[indexes[NormalizerImpl.INDEX_AUX_TRIE_SIZE]];
             canonStartSets=new Object[NormalizerImpl.CANON_SET_MAX_CANON_SETS];

             fcdTrieImpl = new FCDTrieImpl();
             normTrieImpl = new NormTrieImpl();
             auxTrieImpl = new AuxTrieImpl();

             // load the rest of the data data and initialize the data members
             reader.read(normBytes, fcdBytes,auxBytes, extraData, combiningTable,
                         canonStartSets);

             NormTrieImpl.normTrie = new IntTrie( new ByteArrayInputStream(normBytes),normTrieImpl );
             FCDTrieImpl.fcdTrie   = new CharTrie( new ByteArrayInputStream(fcdBytes),fcdTrieImpl  );
             AuxTrieImpl.auxTrie   = new CharTrie( new ByteArrayInputStream(auxBytes),auxTrieImpl  );

             // we reached here without any exceptions so the data is fully
             // loaded set the variable to true
             isDataLoaded = true;

             // get the data format version
             byte[] formatVersion = reader.getDataFormatVersion();

             isFormatVersion_2_1 =( formatVersion[0]>2
                                     ||
                                    (formatVersion[0]==2 && formatVersion[1]>=1)
                                  );
             isFormatVersion_2_2 =( formatVersion[0]>2
                                     ||
                                    (formatVersion[0]==2 && formatVersion[1]>=2)
                                  );
             unicodeVersion = reader.getUnicodeVersion();
             b.close();
         }
     }

     /* ---------------------------------------------------------------------- */

     /* Korean Hangul and Jamo constants */

     public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
     public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
     public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */

     public static final int HANGUL_BASE=0xac00;

     public static final int JAMO_L_COUNT=19;
     public static final int JAMO_V_COUNT=21;
     public static final int JAMO_T_COUNT=28;
     public  static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;

     private static boolean isHangulWithoutJamoT(char c) {
         c-=HANGUL_BASE;
         return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
     }

     /* norm32 helpers */

     /* is this a norm32 with a regular index? */
     private static boolean isNorm32Regular(long norm32) {
         return norm32<MIN_SPECIAL;
     }

     /* is this a norm32 with a special index for a lead surrogate? */
     private static boolean isNorm32LeadSurrogate(long norm32) {
         return MIN_SPECIAL<=norm32 && norm32<SURROGATES_TOP;
     }

     /* is this a norm32 with a special index for a Hangul syllable or a Jamo? */
     private static boolean isNorm32HangulOrJamo(long norm32) {
         return norm32>=MIN_HANGUL;
     }

     /*
      * Given isNorm32HangulOrJamo(),
      * is this a Hangul syllable or a Jamo?
      */
      ///CLOVER:OFF
     private static  boolean isHangulJamoNorm32HangulOrJamoL(long norm32) {
         return norm32<MIN_JAMO_V;
     }
     ///CLOVER:ON

     /*
      * Given norm32 for Jamo V or T,
      * is this a Jamo V?
      */
     private static boolean isJamoVTNorm32JamoV(long norm32) {
         return norm32<JAMO_V_TOP;
     }

     /* data access primitives ----------------------------------------------- */

     public static long/*unsigned*/ getNorm32(char c) {
         return ((UNSIGNED_INT_MASK) & (NormTrieImpl.normTrie.getLeadValue(c)));
     }

     public static long/*unsigned*/ getNorm32FromSurrogatePair(long norm32,
                                                                char c2) {
         /*
          * the surrogate index in norm32 stores only the number of the surrogate
          * index block see gennorm/store.c/getFoldedNormValue()
          */
         return ((UNSIGNED_INT_MASK) &
                     NormTrieImpl.normTrie.getTrailValue((int)norm32, c2));
     }
     ///CLOVER:OFF
     private static long getNorm32(int c){
         return (UNSIGNED_INT_MASK&(NormTrieImpl.normTrie.getCodePointValue(c)));
     }

     private static long getNorm32(int c,int mask){
         long/*unsigned*/ norm32= getNorm32(UTF16.getLeadSurrogate(c));
         if(((norm32&mask)>0) && isNorm32LeadSurrogate(norm32)) {
             /* c is a lead surrogate, get the real norm32 */
             norm32=getNorm32FromSurrogatePair(norm32,UTF16.getTrailSurrogate(c));
         }
         return norm32;
     }
     ///CLOVER:ON


     /*
      * get a norm32 from text with complete code points
      * (like from decompositions)
      */
     private static long/*unsigned*/ getNorm32(char[] p,int start,
                                               int/*unsigned*/ mask) {
         long/*unsigned*/ norm32= getNorm32(p[start]);
         if(((norm32&mask)>0) && isNorm32LeadSurrogate(norm32)) {
             /* *p is a lead surrogate, get the real norm32 */
             norm32=getNorm32FromSurrogatePair(norm32, p[start+1]);
         }
         return norm32;
     }
     public static VersionInfo getUnicodeVersion(){
         return VersionInfo.getInstance(unicodeVersion[0], unicodeVersion[1],
                                        unicodeVersion[2], unicodeVersion[3]);
     }
     public static char    getFCD16(char c) {
         return  FCDTrieImpl.fcdTrie.getLeadValue(c);
     }

     public static char getFCD16FromSurrogatePair(char fcd16, char c2) {
         /* the surrogate index in fcd16 is an absolute offset over the
          * start of stage 1
          * */
         return FCDTrieImpl.fcdTrie.getTrailValue(fcd16, c2);
     }
     public static int getFCD16(int c) {
         return  FCDTrieImpl.fcdTrie.getCodePointValue(c);
     }

     private static int getExtraDataIndex(long norm32) {
         return (int)(norm32>>EXTRA_SHIFT);
     }

     private static final class DecomposeArgs{
         int /*unsigned byte*/ cc;
         int /*unsigned byte*/ trailCC;
         int length;
     }
     /**
      *
      * get the canonical or compatibility decomposition for one character
      *
      * @return index into the extraData array
      */
     private static int/*index*/ decompose(long/*unsigned*/ norm32,
                                           int/*unsigned*/ qcMask,
                                           DecomposeArgs args) {
         int p= getExtraDataIndex(norm32);
         args.length=extraData[p++];

         if((norm32&qcMask&QC_NFKD)!=0 && args.length>=0x100) {
             /* use compatibility decomposition, skip canonical data */
             p+=((args.length>>7)&1)+(args.length&DECOMP_LENGTH_MASK);
             args.length>>=8;
         }

         if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) {
             /* get the lead and trail cc's */
             char bothCCs=extraData[p++];
             args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8);
             args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs;
         } else {
             /* lead and trail cc's are both 0 */
             args.cc=args.trailCC=0;
         }

         args.length&=DECOMP_LENGTH_MASK;
         return p;
     }


     /**
      * get the canonical decomposition for one character
      * @return index into the extraData array
      */
     private static int decompose(long/*unsigned*/ norm32,
                                  DecomposeArgs args) {

         int p= getExtraDataIndex(norm32);
         args.length=extraData[p++];

         if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) {
             /* get the lead and trail cc's */
             char bothCCs=extraData[p++];
             args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8);
             args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs;
         } else {
             /* lead and trail cc's are both 0 */
             args.cc=args.trailCC=0;
         }

         args.length&=DECOMP_LENGTH_MASK;
         return p;
     }


     private static final class NextCCArgs{
         char[] source;
         int next;
         int limit;
         char c;
         char c2;
     }

     /*
      * get the combining class of (c, c2)= args.source[args.next++]
      * before: args.next<args.limit  after: args.next<=args.limit
      * if only one code unit is used, then c2==0
      */
     private static int /*unsigned byte*/ getNextCC(NextCCArgs args) {
         long /*unsigned*/ norm32;

         args.c=args.source[args.next++];

         norm32= getNorm32(args.c);
         if((norm32 & CC_MASK)==0) {
             args.c2=0;
             return 0;
         } else {
             if(!isNorm32LeadSurrogate(norm32)) {
                 args.c2=0;
             } else {
                 /* c is a lead surrogate, get the real norm32 */
                 if(args.next!=args.limit &&
                         UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
                     ++args.next;
                     norm32=getNorm32FromSurrogatePair(norm32, args.c2);
                 } else {
                     args.c2=0;
                     return 0;
                 }
             }

             return (int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT));
         }
     }

     private static final class PrevArgs{
         char[] src;
         int start;
         int current;
         char c;
         char c2;
     }

     /*
      * read backwards and get norm32
      * return 0 if the character is <minC
      * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
      * surrogate but read second!)
      */
     private static long /*unsigned*/ getPrevNorm32(PrevArgs args,
                                                       int/*unsigned*/ minC,
                                                       int/*unsigned*/ mask) {
         long/*unsigned*/ norm32;

         args.c=args.src[--args.current];
         args.c2=0;

         /* check for a surrogate before getting norm32 to see if we need to
          * predecrement further
          */
         if(args.c<minC) {
             return 0;
         } else if(!UTF16.isSurrogate(args.c)) {
             return getNorm32(args.c);
         } else if(UTF16.isLeadSurrogate(args.c)) {
             /* unpaired first surrogate */
             return 0;
         } else if(args.current!=args.start &&
                     UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) {
             --args.current;
             norm32=getNorm32(args.c2);

             if((norm32&mask)==0) {
                 /* all surrogate pairs with this lead surrogate have
                  * only irrelevant data
                  */
                 return 0;
             } else {
                 /* norm32 must be a surrogate special */
                 return getNorm32FromSurrogatePair(norm32, args.c);
             }
         } else {
             /* unpaired second surrogate */
             args.c2=0;
             return 0;
         }
     }

     /*
      * get the combining class of (c, c2)=*--p
      * before: start<p  after: start<=p
      */
     private static int /*unsigned byte*/ getPrevCC(PrevArgs args) {

         return (int)((UNSIGNED_BYTE_MASK)&(getPrevNorm32(args, MIN_WITH_LEAD_CC,
                                                          CC_MASK)>>CC_SHIFT));
     }

     /*
      * is this a safe boundary character for NF*D?
      * (lead cc==0)
      */
     public static boolean isNFDSafe(long/*unsigned*/ norm32,
                                      int/*unsigned*/ccOrQCMask,
                                      int/*unsigned*/ decompQCMask) {
         if((norm32&ccOrQCMask)==0) {
             return true; /* cc==0 and no decomposition: this is NF*D safe */
         }

         /* inspect its decomposition - maybe a Hangul but not a surrogate here*/
         if(isNorm32Regular(norm32) && (norm32&decompQCMask)!=0) {
             DecomposeArgs args=new DecomposeArgs();
             /* decomposes, get everything from the variable-length extra data */
             decompose(norm32, decompQCMask, args);
             return args.cc==0;
         } else {
             /* no decomposition (or Hangul), test the cc directly */
             return (norm32&CC_MASK)==0;
         }
     }

     /*
      * is this (or does its decomposition begin with) a "true starter"?
      * (cc==0 and NF*C_YES)
      */
     public static boolean isTrueStarter(long/*unsigned*/ norm32,
                                           int/*unsigned*/ ccOrQCMask,
                                           int/*unsigned*/ decompQCMask) {
         if((norm32&ccOrQCMask)==0) {
             return true; /* this is a true starter (could be Hangul or Jamo L)*/
         }

         /* inspect its decomposition - not a Hangul or a surrogate here */
         if((norm32&decompQCMask)!=0) {
             int p; /* index into extra data array */
             DecomposeArgs args=new DecomposeArgs();
             /* decomposes, get everything from the variable-length extra data */
             p=decompose(norm32, decompQCMask, args);

             if(args.cc==0) {
                 int/*unsigned*/ qcMask=ccOrQCMask&QC_MASK;

                 /* does it begin with NFC_YES? */
                 if((getNorm32(extraData,p, qcMask)&qcMask)==0) {
                     /* yes, the decomposition begins with a true starter */
                     return true;
                 }
             }
         }
         return false;
     }

     /* reorder UTF-16 in-place ---------------------------------------------- */

     /**
      * simpler, single-character version of mergeOrdered() -
      * bubble-insert one single code point into the preceding string
      * which is already canonically ordered
      * (c, c2) may or may not yet have been inserted at src[current]..src[p]
      *
      * it must be p=current+lengthof(c, c2) i.e. p=current+(c2==0 ? 1 : 2)
      *
      * before: src[start]..src[current] is already ordered, and
      *         src[current]..src[p]     may or may not hold (c, c2) but
      *                          must be exactly the same length as (c, c2)
      * after: src[start]..src[p] is ordered
      *
      * @return the trailing combining class
      */
     private static int/*unsigned byte*/ insertOrdered(char[] source,
                                                       int start,
                                                       int current, int p,
                                                          char c, char c2,
                                                          int/*unsigned byte*/ cc) {
         int back, preBack;
         int r;
         int prevCC, trailCC=cc;

         if(start<current && cc!=0) {
             // search for the insertion point where cc>=prevCC
             preBack=back=current;
             PrevArgs prevArgs = new PrevArgs();
             prevArgs.current  = current;
             prevArgs.start    = start;
             prevArgs.src      = source;
             // get the prevCC
             prevCC=getPrevCC(prevArgs);
             preBack = prevArgs.current;

             if(cc<prevCC) {
                 // this will be the last code point, so keep its cc
                 trailCC=prevCC;
                 back=preBack;
                 while(start<preBack) {
                     prevCC=getPrevCC(prevArgs);
                     preBack=prevArgs.current;
                     if(cc>=prevCC) {
                         break;
                     }
                     back=preBack;
                 }


                 // this is where we are right now with all these indicies:
                 // [start]..[pPreBack] 0..? code points that we can ignore
                 // [pPreBack]..[pBack] 0..1 code points with prevCC<=cc
                 // [pBack]..[current] 0..n code points with >cc, move up to insert (c, c2)
                 // [current]..[p]         1 code point (c, c2) with cc

                 // move the code units in between up
                 r=p;
                 do {
                     source[--r]=source[--current];
                 } while(back!=current);
             }
         }

         // insert (c, c2)
         source[current]=c;
         if(c2!=0) {
             source[(current+1)]=c2;
         }

         // we know the cc of the last code point
         return trailCC;
     }

     /**
      * merge two UTF-16 string parts together
      * to canonically order (order by combining classes) their concatenation
      *
      * the two strings may already be adjacent, so that the merging is done
      * in-place if the two strings are not adjacent, then the buffer holding the
      * first one must be large enough
      * the second string may or may not be ordered in itself
      *
      * before: [start]..[current] is already ordered, and
      *         [next]..[limit]    may be ordered in itself, but
      *                          is not in relation to [start..current[
      * after: [start..current+(limit-next)[ is ordered
      *
      * the algorithm is a simple bubble-sort that takes the characters from
      * src[next++] and inserts them in correct combining class order into the
      * preceding part of the string
      *
      * since this function is called much less often than the single-code point
      * insertOrdered(), it just uses that for easier maintenance
      *
      * @return the trailing combining class
      */
     private static int /*unsigned byte*/ mergeOrdered(char[] source,
                                                       int start,
                                                       int current,
                                                       char[] data,
                                                         int next,
                                                         int limit,
                                                         boolean isOrdered) {
             int r;
             int /*unsigned byte*/ cc, trailCC=0;
             boolean adjacent;

             adjacent= current==next;
             NextCCArgs ncArgs = new NextCCArgs();
             ncArgs.source = data;
             ncArgs.next   = next;
             ncArgs.limit  = limit;

             if(start!=current || !isOrdered) {

                 while(ncArgs.next<ncArgs.limit) {
                     cc=getNextCC(ncArgs);
                     if(cc==0) {
                         // does not bubble back
                         trailCC=0;
                         if(adjacent) {
                             current=ncArgs.next;
                         } else {
                             data[current++]=ncArgs.c;
                             if(ncArgs.c2!=0) {
                                 data[current++]=ncArgs.c2;
                             }
                         }
                         if(isOrdered) {
                             break;
                         } else {
                             start=current;
                         }
                     } else {
                         r=current+(ncArgs.c2==0 ? 1 : 2);
                         trailCC=insertOrdered(source,start, current, r,
                                               ncArgs.c, ncArgs.c2, cc);
                         current=r;
                     }
                 }
             }

             if(ncArgs.next==ncArgs.limit) {
                 // we know the cc of the last code point
                 return trailCC;
             } else {
                 if(!adjacent) {
                     // copy the second string part
                     do {
                         source[current++]=data[ncArgs.next++];
                     } while(ncArgs.next!=ncArgs.limit);
                     ncArgs.limit=current;
                 }
                 PrevArgs prevArgs = new PrevArgs();
                 prevArgs.src   = data;
                 prevArgs.start = start;
                 prevArgs.current =  ncArgs.limit;
                 return getPrevCC(prevArgs);
             }

     }
     private static int /*unsigned byte*/ mergeOrdered(char[] source,
                                                       int start,
                                                       int current,
                                                       char[] data,
                                                         final int next,
                                                         final int limit) {
         return mergeOrdered(source,start,current,data,next,limit,true);
     }


     public static boolean checkFCD(char[] src,int srcStart, int srcLimit,
                                    UnicodeSet nx) {

         char fcd16,c,c2;
         int prevCC=0, cc;
         int i =srcStart, length = srcLimit;

         for(;;) {
             for(;;) {
                 if(i==length) {
                     return true;
                 } else if((c=src[i++])<MIN_WITH_LEAD_CC) {
                     prevCC=(int)-c;
                 } else if((fcd16=getFCD16(c))==0) {
                     prevCC=0;
                 } else {
                     break;
                 }
             }

             // check one above-minimum, relevant code unit
             if(UTF16.isLeadSurrogate(c)) {
                 // c is a lead surrogate, get the real fcd16
                 if(i!=length && UTF16.isTrailSurrogate(c2=src[i])) {
                     ++i;
                     fcd16=getFCD16FromSurrogatePair(fcd16, c2);
                 } else {
                     c2=0;
                     fcd16=0;
                 }
             }else{
                 c2=0;
             }

             if(nx_contains(nx, c, c2)) {
                 prevCC=0; /* excluded: fcd16==0 */
                 continue;
             }

             // prevCC has values from the following ranges:
             // 0..0xff -the previous trail combining class
             // <0      -the negative value of the previous code unit;
             //          that code unit was <MIN_WITH_LEAD_CC and its getFCD16()
             //          was deferred so that average text is checked faster
             //

             // check the combining order
             cc=(int)(fcd16>>8);
             if(cc!=0) {
                 if(prevCC<0) {
                     // the previous character was <_NORM_MIN_WITH_LEAD_CC,
                     // we need to get its trail cc
                     //
                     if(!nx_contains(nx, (int)-prevCC)) {
                         prevCC=(int)(FCDTrieImpl.fcdTrie.getBMPValue(
                                              (char)-prevCC)&0xff
                                              );
                     } else {
                         prevCC=0; /* excluded: fcd16==0 */
                     }

                 }

                 if(cc<prevCC) {
                     return false;
                 }
             }
             prevCC=(int)(fcd16&0xff);
         }
     }

     public static Normalizer.QuickCheckResult quickCheck(char[] src,
                                                             int srcStart,
                                                             int srcLimit,
                                                             int minNoMaybe,
                                                             int qcMask,
                                                             int options,
                                                             boolean allowMaybe,
                                                             UnicodeSet nx){

         int ccOrQCMask;
         long norm32;
         char c, c2;
         char cc, prevCC;
         long qcNorm32;
         Normalizer.QuickCheckResult result;
         ComposePartArgs args = new ComposePartArgs();
         char[] buffer ;
         int start = srcStart;

         if(!isDataLoaded) {
             return Normalizer.MAYBE;
         }
         // initialize
         ccOrQCMask=CC_MASK|qcMask;
         result=Normalizer.YES;
         prevCC=0;

         for(;;) {
             for(;;) {
                 if(srcStart==srcLimit) {
                     return result;
                 } else if((c=src[srcStart++])>=minNoMaybe &&
                                   (( norm32=getNorm32(c)) & ccOrQCMask)!=0) {
                     break;
                 }
                 prevCC=0;
             }


             // check one above-minimum, relevant code unit
             if(isNorm32LeadSurrogate(norm32)) {
                 // c is a lead surrogate, get the real norm32
                 if(srcStart!=srcLimit&& UTF16.isTrailSurrogate(c2=src[srcStart])) {
                     ++srcStart;
                     norm32=getNorm32FromSurrogatePair(norm32,c2);
                 } else {
                     norm32=0;
                     c2=0;
                 }
             }else{
                 c2=0;
             }
             if(nx_contains(nx, c, c2)) {
                 /* excluded: norm32==0 */
                 norm32=0;
             }

             // check the combining order
             cc=(char)((norm32>>CC_SHIFT)&0xFF);
             if(cc!=0 && cc<prevCC) {
                 return Normalizer.NO;
             }
             prevCC=cc;

             // check for "no" or "maybe" quick check flags
             qcNorm32 = norm32 & qcMask;
             if((qcNorm32& QC_ANY_NO)>=1) {
                 result= Normalizer.NO;
                 break;
             } else if(qcNorm32!=0) {
                 // "maybe" can only occur for NFC and NFKC
                 if(allowMaybe){
                     result=Normalizer.MAYBE;
                 }else{
                     // normalize a section around here to see if it is really
                     // normalized or not
                     int prevStarter;
                     int/*unsigned*/ decompQCMask;

                     decompQCMask=(qcMask<<2)&0xf; // decomposition quick check mask

                     // find the previous starter

                     // set prevStarter to the beginning of the current character
                     prevStarter=srcStart-1;
                     if(UTF16.isTrailSurrogate(src[prevStarter])) {
                         // safe because unpaired surrogates do not result
                         // in "maybe"
                         --prevStarter;
                     }
                     prevStarter=findPreviousStarter(src, start, prevStarter,
                                                     ccOrQCMask, decompQCMask,
                                                     (char)minNoMaybe);

                     // find the next true starter in [src..limit[ - modifies
                     // src to point to the next starter
                     srcStart=findNextStarter(src,srcStart, srcLimit, qcMask,
                                              decompQCMask,(char) minNoMaybe);

                     //set the args for compose part
                     args.prevCC = prevCC;

                     // decompose and recompose [prevStarter..src[
                     buffer = composePart(args,prevStarter,src,srcStart,srcLimit,options,nx);

                     // compare the normalized version with the original
                     if(0!=strCompare(buffer,0,args.length,src,prevStarter,(srcStart-prevStarter), false)) {
                         result=Normalizer.NO; // normalization differs
                         break;
                     }

                     // continue after the next starter
                 }
             }
         }
         return result;
     }


     //------------------------------------------------------
     // make NFD & NFKD
     //------------------------------------------------------
     public static int getDecomposition(int c /*UTF-32*/ ,
                                         boolean compat,
                                            char[] dest,
                                            int destStart,
                                            int destCapacity) {

         if( (UNSIGNED_INT_MASK & c)<=0x10ffff) {
             long /*unsigned*/ norm32;
             int qcMask;
             int minNoMaybe;
             int length;

             // initialize
             if(!compat) {
                 minNoMaybe=(int)indexes[INDEX_MIN_NFD_NO_MAYBE];
                 qcMask=QC_NFD;
             } else {
                 minNoMaybe=(int)indexes[INDEX_MIN_NFKD_NO_MAYBE];
                 qcMask=QC_NFKD;
             }

             if(c<minNoMaybe) {
                 // trivial case
                 if(destCapacity>0) {
                     dest[0]=(char)c;
                 }
                 return -1;
             }

             /* data lookup */
             norm32=getNorm32(c);
             if((norm32&qcMask)==0) {
                 /* simple case: no decomposition */
                 if(c<=0xffff) {
                     if(destCapacity>0) {
                         dest[0]=(char)c;
                     }
                     return -1;
                 } else {
                     if(destCapacity>=2) {
                         dest[0]=UTF16.getLeadSurrogate(c);
                         dest[1]=UTF16.getTrailSurrogate(c);
                     }
                     return -2;
                 }
             } else if(isNorm32HangulOrJamo(norm32)) {
                 /* Hangul syllable: decompose algorithmically */
                 char c2;

                 c-=HANGUL_BASE;

                 c2=(char)(c%JAMO_T_COUNT);
                 c/=JAMO_T_COUNT;
                 if(c2>0) {
                     if(destCapacity>=3) {
                         dest[2]=(char)(JAMO_T_BASE+c2);
                     }
                     length=3;
                 } else {
                     length=2;
                 }

                 if(destCapacity>=2) {
                     dest[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
                     dest[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
                 }
                 return length;
             } else {
                 /* c decomposes, get everything from the variable-length extra
                  * data
                  */
                 int p, limit;
                 DecomposeArgs args = new DecomposeArgs();
                 /* the index into extra data array*/
                 p=decompose(norm32, qcMask, args);
                 if(args.length<=destCapacity) {
                     limit=p+args.length;
                     do {
                         dest[destStart++]=extraData[p++];
                     } while(p<limit);
                 }
                 return args.length;
             }
         } else {
             return 0;
         }
     }


     public static int decompose(char[] src,int srcStart,int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                  boolean compat,int[] outTrailCC,
                                  UnicodeSet nx) {

         char[] buffer = new char[3];
         int prevSrc;
         long norm32;
         int ccOrQCMask, qcMask;
         int reorderStartIndex, length;
         char c, c2, minNoMaybe;
         int/*unsigned byte*/ cc, prevCC, trailCC;
         char[] p;
         int pStart;
         int destIndex = destStart;
         int srcIndex = srcStart;
         if(!compat) {
             minNoMaybe=(char)indexes[INDEX_MIN_NFD_NO_MAYBE];
             qcMask=QC_NFD;
         } else {
             minNoMaybe=(char)indexes[INDEX_MIN_NFKD_NO_MAYBE];
             qcMask=QC_NFKD;
         }

         /* initialize */
         ccOrQCMask=CC_MASK|qcMask;
         reorderStartIndex=0;
         prevCC=0;
         norm32=0;
         c=0;
         pStart=0;

         cc=trailCC=-1;//initialize to bogus value

         for(;;) {
             /* count code units below the minimum or with irrelevant data for
              * the quick check
              */
             prevSrc=srcIndex;

             while(srcIndex!=srcLimit &&((c=src[srcIndex])<minNoMaybe ||
                                         ((norm32=getNorm32(c))&ccOrQCMask)==0)){
                 prevCC=0;
                 ++srcIndex;
             }

             /* copy these code units all at once */
             if(srcIndex!=prevSrc) {
                 length=(int)(srcIndex-prevSrc);
                 if((destIndex+length)<=destLimit) {
                     System.arraycopy(src,prevSrc,dest,destIndex,length);
                 }

                 destIndex+=length;
                 reorderStartIndex=destIndex;
             }

             /* end of source reached? */
             if(srcIndex==srcLimit) {
                 break;
             }

             /* c already contains *src and norm32 is set for it, increment src*/
             ++srcIndex;

             /* check one above-minimum, relevant code unit */
             /*
              * generally, set p and length to the decomposition string
              * in simple cases, p==NULL and (c, c2) will hold the length code
              * units to append in all cases, set cc to the lead and trailCC to
              * the trail combining class
              *
              * the following merge-sort of the current character into the
              * preceding, canonically ordered result text will use the
              * optimized insertOrdered()
              * if there is only one single code point to process;
              * this is indicated with p==NULL, and (c, c2) is the character to
              * insert
              * ((c, 0) for a BMP character and (lead surrogate, trail surrogate)
              * for a supplementary character)
              * otherwise, p[length] is merged in with _mergeOrdered()
              */
             if(isNorm32HangulOrJamo(norm32)) {
                 if(nx_contains(nx, c)) {
                     c2=0;
                     p=null;
                     length=1;
                 } else {
                     // Hangul syllable: decompose algorithmically
                     p=buffer;
                     pStart=0;
                     cc=trailCC=0;

                     c-=HANGUL_BASE;

                     c2=(char)(c%JAMO_T_COUNT);
                     c/=JAMO_T_COUNT;
                     if(c2>0) {
                         buffer[2]=(char)(JAMO_T_BASE+c2);
                         length=3;
                     } else {
                         length=2;
                     }

                     buffer[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
                     buffer[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
                 }
             } else {
                 if(isNorm32Regular(norm32)) {
                     c2=0;
                     length=1;
                 } else {
                     // c is a lead surrogate, get the real norm32
                     if(srcIndex!=srcLimit &&
                                     UTF16.isTrailSurrogate(c2=src[srcIndex])) {
                         ++srcIndex;
                         length=2;
                         norm32=getNorm32FromSurrogatePair(norm32, c2);
                     } else {
                         c2=0;
                         length=1;
                         norm32=0;
                     }
                 }

                 /* get the decomposition and the lead and trail cc's */
                 if(nx_contains(nx, c, c2)) {
                     /* excluded: norm32==0 */
                     cc=trailCC=0;
                     p=null;
                 } else if((norm32&qcMask)==0) {
                     /* c does not decompose */
                     cc=trailCC=(int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT));
                     p=null;
                     pStart=-1;
                 } else {
                     DecomposeArgs arg = new DecomposeArgs();
                     /* c decomposes, get everything from the variable-length
                      * extra data
                      */
                     pStart=decompose(norm32, qcMask, arg);
                     p=extraData;
                     length=arg.length;
                     cc=arg.cc;
                     trailCC=arg.trailCC;
                     if(length==1) {
                         /* fastpath a single code unit from decomposition */
                         c=p[pStart];
                         c2=0;
                         p=null;
                         pStart=-1;
                     }
                 }
             }

             /* append the decomposition to the destination buffer, assume
              * length>0
              */
             if((destIndex+length)<=destLimit) {
                 int reorderSplit=destIndex;
                 if(p==null) {
                     /* fastpath: single code point */
                     if(cc!=0 && cc<prevCC) {
                         /* (c, c2) is out of order with respect to the preceding
                          *  text
                          */
                         destIndex+=length;
                         trailCC=insertOrdered(dest,reorderStartIndex,
                                             reorderSplit, destIndex, c, c2, cc);
                     } else {
                         /* just append (c, c2) */
                         dest[destIndex++]=c;
                         if(c2!=0) {
                             dest[destIndex++]=c2;
                         }
                     }
                 } else {
                     /* general: multiple code points (ordered by themselves)
                      * from decomposition
                      */
                     if(cc!=0 && cc<prevCC) {
                         /* the decomposition is out of order with respect to the
                          *  preceding text
                          */
                         destIndex+=length;
                         trailCC=mergeOrdered(dest,reorderStartIndex,
                                           reorderSplit,p, pStart,pStart+length);
                     } else {
                         /* just append the decomposition */
                         do {
                             dest[destIndex++]=p[pStart++];
                         } while(--length>0);
                     }
                 }
             } else {
                 /* buffer overflow */
                 /* keep incrementing the destIndex for preflighting */
                 destIndex+=length;
             }

             prevCC=trailCC;
             if(prevCC==0) {
                 reorderStartIndex=destIndex;
             }
         }

         outTrailCC[0]=prevCC;

         return destIndex - destStart;
     }

     /* make NFC & NFKC ------------------------------------------------------ */
     private static final class NextCombiningArgs{
         char[] source;
         int start;
         //int limit;
         char c;
         char c2;
         int/*unsigned*/ combiningIndex;
         char /*unsigned byte*/ cc;
     }

     /* get the composition properties of the next character */
     private static int /*unsigned*/    getNextCombining(NextCombiningArgs args,
                                                     int limit,
                                                     UnicodeSet nx) {
         long/*unsigned*/ norm32;
         int combineFlags;
         /* get properties */
         args.c=args.source[args.start++];
         norm32=getNorm32(args.c);

         /* preset output values for most characters */
         args.c2=0;
         args.combiningIndex=0;
         args.cc=0;

         if((norm32&(CC_MASK|COMBINES_ANY))==0) {
             return 0;
         } else {
             if(isNorm32Regular(norm32)) {
                 /* set cc etc. below */
             } else if(isNorm32HangulOrJamo(norm32)) {
                 /* a compatibility decomposition contained Jamos */
                 args.combiningIndex=(int)((UNSIGNED_INT_MASK)&(0xfff0|
                                                         (norm32>>EXTRA_SHIFT)));
                 return (int)(norm32&COMBINES_ANY);
             } else {
                 /* c is a lead surrogate, get the real norm32 */
                 if(args.start!=limit && UTF16.isTrailSurrogate(args.c2=
                                                      args.source[args.start])) {
                     ++args.start;
                     norm32=getNorm32FromSurrogatePair(norm32, args.c2);
                 } else {
                     args.c2=0;
                     return 0;
                 }
             }

             if(nx_contains(nx, args.c, args.c2)) {
                 return 0; /* excluded: norm32==0 */
             }

             args.cc= (char)((norm32>>CC_SHIFT)&0xff);

             combineFlags=(int)(norm32&COMBINES_ANY);
             if(combineFlags!=0) {
                 int index = getExtraDataIndex(norm32);
                 args.combiningIndex=index>0 ? extraData[(index-1)] :0;
             }

             return combineFlags;
         }
     }

     /*
      * given a composition-result starter (c, c2) - which means its cc==0,
      * it combines forward, it has extra data, its norm32!=0,
      * it is not a Hangul or Jamo,
      * get just its combineFwdIndex
      *
      * norm32(c) is special if and only if c2!=0
      */
     private static int/*unsigned*/ getCombiningIndexFromStarter(char c,char c2){
         long/*unsigned*/ norm32;

         norm32=getNorm32(c);
         if(c2!=0) {
             norm32=getNorm32FromSurrogatePair(norm32, c2);
         }
         return extraData[(getExtraDataIndex(norm32)-1)];
     }

     /*
      * Find the recomposition result for
      * a forward-combining character
      * (specified with a pointer to its part of the combiningTable[])
      * and a backward-combining character
      * (specified with its combineBackIndex).
      *
      * If these two characters combine, then set (value, value2)
      * with the code unit(s) of the composition character.
      *
      * Return value:
      * 0    do not combine
      * 1    combine
      * >1   combine, and the composition is a forward-combining starter
      *
      * See unormimp.h for a description of the composition table format.
      */
     private static int/*unsigned*/ combine(char[]table,int tableStart,
                                    int/*unsinged*/ combineBackIndex,
                                     int[] outValues) {
         int/*unsigned*/ key;
         int value,value2;

         if(outValues.length<2){
             throw new IllegalArgumentException();
         }

         /* search in the starter's composition table */
         for(;;) {
             key=table[tableStart++];
             if(key>=combineBackIndex) {
                 break;
             }
             tableStart+= ((table[tableStart]&0x8000) != 0)? 2 : 1;
         }

         /* mask off bit 15, the last-entry-in-the-list flag */
         if((key&0x7fff)==combineBackIndex) {
             /* found! combine! */
             value=table[tableStart];

             /* is the composition a starter that combines forward? */
             key=(int)((UNSIGNED_INT_MASK)&((value&0x2000)+1));

             /* get the composition result code point from the variable-length
              * result value
              */
             if((value&0x8000) != 0) {
                 if((value&0x4000) != 0) {
                     /* surrogate pair composition result */
                     value=(int)((UNSIGNED_INT_MASK)&((value&0x3ff)|0xd800));
                     value2=table[tableStart+1];
                 } else {
                     /* BMP composition result U+2000..U+ffff */
                     value=table[tableStart+1];
                     value2=0;
                 }
             } else {
                 /* BMP composition result U+0000..U+1fff */
                 value&=0x1fff;
                 value2=0;
             }
             outValues[0]=value;
             outValues[1]=value2;
             return key;
         } else {
             /* not found */
             return 0;
         }
     }


     private static final class RecomposeArgs{
         char[] source;
         int start;
         int limit;
     }
     /*
      * recompose the characters in [p..limit[
      * (which is in NFD - decomposed and canonically ordered),
      * adjust limit, and return the trailing cc
      *
      * since for NFKC we may get Jamos in decompositions, we need to
      * recompose those too
      *
      * note that recomposition never lengthens the text:
      * any character consists of either one or two code units;
      * a composition may contain at most one more code unit than the original
      * starter, while the combining mark that is removed has at least one code
      * unit
      */
     private static char/*unsigned byte*/ recompose(RecomposeArgs args, int options, UnicodeSet nx) {
         int  remove, q, r;
         int /*unsigned*/ combineFlags;
         int /*unsigned*/ combineFwdIndex, combineBackIndex;
         int /*unsigned*/ result, value=0, value2=0;
         int /*unsigned byte*/  prevCC;
         boolean starterIsSupplementary;
         int starter;
         int[] outValues = new int[2];
         starter=-1;                   /* no starter */
         combineFwdIndex=0;            /* will not be used until starter!=NULL */
         starterIsSupplementary=false; /* will not be used until starter!=NULL */
         prevCC=0;

         NextCombiningArgs ncArg = new NextCombiningArgs();
         ncArg.source  = args.source;

         ncArg.cc      =0;
         ncArg.c2      =0;

         for(;;) {
             ncArg.start = args.start;
             combineFlags=getNextCombining(ncArg,args.limit,nx);
             combineBackIndex=ncArg.combiningIndex;
             args.start = ncArg.start;

             if(((combineFlags&COMBINES_BACK)!=0) && starter!=-1) {
                 if((combineBackIndex&0x8000)!=0) {
                     /* c is a Jamo V/T, see if we can compose it with the
                      * previous character
                      */
                     /* for the PRI #29 fix, check that there is no intervening combining mark */
                     if((options&BEFORE_PRI_29)!=0 || prevCC==0) {
                         remove=-1; /* NULL while no Hangul composition */
                         combineFlags=0;
                         ncArg.c2=args.source[starter];
                         if(combineBackIndex==0xfff2) {
                             /* Jamo V, compose with previous Jamo L and following
                              * Jamo T
                              */
                             ncArg.c2=(char)(ncArg.c2-JAMO_L_BASE);
                             if(ncArg.c2<JAMO_L_COUNT) {
                                 remove=args.start-1;
                                 ncArg.c=(char)(HANGUL_BASE+(ncArg.c2*JAMO_V_COUNT+
                                                (ncArg.c-JAMO_V_BASE))*JAMO_T_COUNT);
                                 if(args.start!=args.limit &&
                                             (ncArg.c2=(char)(args.source[args.start]
                                              -JAMO_T_BASE))<JAMO_T_COUNT) {
                                     ++args.start;
                                     ncArg.c+=ncArg.c2;
                                  } else {
                                      /* the result is an LV syllable, which is a starter (unlike LVT) */
                                      combineFlags=COMBINES_FWD;
                                 }
                                 if(!nx_contains(nx, ncArg.c)) {
                                     args.source[starter]=ncArg.c;
                                    } else {
                                     /* excluded */
                                     if(!isHangulWithoutJamoT(ncArg.c)) {
                                         --args.start; /* undo the ++args.start from reading the Jamo T */
                                     }
                                     /* c is modified but not used any more -- c=*(p-1); -- re-read the Jamo V/T */
                                     remove=args.start;
                                 }
                             }

                         /*
                          * Normally, the following can not occur:
                          * Since the input is in NFD, there are no Hangul LV syllables that
                          * a Jamo T could combine with.
                          * All Jamo Ts are combined above when handling Jamo Vs.
                          *
                          * However, before the PRI #29 fix, this can occur due to
                          * an intervening combining mark between the Hangul LV and the Jamo T.
                          */
                         } else {
                             /* Jamo T, compose with previous Hangul that does not have a Jamo T */
                             if(isHangulWithoutJamoT(ncArg.c2)) {
                                 ncArg.c2+=ncArg.c-JAMO_T_BASE;
                                 if(!nx_contains(nx, ncArg.c2)) {
                                     remove=args.start-1;
                                     args.source[starter]=ncArg.c2;
                                 }
                             }
                         }

                         if(remove!=-1) {
                             /* remove the Jamo(s) */
                             q=remove;
                             r=args.start;
                             while(r<args.limit) {
                                 args.source[q++]=args.source[r++];
                             }
                             args.start=remove;
                             args.limit=q;
                         }

                         ncArg.c2=0; /* c2 held *starter temporarily */

                         if(combineFlags!=0) {
                             /*
                              * not starter=NULL because the composition is a Hangul LV syllable
                              * and might combine once more (but only before the PRI #29 fix)
                              */

                             /* done? */
                             if(args.start==args.limit) {
                                 return (char)prevCC;
                             }

                             /* the composition is a Hangul LV syllable which is a starter that combines forward */
                             combineFwdIndex=0xfff0;

                             /* we combined; continue with looking for compositions */
                             continue;
                         }
                     }

                     /*
                      * now: cc==0 and the combining index does not include
                      * "forward" -> the rest of the loop body will reset starter
                      * to NULL; technically, a composed Hangul syllable is a
                      * starter, but it does not combine forward now that we have
                      * consumed all eligible Jamos; for Jamo V/T, combineFlags
                      * does not contain _NORM_COMBINES_FWD
                      */

                 } else if(
                     /* the starter is not a Hangul LV or Jamo V/T and */
                     !((combineFwdIndex&0x8000)!=0) &&
                     /* the combining mark is not blocked and */
                     ((options&BEFORE_PRI_29)!=0 ?
                         (prevCC!=ncArg.cc || prevCC==0) :
                         (prevCC<ncArg.cc || prevCC==0)) &&
                     /* the starter and the combining mark (c, c2) do combine */
                     0!=(result=combine(combiningTable,combineFwdIndex,
                                        combineBackIndex, outValues)) &&
                     /* the composition result is not excluded */
                     !nx_contains(nx, (char)value, (char)value2)
                 ) {
                     value=outValues[0];
                     value2=outValues[1];
                     /* replace the starter with the composition, remove the
                      * combining mark
                      */
                     remove= ncArg.c2==0 ? args.start-1 : args.start-2; /* index to the combining mark */

                     /* replace the starter with the composition */
                     args.source[starter]=(char)value;
                     if(starterIsSupplementary) {
                         if(value2!=0) {
                             /* both are supplementary */
                             args.source[starter+1]=(char)value2;
                         } else {
                             /* the composition is shorter than the starter,
                              * move the intermediate characters forward one */
                             starterIsSupplementary=false;
                             q=starter+1;
                             r=q+1;
                             while(r<remove) {
                                 args.source[q++]=args.source[r++];
                             }
                             --remove;
                         }
                     } else if(value2!=0) {
                         /* the composition is longer than the starter,
                          * move the intermediate characters back one */
                         starterIsSupplementary=true;
                         /* temporarily increment for the loop boundary */
                         ++starter;
                         q=remove;
                         r=++remove;
                         while(starter<q) {
                             args.source[--r]=args.source[--q];
                         }
                         args.source[starter]=(char)value2;
                         --starter; /* undo the temporary increment */
                     /* } else { both are on the BMP, nothing more to do */
                     }

                     /* remove the combining mark by moving the following text
                      * over it */
                     if(remove<args.start) {
                         q=remove;
                         r=args.start;
                         while(r<args.limit) {
                             args.source[q++]=args.source[r++];
                         }
                         args.start=remove;
                         args.limit=q;
                     }

                     /* keep prevCC because we removed the combining mark */

                     /* done? */
                     if(args.start==args.limit) {
                         return (char)prevCC;
                     }

                     /* is the composition a starter that combines forward? */
                     if(result>1) {
                        combineFwdIndex=getCombiningIndexFromStarter((char)value,
                                                                   (char)value2);
                     } else {
                        starter=-1;
                     }

                     /* we combined; continue with looking for compositions */
                     continue;
                 }
             }

             /* no combination this time */
             prevCC=ncArg.cc;
             if(args.start==args.limit) {
                 return (char)prevCC;
             }

             /* if (c, c2) did not combine, then check if it is a starter */
             if(ncArg.cc==0) {
                 /* found a new starter; combineFlags==0 if (c, c2) is excluded */
                 if((combineFlags&COMBINES_FWD)!=0) {
                     /* it may combine with something, prepare for it */
                     if(ncArg.c2==0) {
                         starterIsSupplementary=false;
                         starter=args.start-1;
                     } else {
                         starterIsSupplementary=false;
                         starter=args.start-2;
                     }
                     combineFwdIndex=combineBackIndex;
                 } else {
                     /* it will not combine with anything */
                     starter=-1;
                 }
             } else if((options&OPTIONS_COMPOSE_CONTIGUOUS)!=0) {
                 /* FCC: no discontiguous compositions; any intervening character blocks */
                 starter=-1;
             }
         }
     }

     // find the last true starter between src[start]....src[current] going
     // backwards and return its index
     private static int findPreviousStarter(char[]src, int srcStart, int current,
                                           int/*unsigned*/ ccOrQCMask,
                                           int/*unsigned*/ decompQCMask,
                                           char minNoMaybe) {
        long norm32;
        PrevArgs args = new PrevArgs();
        args.src = src;
        args.start = srcStart;
        args.current = current;

        while(args.start<args.current) {
            norm32= getPrevNorm32(args, minNoMaybe, ccOrQCMask|decompQCMask);
            if(isTrueStarter(norm32, ccOrQCMask, decompQCMask)) {
                break;
            }
        }
        return args.current;
     }

     /* find the first true starter in [src..limit[ and return the
      * pointer to it
      */
     private static int/*index*/    findNextStarter(char[] src,int start,int limit,
                                                  int/*unsigned*/ qcMask,
                                                  int/*unsigned*/ decompQCMask,
                                                  char minNoMaybe) {
         int p;
         long/*unsigned*/ norm32;
         int ccOrQCMask;
         char c, c2;

         ccOrQCMask=CC_MASK|qcMask;

         DecomposeArgs decompArgs = new DecomposeArgs();

         for(;;) {
             if(start==limit) {
                 break; /* end of string */
             }
             c=src[start];
             if(c<minNoMaybe) {
                 break; /* catches NUL terminater, too */
             }

             norm32=getNorm32(c);
             if((norm32&ccOrQCMask)==0) {
                 break; /* true starter */
             }

             if(isNorm32LeadSurrogate(norm32)) {
                 /* c is a lead surrogate, get the real norm32 */
                 if((start+1)==limit ||
                                    !UTF16.isTrailSurrogate(c2=(src[start+1]))){
                     /* unmatched first surrogate: counts as a true starter */
                     break;
                 }
                 norm32=getNorm32FromSurrogatePair(norm32, c2);

                 if((norm32&ccOrQCMask)==0) {
                     break; /* true starter */
                 }
             } else {
                 c2=0;
             }

             /* (c, c2) is not a true starter but its decomposition may be */
             if((norm32&decompQCMask)!=0) {
                 /* (c, c2) decomposes, get everything from the variable-length
                  *  extra data */
                 p=decompose(norm32, decompQCMask, decompArgs);

                 /* get the first character's norm32 to check if it is a true
                  * starter */
                 if(decompArgs.cc==0 && (getNorm32(extraData,p, qcMask)&qcMask)==0) {
                     break; /* true starter */
                 }
             }

             start+= c2==0 ? 1 : 2; /* not a true starter, continue */
         }

         return start;
     }


     private static final class ComposePartArgs{
         int prevCC;
         int length;   /* length of decomposed part */
     }

      /* decompose and recompose [prevStarter..src[ */
     private static char[] composePart(ComposePartArgs args,
                                       int prevStarter,
                                          char[] src, int start, int limit,
                                        int options,
                                        UnicodeSet nx) {
         int recomposeLimit;
         boolean compat =((options&OPTIONS_COMPAT)!=0);

         /* decompose [prevStarter..src[ */
         int[] outTrailCC = new int[1];
         char[] buffer = new char[(limit-prevStarter)*MAX_BUFFER_SIZE];

         for(;;){
             args.length=decompose(src,prevStarter,(start),
                                       buffer,0,buffer.length,
                                       compat,outTrailCC,nx);
             if(args.length<=buffer.length){
                 break;
             }else{
                 buffer = new char[args.length];
             }
         }

         /* recompose the decomposition */
         recomposeLimit=args.length;

         if(args.length>=2) {
             RecomposeArgs rcArgs = new RecomposeArgs();
             rcArgs.source    = buffer;
             rcArgs.start    = 0;
             rcArgs.limit    = recomposeLimit;
             args.prevCC=recompose(rcArgs, options, nx);
             recomposeLimit = rcArgs.limit;
         }

         /* return with a pointer to the recomposition and its length */
         args.length=recomposeLimit;
         return buffer;
     }

     private static boolean composeHangul(char prev, char c,
                                          long/*unsigned*/ norm32,
                                          char[] src,int[] srcIndex, int limit,
                                             boolean compat,
                                          char[] dest,int destIndex,
                                          UnicodeSet nx) {
         int start=srcIndex[0];
         if(isJamoVTNorm32JamoV(norm32)) {
             /* c is a Jamo V, compose with previous Jamo L and
              * following Jamo T */
             prev=(char)(prev-JAMO_L_BASE);
             if(prev<JAMO_L_COUNT) {
                 c=(char)(HANGUL_BASE+(prev*JAMO_V_COUNT+
                                                  (c-JAMO_V_BASE))*JAMO_T_COUNT);

                 /* check if the next character is a Jamo T (normal or
                  * compatibility) */
                 if(start!=limit) {
                     char next, t;

                     next=src[start];
                     if((t=(char)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
                         /* normal Jamo T */
                         ++start;
                         c+=t;
                     } else if(compat) {
                         /* if NFKC, then check for compatibility Jamo T
                          * (BMP only) */
                         norm32=getNorm32(next);
                         if(isNorm32Regular(norm32) && ((norm32&QC_NFKD)!=0)) {
                             int p /*index into extra data array*/;
                             DecomposeArgs dcArgs = new DecomposeArgs();
                             p=decompose(norm32, QC_NFKD, dcArgs);
                             if(dcArgs.length==1 &&
                                    (t=(char)(extraData[p]-JAMO_T_BASE))
                                                    <JAMO_T_COUNT) {
                                 /* compatibility Jamo T */
                                 ++start;
                                 c+=t;
                             }
                         }
                     }
                 }
                 if(nx_contains(nx, c)) {
                     if(!isHangulWithoutJamoT(c)) {
                         --start; /* undo ++start from reading the Jamo T */
                     }
                     return false;
                 }
                 dest[destIndex]=c;
                 srcIndex[0]=start;
                 return true;
             }
         } else if(isHangulWithoutJamoT(prev)) {
             /* c is a Jamo T, compose with previous Hangul LV that does not
              * contain a Jamo T */
             c=(char)(prev+(c-JAMO_T_BASE));
             if(nx_contains(nx, c)) {
                 return false;
             }
             dest[destIndex]=c;
             srcIndex[0]=start;
             return true;
         }
         return false;
     }
     /*
     public static int compose(char[] src, char[] dest,boolean compat, UnicodeSet nx){
         return compose(src,0,src.length,dest,0,dest.length,compat, nx);
     }
     */

     public static int compose(char[] src, int srcStart, int srcLimit,
                               char[] dest,int destStart,int destLimit,
                               int options,UnicodeSet nx) {

         int prevSrc, prevStarter;
         long/*unsigned*/ norm32;
         int ccOrQCMask, qcMask;
         int  reorderStartIndex, length;
         char c, c2, minNoMaybe;
         int/*unsigned byte*/ cc, prevCC;
         int[] ioIndex = new int[1];
         int destIndex = destStart;
         int srcIndex = srcStart;

         if((options&OPTIONS_COMPAT)!=0) {
             minNoMaybe=(char)indexes[INDEX_MIN_NFKC_NO_MAYBE];
             qcMask=QC_NFKC;
         } else {
             minNoMaybe=(char)indexes[INDEX_MIN_NFC_NO_MAYBE];
             qcMask=QC_NFC;
         }

         /*
          * prevStarter points to the last character before the current one
          * that is a "true" starter with cc==0 and quick check "yes".
          *
          * prevStarter will be used instead of looking for a true starter
          * while incrementally decomposing [prevStarter..prevSrc[
          * in _composePart(). Having a good prevStarter allows to just decompose
          * the entire [prevStarter..prevSrc[.
          *
          * When _composePart() backs out from prevSrc back to prevStarter,
          * then it also backs out destIndex by the same amount.
          * Therefore, at all times, the (prevSrc-prevStarter) source units
          * must correspond 1:1 to destination units counted with destIndex,
          * except for reordering.
          * This is true for the qc "yes" characters copied in the fast loop,
          * and for pure reordering.
          * prevStarter must be set forward to src when this is not true:
          * In _composePart() and after composing a Hangul syllable.
          *
          * This mechanism relies on the assumption that the decomposition of a
          * true starter also begins with a true starter. gennorm/store.c checks
          * for this.
          */
         prevStarter=srcIndex;

         ccOrQCMask=CC_MASK|qcMask;
         /*destIndex=*/reorderStartIndex=0;/* ####TODO#### check this **/
         prevCC=0;

         /* avoid compiler warnings */
         norm32=0;
         c=0;

         for(;;) {
             /* count code units below the minimum or with irrelevant data for
              * the quick check */
             prevSrc=srcIndex;

             while(srcIndex!=srcLimit && ((c=src[srcIndex])<minNoMaybe ||
                      ((norm32=getNorm32(c))&ccOrQCMask)==0)) {
                 prevCC=0;
                 ++srcIndex;
             }


             /* copy these code units all at once */
             if(srcIndex!=prevSrc) {
                 length=(int)(srcIndex-prevSrc);
                 if((destIndex+length)<=destLimit) {
                     System.arraycopy(src,prevSrc,dest,destIndex,length);
                 }
                 destIndex+=length;
                 reorderStartIndex=destIndex;

                 /* set prevStarter to the last character in the quick check
                  * loop */
                 prevStarter=srcIndex-1;
                 if(UTF16.isTrailSurrogate(src[prevStarter]) &&
                     prevSrc<prevStarter &&
                     UTF16.isLeadSurrogate(src[(prevStarter-1)])) {
                     --prevStarter;
                 }

                 prevSrc=srcIndex;
             }

             /* end of source reached? */
             if(srcIndex==srcLimit) {
                 break;
             }

             /* c already contains *src and norm32 is set for it, increment src*/
             ++srcIndex;

             /*
              * source buffer pointers:
              *
              *  all done      quick check   current char  not yet
              *                "yes" but     (c, c2)       processed
              *                may combine
              *                forward
              * [-------------[-------------[-------------[-------------[
              * |             |             |             |             |
              * start         prevStarter   prevSrc       src           limit
              *
              *
              * destination buffer pointers and indexes:
              *
              *  all done      might take    not filled yet
              *                characters for
              *                reordering
              * [-------------[-------------[-------------[
              * |             |             |             |
              * dest      reorderStartIndex destIndex     destCapacity
              */

             /* check one above-minimum, relevant code unit */
             /*
              * norm32 is for c=*(src-1), and the quick check flag is "no" or
              * "maybe", and/or cc!=0
              * check for Jamo V/T, then for surrogates and regular characters
              * c is not a Hangul syllable or Jamo L because
              * they are not marked with no/maybe for NFC & NFKC(and their cc==0)
              */
             if(isNorm32HangulOrJamo(norm32)) {
                 /*
                  * c is a Jamo V/T:
                  * try to compose with the previous character, Jamo V also with
                  * a following Jamo T, and set values here right now in case we
                  * just continue with the main loop
                  */
                 prevCC=cc=0;
                 reorderStartIndex=destIndex;
                 ioIndex[0]=srcIndex;
                 if(
                     destIndex>0 &&
                     composeHangul(src[(prevSrc-1)], c, norm32,src, ioIndex,
                                   srcLimit, (options&OPTIONS_COMPAT)!=0, dest,
                                   destIndex<=destLimit ? destIndex-1: 0,
                                   nx)
                 ) {
                     srcIndex=ioIndex[0];
                     prevStarter=srcIndex;
                     continue;
                 }

                 srcIndex = ioIndex[0];

                 /* the Jamo V/T did not compose into a Hangul syllable, just
                  * append to dest */
                 c2=0;
                 length=1;
                 prevStarter=prevSrc;
             } else {
                 if(isNorm32Regular(norm32)) {
                     c2=0;
                     length=1;
                 } else {
                     /* c is a lead surrogate, get the real norm32 */
                     if(srcIndex!=srcLimit &&
                                      UTF16.isTrailSurrogate(c2=src[srcIndex])) {
                         ++srcIndex;
                         length=2;
                         norm32=getNorm32FromSurrogatePair(norm32, c2);
                     } else {
                         /* c is an unpaired lead surrogate, nothing to do */
                         c2=0;
                         length=1;
                         norm32=0;
                     }
                 }
                 ComposePartArgs args =new ComposePartArgs();

                 /* we are looking at the character (c, c2) at [prevSrc..src[ */
                 if(nx_contains(nx, c, c2)) {
                     /* excluded: norm32==0 */
                     cc=0;
                 } else if((norm32&qcMask)==0) {
                     cc=(int)((UNSIGNED_BYTE_MASK)&(norm32>>CC_SHIFT));
                 } else {
                     char[] p;

                     /*
                      * find appropriate boundaries around this character,
                      * decompose the source text from between the boundaries,
                      * and recompose it
                      *
                      * this puts the intermediate text into the side buffer because
                      * it might be longer than the recomposition end result,
                      * or the destination buffer may be too short or missing
                      *
                      * note that destIndex may be adjusted backwards to account
                      * for source text that passed the quick check but needed to
                      * take part in the recomposition
                      */
                     int decompQCMask=(qcMask<<2)&0xf; /* decomposition quick check mask */
                     /*
                      * find the last true starter in [prevStarter..src[
                      * it is either the decomposition of the current character (at prevSrc),
                      * or prevStarter
                      */
                     if(isTrueStarter(norm32, CC_MASK|qcMask, decompQCMask)) {
                         prevStarter=prevSrc;
                     } else {
                         /* adjust destIndex: back out what had been copied with qc "yes" */
                         destIndex-=prevSrc-prevStarter;
                     }

                     /* find the next true starter in [src..limit[ */
                     srcIndex=findNextStarter(src, srcIndex,srcLimit, qcMask,
                                                decompQCMask, minNoMaybe);
                     //args.prevStarter = prevStarter;
                     args.prevCC    = prevCC;
                     //args.destIndex = destIndex;
                     args.length = length;
                     p=composePart(args,prevStarter,src,srcIndex,srcLimit,options,nx);

                     if(p==null) {
                         /* an error occurred (out of memory) */
                         break;
                     }

                     prevCC      = args.prevCC;
                     length      = args.length;

                     /* append the recomposed buffer contents to the destination
                      * buffer */
                     if((destIndex+args.length)<=destLimit) {
                         int i=0;
                         while(i<args.length) {
                             dest[destIndex++]=p[i++];
                             --length;
                         }
                     } else {
                         /* buffer overflow */
                         /* keep incrementing the destIndex for preflighting */
                         destIndex+=length;
                     }

                     prevStarter=srcIndex;
                     continue;
                 }
             }

             /* append the single code point (c, c2) to the destination buffer */
             if((destIndex+length)<=destLimit) {
                 if(cc!=0 && cc<prevCC) {
                     /* (c, c2) is out of order with respect to the preceding
                      * text */
                     int reorderSplit= destIndex;
                     destIndex+=length;
                     prevCC=insertOrdered(dest,reorderStartIndex, reorderSplit,
                                          destIndex, c, c2, cc);
                 } else {
                     /* just append (c, c2) */
                     dest[destIndex++]=c;
                     if(c2!=0) {
                         dest[destIndex++]=c2;
                     }
                     prevCC=cc;
                 }
             } else {
                 /* buffer overflow */
                 /* keep incrementing the destIndex for preflighting */
                 destIndex+=length;
                 prevCC=cc;
             }
         }

         return destIndex - destStart;
     }
     /* make FCD --------------------------------------------------------------*/

     private static int/*index*/ findSafeFCD(char[] src, int start, int limit,
                                             char fcd16) {
         char c, c2;

         /*
          * find the first position in [src..limit[ after some cc==0 according
          * to FCD data
          *
          * at the beginning of the loop, we have fcd16 from before src
          *
          * stop at positions:
          * - after trail cc==0
          * - at the end of the source
          * - before lead cc==0
          */
         for(;;) {
             /* stop if trail cc==0 for the previous character */
             if((fcd16&0xff)==0) {
                 break;
             }

             /* get c=*src - stop at end of string */
             if(start==limit) {
                 break;
             }
             c=src[start];

             /* stop if lead cc==0 for this character */
             if(c<MIN_WITH_LEAD_CC || (fcd16=getFCD16(c))==0) {
                 break; /* catches terminating NUL, too */
             }

             if(!UTF16.isLeadSurrogate(c)) {
                 if(fcd16<=0xff) {
                     break;
                 }
                 ++start;
             } else if(start+1!=limit &&
                                     (UTF16.isTrailSurrogate(c2=src[start+1]))) {
                 /* c is a lead surrogate, get the real fcd16 */
                 fcd16=getFCD16FromSurrogatePair(fcd16, c2);
                 if(fcd16<=0xff) {
                     break;
                 }
                 start+=2;
             } else {
                 /* c is an unpaired first surrogate, lead cc==0 */
                 break;
             }
         }

         return start;
     }

     private static int/*unsigned byte*/ decomposeFCD(char[] src,
                                                      int start,int decompLimit,
                                                      char[] dest,
                                                      int[] destIndexArr,
                                                      UnicodeSet nx) {
         char[] p=null;
         int pStart=-1;

         long /*unsigned int*/ norm32;
         int reorderStartIndex;
         char c, c2;
         int/*unsigned byte*/ prevCC;
         DecomposeArgs args = new DecomposeArgs();
         int destIndex = destIndexArr[0];
         /*
          * canonically decompose [src..decompLimit[
          *
          * all characters in this range have some non-zero cc,
          * directly or in decomposition,
          * so that we do not need to check in the following for quick-check
          * limits etc.
          *
          * there _are_ _no_ Hangul syllables or Jamos in here because they are
          * FCD-safe (cc==0)!
          *
          * we also do not need to check for c==0 because we have an established
          * decompLimit
          */
         reorderStartIndex=destIndex;
         prevCC=0;

         while(start<decompLimit) {
             c=src[start++];
             norm32=getNorm32(c);
             if(isNorm32Regular(norm32)) {
                 c2=0;
                 args.length=1;
             } else {
                 /*
                  * reminder: this function is called with [src..decompLimit[
                  * not containing any Hangul/Jamo characters,
                  * therefore the only specials are lead surrogates
                  */
                 /* c is a lead surrogate, get the real norm32 */
                 if(start!=decompLimit && UTF16.isTrailSurrogate(c2=src[start])){
                     ++start;
                     args.length=2;
                     norm32=getNorm32FromSurrogatePair(norm32, c2);
                 } else {
                     c2=0;
                     args.length=1;
                     norm32=0;
                 }
             }

             /* get the decomposition and the lead and trail cc's */
             if(nx_contains(nx, c, c2)) {
                 /* excluded: norm32==0 */
                 args.cc=args.trailCC=0;
                 p=null;
             } else if((norm32&QC_NFD)==0) {
                 /* c does not decompose */
                 args.cc=args.trailCC=(int)((UNSIGNED_BYTE_MASK)&
                                                             (norm32>>CC_SHIFT));
                 p=null;
             } else {
                 /* c decomposes, get everything from the variable-length extra
                  * data */
                 pStart=decompose(norm32, args);
                 p=extraData;
                 if(args.length==1) {
                     /* fastpath a single code unit from decomposition */
                     c=p[pStart];
                     c2=0;
                     p=null;
                 }
             }

             /* append the decomposition to the destination buffer, assume
              * length>0 */
             if((destIndex+args.length)<=dest.length) {
                 int reorderSplit=destIndex;
                 if(p==null) {
                     /* fastpath: single code point */
                     if(args.cc!=0 && args.cc<prevCC) {
                         /* (c, c2) is out of order with respect to the preceding
                          *  text */
                         destIndex+=args.length;
                         args.trailCC=insertOrdered(dest,reorderStartIndex,
                                                    reorderSplit, destIndex,
                                                    c, c2, args.cc);
                     } else {
                         /* just append (c, c2) */
                         dest[destIndex++]=c;
                         if(c2!=0) {
                             dest[destIndex++]=c2;
                         }
                     }
                 } else {
                     /* general: multiple code points (ordered by themselves)
                      * from decomposition */
                     if(args.cc!=0 && args.cc<prevCC) {
                         /* the decomposition is out of order with respect to
                          * the preceding text */
                         destIndex+=args.length;
                         args.trailCC=mergeOrdered(dest,reorderStartIndex,
                                                   reorderSplit, p, pStart,
                                                   pStart+args.length);
                     } else {
                         /* just append the decomposition */
                         do {
                             dest[destIndex++]=p[pStart++];
                         } while(--args.length>0);
                     }
                 }
             } else {
                 /* buffer overflow */
                 /* keep incrementing the destIndex for preflighting */
                 destIndex+=args.length;
             }

             prevCC=args.trailCC;
             if(prevCC==0) {
                 reorderStartIndex=destIndex;
             }
         }
         destIndexArr[0]=destIndex;
         return prevCC;
     }

     public static int makeFCD(char[] src,  int srcStart,  int srcLimit,
                               char[] dest, int destStart, int destLimit,
                               UnicodeSet nx) {

         int prevSrc, decompStart;
         int destIndex, length;
         char c, c2;
         int /* unsigned int*/ fcd16;
         int prevCC, cc;

         /* initialize */
         decompStart=srcStart;
         destIndex=destStart;
         prevCC=0;
         c=0;
         fcd16=0;
         int[] destIndexArr = new int[1];
         destIndexArr[0]=destIndex;

         for(;;) {
             /* skip a run of code units below the minimum or with irrelevant
              * data for the FCD check */
             prevSrc=srcStart;

             for(;;) {
                 if(srcStart==srcLimit) {
                     break;
                 } else if((c=src[srcStart])<MIN_WITH_LEAD_CC) {
                     prevCC=(int)-c;
                 } else if((fcd16=getFCD16(c))==0) {
                     prevCC=0;
                 } else {
                     break;
                 }
                 ++srcStart;
             }


             /*
              * prevCC has values from the following ranges:
              * 0..0xff - the previous trail combining class
              * <0      - the negative value of the previous code unit;
              *           that code unit was <_NORM_MIN_WITH_LEAD_CC and its
              *           getFCD16()
              *           was deferred so that average text is checked faster
              */

             /* copy these code units all at once */
             if(srcStart!=prevSrc) {
                 length=(int)(srcStart-prevSrc);
                 if((destIndex+length)<=destLimit) {
                     System.arraycopy(src,prevSrc,dest,destIndex,length);
                 }
                 destIndex+=length;
                 prevSrc=srcStart;

                 /* prevCC<0 is only possible from the above loop, i.e., only if
                  *  prevSrc<src */
                 if(prevCC<0) {
                     /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we
                      * need to get its trail cc */
                     if(!nx_contains(nx, (int)-prevCC)) {
                         prevCC=(int)(getFCD16((int)-prevCC)&0xff);
                     } else {
                         prevCC=0; /* excluded: fcd16==0 */
                     }
                     /*
                      * set a pointer to this below-U+0300 character;
                      * if prevCC==0 then it will moved to after this character
                      * below
                      */
                     decompStart=prevSrc-1;
                 }
             }
             /*
              * now:
              * prevSrc==src - used later to adjust destIndex before
              *          decomposition
              * prevCC>=0
              */

             /* end of source reached? */
             if(srcStart==srcLimit) {
                 break;
             }

             /* set a pointer to after the last source position where prevCC==0*/
             if(prevCC==0) {
                 decompStart=prevSrc;
             }

             /* c already contains *src and fcd16 is set for it, increment src */
             ++srcStart;

             /* check one above-minimum, relevant code unit */
             if(UTF16.isLeadSurrogate(c)) {
                 /* c is a lead surrogate, get the real fcd16 */
                 if(srcStart!=srcLimit &&
                                      UTF16.isTrailSurrogate(c2=src[srcStart])) {
                     ++srcStart;
                     fcd16=getFCD16FromSurrogatePair((char)fcd16, c2);
                 } else {
                     c2=0;
                     fcd16=0;
                 }
             } else {
                 c2=0;
             }

             /* we are looking at the character (c, c2) at [prevSrc..src[ */
             if(nx_contains(nx, c, c2)) {
                 fcd16=0; /* excluded: fcd16==0 */
             }
             /* check the combining order, get the lead cc */
             cc=(int)(fcd16>>8);
             if(cc==0 || cc>=prevCC) {
                 /* the order is ok */
                 if(cc==0) {
                     decompStart=prevSrc;
                 }
                 prevCC=(int)(fcd16&0xff);

                 /* just append (c, c2) */
                 length= c2==0 ? 1 : 2;
                 if((destIndex+length)<=destLimit) {
                     dest[destIndex++]=c;
                     if(c2!=0) {
                         dest[destIndex++]=c2;
                     }
                 } else {
                     destIndex+=length;
                 }
             } else {
                 /*
                  * back out the part of the source that we copied already but
                  * is now going to be decomposed;
                  * prevSrc is set to after what was copied
                  */
                 destIndex-=(int)(prevSrc-decompStart);

                 /*
                  * find the part of the source that needs to be decomposed;
                  * to be safe and simple, decompose to before the next character
                  * with lead cc==0
                  */
                 srcStart=findSafeFCD(src,srcStart, srcLimit, (char)fcd16);

                 /*
                  * the source text does not fulfill the conditions for FCD;
                  * decompose and reorder a limited piece of the text
                  */
                 destIndexArr[0] = destIndex;
                 prevCC=decomposeFCD(src,decompStart, srcStart,dest,
                                     destIndexArr,nx);
                 decompStart=srcStart;
                 destIndex=destIndexArr[0];
             }
         }

         return destIndex - destStart;

     }

     public static int getCombiningClass(int c) {
         long norm32;
         norm32=getNorm32(c);
         return (char)((norm32>>CC_SHIFT)&0xFF);
     }

     public static boolean isFullCompositionExclusion(int c) {
         if(isFormatVersion_2_1) {
             int aux =AuxTrieImpl.auxTrie.getCodePointValue(c);
             return (boolean)((aux & AUX_COMP_EX_MASK)!=0);
         } else {
             return false;
         }
     }

     public static boolean isCanonSafeStart(int c) {
         if(isFormatVersion_2_1) {
             int aux = AuxTrieImpl.auxTrie.getCodePointValue(c);
             return (boolean)((aux & AUX_UNSAFE_MASK)==0);
         } else {
             return false;
         }
     }

     public static boolean getCanonStartSet(int c, USerializedSet fillSet) {

         if(fillSet!=null && canonStartSets!=null) {
              /*
              * binary search for c
              *
              * There are two search tables,
              * one for BMP code points and one for supplementary ones.
              * See unormimp.h for details.
              */
             char[] table;
             int i=0, start, limit;

             int[] indexes = (int[]) canonStartSets[CANON_SET_INDICIES_INDEX];
             char[] startSets = (char[]) canonStartSets[CANON_SET_START_SETS_INDEX];

             if(c<=0xffff) {
                 table=(char[]) canonStartSets[CANON_SET_BMP_TABLE_INDEX];
                 start=0;
                 limit=table.length;

                 /* each entry is a pair { c, result } */
                 while(start<limit-2) {
                     i=(char)(((start+limit)/4)*2);
                     if(c<table[i]) {
                         limit=i;
                     } else {
                         start=i;
                     }
                 }
                 //System.out.println(i);
                 /* found? */
                 if(c==table[start]) {
                     i=table[start+1];
                     if((i & CANON_SET_BMP_MASK)==CANON_SET_BMP_IS_INDEX) {
                         /* result 01xxxxxx xxxxxx contains index x to a
                          * USerializedSet */
                         i&=(CANON_SET_MAX_CANON_SETS-1);
                         return fillSet.getSet(startSets,(i-indexes.length));
                     } else {
                         /* other result values are BMP code points for
                          * single-code point sets */
                         fillSet.setToOne(i);
                         return true;
                     }
                 }
             } else {
                 char high, low, h,j=0;

                 table=(char[]) canonStartSets[CANON_SET_SUPP_TABLE_INDEX];
                 start=0;
                 limit=table.length;

                 high=(char)(c>>16);
                 low=(char)c;

                 /* each entry is a triplet { high(c), low(c), result } */
                 while(start<limit-3) {
                     /* (start+limit)/2 and address triplets */
                     i=(char)(((start+limit)/6)*3);
                     j=(char)(table[i]&0x1f); /* high word */
                     int tableVal = table[i+1];
                     int lowInt = low;
                     if(high<j || ((tableVal>lowInt) && (high==j))) {
                         limit=i;
                     } else {
                         start=i;
                     }

                     //System.err.println("\t((high==j) && (table[i+1]>low)) == " + ((high==j) && (tableVal>lowInt)) );

                     // KLUDGE: IBM JIT in 1.4.0 is sooo broken
                     // The below lines make TestExhaustive pass
                     if(ICUDebug.enabled()){
                         System.err.println("\t\t j = " + Utility.hex(j,4) +
                                            "\t i = " + Utility.hex(i,4) +
                                            "\t high = "+ Utility.hex(high)  +
                                            "\t low = "  + Utility.hex(lowInt,4)   +
                                            "\t table[i+1]: "+ Utility.hex(tableVal,4)
                                            );
                     }

                 }

                 /* found? */
                 h=table[start];

                 //System.err.println("c: \\U"+ Integer.toHexString(c)+" i : "+Integer.toHexString(i) +" h : " + Integer.toHexString(h));
                 int tableVal1 = table[start+1];
                 int lowInt = low;

                 if(high==(h&0x1f) && lowInt==tableVal1) {
                     int tableVal2 = table[start+2];
                     i=tableVal2;
                     if((h&0x8000)==0) {
                         /* the result is an index to a USerializedSet */
                         return fillSet.getSet(startSets,(i-indexes.length));
                     } else {
                         /*
                          * single-code point set {x} in
                          * triplet { 100xxxxx 000hhhhh  llllllll llllllll  xxxxxxxx xxxxxxxx }
                          */
                         //i|=((int)h & 0x1f00)<<8; /* add high bits from high(c) */
                         int temp = ((int)h & 0x1f00)<<8;
                         i|=temp; /* add high bits from high(c) */
                         fillSet.setToOne((int)i);
                         return true;
                     }
                 }
             }
         }

         return false; /* not found */
     }

     public static int getFC_NFKC_Closure(int c, char[] dest) {

         int destCapacity;

         if(dest==null ) {
             destCapacity=0;
         }else{
             destCapacity = dest.length;
         }

         int aux =AuxTrieImpl.auxTrie.getCodePointValue(c);

         aux&= AUX_FNC_MASK;
         if(aux!=0) {
             int s;
             int index=aux;
             int length;

             s =extraData[index];
             if(s<0xff00) {
                 /* s points to the single-unit string */
                 length=1;
             } else {
                 length=s&0xff;
                 ++index;
             }
             if(0<length && length<=destCapacity) {
                 System.arraycopy(extraData,index,dest,0,length);
             }
             return length;
         } else {
             return 0;
         }
     }


     /* Is c an NF<mode>-skippable code point? See unormimp.h. */
     public static boolean isNFSkippable(int c, Normalizer.Mode mode, long mask) {
         long /*unsigned int*/ norm32;
         mask = mask & UNSIGNED_INT_MASK;
         char aux;

         /* check conditions (a)..(e), see unormimp.h */
         norm32 = getNorm32(c);

         if((norm32&mask)!=0) {
             return false; /* fails (a)..(e), not skippable */
         }

         if(mode == Normalizer.NFD || mode == Normalizer.NFKD || mode == Normalizer.NONE){
             return true; /* NF*D, passed (a)..(c), is skippable */
         }
         /* check conditions (a)..(e), see unormimp.h */

         /* NF*C/FCC, passed (a)..(e) */
         if((norm32& QC_NFD)==0) {
             return true; /* no canonical decomposition, is skippable */
         }

         /* check Hangul syllables algorithmically */
         if(isNorm32HangulOrJamo(norm32)) {
             /* Jamo passed (a)..(e) above, must be Hangul */
             return !isHangulWithoutJamoT((char)c); /* LVT are skippable, LV are not */
         }

         /* if(mode<=UNORM_NFKC) { -- enable when implementing FCC */
         /* NF*C, test (f) flag */
         if(!isFormatVersion_2_2) {
             return false; /* no (f) data, say not skippable to be safe */
         }


         aux = AuxTrieImpl.auxTrie.getCodePointValue(c);
         return (aux&AUX_NFC_SKIP_F_MASK)==0; /* TRUE=skippable if the (f) flag is not set */

         /* } else { FCC, test fcd<=1 instead of the above } */
     }

     /*
         private static final boolean
     _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
         // add the start code point to the USet
         uset_add((USet *)context, start);
         return TRUE;
     }
     */

     public static UnicodeSet addPropertyStarts(UnicodeSet set) {
         int c;

         /* add the start code point of each same-value range of each trie */
         //utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, set);
         TrieIterator normIter = new TrieIterator(NormTrieImpl.normTrie);
         RangeValueIterator.Element normResult = new RangeValueIterator.Element();

         while(normIter.next(normResult)){
             set.add(normResult.start);
         }

         //utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
         TrieIterator fcdIter  = new TrieIterator(FCDTrieImpl.fcdTrie);
         RangeValueIterator.Element fcdResult = new RangeValueIterator.Element();

         while(fcdIter.next(fcdResult)){
             set.add(fcdResult.start);
         }

         if(isFormatVersion_2_1){
             //utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, set);
             TrieIterator auxIter  = new TrieIterator(AuxTrieImpl.auxTrie);
             RangeValueIterator.Element auxResult = new RangeValueIterator.Element();
             while(auxIter.next(auxResult)){
                 set.add(auxResult.start);
             }
         }
         /* add Hangul LV syllables and LV+1 because of skippables */
         for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
             set.add(c);
             set.add(c+1);
         }
         set.add(HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
         return set; // for chaining
     }

     /**
      * Internal API, used in UCharacter.getIntPropertyValue().
      * @internal
      * @param c code point
      * @param modeValue numeric value compatible with Mode
      * @return numeric value compatible with QuickCheck
      */
     public static final int quickCheck(int c, int modeValue) {
         final int qcMask[/*UNORM_MODE_COUNT*/]={
             0, 0, QC_NFD, QC_NFKD, QC_NFC, QC_NFKC
         };

         int norm32=(int)getNorm32(c)&qcMask[modeValue];

         if(norm32==0) {
             return 1; // YES
         } else if((norm32&QC_ANY_NO)!=0) {
             return 0; // NO
         } else /* _NORM_QC_ANY_MAYBE */ {
             return 2; // MAYBE;
         }
     }

     /**
      * Internal API, used by collation code.
      * Get access to the internal FCD trie table to be able to perform
      * incremental, per-code unit, FCD checks in collation.
      * One pointer is sufficient because the trie index values are offset
      * by the index size, so that the same pointer is used to access the trie
      * data.
      * @internal
      */
     ///CLOVER:OFF
     public CharTrie getFCDTrie(){
         return FCDTrieImpl.fcdTrie;
     }
     ///CLOVER:ON


    /* compare canonically equivalent ---------------------------------------- */

     /*
      * Compare two strings for canonical equivalence.
      * Further options include case-insensitive comparison and
      * code point order (as opposed to code unit order).
      *
      * In this function, canonical equivalence is optional as well.
      * If canonical equivalence is tested, then both strings must fulfill
      * the FCD check.
      *
      * Semantically, this is equivalent to
      *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
      * where code point order, NFD and foldCase are all optional.
      *
      * String comparisons almost always yield results before processing both
      * strings completely.
      * They are generally more efficient working incrementally instead of
      * performing the sub-processing (strlen, normalization, case-folding)
      * on the entire strings first.
      *
      * It is also unnecessary to not normalize identical characters.
      *
      * This function works in principle as follows:
      *
      * loop {
      *   get one code unit c1 from s1 (-1 if end of source)
      *   get one code unit c2 from s2 (-1 if end of source)
      *
      *   if(either string finished) {
      *     return result;
      *   }
      *   if(c1==c2) {
      *     continue;
      *   }
      *
      *   // c1!=c2
      *   try to decompose/case-fold c1/c2, and continue if one does;
      *
      *   // still c1!=c2 and neither decomposes/case-folds, return result
      *   return c1-c2;
      * }
      *
      * When a character decomposes, then the pointer for that source changes to
      * the decomposition, pushing the previous pointer onto a stack.
      * When the end of the decomposition is reached, then the code unit reader
      * pops the previous source from the stack.
      * (Same for case-folding.)
      *
      * This is complicated further by operating on variable-width UTF-16.
      * The top part of the loop works on code units, while lookups for decomposition
      * and case-folding need code points.
      * Code points are assembled after the equality/end-of-source part.
      * The source pointer is only advanced beyond all code units when the code point
      * actually decomposes/case-folds.
      *
      * If we were on a trail surrogate unit when assembling a code point,
      * and the code point decomposes/case-folds, then the decomposition/folding
      * result must be compared with the part of the other string that corresponds to
      * this string's lead surrogate.
      * Since we only assemble a code point when hitting a trail unit when the
      * preceding lead units were identical, we back up the other string by one unit
      * in such a case.
      *
      * The optional code point order comparison at the end works with
      * the same fix-up as the other code point order comparison functions.
      * See ustring.c and the comment near the end of this function.
      *
      * Assumption: A decomposition or case-folding result string never contains
      * a single surrogate. This is a safe assumption in the Unicode Standard.
      * Therefore, we do not need to check for surrogate pairs across
      * decomposition/case-folding boundaries.
      * Further assumptions (see verifications tstnorm.cpp):
      * The API function checks for FCD first, while the core function
      * first case-folds and then decomposes. This requires that case-folding does not
      * un-FCD any strings.
      *
      * The API function may also NFD the input and turn off decomposition.
      * This requires that case-folding does not un-NFD strings either.
      *
      * TODO If any of the above two assumptions is violated,
      * then this entire code must be re-thought.
      * If this happens, then a simple solution is to case-fold both strings up front
      * and to turn off UNORM_INPUT_IS_FCD.
      * We already do this when not both strings are in FCD because makeFCD
      * would be a partial NFD before the case folding, which does not work.
      * Note that all of this is only a problem when case-folding _and_
      * canonical equivalence come together.
      *
      * This function could be moved to a different source file, at increased cost
      * for calling the decomposition access function.
      */

     // stack element for previous-level source/decomposition pointers
     private static class CmpEquivLevel {
         char[] source;
         int start;
         int s;
         int limit;
     }

     /**
      * Get the canonical decomposition for one code point.
      * @param c code point
      * @param buffer out-only buffer for algorithmic decompositions of Hangul
      * @param length out-only, takes the length of the decomposition, if any
      * @return index into the extraData array, or 0 if none
      * @internal
      */
      private static int decompose(int c, char[] buffer) {

         long norm32;
         int length=0;
         norm32 = (long) ((UNSIGNED_INT_MASK) & NormTrieImpl.normTrie.getCodePointValue(c));
         if((norm32 & QC_NFD)!=0) {
             if(isNorm32HangulOrJamo(norm32)) {
                 /* Hangul syllable: decompose algorithmically */
                 char c2;

                 c-=HANGUL_BASE;

                 c2=(char)(c%JAMO_T_COUNT);
                 c/=JAMO_T_COUNT;
                 if(c2>0) {
                     buffer[2]=(char)(JAMO_T_BASE+c2);
                     length=3;
                 } else {
                     length=2;
                 }
                 buffer[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
                 buffer[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
                 return length;
             } else {
                 /* normal decomposition */
                 DecomposeArgs  args = new DecomposeArgs();
                 int index = decompose(norm32, args);
                 System.arraycopy(extraData,index,buffer,0,args.length);
                 return args.length ;
             }
         } else {
             return 0;
         }
     }

     private static int foldCase(int c, char[] dest, int destStart, int destLimit,
                                  int options){
         String src = UTF16.valueOf(c);
         String foldedStr = UCharacter.foldCase(src,options);
         char[] foldedC = foldedStr.toCharArray();
         for(int i=0;i<foldedC.length;i++){
             if(destStart<destLimit){
                 dest[destStart]=foldedC[i];
             }
             // always increment destStart so that we can return
             // the required length
             destStart++;
         }
         return (c==UTF16.charAt(foldedStr,0)) ? -destStart : destStart;
     }

     /*
      private static int foldCase(char[] src,int srcStart,int srcLimit,
                                 char[] dest, int destStart, int destLimit,
                                 int options){
         String source =new String(src,srcStart,(srcLimit-srcStart));
         String foldedStr = UCharacter.foldCase(source,options);
         char[] foldedC = foldedStr.toCharArray();
         for(int i=0;i<foldedC.length;i++){
             if(destStart<destLimit){
                 dest[destStart]=foldedC[i];
             }
             // always increment destStart so that we can return
             // the required length
             destStart++;

         }
         return destStart;
     }
     */
     public static int cmpEquivFold(String s1, String s2,int options){
         return cmpEquivFold(s1.toCharArray(),0,s1.length(),
                             s2.toCharArray(),0,s2.length(),
                             options);
     }


     // internal function
     public static int cmpEquivFold(char[] s1, int s1Start,int s1Limit,
                                    char[] s2, int s2Start,int s2Limit,
                                    int options) {
         // current-level start/limit - s1/s2 as current
         int start1, start2, limit1, limit2;
         char[] cSource1, cSource2;

         cSource1 = s1;
         cSource2 = s2;
         // decomposition variables
         int length;

         // stacks of previous-level start/current/limit
         CmpEquivLevel[] stack1 = new CmpEquivLevel[]{
                                                     new CmpEquivLevel(),
                                                     new CmpEquivLevel()
                                                   };
         CmpEquivLevel[] stack2 = new CmpEquivLevel[]{
                                                     new CmpEquivLevel(),
                                                     new CmpEquivLevel()
                                                   };

         // decomposition buffers for Hangul
         char[] decomp1 = new char[8];
         char[] decomp2 = new char[8];

         // case folding buffers, only use current-level start/limit
         char[] fold1 = new char[32];
         char[] fold2 = new char[32];

         // track which is the current level per string
         int level1, level2;

         // current code units, and code points for lookups
         int c1, c2;
         int cp1, cp2;

         // no argument error checking because this itself is not an API

         // assume that at least one of the options COMPARE_EQUIV and
         // COMPARE_IGNORE_CASE is set
         // otherwise this function must behave exactly as uprv_strCompare()
         // not checking for that here makes testing this function easier


         // initialize
         start1=s1Start;
         limit1=s1Limit;

         start2=s2Start;
         limit2=s2Limit;

         level1=level2=0;
         c1=c2=-1;
         cp1=cp2=-1;
         // comparison loop
         for(;;) {
             // here a code unit value of -1 means "get another code unit"
             // below it will mean "this source is finished"

             if(c1<0) {
                 // get next code unit from string 1, post-increment
                 for(;;) {
                     if(s1Start>=limit1) {
                         if(level1==0) {
                             c1=-1;
                             break;
                         }
                     } else {
                         c1=cSource1[s1Start];
                         ++s1Start;
                         break;
                     }

                     // reached end of level buffer, pop one level
                     do {
                         --level1;
                         start1=stack1[level1].start;
                     } while(start1==-1); //###### check this
                     s1Start=stack1[level1].s;
                     limit1=stack1[level1].limit;
                     cSource1=stack1[level1].source;
                 }
             }

             if(c2<0) {
                 // get next code unit from string 2, post-increment
                 for(;;) {
                     if(s2Start>=limit2) {
                         if(level2==0) {
                             c2=-1;
                             break;
                         }
                     } else {
                         c2=cSource2[s2Start];
                         ++s2Start;
                         break;
                     }

                     // reached end of level buffer, pop one level
                     do {
                         --level2;
                         start2=stack2[level2].start;
                     } while(start2==-1);
                     s2Start=stack2[level2].s;
                     limit2=stack2[level2].limit;
                     cSource2=stack2[level2].source;
                 }
             }

             // compare c1 and c2
             // either variable c1, c2 is -1 only if the corresponding string
             // is finished
             if(c1==c2) {
                 if(c1<0) {
                     return 0;   // c1==c2==-1 indicating end of strings
                 }
                 c1=c2=-1;       // make us fetch new code units
                 continue;
             } else if(c1<0) {
                 return -1;      // string 1 ends before string 2
             } else if(c2<0) {
                 return 1;       // string 2 ends before string 1
             }
             // c1!=c2 && c1>=0 && c2>=0

             // get complete code points for c1, c2 for lookups if either is a
             // surrogate
             cp1=c1;
             if(UTF16.isSurrogate((char)c1)) {
                 char c;

                 if(UTF16.isLeadSurrogate((char)c1)) {
                     if(    s1Start!=limit1 &&
                            UTF16.isTrailSurrogate(c=cSource1[s1Start])
                       ) {
                         // advance ++s1; only below if cp1 decomposes/case-folds
                         cp1=UCharacterProperty.getRawSupplementary((char)c1, c);
                     }
                 } else /* isTrail(c1) */ {
                     if(    start1<=(s1Start-2) &&
                             UTF16.isLeadSurrogate(c=cSource1[(s1Start-2)])
                       ) {
                         cp1=UCharacterProperty.getRawSupplementary(c, (char)c1);
                     }
                 }
             }
             cp2=c2;
             if(UTF16.isSurrogate((char)c2)) {
                 char c;

                 if(UTF16.isLeadSurrogate((char)c2)) {
                     if(    s2Start!=limit2 &&
                            UTF16.isTrailSurrogate(c=cSource2[s2Start])
                       ) {
                         // advance ++s2; only below if cp2 decomposes/case-folds
                         cp2=UCharacterProperty.getRawSupplementary((char)c2, c);
                     }
                 } else /* isTrail(c2) */ {
                     if(    start2<=(s2Start-2) &&
                            UTF16.isLeadSurrogate(c=cSource2[s2Start-2])
                       ) {
                         cp2=UCharacterProperty.getRawSupplementary(c, (char)c2);
                     }
                 }
             }

             // go down one level for each string
             // continue with the main loop as soon as there is a real change
             if( level1<2 && ((options & Normalizer.COMPARE_IGNORE_CASE)!=0)&&
                 (length=foldCase(cp1, fold1, 0,32,options))>=0
             ) {
                 // cp1 case-folds to fold1[length]
                 if(UTF16.isSurrogate((char)c1)) {
                     if(UTF16.isLeadSurrogate((char)c1)) {
                         // advance beyond source surrogate pair if it
                         // case-folds
                         ++s1Start;
                     } else /* isTrail(c1) */ {
                         // we got a supplementary code point when hitting its
                         // trail surrogate, therefore the lead surrogate must
                         // have been the same as in the other string;
                         // compare this decomposition with the lead surrogate
                         // in the other string
                         --s2Start;
                         c2=cSource2[(s2Start-1)];
                     }
                 }

                 // push current level pointers
                 stack1[0].start=start1;
                 stack1[0].s=s1Start;
                 stack1[0].limit=limit1;
                 stack1[0].source=cSource1;
                 ++level1;

                 cSource1 = fold1;
                 start1=s1Start=0;
                 limit1=length;

                 // get ready to read from decomposition, continue with loop
                 c1=-1;
                 continue;
             }

             if( level2<2 && ((options& Normalizer.COMPARE_IGNORE_CASE)!=0) &&
                 (length=foldCase(cp2, fold2,0,32, options))>=0
             ) {
                 // cp2 case-folds to fold2[length]
                 if(UTF16.isSurrogate((char)c2)) {
                     if(UTF16.isLeadSurrogate((char)c2)) {
                         // advance beyond source surrogate pair if it
                         // case-folds
                         ++s2Start;
                     } else /* isTrail(c2) */ {
                         // we got a supplementary code point when hitting its
                         // trail surrogate, therefore the lead surrogate must
                         // have been the same as in the other string;
                         // compare this decomposition with the lead surrogate
                         // in the other string
                         --s1Start;
                         c1=cSource1[(s1Start-1)];
                     }
                 }

                 // push current level pointers
                 stack2[0].start=start2;
                 stack2[0].s=s2Start;
                 stack2[0].limit=limit2;
                 stack2[0].source=cSource2;
                 ++level2;

                 cSource2 = fold2;
                 start2=s2Start=0;
                 limit2=length;

                 // get ready to read from decomposition, continue with loop
                 c2=-1;
                 continue;
             }

             if( level1<2 && ((options&COMPARE_EQUIV)!=0) &&
                 0!=(length=decompose(cp1,decomp1))
             ) {
                 // cp1 decomposes into p[length]
                 if(UTF16.isSurrogate((char)c1)) {
                     if(UTF16.isLeadSurrogate((char)c1)) {
                         // advance beyond source surrogate pair if it
                         //decomposes
                         ++s1Start;
                     } else /* isTrail(c1) */ {
                         // we got a supplementary code point when hitting
                         // its trail surrogate, therefore the lead surrogate
                         // must have been the same as in the other string;
                         // compare this decomposition with the lead surrogate
                         // in the other string
                         --s2Start;
                         c2=cSource2[(s2Start-1)];
                     }
                 }

                 // push current level pointers
                 stack1[level1].start=start1;
                 stack1[level1].s=s1Start;
                 stack1[level1].limit=limit1;
                 stack1[level1].source=cSource1;
                 ++level1;

                 // set next level pointers to decomposition
                 cSource1 = decomp1;
                 start1=s1Start=0;
                 limit1=length;

                 // set empty intermediate level if skipped
                 if(level1<2) {
                     stack1[level1++].start=-1;
                 }
                 // get ready to read from decomposition, continue with loop
                 c1=-1;
                 continue;
             }

             if( level2<2 && ((options&COMPARE_EQUIV)!=0) &&
                 0!=(length=decompose(cp2, decomp2))
             ) {
                 // cp2 decomposes into p[length]
                 if(UTF16.isSurrogate((char)c2)) {
                     if(UTF16.isLeadSurrogate((char)c2)) {
                         // advance beyond source surrogate pair if it
                         // decomposes
                         ++s2Start;
                     } else /* isTrail(c2) */ {
                         // we got a supplementary code point when hitting its
                         // trail surrogate, therefore the lead surrogate must
                         // have been the same as in the other string;
                         // compare this decomposition with the lead surrogate
                         // in the other string
                         --s1Start;
                         c1=cSource1[(s1Start-1)];
                     }
                 }

                 // push current level pointers
                 stack2[level2].start=start2;
                 stack2[level2].s=s2Start;
                 stack2[level2].limit=limit2;
                 stack2[level2].source=cSource2;
                 ++level2;

                 // set next level pointers to decomposition
                 cSource2=decomp2;
                 start2=s2Start=0;
                 limit2=length;

                 // set empty intermediate level if skipped
                 if(level2<2) {
                     stack2[level2++].start=-1;
                 }

                 // get ready to read from decomposition, continue with loop
                 c2=-1;
                 continue;
             }


             // no decomposition/case folding, max level for both sides:
             // return difference result

             // code point order comparison must not just return cp1-cp2
             // because when single surrogates are present then the surrogate
             // pairs that formed cp1 and cp2 may be from different string
             // indexes

             // example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at
             // second code units
             // c1=d800 cp1=10001 c2=dc00 cp2=10000
             // cp1-cp2>0 but c1-c2<0 and in fact in UTF-32
             // it is { d800 10001 } < { 10000 }
             // therefore fix-up

             if(     c1>=0xd800 && c2>=0xd800 &&
                     ((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0)
               ) {
                 /* subtract 0x2800 from BMP code points to make them smaller
                  * than supplementary ones */
                 if(
                     (    c1<=0xdbff && s1Start!=limit1
                          &&
                          UTF16.isTrailSurrogate(cSource1[s1Start])
                     )
                      ||
                     (    UTF16.isTrailSurrogate((char)c1) && start1!=(s1Start-1)
                          &&
                          UTF16.isLeadSurrogate(cSource1[(s1Start-2)])
                     )
                 ) {
                     /* part of a surrogate pair, leave >=d800 */
                 } else {
                     /* BMP code point - may be surrogate code point -
                      * make <d800 */
                     c1-=0x2800;
                 }

                 if(
                     (    c2<=0xdbff && s2Start!=limit2
                          &&
                          UTF16.isTrailSurrogate(cSource2[s2Start])
                     )
                      ||
                     (    UTF16.isTrailSurrogate((char)c2) && start2!=(s2Start-1)
                          &&
                          UTF16.isLeadSurrogate(cSource2[(s2Start-2)])
                     )
                 ) {
                     /* part of a surrogate pair, leave >=d800 */
                 } else {
                     /* BMP code point - may be surrogate code point -
                      * make <d800 */
                     c2-=0x2800;
                 }
             }

             return c1-c2;
         }
     }
     private static int strCompare(char[] s1, int s1Start, int s1Limit,
                                   char[] s2, int s2Start, int s2Limit,
                                   boolean codePointOrder) {

         int start1, start2, limit1, limit2;

         char c1, c2;

         /* setup for fix-up */
         start1=s1Start;
         start2=s2Start;

         int length1, length2;

         length1 = s1Limit - s1Start;
         length2 = s2Limit - s2Start;

         int lengthResult;

         if(length1<length2) {
             lengthResult=-1;
             limit1=start1+length1;
         } else if(length1==length2) {
             lengthResult=0;
             limit1=start1+length1;
         } else /* length1>length2 */ {
             lengthResult=1;
             limit1=start1+length2;
         }

         if(s1==s2) {
             return lengthResult;
         }

         for(;;) {
             /* check pseudo-limit */
             if(s1Start==limit1) {
                 return lengthResult;
             }

             c1=s1[s1Start];
             c2=s2[s2Start];
             if(c1!=c2) {
                 break;
             }
             ++s1Start;
             ++s2Start;
         }

         /* setup for fix-up */
         limit1=start1+length1;
         limit2=start2+length2;


         /* if both values are in or above the surrogate range, fix them up */
         if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
             /* subtract 0x2800 from BMP code points to make them smaller than
              *  supplementary ones */
             if(
                 ( c1<=0xdbff && (s1Start+1)!=limit1 &&
                   UTF16.isTrailSurrogate(s1[(s1Start+1)])
                 ) ||
                 ( UTF16.isTrailSurrogate(c1) && start1!=s1Start &&
                   UTF16.isLeadSurrogate(s1[(s1Start-1)])
                 )
             ) {
                 /* part of a surrogate pair, leave >=d800 */
             } else {
                 /* BMP code point - may be surrogate code point - make <d800 */
                 c1-=0x2800;
             }

             if(
                 ( c2<=0xdbff && (s2Start+1)!=limit2 &&
                   UTF16.isTrailSurrogate(s2[(s2Start+1)])
                 ) ||
                 ( UTF16.isTrailSurrogate(c2) && start2!=s2Start &&
                   UTF16.isLeadSurrogate(s2[(s2Start-1)])
                 )
             ) {
                 /* part of a surrogate pair, leave >=d800 */
             } else {
                 /* BMP code point - may be surrogate code point - make <d800 */
                 c2-=0x2800;
             }
         }

         /* now c1 and c2 are in UTF-32-compatible order */
         return (int)c1-(int)c2;
     }


     /*
      * Status of tailored normalization
      *
      * This was done initially for investigation on Unicode public review issue 7
      * (http://www.unicode.org/review/). See Jitterbug 2481.
      * While the UTC at meeting #94 (2003mar) did not take up the issue, this is
      * a permanent feature in ICU 2.6 in support of IDNA which requires true
      * Unicode 3.2 normalization.
      * (NormalizationCorrections are rolled into IDNA mapping tables.)
      *
      * Tailored normalization as implemented here allows to "normalize less"
      * than full Unicode normalization would.
      * Based internally on a UnicodeSet of code points that are
      * "excluded from normalization", the normalization functions leave those
      * code points alone ("inert"). This means that tailored normalization
      * still transforms text into a canonically equivalent form.
      * It does not add decompositions to code points that do not have any or
      * change decomposition results.
      *
      * Any function that searches for a safe boundary has not been touched,
      * which means that these functions will be over-pessimistic when
      * exclusions are applied.
      * This should not matter because subsequent checks and normalizations
      * do apply the exclusions; only a little more of the text may be processed
      * than necessary under exclusions.
      *
      * Normalization exclusions have the following effect on excluded code points c:
      * - c is not decomposed
      * - c is not a composition target
      * - c does not combine forward or backward for composition
      *   except that this is not implemented for Jamo
      * - c is treated as having a combining class of 0
      */

     /*
      * Constants for the bit fields in the options bit set parameter.
      * These need not be public.
      * A user only needs to know the currently assigned values.
      * The number and positions of reserved bits per field can remain private.
      */
     private static final int OPTIONS_NX_MASK=0x1f;
     private static final int OPTIONS_UNICODE_MASK=0xe0;
     public  static final int OPTIONS_SETS_MASK=0xff;
     private static final int OPTIONS_UNICODE_SHIFT=5;
     private static final UnicodeSet[] nxCache = new UnicodeSet[OPTIONS_SETS_MASK+1];

     /* Constants for options flags for normalization.*/

     /**
      * Options bit 0, do not decompose Hangul syllables.
      * @draft ICU 2.6
      */
     private static final int NX_HANGUL = 1;
     /**
      * Options bit 1, do not decompose CJK compatibility characters.
      * @draft ICU 2.6
      */
     private static final int NX_CJK_COMPAT=2;
     /**
      * Options bit 8, use buggy recomposition described in
      * Unicode Public Review Issue #29
      * at http://www.unicode.org/review/resolved-pri.html#pri29
      *
      * Used in IDNA implementation according to strict interpretation
      * of IDNA definition based on Unicode 3.2 which predates PRI #29.
      *
      * See ICU4C unormimp.h
      *
      * @draft ICU 3.2
      */
     public static final int BEFORE_PRI_29=0x100;

     /*
      * The following options are used only in some composition functions.
      * They use bits 12 and up to preserve lower bits for the available options
      * space in unorm_compare() -
      * see documentation for UNORM_COMPARE_NORM_OPTIONS_SHIFT.
      */

     /** Options bit 12, for compatibility vs. canonical decomposition. */
     public static final int OPTIONS_COMPAT=0x1000;
     /** Options bit 13, no discontiguous composition (FCC vs. NFC). */
     public static final int OPTIONS_COMPOSE_CONTIGUOUS=0x2000;

     /* normalization exclusion sets --------------------------------------------- */

     /*
      * Normalization exclusion UnicodeSets are used for tailored normalization;
      * see the comment near the beginning of this file.
      *
      * By specifying one or several sets of code points,
      * those code points become inert for normalization.
      */
     private static final synchronized UnicodeSet internalGetNXHangul() {
         /* internal function, does not check for incoming U_FAILURE */

         if(nxCache[NX_HANGUL]==null) {
              nxCache[NX_HANGUL]=new UnicodeSet(0xac00, 0xd7a3);
         }
         return nxCache[NX_HANGUL];
     }

     private static final synchronized UnicodeSet internalGetNXCJKCompat() {
         /* internal function, does not check for incoming U_FAILURE */

         if(nxCache[NX_CJK_COMPAT]==null) {

             /* build a set from [CJK Ideographs]&[has canonical decomposition] */
             UnicodeSet set, hasDecomp;

             set=new UnicodeSet("[:Ideographic:]");

             /* start with an empty set for [has canonical decomposition] */
             hasDecomp=new UnicodeSet();

             /* iterate over all ideographs and remember which canonically decompose */
             UnicodeSetIterator it = new UnicodeSetIterator(set);
             int start, end;
             long norm32;

             while(it.nextRange() && (it.codepoint != UnicodeSetIterator.IS_STRING)) {
                 start=it.codepoint;
                 end=it.codepointEnd;
                 while(start<=end) {
                     norm32 = getNorm32(start);
                     if((norm32 & QC_NFD)>0) {
                         hasDecomp.add(start);
                     }
                     ++start;
                 }
             }

             /* hasDecomp now contains all ideographs that decompose canonically */
              nxCache[NX_CJK_COMPAT]=hasDecomp;

         }

         return nxCache[NX_CJK_COMPAT];
     }

     private static final synchronized UnicodeSet internalGetNXUnicode(int options) {
         options &= OPTIONS_UNICODE_MASK;
         if(options==0) {
             return null;
         }

         if(nxCache[options]==null) {
             /* build a set with all code points that were not designated by the specified Unicode version */
             UnicodeSet set = new UnicodeSet();

             switch(options) {
             case Normalizer.UNICODE_3_2:
                 set.applyPattern("[:^Age=3.2:]");
                 break;
             default:
                 return null;
             }

             nxCache[options]=set;
         }

         return nxCache[options];
     }

     /* Get a decomposition exclusion set. The data must be loaded. */
     private static final synchronized UnicodeSet internalGetNX(int options) {
         options&=OPTIONS_SETS_MASK;

         if(nxCache[options]==null) {
             /* return basic sets */
             if(options==NX_HANGUL) {
                 return internalGetNXHangul();
             }
             if(options==NX_CJK_COMPAT) {
                 return internalGetNXCJKCompat();
             }
             if((options & OPTIONS_UNICODE_MASK)!=0 && (options & OPTIONS_NX_MASK)==0) {
                 return internalGetNXUnicode(options);
             }

             /* build a set from multiple subsets */
             UnicodeSet set;
             UnicodeSet other;

             set=new UnicodeSet();


             if((options & NX_HANGUL)!=0 && null!=(other=internalGetNXHangul())) {
                 set.addAll(other);
             }
             if((options&NX_CJK_COMPAT)!=0 && null!=(other=internalGetNXCJKCompat())) {
                 set.addAll(other);
             }
             if((options&OPTIONS_UNICODE_MASK)!=0 && null!=(other=internalGetNXUnicode(options))) {
                 set.addAll(other);
             }

                nxCache[options]=set;
         }
         return nxCache[options];
     }

     public static final UnicodeSet getNX(int options) {
         if((options&=OPTIONS_SETS_MASK)==0) {
             /* incoming failure, or no decomposition exclusions requested */
             return null;
         } else {
             return internalGetNX(options);
         }
     }

     private static final boolean nx_contains(UnicodeSet nx, int c) {
         return nx!=null && nx.contains(c);
     }

     private static final boolean nx_contains(UnicodeSet nx, char c, char c2) {
         return nx!=null && nx.contains(c2==0 ? c : UCharacterProperty.getRawSupplementary(c, c2));
     }


 }