src/com/ibm/icu/impl/NormalizerImpl.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 1996-2000, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/NormalizerImpl.java,v $
  * $Date: 2002/03/28 01:50:59 $
  * $Revision: 1.4 $
  *******************************************************************************
  */

 package com.ibm.icu.impl;
 import java.io.*;
 //import com.ibm.icu.text.NewNormalizer;
 import com.ibm.icu.text.UTF16;
 /**
  * @version 	1.0
  * @author  Ram Viswanadha
  */
 public final class NormalizerImpl {
 	/* Static block for the class to initialize its own self */
 	static NormalizerImpl IMPL=null;

 	static
     {
         try
         {
             IMPL = new NormalizerImpl();
         }
         catch (Exception e)
         {
             throw new RuntimeException(e.getMessage());
         }
     }

 	static final int UNSIGNED_BYTE_MASK =0xFF;
 	/*
 	 * This new implementation of the normalization code loads its data from
 	 * unorm.dat, which is generated with the gennorm tool.
 	 * The format of that file is described at the end of this file.
 	 */
 	private static final String DATA_FILE_NAME_ = "data/unorm.dat";

 	/* norm32 value constants */

     /* quick check flags 0..3 set mean "no" for their forms */
     static final int QC_NFC=0x11;          /* no|maybe */
     static final int QC_NFKC=0x22;         /* no|maybe */
     static final int QC_NFD=4;             /* no */
     static final int QC_NFKD=8;            /* no */

     static final int QC_ANY_NO=0xf;

     /* quick check flags 4..5 mean "maybe" for their forms; test flags>=QC_MAYBE */
     static final int QC_MAYBE=0x10;
     static final int QC_ANY_MAYBE=0x30;

     static final int QC_MASK=0x3f;

     static final int COMBINES_FWD=0x40;
     static final int COMBINES_BACK=0x80;
     static final int COMBINES_ANY=0xc0;

     static final int CC_SHIFT=8;           		   /* UnicodeData.txt combining class in bits 15..8 */
     static final int CC_MASK=0xff00;

     static final int EXTRA_SHIFT=16;               /* 16 bits for the index to UChars and other extra data */
     static final int EXTRA_INDEX_TOP=0xfc00;       /* start of surrogate specials after shift */

     static final int EXTRA_SURROGATE_MASK=0x3ff;
     static final int EXTRA_SURROGATE_TOP=0x3f0;    /* hangul etc. */

     static final int EXTRA_HANGUL=EXTRA_SURROGATE_TOP;
     static final int EXTRA_JAMO_L=EXTRA_SURROGATE_TOP+1;                 /* ### not used */
     static final int EXTRA_JAMO_V=EXTRA_SURROGATE_TOP+2;
     static final int EXTRA_JAMO_T=EXTRA_SURROGATE_TOP+3;

 	/* norm32 value constants using >16 bits */
 	static final int  UNSIGNED_INT_MASK = 0x7fffffff;
 	static final int  MIN_SPECIAL     =  0xfc000000 & UNSIGNED_INT_MASK;
 	static final int  SURROGATES_TOP  =  0xfff00000 & UNSIGNED_INT_MASK;
 	static final int  MIN_HANGUL      =  0xfff00000 & UNSIGNED_INT_MASK;
 	static final int  MIN_JAMO_V      =  0xfff20000 & UNSIGNED_INT_MASK;
 	static final int  JAMO_V_TOP      =  0xfff30000 & UNSIGNED_INT_MASK;


 	/* indexes[] value names */

 	static final int INDEX_TRIE_SIZE 		  = 0;     /* number of bytes in normalization trie */
 	static final int INDEX_CHAR_COUNT 		  = 1;     /* number of chars in extra data */

 	static final int INDEX_COMBINE_DATA_COUNT = 2;     /* number of uint16_t words for combining data */
 	static final int INDEX_COMBINE_FWD_COUNT  = 3;     /* number of code points that combine forward */
 	static final int INDEX_COMBINE_BOTH_COUNT = 4;     /* number of code points that combine forward and backward */
 	static final int INDEX_COMBINE_BACK_COUNT = 5;     /* number of code points that combine backward */

 	static final int INDEX_MIN_NFC_NO_MAYBE   = 6;     /* first code point with quick check NFC NO/MAYBE */
 	static final int INDEX_MIN_NFKC_NO_MAYBE  = 7;     /* first code point with quick check NFKC NO/MAYBE */
 	static final int INDEX_MIN_NFD_NO_MAYBE   = 8;     /* first code point with quick check NFD NO/MAYBE */
 	static final int INDEX_MIN_NFKD_NO_MAYBE  = 9;     /* first code point with quick check NFKD NO/MAYBE */

 	static final int INDEX_FCD_TRIE_SIZE      = 10;    /* number of bytes in FCD trie */
     static final int INDEX_AUX_TRIE_SIZE      = 11;    /* number of bytes in the auxiliary trie */
     static final int INDEX_CANON_SET_COUNT    = 12;    /* number of uint16_t in the array of serialized USet */

 	static final int INDEX_TOP                = 32;    /* changing this requires a new formatVersion */


 	/* AUX constants */
 	/* value constants for auxTrie */
 	static final int AUX_UNSAFE_SHIFT	= 11;
 	static final int AUX_COMP_EX_SHIFT	= 10;

 	static final int AUX_MAX_FNC        =   ((int)1<<AUX_COMP_EX_SHIFT);
 	static final int AUX_UNSAFE_MASK    =   (1<<AUX_UNSAFE_SHIFT) & UNSIGNED_INT_MASK;
 	static final int AUX_FNC_MASK       =   (AUX_MAX_FNC-1) & UNSIGNED_INT_MASK;
 	static final int AUX_COMP_EX_MASK   =   (1<<AUX_COMP_EX_SHIFT) & UNSIGNED_INT_MASK;

 	/* canonStartSets[0..31] contains indexes for what is in the array */
     static final int SET_INDEX_CANON_SETS_LENGTH		= 0; /* number of uint16_t in canonical starter sets */
     static final int SET_INDEX_CANON_BMP_TABLE_LENGTH	= 1; /* number of uint16_t in the BMP search table (contains pairs) */
     static final int SET_INDEX_CANON_SUPP_TABLE_LENGTH  = 2; /* number of uint16_t in the supplementary search table (contains triplets) */
     static final int SET_INDEX_TOP						= 32;/* changing this requires a new formatVersion */

 	static final int CANON_SET_INDICIES_INDEX  			= 0;
 	static final int CANON_SET_START_SETS_INDEX			= 1;
 	static final int CANON_SET_BMP_TABLE_INDEX			= 2;
 	static final int CANON_SET_SUPP_TABLE_INDEX			= 3;

 	static final int CANON_SET_MAX_CANON_SETS     		= 0x0004; /* 14 bit indexes to canonical USerializedSets */
 	/* single-code point BMP sets are encoded directly in the search table except if result=0x4000..0x7fff */
 	static final int CANON_SET_BMP_MASK        			= 0xc000;
 	static final int CANON_SET_BMP_IS_INDEX    			= 0x4000;

 	/*******************************/

 	/* Wrappers for Trie implementations */
 	static final class NormTrieImpl implements Trie.DataManipulate{
 		static IntTrie normTrie= null;
 	   /**
 	    * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
 	    * data the index array offset of the indexes for that lead surrogate.
 	    * @param property data value for a surrogate from the trie, including the
 	    *        folding offset
 	    * @return data offset or 0 if there is no data for the lead surrogate
 	    */
 	    public int getFoldingOffset(int value){

 	    	return 0x10000>>5+((value>>(EXTRA_SHIFT-5))&(0x3ff<<5));

 	    }

 	}
 	static final class FCDTrieImpl implements Trie.DataManipulate{
 		static CharTrie fcdTrie=null;
 	   /**
 	    * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
 	    * data the index array offset of the indexes for that lead surrogate.
 	    * @param property data value for a surrogate from the trie, including the
 	    *        folding offset
 	    * @return data offset or 0 if there is no data for the lead surrogate
 	    */

 	    public int getFoldingOffset(int value){
 			return 0;
 	    }
 	}

 	static final class AuxTrieImpl implements Trie.DataManipulate{
 		static CharTrie auxTrie = null;
 	   /**
 	    * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
 	    * data the index array offset of the indexes for that lead surrogate.
 	    * @param property data value for a surrogate from the trie, including the
 	    *        folding offset
 	    * @return data offset or 0 if there is no data for the lead surrogate
 	    */
 	    public int getFoldingOffset(int value){
 	        return (value&AUX_FNC_MASK)<<5;
 	    }
 	}

 	/****************************************************/


 	static FCDTrieImpl fcdTrieImpl;
 	static NormTrieImpl normTrieImpl;
 	static AuxTrieImpl auxTrieImpl;
 	static int[] indexes;
 	static char[] combiningTable;
 	static char[] extraData;
 	static Object[] canonStartSets;

 	static boolean isDataLoaded;
 	static boolean isFormatVersion_2_1;
 	/**
     * Default buffer size of datafile
     */
     private static final int DATA_BUFFER_SIZE_ = 25000;

 	/* FCD check: everything below this code point is known to have a 0 lead combining class */
 	public static final int MIN_WITH_LEAD_CC=0x300;


     /**
      * Bit 7 of the length byte for a decomposition string in extra data is
      * a flag indicating whether the decomposition string is
      * preceded by a 16-bit word with the leading and trailing cc
      * of the decomposition (like for A-umlaut);
      * if not, then both cc's are zero (like for compatibility ideographs).
      */
 	static final int DECOMP_FLAG_LENGTH_HAS_CC=0x80;
     /**
 	 * Bits 6..0 of the length byte contain the actual length.
 	 */
 	static final int DECOMP_LENGTH_MASK=0x7f;

 	/* -------------------------------------------------------------------------- */

 	/* Korean Hangul and Jamo constants */

 	private static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
 	private static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
 	private static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */

 	private static final int HANGUL_BASE=0xac00;

 	private static final int JAMO_L_COUNT=19;
 	private static final int JAMO_V_COUNT=21;
 	private static final int JAMO_T_COUNT=28;
 	private static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;

 	private static boolean isHangulWithoutJamoT(char c) {
 	    c-=HANGUL_BASE;
 	    return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
 	}

 	/* norm32 helpers */

 	/* is this a norm32 with a regular index? */
 	private static boolean isNorm32Regular(int norm32) {
 	    return norm32<MIN_SPECIAL;
 	}

 	/* is this a norm32 with a special index for a lead surrogate? */
 	private static boolean isNorm32LeadSurrogate(int norm32) {
 	    return MIN_SPECIAL<=norm32 && norm32<SURROGATES_TOP;
 	}

 	/* is this a norm32 with a special index for a Hangul syllable or a Jamo? */
 	private static boolean isNorm32HangulOrJamo(int norm32) {
 	    return norm32>=MIN_HANGUL;
 	}

 	/*
 	 * Given isNorm32HangulOrJamo(),
 	 * is this a Hangul syllable or a Jamo?
 	 */
 	private static  boolean isHangulJamoNorm32HangulOrJamoL(int norm32) {
 	    return norm32<MIN_JAMO_V;
 	}

 	/*
 	 * Given norm32 for Jamo V or T,
 	 * is this a Jamo V?
 	 */
 	private static boolean isJamoVTNorm32JamoV(int norm32) {
 	    return norm32<JAMO_V_TOP;
 	}

 	static int getExtraDataIndex(int norm32) {
 	    return (norm32>>EXTRA_SHIFT);
 	}


     // protected constructor ---------------------------------------------

     /**
     * Constructor
     * @exception thrown when data reading fails or data corrupted
     */
     private NormalizerImpl() throws IOException{
     	//data should be loaded only once
     	if(!isDataLoaded){
     		indexes = null;
     		combiningTable=null;
     		extraData=null;
     		fcdTrieImpl = new FCDTrieImpl();
 			normTrieImpl = new NormTrieImpl();
 			auxTrieImpl = new AuxTrieImpl();
 	        // jar access
 	        InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME_);
 	        BufferedInputStream b = new BufferedInputStream(i,
 	                                                        DATA_BUFFER_SIZE_);
 	        NormalizerDataReader reader = new NormalizerDataReader(b);
 	        reader.read(this);
 	        b.close();
 	        i.close();
     	}
     }

     public static boolean checkFCD(char[] src) {

 	    char fcd16,c;
 	    int prevCC=0, cc;
 		int i =0, length = src.length;

 	    for(;;) {
             for(;;) {
                 if(i==length) {
                     return true;
                 } else if((c=src[i++])<MIN_WITH_LEAD_CC) {
                     prevCC=(int)-c;
                 } else if((fcd16=fcdTrieImpl.fcdTrie.getBMPValue(c))==0) {
                     prevCC=0;
                 } else {
                     break;
                 }
             }

 	        /* check one above-minimum, relevant code unit */
 	        if(UTF16.isLeadSurrogate(c)) {
 	            /* c is a lead surrogate, get the real fcd16 */
 	            if(i!=length && UTF16.isTrailSurrogate(src[i])) {
 	                ++i;
 	                fcd16=fcdTrieImpl.fcdTrie.getSurrogateValue(fcd16, src[i]);
 	            } else {
 	                fcd16=0;
 	            }
 	        }

 	        /*
 	         * prevCC has values from the following ranges:
 	         * 0..0xff - the previous trail combining class
 	         * <0      - the negative value of the previous code unit;
 	         *           that code unit was <_NORM_MIN_WITH_LEAD_CC and its _getFCD16()
 	         *           was deferred so that average text is checked faster
 	         */

 	        /* check the combining order */
 	        cc=(int)(fcd16>>8);
 	        if(cc!=0) {
 	            if(prevCC<0) {
 	                /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
 	                prevCC=(int)(fcdTrieImpl.fcdTrie.getBMPValue((char)-prevCC)&0xff);
 	            }

 	            if(cc<prevCC) {
 	                return false;
 	            }
 	        }
 	        prevCC=(int)(fcd16&0xff);
 	    }
 	}
 	/*
 	public static NewNormalizer.QuickCheckResult quickCheck(char[] src,NewNormalizer.Mode mode) {

 	    int norm32, ccOrQCMask, qcMask;
 	    char c, c2, minNoMaybe;
 	    char cc, prevCC;
 	    NewNormalizer.QuickCheckResult result;


 	    if(!isDataLoaded) {
 	        return NewNormalizer.MAYBE;
 	    }

 	    // check for a valid mode and set the quick check minimum and mask

 	   	if(mode.equals(NewNormalizer.NFC)){
 	        minNoMaybe=(char)indexes[INDEX_MIN_NFC_NO_MAYBE];
 	        qcMask=QC_NFC;
 		}else if(mode.equals(NewNormalizer.NFKC)){
 	        minNoMaybe=(char)indexes[INDEX_MIN_NFKC_NO_MAYBE];
 	        qcMask=QC_NFKC;
 	   	}else if(mode.equals(NewNormalizer.NFD)){
 	        minNoMaybe=(char)indexes[INDEX_MIN_NFD_NO_MAYBE];
 	        qcMask=QC_NFD;
 	   	}else if(mode.equals(NewNormalizer.NFKD)){
 	        minNoMaybe=(char)indexes[INDEX_MIN_NFKD_NO_MAYBE];
 	        qcMask=QC_NFKD;
 	   	}else if(mode.equals(NewNormalizer.FCD)){
 	        return (checkFCD(src)) ? NewNormalizer.YES : NewNormalizer.NO;
 	   	}else{
 	        return NewNormalizer.MAYBE;
 	    }

 	    // initialize
 	    ccOrQCMask=CC_MASK|qcMask;
 	    result=NewNormalizer.YES;
 	    prevCC=0;
 		int i=0;

 	    for(;;) {
             for(;;) {
                 if(i==src.length) {
                     return result;
                 } else if((c=src[i++])>=minNoMaybe && ((norm32=normTrieImpl.normTrie.getBMPValue(c))&ccOrQCMask)!=0) {
                   	break;
                 }
                 prevCC=0;
             }


 	        //* check one above-minimum, relevant code unit
 	        if(isNorm32LeadSurrogate(norm32)) {
 	            //* c is a lead surrogate, get the real norm32
 	            if(i!=src.length && UTF16.isTrailSurrogate(c2=src[i])) {
 	                ++i;
 	                norm32=normTrieImpl.normTrie.getRawOffset(norm32, c2);
 	            } else {
 	                norm32=0;
 	            }
 	        }

 	        //* check the combining order
 	        cc=(char)((norm32>>CC_SHIFT)&0xFF);
 	        if(cc!=0 && cc<prevCC) {
 	            return NewNormalizer.NO;
 	        }
 	        prevCC=cc;

 	        //* check for "no" or "maybe" quick check flags
 	        norm32&=qcMask;
 	        if((norm32& QC_ANY_NO)>=1) {
 	            return NewNormalizer.NO;
 	        } else if(norm32!=0) {
 	            result=NewNormalizer.MAYBE;
 	        }
 	    }
 	} */

 	public static int getCombiningClass(int c) {
 	    int norm32;
         if(c<=0xffff) {
             norm32=normTrieImpl.normTrie.getBMPValue((char)c);
         } else {
             norm32=normTrieImpl.normTrie.getBMPValue(UTF16.getLeadSurrogate(c));
             if((norm32&CC_MASK)!=0) {
                 norm32=normTrieImpl.normTrie.getRawOffset(norm32, UTF16.getTrailSurrogate(c));
             }
         }
         return (char)((norm32>>CC_SHIFT)&0xFF);
 	}

 	public static boolean isFullCompositionExclusion(int c) {
 	    if(isFormatVersion_2_1) {
 	        int aux =auxTrieImpl.auxTrie.getCodePointValue(c);
 	        return (boolean)((aux & AUX_COMP_EX_MASK)!=0);
 	    } else {
 	        return false;
 	    }
 	}

 	public static boolean isCanonSafeStart(int c) {
 	    if(isFormatVersion_2_1) {
 	        int aux = auxTrieImpl.auxTrie.getCodePointValue(c);
 	        return (boolean)((aux & AUX_UNSAFE_MASK)==0);
 	    } else {
 	        return false;
 	    }
 	}

 	public static boolean getCanonStartSet(int c, USerializedSet fillSet) {

 	    if(fillSet!=null && canonStartSets!=null) {
 	 		/*
 	         * binary search for c
 	         *
 	         * There are two search tables,
 	         * one for BMP code points and one for supplementary ones.
 	         * See unormimp.h for details.
 	         */
 	        char[] table;
 	        int i, start, limit;

 	        if(c<=0xffff) {
 	            table=(char[]) canonStartSets[CANON_SET_BMP_TABLE_INDEX];
 	            start=0;
 	            limit=table.length;

 	            /* each entry is a pair { c, result } */
 	            while(start<limit) {
 	                i=(char)((start+limit)/2);
 	                if(c<table[i]) {
 	                    limit=i;
 	                } else {
 	                    start=i;
 	                }
 	            }

 	            /* found? */
 	            if(c==table[start]) {
 	                i=table[start+1];
 	                if((i&CANON_SET_BMP_MASK)==CANON_SET_BMP_IS_INDEX) {
 	                    /* result 01xxxxxx xxxxxx contains index x to a USerializedSet */
 	                    i&=(CANON_SET_MAX_CANON_SETS-1);
 	                    return fillSet.getSet(table,i);
 	                } else {
 	                    /* other result values are BMP code points for single-code point sets */
 	                    fillSet.setSerializedToOne(i);
 	                    return true;
 	                }
 	            }
 	        } else {
 	            char high, low, h;

 	            table=(char[]) canonStartSets[CANON_SET_SUPP_TABLE_INDEX];
 	            start=0;
 	            limit=table.length;

 	            high=(char)(c>>16);
 	            low=(char)c;

 	            /* each entry is a triplet { high(c), low(c), result } */
 	            while(start<limit-3) {
 	                i=(char)(((start+limit)/6)*3); /* (start+limit)/2 and address triplets */
 	                h=(char)(table[i]&0x1f); /* high word */
 	                if(high<h || (high==h && low<table[i+1])) {
 	                    limit=i;
 	                } else {
 	                    start=i;
 	                }
 	            }

 	            /* found? */
 	            h=table[start];
 	            if(high==(h&0x1f) && low==table[start+1]) {
 	                i=table[start+2];
 	                if((h&0x8000)==0) {
 	                    /* the result is an index to a USerializedSet */
 	                    return fillSet.getSet(table,i);
 	                } else {
 	                    /*
 	                     * single-code point set {x} in
 	                     * triplet { 100xxxxx 000hhhhh  llllllll llllllll  xxxxxxxx xxxxxxxx }
 	                     */
 	                    i|=((int)h&0x1f00)<<8; /* add high bits from high(c) */
 	                    fillSet.setSerializedToOne((int)i);
 	                    return true;
 	                }
 	            }
 	        }
 	    }

 	    return false; /* not found */
 	}

 	/**
 	 * Internal API, used by collation code.
 	 * Get access to the internal FCD trie table to be able to perform
 	 * incremental, per-code unit, FCD checks in collation.
 	 * One pointer is sufficient because the trie index values are offset
 	 * by the index size, so that the same pointer is used to access the trie data.
 	 * @internal
 	 */
 	public CharTrie getFCDTrie(){
 		return fcdTrieImpl.fcdTrie;
 	}

 }
	/*
	*******************************************************************************
	* Copyright (C) 1996-2000, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/NormalizerImpl.java,v $
	* $Date: 2002/03/28 01:50:59 $
	* $Revision: 1.4 $
	*******************************************************************************
	*/

	package com.ibm.icu.impl;
	import java.io.*;
	//import com.ibm.icu.text.NewNormalizer;
	import com.ibm.icu.text.UTF16;
	/**
	* @version 1.0
	* @author Ram Viswanadha
	*/
	public final class NormalizerImpl {
	/* Static block for the class to initialize its own self */
	static NormalizerImpl IMPL=null;

	static
	{
	try
	{
	IMPL = new NormalizerImpl();
	}
	catch (Exception e)
	{
	throw new RuntimeException(e.getMessage());
	}
	}

	static final int UNSIGNED_BYTE_MASK =0xFF;
	/*
	* This new implementation of the normalization code loads its data from
	* unorm.dat, which is generated with the gennorm tool.
	* The format of that file is described at the end of this file.
	*/
	private static final String DATA_FILE_NAME_ = "data/unorm.dat";

	/* norm32 value constants */

	/* quick check flags 0..3 set mean "no" for their forms */
	static final int QC_NFC=0x11; /* no\|maybe */
	static final int QC_NFKC=0x22; /* no\|maybe */
	static final int QC_NFD=4; /* no */
	static final int QC_NFKD=8; /* no */

	static final int QC_ANY_NO=0xf;

	/* quick check flags 4..5 mean "maybe" for their forms; test flags>=QC_MAYBE */
	static final int QC_MAYBE=0x10;
	static final int QC_ANY_MAYBE=0x30;

	static final int QC_MASK=0x3f;

	static final int COMBINES_FWD=0x40;
	static final int COMBINES_BACK=0x80;
	static final int COMBINES_ANY=0xc0;

	static final int CC_SHIFT=8; /* UnicodeData.txt combining class in bits 15..8 */
	static final int CC_MASK=0xff00;

	static final int EXTRA_SHIFT=16; /* 16 bits for the index to UChars and other extra data */
	static final int EXTRA_INDEX_TOP=0xfc00; /* start of surrogate specials after shift */

	static final int EXTRA_SURROGATE_MASK=0x3ff;
	static final int EXTRA_SURROGATE_TOP=0x3f0; /* hangul etc. */

	static final int EXTRA_HANGUL=EXTRA_SURROGATE_TOP;
	static final int EXTRA_JAMO_L=EXTRA_SURROGATE_TOP+1; /* ### not used */
	static final int EXTRA_JAMO_V=EXTRA_SURROGATE_TOP+2;
	static final int EXTRA_JAMO_T=EXTRA_SURROGATE_TOP+3;

	/* norm32 value constants using >16 bits */
	static final int UNSIGNED_INT_MASK = 0x7fffffff;
	static final int MIN_SPECIAL = 0xfc000000 & UNSIGNED_INT_MASK;
	static final int SURROGATES_TOP = 0xfff00000 & UNSIGNED_INT_MASK;
	static final int MIN_HANGUL = 0xfff00000 & UNSIGNED_INT_MASK;
	static final int MIN_JAMO_V = 0xfff20000 & UNSIGNED_INT_MASK;
	static final int JAMO_V_TOP = 0xfff30000 & UNSIGNED_INT_MASK;


	/* indexes[] value names */

	static final int INDEX_TRIE_SIZE = 0; /* number of bytes in normalization trie */
	static final int INDEX_CHAR_COUNT = 1; /* number of chars in extra data */

	static final int INDEX_COMBINE_DATA_COUNT = 2; /* number of uint16_t words for combining data */
	static final int INDEX_COMBINE_FWD_COUNT = 3; /* number of code points that combine forward */
	static final int INDEX_COMBINE_BOTH_COUNT = 4; /* number of code points that combine forward and backward */
	static final int INDEX_COMBINE_BACK_COUNT = 5; /* number of code points that combine backward */

	static final int INDEX_MIN_NFC_NO_MAYBE = 6; /* first code point with quick check NFC NO/MAYBE */
	static final int INDEX_MIN_NFKC_NO_MAYBE = 7; /* first code point with quick check NFKC NO/MAYBE */
	static final int INDEX_MIN_NFD_NO_MAYBE = 8; /* first code point with quick check NFD NO/MAYBE */
	static final int INDEX_MIN_NFKD_NO_MAYBE = 9; /* first code point with quick check NFKD NO/MAYBE */

	static final int INDEX_FCD_TRIE_SIZE = 10; /* number of bytes in FCD trie */
	static final int INDEX_AUX_TRIE_SIZE = 11; /* number of bytes in the auxiliary trie */
	static final int INDEX_CANON_SET_COUNT = 12; /* number of uint16_t in the array of serialized USet */

	static final int INDEX_TOP = 32; /* changing this requires a new formatVersion */


	/* AUX constants */
	/* value constants for auxTrie */
	static final int AUX_UNSAFE_SHIFT = 11;
	static final int AUX_COMP_EX_SHIFT = 10;

	static final int AUX_MAX_FNC = ((int)1<<AUX_COMP_EX_SHIFT);
	static final int AUX_UNSAFE_MASK = (1<<AUX_UNSAFE_SHIFT) & UNSIGNED_INT_MASK;
	static final int AUX_FNC_MASK = (AUX_MAX_FNC-1) & UNSIGNED_INT_MASK;
	static final int AUX_COMP_EX_MASK = (1<<AUX_COMP_EX_SHIFT) & UNSIGNED_INT_MASK;

	/* canonStartSets[0..31] contains indexes for what is in the array */
	static final int SET_INDEX_CANON_SETS_LENGTH = 0; /* number of uint16_t in canonical starter sets */
	static final int SET_INDEX_CANON_BMP_TABLE_LENGTH = 1; /* number of uint16_t in the BMP search table (contains pairs) */
	static final int SET_INDEX_CANON_SUPP_TABLE_LENGTH = 2; /* number of uint16_t in the supplementary search table (contains triplets) */
	static final int SET_INDEX_TOP = 32;/* changing this requires a new formatVersion */

	static final int CANON_SET_INDICIES_INDEX = 0;
	static final int CANON_SET_START_SETS_INDEX = 1;
	static final int CANON_SET_BMP_TABLE_INDEX = 2;
	static final int CANON_SET_SUPP_TABLE_INDEX = 3;

	static final int CANON_SET_MAX_CANON_SETS = 0x0004; /* 14 bit indexes to canonical USerializedSets */
	/* single-code point BMP sets are encoded directly in the search table except if result=0x4000..0x7fff */
	static final int CANON_SET_BMP_MASK = 0xc000;
	static final int CANON_SET_BMP_IS_INDEX = 0x4000;

	/*******************************/

	/* Wrappers for Trie implementations */
	static final class NormTrieImpl implements Trie.DataManipulate{
	static IntTrie normTrie= null;
	/**
	* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
	* data the index array offset of the indexes for that lead surrogate.
	* @param property data value for a surrogate from the trie, including the
	* folding offset
	* @return data offset or 0 if there is no data for the lead surrogate
	*/
	public int getFoldingOffset(int value){

	return 0x10000>>5+((value>>(EXTRA_SHIFT-5))&(0x3ff<<5));

	}

	}
	static final class FCDTrieImpl implements Trie.DataManipulate{
	static CharTrie fcdTrie=null;
	/**
	* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
	* data the index array offset of the indexes for that lead surrogate.
	* @param property data value for a surrogate from the trie, including the
	* folding offset
	* @return data offset or 0 if there is no data for the lead surrogate
	*/

	public int getFoldingOffset(int value){
	return 0;
	}
	}

	static final class AuxTrieImpl implements Trie.DataManipulate{
	static CharTrie auxTrie = null;
	/**
	* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
	* data the index array offset of the indexes for that lead surrogate.
	* @param property data value for a surrogate from the trie, including the
	* folding offset
	* @return data offset or 0 if there is no data for the lead surrogate
	*/
	public int getFoldingOffset(int value){
	return (value&AUX_FNC_MASK)<<5;
	}
	}

	/****************************************************/


	static FCDTrieImpl fcdTrieImpl;
	static NormTrieImpl normTrieImpl;
	static AuxTrieImpl auxTrieImpl;
	static int[] indexes;
	static char[] combiningTable;
	static char[] extraData;
	static Object[] canonStartSets;

	static boolean isDataLoaded;
	static boolean isFormatVersion_2_1;
	/**
	* Default buffer size of datafile
	*/
	private static final int DATA_BUFFER_SIZE_ = 25000;

	/* FCD check: everything below this code point is known to have a 0 lead combining class */
	public static final int MIN_WITH_LEAD_CC=0x300;


	/**
	* Bit 7 of the length byte for a decomposition string in extra data is
	* a flag indicating whether the decomposition string is
	* preceded by a 16-bit word with the leading and trailing cc
	* of the decomposition (like for A-umlaut);
	* if not, then both cc's are zero (like for compatibility ideographs).
	*/
	static final int DECOMP_FLAG_LENGTH_HAS_CC=0x80;
	/**
	* Bits 6..0 of the length byte contain the actual length.
	*/
	static final int DECOMP_LENGTH_MASK=0x7f;

	/* -------------------------------------------------------------------------- */

	/* Korean Hangul and Jamo constants */

	private static final int JAMO_L_BASE=0x1100; /* "lead" jamo */
	private static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */
	private static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */

	private static final int HANGUL_BASE=0xac00;

	private static final int JAMO_L_COUNT=19;
	private static final int JAMO_V_COUNT=21;
	private static final int JAMO_T_COUNT=28;
	private static final int HANGUL_COUNT=JAMO_L_COUNTJAMO_V_COUNTJAMO_T_COUNT;

	private static boolean isHangulWithoutJamoT(char c) {
	c-=HANGUL_BASE;
	return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
	}

	/* norm32 helpers */

	/* is this a norm32 with a regular index? */
	private static boolean isNorm32Regular(int norm32) {
	return norm32<MIN_SPECIAL;
	}

	/* is this a norm32 with a special index for a lead surrogate? */
	private static boolean isNorm32LeadSurrogate(int norm32) {
	return MIN_SPECIAL<=norm32 && norm32<SURROGATES_TOP;
	}

	/* is this a norm32 with a special index for a Hangul syllable or a Jamo? */
	private static boolean isNorm32HangulOrJamo(int norm32) {
	return norm32>=MIN_HANGUL;
	}

	/*
	* Given isNorm32HangulOrJamo(),
	* is this a Hangul syllable or a Jamo?
	*/
	private static boolean isHangulJamoNorm32HangulOrJamoL(int norm32) {
	return norm32<MIN_JAMO_V;
	}

	/*
	* Given norm32 for Jamo V or T,
	* is this a Jamo V?
	*/
	private static boolean isJamoVTNorm32JamoV(int norm32) {
	return norm32<JAMO_V_TOP;
	}

	static int getExtraDataIndex(int norm32) {
	return (norm32>>EXTRA_SHIFT);
	}


	// protected constructor ---------------------------------------------

	/**
	* Constructor
	* @exception thrown when data reading fails or data corrupted
	*/
	private NormalizerImpl() throws IOException{
	//data should be loaded only once
	if(!isDataLoaded){
	indexes = null;
	combiningTable=null;
	extraData=null;
	fcdTrieImpl = new FCDTrieImpl();
	normTrieImpl = new NormTrieImpl();
	auxTrieImpl = new AuxTrieImpl();
	// jar access
	InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME_);
	BufferedInputStream b = new BufferedInputStream(i,
	DATA_BUFFER_SIZE_);
	NormalizerDataReader reader = new NormalizerDataReader(b);
	reader.read(this);
	b.close();
	i.close();
	}
	}

	public static boolean checkFCD(char[] src) {

	char fcd16,c;
	int prevCC=0, cc;
	int i =0, length = src.length;

	for(;;) {
	for(;;) {
	if(i==length) {
	return true;
	} else if((c=src[i++])<MIN_WITH_LEAD_CC) {
	prevCC=(int)-c;
	} else if((fcd16=fcdTrieImpl.fcdTrie.getBMPValue(c))==0) {
	prevCC=0;
	} else {
	break;
	}
	}

	/* check one above-minimum, relevant code unit */
	if(UTF16.isLeadSurrogate(c)) {
	/* c is a lead surrogate, get the real fcd16 */
	if(i!=length && UTF16.isTrailSurrogate(src[i])) {
	++i;
	fcd16=fcdTrieImpl.fcdTrie.getSurrogateValue(fcd16, src[i]);
	} else {
	fcd16=0;
	}
	}

	/*
	* prevCC has values from the following ranges:
	* 0..0xff - the previous trail combining class
	* <0 - the negative value of the previous code unit;
	* that code unit was <_NORM_MIN_WITH_LEAD_CC and its _getFCD16()
	* was deferred so that average text is checked faster
	*/

	/* check the combining order */
	cc=(int)(fcd16>>8);
	if(cc!=0) {
	if(prevCC<0) {
	/* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
	prevCC=(int)(fcdTrieImpl.fcdTrie.getBMPValue((char)-prevCC)&0xff);
	}

	if(cc<prevCC) {
	return false;
	}
	}
	prevCC=(int)(fcd16&0xff);
	}
	}
	/*
	public static NewNormalizer.QuickCheckResult quickCheck(char[] src,NewNormalizer.Mode mode) {

	int norm32, ccOrQCMask, qcMask;
	char c, c2, minNoMaybe;
	char cc, prevCC;
	NewNormalizer.QuickCheckResult result;



	if(!isDataLoaded) {
	return NewNormalizer.MAYBE;
	}

	// check for a valid mode and set the quick check minimum and mask

	if(mode.equals(NewNormalizer.NFC)){
	minNoMaybe=(char)indexes[INDEX_MIN_NFC_NO_MAYBE];
	qcMask=QC_NFC;
	}else if(mode.equals(NewNormalizer.NFKC)){
	minNoMaybe=(char)indexes[INDEX_MIN_NFKC_NO_MAYBE];
	qcMask=QC_NFKC;
	}else if(mode.equals(NewNormalizer.NFD)){
	minNoMaybe=(char)indexes[INDEX_MIN_NFD_NO_MAYBE];
	qcMask=QC_NFD;
	}else if(mode.equals(NewNormalizer.NFKD)){
	minNoMaybe=(char)indexes[INDEX_MIN_NFKD_NO_MAYBE];
	qcMask=QC_NFKD;
	}else if(mode.equals(NewNormalizer.FCD)){
	return (checkFCD(src)) ? NewNormalizer.YES : NewNormalizer.NO;
	}else{
	return NewNormalizer.MAYBE;
	}

	// initialize
	ccOrQCMask=CC_MASK\|qcMask;
	result=NewNormalizer.YES;
	prevCC=0;
	int i=0;

	for(;;) {
	for(;;) {
	if(i==src.length) {
	return result;
	} else if((c=src[i++])>=minNoMaybe && ((norm32=normTrieImpl.normTrie.getBMPValue(c))&ccOrQCMask)!=0) {
	break;
	}
	prevCC=0;
	}


	//* check one above-minimum, relevant code unit
	if(isNorm32LeadSurrogate(norm32)) {
	//* c is a lead surrogate, get the real norm32
	if(i!=src.length && UTF16.isTrailSurrogate(c2=src[i])) {
	++i;
	norm32=normTrieImpl.normTrie.getRawOffset(norm32, c2);
	} else {
	norm32=0;
	}
	}

	//* check the combining order
	cc=(char)((norm32>>CC_SHIFT)&0xFF);
	if(cc!=0 && cc<prevCC) {
	return NewNormalizer.NO;
	}
	prevCC=cc;

	//* check for "no" or "maybe" quick check flags
	norm32&=qcMask;
	if((norm32& QC_ANY_NO)>=1) {
	return NewNormalizer.NO;
	} else if(norm32!=0) {
	result=NewNormalizer.MAYBE;
	}
	}
	} */

	public static int getCombiningClass(int c) {
	int norm32;
	if(c<=0xffff) {
	norm32=normTrieImpl.normTrie.getBMPValue((char)c);
	} else {
	norm32=normTrieImpl.normTrie.getBMPValue(UTF16.getLeadSurrogate(c));
	if((norm32&CC_MASK)!=0) {
	norm32=normTrieImpl.normTrie.getRawOffset(norm32, UTF16.getTrailSurrogate(c));
	}
	}
	return (char)((norm32>>CC_SHIFT)&0xFF);
	}

	public static boolean isFullCompositionExclusion(int c) {
	if(isFormatVersion_2_1) {
	int aux =auxTrieImpl.auxTrie.getCodePointValue(c);
	return (boolean)((aux & AUX_COMP_EX_MASK)!=0);
	} else {
	return false;
	}
	}

	public static boolean isCanonSafeStart(int c) {
	if(isFormatVersion_2_1) {
	int aux = auxTrieImpl.auxTrie.getCodePointValue(c);
	return (boolean)((aux & AUX_UNSAFE_MASK)==0);
	} else {
	return false;
	}
	}

	public static boolean getCanonStartSet(int c, USerializedSet fillSet) {

	if(fillSet!=null && canonStartSets!=null) {
	/*
	* binary search for c
	*
	* There are two search tables,
	* one for BMP code points and one for supplementary ones.
	* See unormimp.h for details.
	*/
	char[] table;
	int i, start, limit;

	if(c<=0xffff) {
	table=(char[]) canonStartSets[CANON_SET_BMP_TABLE_INDEX];
	start=0;
	limit=table.length;

	/* each entry is a pair { c, result } */
	while(start<limit) {
	i=(char)((start+limit)/2);
	if(c<table[i]) {
	limit=i;
	} else {
	start=i;
	}
	}

	/* found? */
	if(c==table[start]) {
	i=table[start+1];
	if((i&CANON_SET_BMP_MASK)==CANON_SET_BMP_IS_INDEX) {
	/* result 01xxxxxx xxxxxx contains index x to a USerializedSet */
	i&=(CANON_SET_MAX_CANON_SETS-1);
	return fillSet.getSet(table,i);
	} else {
	/* other result values are BMP code points for single-code point sets */
	fillSet.setSerializedToOne(i);
	return true;
	}
	}
	} else {
	char high, low, h;

	table=(char[]) canonStartSets[CANON_SET_SUPP_TABLE_INDEX];
	start=0;
	limit=table.length;

	high=(char)(c>>16);
	low=(char)c;

	/* each entry is a triplet { high(c), low(c), result } */
	while(start<limit-3) {
	i=(char)(((start+limit)/6)3); / (start+limit)/2 and address triplets */
	h=(char)(table[i]&0x1f); /* high word */
	if(high<h \|\| (high==h && low<table[i+1])) {
	limit=i;
	} else {
	start=i;
	}
	}

	/* found? */
	h=table[start];
	if(high==(h&0x1f) && low==table[start+1]) {
	i=table[start+2];
	if((h&0x8000)==0) {
	/* the result is an index to a USerializedSet */
	return fillSet.getSet(table,i);
	} else {
	/*
	* single-code point set {x} in
	* triplet { 100xxxxx 000hhhhh llllllll llllllll xxxxxxxx xxxxxxxx }
	*/
	i\|=((int)h&0x1f00)<<8; /* add high bits from high(c) */
	fillSet.setSerializedToOne((int)i);
	return true;
	}
	}
	}
	}

	return false; /* not found */
	}

	/**
	* Internal API, used by collation code.
	* Get access to the internal FCD trie table to be able to perform
	* incremental, per-code unit, FCD checks in collation.
	* One pointer is sufficient because the trie index values are offset
	* by the index size, so that the same pointer is used to access the trie data.
	* @internal
	*/
	public CharTrie getFCDTrie(){
	return fcdTrieImpl.fcdTrie;
	}

	}