src/com/ibm/icu/text/Normalizer.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 2000-2003, International Business Machines Corporation and         *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Normalizer.java,v $
  * $Date: 2003/12/13 00:30:50 $
  * $Revision: 1.39 $
  *
  *******************************************************************************
  */
 package com.ibm.icu.text;
 import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.UCharacterProperty;
 import com.ibm.icu.lang.UCharacter;

 import java.text.CharacterIterator;
 import com.ibm.icu.impl.Utility;

 /**
  * Unicode Normalization
  *
  * <h2>Unicode normalization API</h2>
  *
  * <code>normalize</code> transforms Unicode text into an equivalent composed or
  * decomposed form, allowing for easier sorting and searching of text.
  * <code>normalize</code> supports the standard normalization forms described in
  * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
  * Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
  *
  * Characters with accents or other adornments can be encoded in
  * several different ways in Unicode.  For example, take the character A-acute.
  * In Unicode, this can be encoded as a single character (the
  * "composed" form):
  *
  * <p>
  *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
  * </p>
  *
  * or as two separate characters (the "decomposed" form):
  *
  * <p>
  *      0041    LATIN CAPITAL LETTER A
  *      0301    COMBINING ACUTE ACCENT
  * </p>
  *
  * To a user of your program, however, both of these sequences should be
  * treated as the same "user-level" character "A with acute accent".  When you
  * are searching or comparing text, you must ensure that these two sequences are
  * treated equivalently.  In addition, you must handle characters with more than
  * one accent.  Sometimes the order of a character's combining accents is
  * significant, while in other cases accent sequences in different orders are
  * really equivalent.
  *
  * Similarly, the string "ffi" can be encoded as three separate letters:
  *
  * <p>
  *      0066    LATIN SMALL LETTER F
  *      0066    LATIN SMALL LETTER F
  *      0069    LATIN SMALL LETTER I
  * <\p>
  *
  * or as the single character
  *
  * <p>
  *      FB03    LATIN SMALL LIGATURE FFI
  * <\p>
  *
  * The ffi ligature is not a distinct semantic character, and strictly speaking
  * it shouldn't be in Unicode at all, but it was included for compatibility
  * with existing character sets that already provided it.  The Unicode standard
  * identifies such characters by giving them "compatibility" decompositions
  * into the corresponding semantic characters.  When sorting and searching, you
  * will often want to use these mappings.
  *
  * <code>normalize</code> helps solve these problems by transforming text into
  * the canonical composed and decomposed forms as shown in the first example
  * above. In addition, you can have it perform compatibility decompositions so
  * that you can treat compatibility characters the same as their equivalents.
  * Finally, <code>normalize</code> rearranges accents into the proper canonical
  * order, so that you do not have to worry about accent rearrangement on your
  * own.
  *
  * Form FCD, "Fast C or D", is also designed for collation.
  * It allows to work on strings that are not necessarily normalized
  * with an algorithm (like in collation) that works under "canonical closure",
  * i.e., it treats precomposed characters and their decomposed equivalents the
  * same.
  *
  * It is not a normalization form because it does not provide for uniqueness of
  * representation. Multiple strings may be canonically equivalent (their NFDs
  * are identical) and may all conform to FCD without being identical themselves.
  *
  * The form is defined such that the "raw decomposition", the recursive
  * canonical decomposition of each character, results in a string that is
  * canonically ordered. This means that precomposed characters are allowed for
  * as long as their decompositions do not need canonical reordering.
  *
  * Its advantage for a process like collation is that all NFD and most NFC texts
  * - and many unnormalized texts - already conform to FCD and do not need to be
  * normalized (NFD) for such a process. The FCD quick check will return YES for
  * most strings in practice.
  *
  * normalize(FCD) may be implemented with NFD.
  *
  * For more details on FCD see the collation design document:
  * http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm
  *
  * ICU collation performs either NFD or FCD normalization automatically if
  * normalization is turned on for the collator object. Beyond collation and
  * string search, normalized strings may be useful for string equivalence
  * comparisons, transliteration/transcription, unique representations, etc.
  *
  * The W3C generally recommends to exchange texts in NFC.
  * Note also that most legacy character encodings use only precomposed forms and
  * often do not encode any combining marks by themselves. For conversion to such
  * character encodings the Unicode text needs to be normalized to NFC.
  * For more usage examples, see the Unicode Standard Annex.
  * @stable ICU 2.8
  */

 public final class Normalizer implements Cloneable{

     //-------------------------------------------------------------------------
     // Private data
     //-------------------------------------------------------------------------
     private char[] buffer = new char[100];
     private int bufferStart = 0;
     private int bufferPos   = 0;
     private int bufferLimit = 0;

     // This tells us what the bits in the "mode" object mean.
     private static final int COMPAT_BIT = 1;
     private static final int DECOMP_BIT = 2;
     private static final int COMPOSE_BIT = 4;

     // The input text and our position in it
     private UCharacterIterator  text;
     private Mode                mode = NFC;
     private int                 options = 0;
     private int                 currentIndex;
     private int                 nextIndex;

     /**
      * Options bit set value to select Unicode 3.2 normalization
      * (except NormalizationCorrections).
      * At most one Unicode version can be selected at a time.
      * @draft ICU 2.6
      */
     public static final int UNICODE_3_2=0x20;
     /**
      * Constant indicating that the end of the iteration has been reached.
      * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
      * @stable ICU 2.8
      */
     public static final int DONE = UCharacterIterator.DONE;

     /**
      * Constants for normalization modes.
      * @stable ICU 2.8
      */
     public static class Mode {
 		private int modeValue;
 		private Mode(int value){
 		    modeValue = value;
 		}
         /**
          * This method is used for method dispatch
          * @draft ICU 2.6
          */
         protected int normalize(char[] src, int srcStart, int srcLimit,
 			                    char[] dest,int destStart,int destLimit,
 			                    UnicodeSet nx){
             int srcLen = (srcLimit - srcStart);
             int destLen = (destLimit - destStart);
             if( srcLen > destLen ){
                 return srcLen;
             }
             System.arraycopy(src,srcStart,dest,destStart,srcLen);
             return srcLen;
         }
         /**
          * This method is used for method dispatch
          * @draft ICU 2.6
          */
         protected int normalize(char[] src, int srcStart, int srcLimit,
                       			char[] dest,int destStart,int destLimit,
                       			int options){
           return normalize(	src, srcStart, srcLimit,
           					dest,destStart,destLimit,
           					NormalizerImpl.getNX(options)
                       	  );
         }

         /**
          * This method is used for method dispatch
          * @draft ICU 2.6
          */
         protected String normalize(String src, int options){
             return src;
         }
         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected int getMinC(){
             return -1;
         }
         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected int getMask(){
             return -1;
         }
         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected IsPrevBoundary getPrevBoundary(){
             return null;
         }
         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected IsNextBoundary getNextBoundary(){
             return null;
         }
         /**
          * This method is used for method dispatch
          * @draft ICU 2.6
          */
         protected QuickCheckResult quickCheck(char[] src,int start, int limit,
                                               boolean allowMaybe,UnicodeSet nx){
             if(allowMaybe){
                 return MAYBE;
             }
             return NO;
         }
         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected boolean isNFSkippable(int c){
             return true;
         }

     }

     /**
      * No decomposition/composition.
      * @stable ICU 2.8
      */
     public static final Mode NONE = new Mode(1);

     /**
      * Canonical decomposition.
      * @stable ICU 2.8
      */
     public static final Mode NFD = new NFDMode(2);

     private static final class NFDMode extends Mode{
         private NFDMode(int value){
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
 			                    char[] dest,int destStart,int destLimit,
 			                    UnicodeSet nx){
 		  int[] trailCC = new int[1];
           return NormalizerImpl.decompose(src,  srcStart,srcLimit,
 			                              dest, destStart,destLimit,
 			                              false, trailCC,nx);
         }

         protected String normalize( String src, int options){
             return decompose(src,false);
         }
         protected int getMinC(){
             return NormalizerImpl.MIN_WITH_LEAD_CC;
         }
         protected IsPrevBoundary getPrevBoundary(){
             return new IsPrevNFDSafe();
         }
         protected IsNextBoundary getNextBoundary(){
             return new IsNextNFDSafe();
         }
         protected int getMask(){
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx){
             return NormalizerImpl.quickCheck(
                                   src, start,limit,
                                   NormalizerImpl.getFromIndexesArr(
                                        NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
                                   ),
                                   NormalizerImpl.QC_NFD,
                                   allowMaybe,
                                   nx
                              );
         }
         protected boolean isNFSkippable(int c){
             return NormalizerImpl.isNFSkippable(c,this,
                                   (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
                    );
         }
     };

     /**
      * Compatibility decomposition.
      * @stable ICU 2.8
      */
     public static final Mode NFKD = new NFKDMode(3);

     private static final class NFKDMode extends Mode{
         private NFKDMode(int value){
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
                        			char[] dest,int destStart,int destLimit,
                        			UnicodeSet nx){
           int[] trailCC = new int[1];
           return NormalizerImpl.decompose(src,  srcStart,srcLimit,
                            dest, destStart,destLimit,
                            true, trailCC, nx);
         }
         protected String normalize( String src, int options){
             return decompose(src,true);
         }
         protected int getMinC(){
             return NormalizerImpl.MIN_WITH_LEAD_CC;
         }
         protected IsPrevBoundary getPrevBoundary(){
             return new IsPrevNFDSafe();
         }
         protected IsNextBoundary getNextBoundary(){
             return new IsNextNFDSafe();
         }
         protected int getMask(){
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx){
             return NormalizerImpl.quickCheck(
                                   src,start,limit,
                                   NormalizerImpl.getFromIndexesArr(
                                       NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
                                   ),
                                   NormalizerImpl.QC_NFKD,
                                   allowMaybe,
                                   nx
                             );
         }
         protected boolean isNFSkippable(int c){
             return NormalizerImpl.isNFSkippable(c, this,
                             (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
                    );
         }
     };

     /**
      * Canonical decomposition followed by canonical composition.
      * @stable ICU 2.8
      */
     public static final Mode NFC = new NFCMode(4);

     private static final class NFCMode extends Mode{
         private NFCMode(int value){
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
                       			char[] dest,int destStart,int destLimit,
                       			UnicodeSet nx){
           return NormalizerImpl.compose( src, srcStart, srcLimit,
 				                         dest,destStart,destLimit,
 				                         false, nx);
         }

         protected String normalize( String src, int options){
             return compose(src,false);
         }

         protected int getMinC(){
             return NormalizerImpl.getFromIndexesArr(
                                     NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
                                 );
         }
         protected IsPrevBoundary getPrevBoundary(){
             return new IsPrevTrueStarter();
         }
         protected IsNextBoundary getNextBoundary(){
             return new IsNextTrueStarter();
         }
         protected int getMask(){
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx){
             return NormalizerImpl.quickCheck(
                                    src,start,limit,
                                    NormalizerImpl.getFromIndexesArr(
                                        NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
                                    ),
                                    NormalizerImpl.QC_NFC,
                                    allowMaybe,
                                    nx
                                );
         }
         protected boolean isNFSkippable(int c){
             return NormalizerImpl.isNFSkippable(c,this,
                            ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
                              (NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
                            )
                    );
         }
     };

     /**
      * Default normalization.
      * @stable ICU 2.8
      */
     public static final Mode DEFAULT = NFC;

     /**
      * Compatibility decomposition followed by canonical composition.
      * @stable ICU 2.8
      */
     public static final Mode NFKC =new NFKCMode(5);

     private static final class NFKCMode extends Mode{
         private NFKCMode(int value){
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
 			                    char[] dest,int destStart,int destLimit,
 			                    UnicodeSet nx){
           return NormalizerImpl.compose(src,  srcStart,srcLimit,
                          dest, destStart,destLimit,
                          true, nx);
         }

         protected String normalize( String src, int options){
             return compose(src,true);
         }
         protected int getMinC(){
             return NormalizerImpl.getFromIndexesArr(
                                     NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
                                 );
         }
         protected IsPrevBoundary getPrevBoundary(){
             return new IsPrevTrueStarter();
         }
         protected IsNextBoundary getNextBoundary(){
             return new IsNextTrueStarter();
         }
         protected int getMask(){
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx){
             return NormalizerImpl.quickCheck(
                                    src,start,limit,
                                    NormalizerImpl.getFromIndexesArr(
                                       NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
                                    ),
                                    NormalizerImpl.QC_NFKC,
                                    allowMaybe,
                                    nx
                                  );
         }
         protected boolean isNFSkippable(int c){
             return NormalizerImpl.isNFSkippable(c, this,
                           ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
                             (NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
                           )
                    );
         }
     };

     /**
      * "Fast C or D" form.
      * @stable ICU 2.8
      */
     public static final Mode FCD = new FCDMode(6);

     private static final class FCDMode extends Mode{
         private FCDMode(int value){
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
                       			char[] dest,int destStart,int destLimit,
                       			UnicodeSet nx){
           return NormalizerImpl.makeFCD(src, srcStart,srcLimit,
                                         dest, destStart,destLimit, nx);
         }
         protected String normalize( String src, int options){
             return makeFCD(src, options);
         }
         protected int getMinC(){
             return NormalizerImpl.MIN_WITH_LEAD_CC;
         }
         protected IsPrevBoundary getPrevBoundary(){
             return new IsPrevNFDSafe();
         }
         protected IsNextBoundary getNextBoundary(){
             return new IsNextNFDSafe();
         }
         protected int getMask(){
             return NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD;
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx){
             return NormalizerImpl.checkFCD(src,start,limit,nx) ? YES : NO;
         }
         protected boolean isNFSkippable(int c){
             /* FCD: skippable if lead cc==0 and trail cc<=1 */
            return (NormalizerImpl.getFCD16(c)>1);
         }
     };


     /**
      * Null operation for use with the {@link #Normalizer constructors}
      * and the static {@link #normalize normalize} method.  This value tells
      * the <tt>Normalizer</tt> to do nothing but return unprocessed characters
      * from the underlying String or CharacterIterator.  If you have code which
      * requires raw text at some times and normalized text at others, you can
      * use <tt>NO_OP</tt> for the cases where you want raw text, rather
      * than having a separate code path that bypasses <tt>Normalizer</tt>
      * altogether.
      * <p>
      * @see #setMode
      * @deprecated ICU 2.8. Use Nomalizer.NONE
      * @see #NONE
      */
     public static final Mode NO_OP = NONE;

     /**
      * Canonical decomposition followed by canonical composition.  Used with the
      * {@link #Normalizer constructors} and the static
      * {@link #normalize normalize} method to determine the operation to be
      * performed.
      * <p>
      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
      * off, this operation produces output that is in
      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
      * Form</a>
      * <b>C</b>.
      * <p>
      * @see #setMode
      * @deprecated ICU 2.8. Use Normalier.NFC
      * @see #NFC
      */
     public static final Mode COMPOSE = NFC;

     /**
      * Compatibility decomposition followed by canonical composition.
      * Used with the {@link #Normalizer constructors} and the static
      * {@link #normalize normalize} method to determine the operation to be
      * performed.
      * <p>
      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
      * off, this operation produces output that is in
      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
      * Form</a>
      * <b>KC</b>.
      * <p>
      * @see #setMode
      * @deprecated ICU 2.8. Use Normalizer.NFKC
      * @see #NFKC
      */
     public static final Mode COMPOSE_COMPAT = NFKC;

     /**
      * Canonical decomposition.  This value is passed to the
      * {@link #Normalizer constructors} and the static
      * {@link #normalize normalize}
      * method to determine the operation to be performed.
      * <p>
      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
      * off, this operation produces output that is in
      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
      * Form</a>
      * <b>D</b>.
      * <p>
      * @see #setMode
      * @deprecated ICU 2.8. Use Normalizer.NFD
      * @see #NFD
      */
     public static final Mode DECOMP = NFD;

     /**
      * Compatibility decomposition.  This value is passed to the
      * {@link #Normalizer constructors} and the static
      * {@link #normalize normalize}
      * method to determine the operation to be performed.
      * <p>
      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
      * off, this operation produces output that is in
      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
      * Form</a>
      * <b>KD</b>.
      * <p>
      * @see #setMode
      * @deprecated ICU 2.8. Use Normalizer.NFKD
      * @see #NFKD
      */
     public static final Mode DECOMP_COMPAT = NFKD;

     /**
      * Option to disable Hangul/Jamo composition and decomposition.
      * This option applies to Korean text,
      * which can be represented either in the Jamo alphabet or in Hangul
      * characters, which are really just two or three Jamo combined
      * into one visual glyph.  Since Jamo takes up more storage space than
      * Hangul, applications that process only Hangul text may wish to turn
      * this option on when decomposing text.
      * <p>
      * The Unicode standard treates Hangul to Jamo conversion as a
      * canonical decomposition, so this option must be turned <b>off</b> if you
      * wish to transform strings into one of the standard
      * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
      * Unicode Normalization Forms</a>.
      * <p>
      * @see #setOption
      * @deprecated ICU 2.8. This option is no longer supported. TODO: check with Ram
      */
     public static final int IGNORE_HANGUL = 0x0001;

     /**
      * Result values for quickCheck().
      * For details see Unicode Technical Report 15.
      * @stable ICU 2.8
      */
     public static final class QuickCheckResult{
 		private int resultValue;
 		private QuickCheckResult(int value){
 		    resultValue=value;
 		}
     }
     /**
      * Indicates that string is not in the normalized format
      * @stable ICU 2.8
      */
     public static final QuickCheckResult NO = new QuickCheckResult(0);

     /**
      * Indicates that string is in the normalized format
      * @stable ICU 2.8
      */
     public static final QuickCheckResult YES = new QuickCheckResult(1);

     /**
      * Indicates it cannot be determined if string is in the normalized
      * format without further thorough checks.
      * @stable ICU 2.8
      */
     public static final QuickCheckResult MAYBE = new QuickCheckResult(2);

     /**
      * Option bit for compare:
      * Case sensitively compare the strings
      * @stable ICU 2.8
      */
     public static final int FOLD_CASE_DEFAULT =  UCharacter.FOLD_CASE_DEFAULT;

     /**
      * Option bit for compare:
      * Both input strings are assumed to fulfill FCD conditions.
      * @stable ICU 2.8
      */
     public static final int INPUT_IS_FCD    =      0x20000;

     /**
      * Option bit for compare:
      * Perform case-insensitive comparison.
      * @stable ICU 2.8
      */
     public static final int COMPARE_IGNORE_CASE  =     0x10000;

     /**
      * Option bit for compare:
      * Compare strings in code point order instead of code unit order.
      * @stable ICU 2.8
      */
     public static final int COMPARE_CODE_POINT_ORDER = 0x8000;

     /**
      * Option value for case folding: exclude the mappings for dotted I
      * and dotless i marked with 'I' in CaseFolding.txt.
      * @stable ICU 2.8
      */
     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I;

     /**
 	 * Lowest-order bit number of compare() options bits corresponding to
 	 * normalization options bits.
 	 *
 	 * The options parameter for compare() uses most bits for
 	 * itself and for various comparison and folding flags.
 	 * The most significant bits, however, are shifted down and passed on
 	 * to the normalization implementation.
 	 * (That is, from compare(..., options, ...),
 	 * options>>COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
 	 * internal normalization functions.)
 	 *
 	 * @see #compare
 	 * @draft ICU 2.6
 	 */
 	 public static final int COMPARE_NORM_OPTIONS_SHIFT  = 20;

     //-------------------------------------------------------------------------
     // Constructors
     //-------------------------------------------------------------------------

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of a given string.
      * <p>
      * The <tt>options</tt> parameter specifies which optional
      * <tt>Normalizer</tt> features are to be enabled for this object.
      * <p>
      * @param str  The string to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode The normalization mode.
      *
      * @param opt Any optional features to be enabled.
      *            Currently the only available option is {@link #UNICODE_3_2}.
      *            If you want the default behavior corresponding to one of the
      *            standard Unicode Normalization Forms, use 0 for this argument.
      * @draft ICU 2.6
      */
     public Normalizer(String str, Mode mode, int opt) {
         this.text = UCharacterIterator.getInstance(str);
         this.mode = mode;
         this.options=opt;
     }

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of the given text.
      * <p>
      * @param iter  The input text to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode  The normalization mode.
      *
      * @param opt Any optional features to be enabled.
      *            Currently the only available option is {@link #UNICODE_3_2}.
      *            If you want the default behavior corresponding to one of the
      *            standard Unicode Normalization Forms, use 0 for this argument.
 	 * @draft ICU 2.6
      */
     public Normalizer(CharacterIterator iter, Mode mode, int opt){
         this.text = UCharacterIterator.getInstance(
                                         (CharacterIterator)iter.clone()
                                     );
         this.mode = mode;
         this.options = opt;
     }

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of the given text.
      * <p>
      * @param iter  The input text to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode  The normalization mode.
      * @param options The normalization options, ORed together (0 for no options).
      * @draft ICU 2.6
      */
     public Normalizer(UCharacterIterator iter, Mode mode, int options){
         try{
             this.text     = (UCharacterIterator)iter.clone();
             this.mode     = mode;
             this.options  = options;
         }catch (CloneNotSupportedException e) {
             throw new InternalError(e.toString());
         }

     }

     /**
      * Clones this <tt>Normalizer</tt> object.  All properties of this
      * object are duplicated in the new object, including the cloning of any
      * {@link CharacterIterator} that was passed in to the constructor
      * or to {@link #setText(CharacterIterator) setText}.
      * However, the text storage underlying
      * the <tt>CharacterIterator</tt> is not duplicated unless the
      * iterator's <tt>clone</tt> method does so.
      * @stable ICU 2.8
      */
     public Object clone() {
         try {
             Normalizer copy = (Normalizer) super.clone();
             copy.text = (UCharacterIterator) text.clone();
             //clone the internal buffer
             if (buffer != null) {
                 copy.buffer = new char[buffer.length];
                 System.arraycopy(buffer,0,copy.buffer,0,buffer.length);
             }
             return copy;
         }
         catch (CloneNotSupportedException e) {
             throw new InternalError(e.toString());
         }
     }

     //--------------------------------------------------------------------------
     // Static Utility methods
     //--------------------------------------------------------------------------

     /**
      * Compose a string.
      * The string will be composed to according the the specified mode.
      * @param str        The string to compose.
      * @param compat     If true the string will be composed accoding to
      *                    NFKC rules and if false will be composed according to
      *                    NFC rules.
      * @return String    The composed string
      * @stable ICU 2.8
      */
     public static String compose(String str, boolean compat){
          return compose(str,compat,0);
     }

     /**
      * Compose a string.
      * The string will be composed to according the the specified mode.
      * @param str        The string to compose.
      * @param compat     If true the string will be composed accoding to
      *                    NFKC rules and if false will be composed according to
      *                    NFC rules.
      * @param options    The only recognized option is UNICODE_3_2
      * @return String    The composed string
 	 * @draft ICU 2.6
      */
     public static String compose(String str, boolean compat, int options){

         char[] dest = new char[str.length()*MAX_BUF_SIZE_COMPOSE];
         int destSize=0;
         char[] src = str.toCharArray();
         UnicodeSet nx = NormalizerImpl.getNX(options);
         for(;;){
             destSize=NormalizerImpl.compose(src,0,src.length,
                                             dest,0,dest.length,compat,
                                             nx);
             if(destSize<=dest.length){
 		        return new String(dest,0,destSize);
             }else{
                 dest = new char[destSize];
             }
         }
     }

     /**
      * Compose a string.
      * The string will be composed to according the the specified mode.
      * @param source The char array to compose.
      * @param target A char buffer to receive the normalized text.
      * @param compat If true the char array will be composed accoding to
      *                NFKC rules and if false will be composed according to
      *                NFC rules.
      * @param options The normalization options, ORed together (0 for no options).
      * @return int   The total buffer size needed;if greater than length of
      *                result, the output was truncated.
      * @exception IndexOutOfBoundsException if target.length is less than the
      *             required length
      * @draft ICU 2.6
      */
     public static int compose(char[] source,char[] target, boolean compat, int options){
         UnicodeSet nx = NormalizerImpl.getNX(options);
         int length = NormalizerImpl.compose(source,0,source.length,
                                             target,0,target.length,
                                             compat,nx);
 		if(length<=target.length){
 		    return length;
 		}else{
 		    throw new IndexOutOfBoundsException(Integer.toString(length));
 		}
     }

     /**
      * Compose a string.
      * The string will be composed to according the the specified mode.
      * @param src       The char array to compose.
      * @param srcStart  Start index of the source
      * @param srcLimit  Limit index of the source
      * @param dest      The char buffer to fill in
      * @param destStart Start index of the destination buffer
      * @param destLimit End index of the destination buffer
      * @param compat If true the char array will be composed accoding to
      *                NFKC rules and if false will be composed according to
      *                NFC rules.
      * @param options The normalization options, ORed together (0 for no options).
      * @return int   The total buffer size needed;if greater than length of
      *                result, the output was truncated.
      * @exception IndexOutOfBoundsException if target.length is less than the
      *             required length
      * @draft ICU 2.6
      */
     public static int compose(char[] src,int srcStart, int srcLimit,
                               char[] dest,int destStart, int destLimit,
                               boolean compat, int options){
         UnicodeSet nx = NormalizerImpl.getNX(options);
         int length = NormalizerImpl.compose(src,srcStart,srcLimit,
                                             dest,destStart,destLimit,
                                             compat, nx);
         if(length<=(destLimit-destStart)){
             return length;
         }else{
             throw new IndexOutOfBoundsException(Integer.toString(length));
         }
     }

     private static final int MAX_BUF_SIZE_COMPOSE = 2;
     private static final int MAX_BUF_SIZE_DECOMPOSE = 3;

     /**
      * Decompose a string.
      * The string will be decomposed to according the the specified mode.
      * @param str       The string to decompose.
      * @param compat    If true the string will be decomposed accoding to NFKD
      *                   rules and if false will be decomposed according to NFD
      *                   rules.
      * @return String   The decomposed string
      * @stable ICU 2.8
      */
     public static String decompose(String str, boolean compat){
 	   return decompose(str,compat,0);
     }

     /**
      * Decompose a string.
      * The string will be decomposed to according the the specified mode.
      * @param str     The string to decompose.
      * @param compat  If true the string will be decomposed accoding to NFKD
      *                 rules and if false will be decomposed according to NFD
      *                 rules.
      * @param options The normalization options, ORed together (0 for no options).
      * @return String The decomposed string
      * @draft ICU 2.6
      */
     public static String decompose(String str, boolean compat, int options){

         char[] dest = new char[str.length()*MAX_BUF_SIZE_DECOMPOSE];
         int[] trailCC = new int[1];
         int destSize=0;
         UnicodeSet nx = NormalizerImpl.getNX(options);
         for(;;){
             destSize=NormalizerImpl.decompose(str.toCharArray(),0,str.length(),
                                               dest,0,dest.length,
                                               compat,trailCC, nx);
             if(destSize<=dest.length){
 		        return new String(dest,0,destSize);
             }else{
                 dest = new char[destSize];
             }
         }

     }

     /**
      * Decompose a string.
      * The string will be decomposed to according the the specified mode.
      * @param source The char array to decompose.
      * @param target A char buffer to receive the normalized text.
      * @param compat If true the char array will be decomposed accoding to NFKD
      *                rules and if false will be decomposed according to
      *                NFD rules.
      * @return int   The total buffer size needed;if greater than length of
      *                result,the output was truncated.
      * @param options The normalization options, ORed together (0 for no options).
      * @exception IndexOutOfBoundsException if the target capacity is less than
      *             the required length
      * @draft ICU 2.6
      */
     public static int decompose(char[] source,char[] target, boolean compat, int options){
         int[] trailCC = new int[1];
         UnicodeSet nx = NormalizerImpl.getNX(options);
         int length = NormalizerImpl.decompose(source,0,source.length,
                                               target,0,target.length,
                                               compat,trailCC,nx);
 		if(length<=target.length){
 		    return length;
 		}else{
 		    throw new IndexOutOfBoundsException(Integer.toString(length));
 		}
     }

     /**
      * Decompose a string.
      * The string will be decomposed to according the the specified mode.
      * @param src       The char array to compose.
      * @param srcStart  Start index of the source
      * @param srcLimit  Limit index of the source
      * @param dest      The char buffer to fill in
      * @param destStart Start index of the destination buffer
      * @param destLimit End index of the destination buffer
      * @param compat If true the char array will be decomposed accoding to NFKD
      *                rules and if false will be decomposed according to
      *                NFD rules.
      * @param options The normalization options, ORed together (0 for no options).
      * @return int   The total buffer size needed;if greater than length of
      *                result,the output was truncated.
      * @exception IndexOutOfBoundsException if the target capacity is less than
      *             the required length
      * @draft ICU 2.6
      */
     public static int decompose(char[] src,int srcStart, int srcLimit,
                                 char[] dest,int destStart, int destLimit,
                                 boolean compat, int options){
         int[] trailCC = new int[1];
         UnicodeSet nx = NormalizerImpl.getNX(options);
         int length = NormalizerImpl.decompose(src,srcStart,srcLimit,
                                               dest,destStart,destLimit,
                                               compat,trailCC,nx);
         if(length<=(destLimit-destStart)){
             return length;
         }else{
             throw new IndexOutOfBoundsException(Integer.toString(length));
         }
     }

     private static String makeFCD(String src,int options){
         int srcLen = src.length();
         char[] dest = new char[MAX_BUF_SIZE_DECOMPOSE*srcLen];
         int length = 0;
         UnicodeSet nx = NormalizerImpl.getNX(options);
         for(;;){
             length = NormalizerImpl.makeFCD(src.toCharArray(),0,srcLen,
                                             dest,0,dest.length,nx);
             if(length <= dest.length){
                 return new String(dest,0,length);
             }else{
                 dest = new char[length];
             }
         }
     }

     /**
      * Normalizes a <tt>String</tt> using the given normalization operation.
      * <p>
      * The <tt>options</tt> parameter specifies which optional
      * <tt>Normalizer</tt> features are to be enabled for this operation.
      * Currently the only available option is {@link #UNICODE_3_2}.
      * If you want the default behavior corresponding to one of the standard
      * Unicode Normalization Forms, use 0 for this argument.
      * <p>
      * @param str       the input string to be normalized.
      * @param mode      the normalization mode
      * @param options   the optional features to be enabled.
      * @return String   the normalized string
      * @draft ICU 2.6
      */
     public static String normalize(String str, Mode mode, int options){
         return mode.normalize(str,options);
     }

     /**
      * Normalize a string.
      * The string will be normalized according the the specified normalization
      * mode and options.
      * @param src        The string to normalize.
      * @param mode       The normalization mode; one of Normalizer.NONE,
      *                    Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
      *                    Normalizer.NFKD, Normalizer.DEFAULT
      * @return the normalized string
      * @stable ICU 2.8
      *
      */
     public static String normalize(String src,Mode mode){
         return normalize(src, mode, 0);
     }
     /**
      * Normalize a string.
      * The string will be normalized according the the specified normalization
      * mode and options.
      * @param source The char array to normalize.
      * @param target A char buffer to receive the normalized text.
      * @param mode   The normalization mode; one of Normalizer.NONE,
      *                Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
      *                Normalizer.NFKD, Normalizer.DEFAULT
      * @param options The normalization options, ORed together (0 for no options).
      * @return int   The total buffer size needed;if greater than length of
      *                result, the output was truncated.
      * @exception    IndexOutOfBoundsException if the target capacity is less
      *                than the required length
      * @draft ICU 2.6
      */
     public static int normalize(char[] source,char[] target, Mode  mode, int options){
 		int length = normalize(source,0,source.length,target,0,target.length,mode, options);
 		if(length<=target.length){
 		    return length;
 		}else{
 		    throw new IndexOutOfBoundsException(Integer.toString(length));
 		}
     }

     /**
      * Normalize a string.
      * The string will be normalized according the the specified normalization
      * mode and options.
      * @param src       The char array to compose.
      * @param srcStart  Start index of the source
      * @param srcLimit  Limit index of the source
      * @param dest      The char buffer to fill in
      * @param destStart Start index of the destination buffer
      * @param destLimit End index of the destination buffer
      * @param mode      The normalization mode; one of Normalizer.NONE,
      *                   Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
      *                   Normalizer.NFKD, Normalizer.DEFAULT
      * @param options The normalization options, ORed together (0 for no options).
      * @return int      The total buffer size needed;if greater than length of
      *                   result, the output was truncated.
      * @exception       IndexOutOfBoundsException if the target capacity is
      *                   less than the required length
      * @draft ICU 2.6
      */
     public static int normalize(char[] src,int srcStart, int srcLimit,
                                 char[] dest,int destStart, int destLimit,
                                 Mode  mode, int options){
         int length = mode.normalize(src,srcStart,srcLimit,dest,destStart,destLimit, options);

         if(length<=(destLimit-destStart)){
             return length;
         }else{
             throw new IndexOutOfBoundsException(Integer.toString(length));
         }
     }

     /**
      * Normalize a codepoint accoding to the given mode
      * @param char32    The input string to be normalized.
      * @param mode      The normalization mode
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2
      * @return String   The normalized string
      * @draft ICU 2.6
      * @see #UNICODE_3_2
      */
     // TODO: actually do the optimization when the guts of Normalizer are
     // upgraded --has just dumb implementation for now
     public static String normalize(int char32, Mode mode, int options) {
         return normalize(UTF16.valueOf(char32), mode, options);
     }
     /**
      * Conveinience method to normalize a codepoint accoding to the given mode
      * @param char32    The input string to be normalized.
      * @param mode      The normalization mode
      * @return String   The normalized string
      * @see #UNICODE_3_2
 	 * @draft ICU 2.6
      */
     // TODO: actually do the optimization when the guts of Normalizer are
     // upgraded --has just dumb implementation for now
     public static String normalize(int char32, Mode mode) {
         return normalize(UTF16.valueOf(char32), mode, 0);
     }

     /**
      * Convenience method.
      *
      * @param source   string for determining if it is in a normalized format
      * @param mode     normalization format (Normalizer.NFC,Normalizer.NFD,
      *                  Normalizer.NFKC,Normalizer.NFKD)
 	 * @return         Return code to specify if the text is normalized or not
      *                     (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
      * @stable ICU 2.8
      */
     public static QuickCheckResult quickCheck( String source, Mode mode){
 	    return mode.quickCheck(source.toCharArray(),0,source.length(),true,null);
     }

     /**
      * Convenience method.
      *
      * @param source   string for determining if it is in a normalized format
      * @param mode     normalization format (Normalizer.NFC,Normalizer.NFD,
      *                  Normalizer.NFKC,Normalizer.NFKD)
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2
      * @return         Return code to specify if the text is normalized or not
      *                     (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
      * @draft ICU 2.6
      */
     public static QuickCheckResult quickCheck( String source, Mode mode, int options){
 	    return mode.quickCheck(source.toCharArray(),0,source.length(),true,NormalizerImpl.getNX(options));
     }

     /**
      * Convenience method.
      *
      * @param source Array of characters for determining if it is in a
      *                normalized format
      * @param mode   normalization format (Normalizer.NFC,Normalizer.NFD,
      *                Normalizer.NFKC,Normalizer.NFKD)
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2
      * @return       Return code to specify if the text is normalized or not
      *                (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
      * @draft ICU 2.6
      */
     public static QuickCheckResult quickCheck(char[] source, Mode mode, int options){
         return mode.quickCheck(source,0,source.length,true, NormalizerImpl.getNX(options));
     }

     /**
      * Performing quick check on a string, to quickly determine if the string is
      * in a particular normalization format.
      * Three types of result can be returned Normalizer.YES, Normalizer.NO or
      * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
      * string is in the desired normalized format, Normalizer.NO determines that
      * argument string is not in the desired normalized format. A
      * Normalizer.MAYBE result indicates that a more thorough check is required,
      * the user may have to put the string in its normalized form and compare
      * the results.
      *
      * @param source    string for determining if it is in a normalized format
      * @param start     the start index of the source
      * @param limit     the limit index of the source it is equal to the length
      * @param mode      normalization format (Normalizer.NFC,Normalizer.NFD,
      *                   Normalizer.NFKC,Normalizer.NFKD)
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2
      * @return          Return code to specify if the text is normalized or not
      *                   (Normalizer.YES, Normalizer.NO or
      *                   Normalizer.MAYBE)
      * @draft ICU 2.6
      */

     public static QuickCheckResult quickCheck(char[] source,int start,
                                               int limit, Mode mode,int options){
 	    return mode.quickCheck(source,start,limit,true,NormalizerImpl.getNX(options));
     }

     //-------------------------------------------------------------------------
     // Internal methods (for now)
     //-------------------------------------------------------------------------


     /**
 	 * Test if a string is in a given normalization form.
 	 * This is semantically equivalent to source.equals(normalize(source, mode)).
 	 *
 	 * Unlike quickCheck(), this function returns a definitive result,
 	 * never a "maybe".
 	 * For NFD, NFKD, and FCD, both functions work exactly the same.
 	 * For NFC and NFKC where quickCheck may return "maybe", this function will
 	 * perform further tests to arrive at a true/false result.
      * @param src       The input array of characters to be checked to see if
      *                   it is normalized
      * @param start     The strart index in the source
      * @param limit     The limit index in the source
      * @param mode      the normalization mode
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2
      * @return Boolean value indicating whether the source string is in the
      *         "mode" normalization form
      * @draft ICU 2.6
      */
     public static boolean isNormalized(char[] src,int start,
                                        int limit, Mode mode,
                                        int options) {
         return (mode.quickCheck(src,start,limit,false,NormalizerImpl.getNX(options))==YES);
     }

     /**
      * Convenience Method
      * @param str       the input string to be checked to see if it is
      *                   normalized
      * @param mode      the normalization mode
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2
      * @see #isNormalized
      * @draft ICU 2.6
      */
     public static boolean isNormalized(String str, Mode mode, int options) {
         return (mode.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
     }

     /**
      * Convenience Method
      * @param char32    the input code point to be checked to see if it is
      *                   normalized
      * @param mode      the normalization mode
      * @param options   Options for use with exclusion set an tailored Normalization
      * 					 The only option that is currently recognized is UNICODE_3_2

      * @see #isNormalized
      * @draft ICU 2.6
      */
     // TODO: actually do the optimization when the guts of Normalizer are
     // upgraded --has just dumb implementation for now
     public static boolean isNormalized(int char32, Mode mode,int options) {
         return isNormalized(UTF16.valueOf(char32), mode, options);
     }

     /**
      * Compare two strings for canonical equivalence.
      * Further options include case-insensitive comparison and
      * code point order (as opposed to code unit order).
      *
      * Canonical equivalence between two strings is defined as their normalized
      * forms (NFD or NFC) being identical.
      * This function compares strings incrementally instead of normalizing
      * (and optionally case-folding) both strings entirely,
      * improving performance significantly.
      *
      * Bulk normalization is only necessary if the strings do not fulfill the
      * FCD conditions. Only in this case, and only if the strings are relatively
      * long, is memory allocated temporarily.
      * For FCD strings and short non-FCD strings there is no memory allocation.
      *
      * Semantically, this is equivalent to
      *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
      * where code point order and foldCase are all optional.
      *
      * @param s1        First source character array.
      * @param s1Start   start index of source
      * @param s1Limit   limit of the source
      *
      * @param s2        Second source character array.
      * @param s2Start   start index of the source
      * @param s2Limit   limit of the source
      *
      * @param options A bit set of options:
      *   - FOLD_CASE_DEFAULT or 0 is used for default options:
      *     Case-sensitive comparison in code unit order, and the input strings
      *     are quick-checked for FCD.
      *
      *   - INPUT_IS_FCD
      *     Set if the caller knows that both s1 and s2 fulfill the FCD
      *     conditions.If not set, the function will quickCheck for FCD
      *     and normalize if necessary.
      *
      *   - COMPARE_CODE_POINT_ORDER
      *     Set to choose code point order instead of code unit order
      *
      *   - COMPARE_IGNORE_CASE
      *     Set to compare strings case-insensitively using case folding,
      *     instead of case-sensitively.
      *     If set, then the following case folding options are used.
      *
      *
      * @return <0 or 0 or >0 as usual for string comparisons
      *
      * @see #normalize
      * @see #FCD
      * @stable ICU 2.8
      */
      public static int compare(char[] s1, int s1Start, int s1Limit,
                                char[] s2, int s2Start, int s2Limit,
                                int options){
          return internalCompare(s1, s1Start, s1Limit,
                         s2, s2Start, s2Limit,
                         options);
      }

     /**
      * Compare two strings for canonical equivalence.
      * Further options include case-insensitive comparison and
      * code point order (as opposed to code unit order).
      * Convenience method.
      *
      * @param s1 First source string.
      * @param s2 Second source string.
      *
      * @param options A bit set of options:
      *   - FOLD_CASE_DEFAULT or 0 is used for default options:
      *     Case-sensitive comparison in code unit order, and the input strings
      *     are quick-checked for FCD.
      *
      *   - INPUT_IS_FCD
      *     Set if the caller knows that both s1 and s2 fulfill the FCD
      *     conditions. If not set, the function will quickCheck for FCD
      *     and normalize if necessary.
      *
      *   - COMPARE_CODE_POINT_ORDER
      *     Set to choose code point order instead of code unit order
      *
      *   - COMPARE_IGNORE_CASE
      *     Set to compare strings case-insensitively using case folding,
      *     instead of case-sensitively.
      *     If set, then the following case folding options are used.
      *
      *
      * @return <0 or 0 or >0 as usual for string comparisons
      *
      * @see #normalize
      * @see #FCD
      * @stable ICU 2.8
      */
      public static int compare(String s1, String s2, int options){

          return compare(s1.toCharArray(),0,s1.length(),
                                        s2.toCharArray(),0,s2.length(),
                                        options);
      }

     /**
      * Compare two strings for canonical equivalence.
      * Further options include case-insensitive comparison and
      * code point order (as opposed to code unit order).
      * Convenience method.
      *
      * @param s1 First source string.
      * @param s2 Second source string.
      *
      * @param options A bit set of options:
      *   - FOLD_CASE_DEFAULT or 0 is used for default options:
      *     Case-sensitive comparison in code unit order, and the input strings
      *     are quick-checked for FCD.
      *
      *   - INPUT_IS_FCD
      *     Set if the caller knows that both s1 and s2 fulfill the FCD
      *     conditions. If not set, the function will quickCheck for FCD
      *     and normalize if necessary.
      *
      *   - COMPARE_CODE_POINT_ORDER
      *     Set to choose code point order instead of code unit order
      *
      *   - COMPARE_IGNORE_CASE
      *     Set to compare strings case-insensitively using case folding,
      *     instead of case-sensitively.
      *     If set, then the following case folding options are used.
      *
      *
      * @return <0 or 0 or >0 as usual for string comparisons
      *
      * @see #normalize
      * @see #FCD
      * @stable ICU 2.8
      */
      public static int compare(char[] s1, char[] s2, int options){

          return compare(s1,0,s1.length,s2,0,s2.length,options);
      }

     /**
      * Convenience method that can have faster implementation
      * by not allocating buffers.
      * @param char32a    the first code point to be checked against the
      * @param char32b    the second code point
      * @param options    A bit set of options
      * @stable ICU 2.8
      */
     // TODO: actually do the optimization when the guts of Normalizer are
     // upgraded --has just dumb implementation for now
     public static int compare(int char32a, int char32b,int options) {
         return compare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options);
     }


     /**
      * Convenience method that can have faster implementation
      * by not allocating buffers.
      * @internal
      * @param char32a   the first code point to be checked against the
      * @param str2      the second string
      * @param options   A bit set of options
      * @stable ICU 2.8
      *
      */
     // TODO: actually do the optimization when the guts of Normalizer are
     // upgraded --has just dumb implementation for now
     public static int compare(int charA, String str2, int options) {
         return compare(UTF16.valueOf(charA), str2, options);
     }

     /**
      * Concatenate normalized strings, making sure that the result is normalized
      * as well.
      *
      * If both the left and the right strings are in
      * the normalization form according to "mode",
      * then the result will be
      *
      * <code>
      *     dest=normalize(left+right, mode)
      * </code>
      *
      * With the input strings already being normalized,
      * this function will use next() and previous()
      * to find the adjacent end pieces of the input strings.
      * Only the concatenation of these end pieces will be normalized and
      * then concatenated with the remaining parts of the input strings.
      *
      * It is allowed to have dest==left to avoid copying the entire left string.
      *
      * @param left Left source array, may be same as dest.
      * @param leftStart start in the left array.
      * @param leftLimit limit in the left array (==length)
      * @param right Right source array.
      * @param rightStart start in the right array.
      * @param rightLimit limit in the right array (==length)
      * @param dest The output buffer; can be null if destStart==destLimit==0
      *              for pure preflighting.
      * @param destStart start in the destination array
      * @param destLimit limit in the destination array (==length)
      * @param mode The normalization mode.
      * @param options The normalization options, ORed together (0 for no options).
      * @return Length of output (number of chars) when successful or
      *          IndexOutOfBoundsException
      * @exception IndexOutOfBoundsException whose message has the string
      *             representation of destination capacity required.
      * @see #normalize
      * @see #next
      * @see #previous
      * @exception IndexOutOfBoundsException if target capacity is less than the
      *             required length
      * @stable ICU 2.8
      */
      /* Concatenation of normalized strings --------------------------------- */

     public static int concatenate(char[] left,  int leftStart,  int leftLimit,
                                   char[] right, int rightStart, int rightLimit,
                                   char[] dest,  int destStart,  int destLimit,
                                   Normalizer.Mode mode, int options) {

         char[] buffer=new char[100];
         int bufferLength;

         UCharacterIterator iter;

         int leftBoundary, rightBoundary, destLength;

         if(dest == null){
             throw new IllegalArgumentException();
         }

         /* check for overlapping right and destination */
         if (right == dest && rightStart < destLimit && destStart < rightLimit) {
             throw new IllegalArgumentException("overlapping right and dst ranges");
         }

         /* allow left==dest */


         /*
          * Input: left[0..leftLength[ + right[0..rightLength[
          *
          * Find normalization-safe boundaries leftBoundary and rightBoundary
          * and copy the end parts together:
          * buffer=left[leftBoundary..leftLength[ + right[0..rightBoundary[
          *
          * dest=left[0..leftBoundary[ +
          *      normalize(buffer) +
          *      right[rightBoundary..rightLength[
          */

         /*
          * find a normalization boundary at the end of the left string
          * and copy the end part into the buffer
          */

         iter = UCharacterIterator.getInstance(left, leftStart, leftLimit);

         iter.setIndex(iter.getLength()); /* end of left string */

         bufferLength=previous(iter, buffer,0,buffer.length,mode,false,null,options);

         leftBoundary=iter.getIndex();

         if(bufferLength>buffer.length) {
             char[] newBuf = new char[buffer.length*2];
             buffer = newBuf;
             newBuf = null; // null the reference for GC
             /* just copy from the left string: we know the boundary already */
             System.arraycopy(left,leftBoundary,buffer,0,bufferLength);
         }

         /*
          * find a normalization boundary at the beginning of the right string
          * and concatenate the beginning part to the buffer
          */

         iter = UCharacterIterator.getInstance(right, rightStart, rightLimit);

         rightBoundary=next(iter,buffer,bufferLength, buffer.length-bufferLength,
                            mode, false,null, options);

         if(bufferLength>buffer.length) {
             char[] newBuf = new char[buffer.length*2];
             buffer = newBuf;
             newBuf = null; // null the reference for GC
             /* just copy from the right string: we know the boundary already */
             System.arraycopy(right,rightBoundary,buffer,
                              bufferLength,rightBoundary);
         }

         bufferLength+=rightBoundary;

         /* copy left[0..leftBoundary[ to dest */
         if(left!=dest && leftBoundary>0 && (destLimit)>0) {
             System.arraycopy(left,0,dest,0, Math.min(leftBoundary,destLimit));
         }
         destLength=leftBoundary;

         /* concatenate the normalization of the buffer to dest */
         if(destLimit>destLength) {
             destLength+=Normalizer.normalize(buffer,0,bufferLength,dest,
                                                      destLength,destLimit,mode,options);

         } else {
             destLength+=Normalizer.normalize(buffer, 0, bufferLength,null,0,0,mode,options);
         }

         /* concatenate right[rightBoundary..rightLength[ to dest */
         rightStart+=rightBoundary;
         int rightLength=(rightLimit-rightStart);
         if(rightLength>0 && destLimit>destLength) {
             System.arraycopy(right,rightStart,dest,destLength,
                                 Math.min(rightLength,destLength)
                             );
         }
         destLength+=rightLength;

         if(destLength<=(destLimit-destStart)){
             return destLength;
         }else{
             throw new IndexOutOfBoundsException(Integer.toString(destLength));
         }
     }

     /**
      * Concatenate normalized strings, making sure that the result is normalized
      * as well.
      *
      * If both the left and the right strings are in
      * the normalization form according to "mode",
      * then the result will be
      *
      * <code>
      *     dest=normalize(left+right, mode)
      * </code>
      *
      * For details see concatenate
      *
      * @param left Left source string.
      * @param right Right source string.
      * @param mode The normalization mode.
      * @param options The normalization options, ORed together (0 for no options).
      * @return result
      *
      * @see #concatenate
      * @see #normalize
      * @see #next
      * @see #previous
      * @see #concatenate
      * @stable ICU 2.8
      */
     public static String concatenate(char[] left, char[] right,Mode mode, int options){
         char[] result = new char[(left.length+right.length)* MAX_BUF_SIZE_DECOMPOSE];
         for(;;){

             int length = concatenate(left,  0, left.length,
                                      right, 0, right.length,
                                      result,0, result.length,
                                      mode, options);
             if(length<=result.length){
                 return new String(result,0,length);
             }else{
                 result = new char[length];
             }
         }
     }

     /**
      * Concatenate normalized strings, making sure that the result is normalized
      * as well.
      *
      * If both the left and the right strings are in
      * the normalization form according to "mode",
      * then the result will be
      *
      * <code>
      *     dest=normalize(left+right, mode)
      * </code>
      *
      * For details see concatenate
      *
      * @param left Left source string.
      * @param right Right source string.
      * @param mode The normalization mode.
      * @param options The normalization options, ORed together (0 for no options).
      * @return result
      *
      * @see #concatenate
      * @see #normalize
      * @see #next
      * @see #previous
      * @see #concatenate
      * @stable ICU 2.8
      */
     public static String concatenate(String left, String right,Mode mode, int options){
         char[] result = new char[(left.length()+right.length())* MAX_BUF_SIZE_DECOMPOSE];
         for(;;){

             int length = concatenate(left.toCharArray(), 0, left.length(),
                          right.toCharArray(),0, right.length(),
                          result,             0, result.length,
                          mode, options);
             if(length<=result.length){
                 return new String(result,0,length);
             }else{
                 result = new char[length];
             }
         }
     }

     /**
      * Gets the FC_NFKC closure set from the normalization data
      * @param c The code point whose closure set is to be retrieved
      * @param dest The char array to recive the closure set
      * @internal
      * @draft ICU 2.4
      */
     public static int getFC_NFKC_Closure(int c,char[] dest){
         return NormalizerImpl.getFC_NFKC_Closure(c,dest);
     }
     /**
      * Gets the FC_NFKC closure set from the normalization data
      * @param c The the code point whose closure set is to be retrieved
      * @return String representation of the closure set
      * @internal
      * @draft ICU 2.4
      */
     public static String getFC_NFKC_Closure(int c){
         char[] dest = new char[10];
         for(;;){
             int length = getFC_NFKC_Closure(c,dest);
             if(length<=dest.length){
                 return new String(dest,0,length);
             }else{
                 dest = new char[length];
             }
         }
     }
     //-------------------------------------------------------------------------
     // Iteration API
     //-------------------------------------------------------------------------

     /**
      * Return the current character in the normalized text->
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int current() {
 		if(bufferPos<bufferLimit || nextNormalize()) {
 		    return getCodePointAt(bufferPos);
 		} else {
 		    return DONE;
 		}
     }

     /**
      * Return the next character in the normalized text and advance
      * the iteration position by one.  If the end
      * of the text has already been reached, {@link #DONE} is returned.
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int next() {
 		if(bufferPos<bufferLimit ||  nextNormalize()) {
 		    int c=getCodePointAt(bufferPos);
 		    bufferPos+=(c>0xFFFF) ? 2 : 1;
 		    return c;
 		} else {
 		    return DONE;
 		}
     }


     /**
      * Return the previous character in the normalized text and decrement
      * the iteration position by one.  If the beginning
      * of the text has already been reached, {@link #DONE} is returned.
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int previous() {
 		if(bufferPos>0 || previousNormalize()) {
 		    int c=getCodePointAt(bufferPos-1);
 		    bufferPos-=(c>0xFFFF) ? 2 : 1;
 		    return c;
 		} else {
 		    return DONE;
 		}
     }

    /**
     * Reset the index to the beginning of the text.
     * This is equivalent to setIndexOnly(startIndex)).
     * @stable ICU 2.8
     */
     public void reset() {
         text.setIndex(0);
 		currentIndex=nextIndex=0;
 		clearBuffer();
     }

    /**
     * Set the iteration position in the input text that is being normalized,
     * without any immediate normalization.
     * After setIndexOnly(), getIndex() will return the same index that is
     * specified here.
     *
     * @param index the desired index in the input text.
     * @stable ICU 2.8
     */
     public void setIndexOnly(int index) {
         text.setIndex(index);
 		currentIndex=nextIndex=index; // validates index
 		clearBuffer();
     }

     /**
      * Set the iteration position in the input text that is being normalized
      * and return the first normalized character at that position.
      * <p>
      * <b>Note:</b> This method sets the position in the <em>input</em> text,
      * while {@link #next} and {@link #previous} iterate through characters
      * in the normalized <em>output</em>.  This means that there is not
      * necessarily a one-to-one correspondence between characters returned
      * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
      * returned from <tt>setIndex</tt> and {@link #getIndex}.
      * <p>
      * @param index the desired index in the input text->
      *
      * @return   the first normalized character that is the result of iterating
      *            forward starting at the given index.
      *
      * @throws IllegalArgumentException if the given index is less than
      *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
      * @return The codepoint as an int
      */
 //    public int setIndex(int index) {
 //		setIndexOnly(index);
 //		return current();
 //    }

     /**
      * Retrieve the index of the start of the input text. This is the begin
      * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
      * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
      * @deprecated ICU 2.2. Use startIndex() instead.
      * @return The codepoint as an int
      * @see #startIndex
      */
     public int getBeginIndex() {
         return 0;
     }

     /**
      * Retrieve the index of the end of the input text.  This is the end index
      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
      * over which this <tt>Normalizer</tt> is iterating
      * @deprecated ICU 2.2. Use endIndex() instead.
      * @return The codepoint as an int
      * @see #endIndex
      */
     public int getEndIndex() {
         return endIndex();
     }
     /**
      * Return the first character in the normalized text->  This resets
      * the <tt>Normalizer's</tt> position to the beginning of the text->
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int first() {
 		reset();
 		return next();
     }

     /**
      * Return the last character in the normalized text->  This resets
      * the <tt>Normalizer's</tt> position to be just before the
      * the input text corresponding to that normalized character.
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int last() {
         text.setToLimit();
 		currentIndex=nextIndex=text.getIndex();
 		clearBuffer();
 		return previous();
     }

     /**
      * Retrieve the current iteration position in the input text that is
      * being normalized.  This method is useful in applications such as
      * searching, where you need to be able to determine the position in
      * the input text that corresponds to a given normalized output character.
      * <p>
      * <b>Note:</b> This method sets the position in the <em>input</em>, while
      * {@link #next} and {@link #previous} iterate through characters in the
      * <em>output</em>.  This means that there is not necessarily a one-to-one
      * correspondence between characters returned by <tt>next</tt> and
      * <tt>previous</tt> and the indices passed to and returned from
      * <tt>setIndex</tt> and {@link #getIndex}.
      * @return The current iteration position
      * @stable ICU 2.8
      */
     public int getIndex(){
 		if(bufferPos<bufferLimit) {
 		    return currentIndex;
 		} else {
 		    return nextIndex;
 		}
     }

     /**
      * Retrieve the index of the start of the input text. This is the begin
      * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
      * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
      * @return The current iteration position
      * @stable ICU 2.8
      */
     public int startIndex(){
 		return 0;
     }

     /**
      * Retrieve the index of the end of the input text->  This is the end index
      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
      * over which this <tt>Normalizer</tt> is iterating
      * @return The current iteration position
      * @stable ICU 2.8
      */
     public int endIndex(){
 		return text.getLength();
     }

     //-------------------------------------------------------------------------
     // Property access methods
     //-------------------------------------------------------------------------
 	/**
      * Set the normalization mode for this object.
      * <p>
      * <b>Note:</b>If the normalization mode is changed while iterating
      * over a string, calls to {@link #next} and {@link #previous} may
      * return previously buffers characters in the old normalization mode
      * until the iteration is able to re-sync at the next base character.
      * It is safest to call {@link #setText setText()}, {@link #first},
      * {@link #last}, etc. after calling <tt>setMode</tt>.
      * <p>
      * @param newMode the new mode for this <tt>Normalizer</tt>.
      * The supported modes are:
      * <ul>
      *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
      *                                  followed by canonical composition.
      *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
      *                                  follwed by canonical composition.
      *  <li>{@link #DECOMP}         - Unicode canonical decomposition
      *  <li>{@link #DECOMP_COMPAT}  - Unicode compatibility decomposition.
      *  <li>{@link #NO_OP}          - Do nothing but return characters
      *                                  from the underlying input text.
      * </ul>
      *
      * @see #getMode
      * @stable ICU 2.8
      */
     public void setMode(Mode newMode){
 		mode = newMode;
     }
 	/**
      * Return the basic operation performed by this <tt>Normalizer</tt>
      *
      * @see #setMode
      * @stable ICU 2.8
      */
     public Mode getMode() {
 		return mode;
     }
 	/**
      * Set options that affect this <tt>Normalizer</tt>'s operation.
      * Options do not change the basic composition or decomposition operation
      * that is being performed , but they control whether
      * certain optional portions of the operation are done.
      * Currently the only available option is:
      * <p>
      * <ul>
      *   <li>{@link #UNICODE_3_2} - Use Normalization conforming to Unicode version 3.2.
      * </ul>
      * <p>
      * @param   option  the option whose value is to be set.
      * @param   value   the new setting for the option.  Use <tt>true</tt> to
      *                  turn the option on and <tt>false</tt> to turn it off.
      *
      * @see #getOption
      * @draft ICU 2.6
      */
     public void setOption(int option,boolean value) {
 		if (value) {
 		    options |= option;
 		} else {
 		    options &= (~option);
 		}
     }

     /**
      * Determine whether an option is turned on or off.
      * <p>
      * @see #setOption
      * @draft ICU 2.6
      */
     public int getOption(int option){
 	    if((options & option)!=0){
             return 1 ;
         }else{
             return 0;
         }
     }

     /**
      * Gets the underlying text storage
      * @param fillIn the char buffer to fill the UTF-16 units.
      *         The length of the buffer should be equal to the length of the
      *         underlying text storage
      * @throws IndexOutOfBoundsException
      * @see   #getLength
      * @stable ICU 2.8
      */
     public int getText(char[] fillIn){
         return text.getText(fillIn);
     }

     /**
      * Gets the length of underlying text storage
      * @return the length
      * @stable ICU 2.8
      */
     public int getLength(){
         return text.getLength();
     }

     /**
      * Returns the text under iteration as a string
      * @return a copy of the text under iteration.
      * @stable ICU 2.8
      */
     public String getText(){
         return text.getText();
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the input text->
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(StringBuffer newText){

         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
         if (newIter == null) {
                 throw new InternalError("Could not create a new UCharacterIterator");
         }
         text = newIter;
         reset();
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the input text->
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(char[] newText){

         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
         if (newIter == null) {
                 throw new InternalError("Could not create a new UCharacterIterator");
         }
         text = newIter;
         reset();
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the input text->
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(String newText){

 		UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
 		if (newIter == null) {
 	            throw new InternalError("Could not create a new UCharacterIterator");
 		}
 		text = newIter;
 		reset();
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the input text->
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(CharacterIterator newText){

         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
         if (newIter == null) {
             throw new InternalError("Could not create a new UCharacterIterator");
         }
         text = newIter;
         reset();
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the string.
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(UCharacterIterator newText){
         try{
 	        UCharacterIterator newIter = (UCharacterIterator)newText.clone();
 		    if (newIter == null) {
 			    throw new InternalError("Could not create a new UCharacterIterator");
 		    }
 		    text = newIter;
 		    reset();
         }catch(CloneNotSupportedException e){
             throw new InternalError("Could not clone the UCharacterIterator");
         }
     }

     //-------------------------------------------------------------------------
     // Private utility methods
     //-------------------------------------------------------------------------


     /* backward iteration --------------------------------------------------- */

     /*
      * read backwards and get norm32
      * return 0 if the character is <minC
      * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
      * surrogate but read second!)
      */

     private static  long getPrevNorm32(UCharacterIterator src,
                                                   int/*unsigned*/ minC,
                                                   int/*unsigned*/ mask,
                                                   char[] chars) {
         long norm32;
         int ch=0;
         /* need src.hasPrevious() */
         if((ch=src.previous()) == UCharacterIterator.DONE){
             return 0;
         }
         chars[0]=(char)ch;
         chars[1]=0;

         /* check for a surrogate before getting norm32 to see if we need to
          * predecrement further */
         if(chars[0]<minC) {
             return 0;
         } else if(!UTF16.isSurrogate(chars[0])) {
             return NormalizerImpl.getNorm32(chars[0]);
         } else if(UTF16.isLeadSurrogate(chars[0]) || (src.getIndex()==0)) {
             /* unpaired surrogate */
             chars[1]=(char)src.current();
             return 0;
         } else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
             norm32=NormalizerImpl.getNorm32(chars[1]);
             if((norm32&mask)==0) {
                 /* all surrogate pairs with this lead surrogate have irrelevant
                  * data */
                 return 0;
             } else {
                 /* norm32 must be a surrogate special */
                 return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
             }
         } else {
             /* unpaired second surrogate, undo the c2=src.previous() movement */
             src.moveIndex( 1);
             return 0;
         }
     }

     private interface IsPrevBoundary{
         public boolean isPrevBoundary(UCharacterIterator src,
                        int/*unsigned*/ minC,
                        int/*unsigned*/ mask,
                        char[] chars);
     }
     private static final class IsPrevNFDSafe implements IsPrevBoundary{
         /*
          * for NF*D:
          * read backwards and check if the lead combining class is 0
          * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
          * surrogate but read second!)
          */
         public boolean isPrevBoundary(UCharacterIterator src,
                                       int/*unsigned*/ minC,
                                       int/*unsigned*/ ccOrQCMask,
                                       char[] chars) {

             return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
                                                           ccOrQCMask, chars),
                                             ccOrQCMask,
                                             ccOrQCMask& NormalizerImpl.QC_MASK);
         }
     }

     private static final class IsPrevTrueStarter implements IsPrevBoundary{
         /*
          * read backwards and check if the character is (or its decomposition
          * begins with) a "true starter" (cc==0 and NF*C_YES)
          * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
          * surrogate but read second!)
          */
         public boolean isPrevBoundary(UCharacterIterator src,
                                          int/*unsigned*/ minC,
                                          int/*unsigned*/ ccOrQCMask,
                                          char[] chars) {
             long norm32;
             int/*unsigned*/ decompQCMask;

             decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
             norm32=getPrevNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
             return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
         }
     }

     private static int findPreviousIterationBoundary(UCharacterIterator src,
                                                      IsPrevBoundary obj,
                                                      int/*unsigned*/ minC,
                                                      int/*mask*/ mask,
                                                      char[] buffer,
                                                      int[] startIndex) {
         char[] chars=new char[2];
         boolean isBoundary;

          /* fill the buffer from the end backwards */
         startIndex[0] = buffer.length;
         chars[0]=0;
         while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
             isBoundary=obj.isPrevBoundary(src, minC, mask, chars);

             /* always write this character to the front of the buffer */
             /* make sure there is enough space in the buffer */
             if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {

                 // grow the buffer
                 char[] newBuf = new char[buffer.length*2];
                 /* move the current buffer contents up */
                 System.arraycopy(buffer,startIndex[0],newBuf,
                                  newBuf.length-(buffer.length-startIndex[0]),
                                  buffer.length-startIndex[0]);
                 //adjust the startIndex
                 startIndex[0]+=newBuf.length-buffer.length;

                 buffer=newBuf;
                 newBuf=null;

             }

             buffer[--startIndex[0]]=chars[0];
             if(chars[1]!=0) {
                 buffer[--startIndex[0]]=chars[1];
             }

             /* stop if this just-copied character is a boundary */
             if(isBoundary) {
                 break;
             }
         }

         /* return the length of the buffer contents */
         return buffer.length-startIndex[0];
     }

     private static int previous(UCharacterIterator src,
                    char[] dest, int destStart, int destLimit,
                    Mode mode,
                    boolean doNormalize,
                    boolean[] pNeededToNormalize,
                    int options) {

         IsPrevBoundary isPreviousBoundary;
         int destLength, bufferLength;
         int/*unsigned*/ mask;
         int[] startIndex= new int[1];
         int c,c2;

         char minC;
         int destCapacity = destLimit-destStart;
         destLength=0;
         char[] buffer = new char[100];

         if(pNeededToNormalize!=null) {
             pNeededToNormalize[0]=false;
         }
         minC = (char)mode.getMinC();
         mask = mode.getMask();
         isPreviousBoundary = mode.getPrevBoundary();

         if(isPreviousBoundary==null){
             destLength=0;
             if((c=src.previous())>=0) {
                 destLength=1;
                 if(UTF16.isTrailSurrogate((char)c)){
                     c2= src.previous();
                     if(c2!= UCharacterIterator.DONE){
                         if(UTF16.isLeadSurrogate((char)c2)) {
                             if(destCapacity>=2) {
                                 dest[1]=(char)c; // trail surrogate
                                 destLength=2;
                             }
                             // lead surrogate to be written below
                             c=c2;
                         } else {
                             src.moveIndex(1);
                         }
                     }
                 }

                 if(destCapacity>0) {
                     dest[0]=(char)c;
                 }
             }
             return destLength;
          }

         bufferLength=findPreviousIterationBoundary(src,
                                                    isPreviousBoundary,
                                                    minC, mask,buffer,
                                                    startIndex);
         if(bufferLength>0) {
             if(doNormalize) {
                 destLength=Normalizer.normalize(buffer,startIndex[0],
                                      startIndex[0]+bufferLength,
                                      dest, destStart,destLimit,
                                      mode, options);

                 if(pNeededToNormalize!=null) {
                     pNeededToNormalize[0]=(boolean)(destLength!=bufferLength ||
                                                     Utility.arrayRegionMatches(
                                                             buffer,0,dest,
                                                             destStart,destLimit
                                                             ));
                 }
             } else {
                 /* just copy the source characters */
                 if(destCapacity>0) {
                     System.arraycopy(buffer,startIndex[0],dest,0,
                                         (bufferLength<destCapacity) ?
                                                     bufferLength : destCapacity
                                     );
                 }
             }
         }


         return destLength;
     }


     /* forward iteration ---------------------------------------------------- */
     /*
      * read forward and check if the character is a next-iteration boundary
      * if c2!=0 then (c, c2) is a surrogate pair
      */
     private interface IsNextBoundary{
         boolean isNextBoundary(UCharacterIterator src,
                                int/*unsigned*/ minC,
                                int/*unsigned*/ mask,
                                int[] chars);
     }
     /*
      * read forward and get norm32
      * return 0 if the character is <minC
      * if c2!=0 then (c2, c) is a surrogate pair
      * always reads complete characters
      */
     private static long /*unsigned*/ getNextNorm32(UCharacterIterator src,
                                                   int/*unsigned*/ minC,
                                                   int/*unsigned*/ mask,
                                                   int[] chars) {
         long norm32;

         /* need src.hasNext() to be true */
         chars[0]=src.next();
         chars[1]=0;

         if(chars[0]<minC) {
             return 0;
         }

         norm32=NormalizerImpl.getNorm32((char)chars[0]);
         if(UTF16.isLeadSurrogate((char)chars[0])) {
             if(src.current()!=UCharacterIterator.DONE &&
                         UTF16.isTrailSurrogate((char)(chars[1]=src.current()))){
                 src.moveIndex(1); /* skip the c2 surrogate */
                 if((norm32&mask)==0) {
                     /* irrelevant data */
                     return 0;
                 } else {
                     /* norm32 must be a surrogate special */
                     return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
                 }
             } else {
                 /* unmatched surrogate */
                 return 0;
             }
         }
         return norm32;
     }


     /*
      * for NF*D:
      * read forward and check if the lead combining class is 0
      * if c2!=0 then (c, c2) is a surrogate pair
      */
     private static final class IsNextNFDSafe implements IsNextBoundary{
         public boolean isNextBoundary(UCharacterIterator src,
                                int/*unsigned*/ minC,
                                int/*unsigned*/ ccOrQCMask,
                                int[] chars) {
             return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
                              ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
        }
     }

     /*
      * for NF*C:
      * read forward and check if the character is (or its decomposition begins
      * with) a "true starter" (cc==0 and NF*C_YES)
      * if c2!=0 then (c, c2) is a surrogate pair
      */
     private static final class IsNextTrueStarter implements IsNextBoundary{
         public boolean isNextBoundary(UCharacterIterator src,
                                int/*unsigned*/ minC,
                                int/*unsigned*/ ccOrQCMask,
                                int[] chars) {
             long norm32;
             int/*unsigned*/ decompQCMask;

             decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
             norm32=getNextNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
             return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
         }
     }

     private static int findNextIterationBoundary(UCharacterIterator src,
                                                  IsNextBoundary obj,
                                                  int/*unsigned*/ minC,
                                                  int/*unsigned*/ mask,
                                                  char[] buffer) {
         int[] chars = new int[2];
         int bufferIndex =0;

         if(src.current()==UCharacterIterator.DONE){
             return 0;
         }
         /* get one character and ignore its properties */
         chars[0]=src.next();
         buffer[0]=(char)chars[0];
         bufferIndex=1;

         if(UTF16.isLeadSurrogate((char)chars[0])&&
                                         src.current()!=UCharacterIterator.DONE){
             if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))){
                 buffer[bufferIndex++]=(char)chars[1];
             } else {
                 src.moveIndex(-1); /* back out the non-trail-surrogate */
             }
         }

         /* get all following characters until we see a boundary */
         /* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
          * is part of the string */
         while( src.current()!=UCharacterIterator.DONE) {
             if(obj.isNextBoundary(src, minC, mask, chars)) {
                 /* back out the latest movement to stop at the boundary */
                 src.moveIndex(chars[1]==0 ? -1 : -2);
                 break;
             } else {
                 if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
                     buffer[bufferIndex++]=(char)chars[0];
                     if(chars[1]!=0) {
                         buffer[bufferIndex++]=(char)chars[1];
                     }
                 }else{
                     char[] newBuf = new char[buffer.length    *2];
                     System.arraycopy(buffer,0,newBuf,0,bufferIndex);
                     buffer = newBuf;
                     buffer[bufferIndex++]=(char)chars[0];
                     if(chars[1]!=0) {
                         buffer[bufferIndex++]=(char)chars[1];
                     }
                 }
             }
         }

         /* return the length of the buffer contents */
         return bufferIndex;
     }

     private static int next(UCharacterIterator src,
                            char[] dest, int destStart, int destLimit,
                            Normalizer.Mode mode,
                            boolean doNormalize,
                            boolean[] pNeededToNormalize,
                            int options){

         char[] buffer=new char[100];
         IsNextBoundary isNextBoundary;
         int /*unsigned*/ mask;
         int /*unsigned*/ bufferLength;
         int c,c2;
         char minC;
         int destCapacity = destLimit - destStart;
         int destLength = 0;
         int[] startIndex = new int[1];
         if(pNeededToNormalize!=null) {
             pNeededToNormalize[0]=false;
         }

         minC = (char)mode.getMinC();
         mask = mode.getMask();
         isNextBoundary = mode.getNextBoundary();

         if(isNextBoundary==null){
             destLength=0;
             c=src.next();
             if(c!=UCharacterIterator.DONE) {
                 destLength=1;
                 if(UTF16.isLeadSurrogate((char)c)){
                     c2= src.next();
                     if(c2!= UCharacterIterator.DONE) {
                         if(UTF16.isTrailSurrogate((char)c2)) {
                             if(destCapacity>=2) {
                                 dest[1]=(char)c2; // trail surrogate
                                 destLength=2;
                             }
                             // lead surrogate to be written below
                         } else {
                             src.moveIndex(-1);
                         }
                     }
                 }

                 if(destCapacity>0) {
                     dest[0]=(char)c;
                 }
             }
             return destLength;
         }

         bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
                                                buffer);
         if(bufferLength>0) {
             if(doNormalize) {
                 destLength=mode.normalize(buffer,startIndex[0],bufferLength,
                                           dest,destStart,destLimit, options);

                 if(pNeededToNormalize!=null) {
                     pNeededToNormalize[0]=(boolean)(destLength!=bufferLength ||
                                 Utility.arrayRegionMatches(buffer,startIndex[0],
                                                            dest,destStart,
                                                            destLength));
                 }
             } else {
                 /* just copy the source characters */
                 if(destCapacity>0) {
                     System.arraycopy(buffer,0,dest,destStart,
                                         Math.min(bufferLength,destCapacity)
                                      );
                 }


             }
         }
         return destLength;
     }

     private void clearBuffer() {
         bufferLimit=bufferStart=bufferPos=0;
     }

     private boolean nextNormalize() {

 		clearBuffer();
 		currentIndex=nextIndex;
 		text.setIndex(nextIndex);

 		bufferLimit=next(text,buffer,bufferStart,buffer.length,mode,true,null,options);

 		nextIndex=text.getIndex();
 		return (bufferLimit>0);
     }

     private boolean	previousNormalize() {

 		clearBuffer();
 		nextIndex=currentIndex;
 		text.setIndex(currentIndex);
 		bufferLimit=previous(text,buffer,bufferStart,buffer.length,mode,true,null,options);

 		currentIndex=text.getIndex();
 	    bufferPos = bufferLimit;
 		return bufferLimit>0;
     }

     private int getCodePointAt(int index){
         if( UTF16.isSurrogate(buffer[index])){
             if(UTF16.isLeadSurrogate(buffer[index])){
                 if((index+1)<bufferLimit &&
                                     UTF16.isTrailSurrogate(buffer[index+1])){
 		               return UCharacterProperty.getRawSupplementary(
 				        	          buffer[index],
                                       buffer[index+1]
                                   );
                 }
             }else if(UTF16.isTrailSurrogate(buffer[index])){
                 if(index>0 && UTF16.isLeadSurrogate(buffer[index-1])){
                     return UCharacterProperty.getRawSupplementary(
 								     buffer[index-1],
 								     buffer[index]
 								  );
                 }
             }
         }
         return buffer[index];

     }

     /**
      * Internal API
      * @internal
      */
     public static boolean isNFSkippable(int c, Mode mode){
         return mode.isNFSkippable(c);
     }


     private static int internalCompare(char[] s1, int s1Start,int s1Limit,
 	                          char[] s2, int s2Start,int s2Limit,
 	                          int options) {

 	    char[] fcd1  = new char[300];
         char[] fcd2  = new char[300];

         Normalizer.Mode mode;
         int result;

 	    if(    s1==null || s1Start<0 || s1Limit<0 ||
                s2==null || s2Start<0 || s2Limit<0 ||
                s1Limit<s1Start || s2Limit<s2Start
           ) {

 	        throw new IllegalArgumentException();
 	    }

 	    UnicodeSet nx=NormalizerImpl.getNX((int)(options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT));
     	options|= NormalizerImpl.COMPARE_EQUIV;
     	result=0;

 	    /*
 	     * UAX #21 Case Mappings, as fixed for Unicode version 4
 	     * (see Jitterbug 2021), defines a canonical caseless match as
 	     *
 	     * A string X is a canonical caseless match
 	     * for a string Y if and only if
 	     * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
 	     *
 	     * For better performance, we check for FCD (or let the caller tell us that
 	     * both strings are in FCD) for the inner normalization.
 	     * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
 	     * case-folding preserves the FCD-ness of a string.
 	     * The outer normalization is then only performed by NormalizerImpl.cmpEquivFold()
 	     * when there is a difference.
 	     *
 	     * Exception: When using the Turkic case-folding option, we do perform
 	     * full NFD first. This is because in the Turkic case precomposed characters
 	     * with 0049 capital I or 0069 small i fold differently whether they
 	     * are first decomposed or not, so an FCD check - a check only for
 	     * canonical order - is not sufficient.
 	     */
 	    if((options& Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) >0 ) {
 	        mode=Normalizer.NFD;
 	        options&=~ Normalizer.INPUT_IS_FCD;
 	    } else {
 	        mode=Normalizer.FCD;
 	    }
 	    if((options& Normalizer.INPUT_IS_FCD)==0) {
 	        char[] dest;
 	        int fcdLen1, fcdLen2;
 	        boolean isFCD1, isFCD2;

 	        // check if s1 and/or s2 fulfill the FCD conditions
 	        isFCD1= Normalizer.YES==mode.quickCheck(s1, s1Start, s1Limit, true, nx);
 	        isFCD2= Normalizer.YES==mode.quickCheck(s2, s2Start, s2Limit, true, nx);
 	        /*
 	         * ICU 2.4 had a further optimization:
 	         * If both strings were not in FCD, then they were both NFD'ed,
 	         * and the COMPARE_EQUIV option was turned off.
 	         * It is not entirely clear that this is valid with the current
 	         * definition of the canonical caseless match.
 	         * Therefore, ICU 2.6 removes that optimization.
 	         */

             if(!isFCD1) {
                 fcdLen1=mode.normalize(s1, 0, s1.length,
                                        fcd1, 0, fcd1.length,
                                        nx);

                 if(fcdLen1>fcd1.length){
                     dest=new char[fcdLen1];
                     fcdLen1=mode.normalize( s1, 0, s1.length,
                                        		dest, 0, dest.length,
                                        		nx);
                     s1=dest;
                 }else{
                     s1=fcd1;
                 }
                 s1Limit=fcdLen1;
                 s1Start=0;
             }

             if(!isFCD2) {
                 fcdLen2=mode.normalize(s2,s2Start,s2Limit,
                 					   fcd2,0,fcd2.length,
                 					   nx);

                 if(fcdLen2>fcd2.length){
                     dest=new char[fcdLen2];
                     fcdLen2=mode.normalize( s2,s2Start,s2Limit,
                 					   		dest,0,dest.length,
                 					   		nx);
                     s2=dest;
                 }else{
                     s2=fcd2;
                 }
                 s2Limit=fcdLen2;
                 s2Start=0;
             }

 	    }


 	    result=NormalizerImpl.cmpEquivFold(s1, s1Start, s1Limit,
                                 s2, s2Start, s2Limit, options);
 	    return result;
 	}
 }