src/com/ibm/icu/text/CollationElementIterator.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2003, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 *
 *******************************************************************************
 */
 package com.ibm.icu.text;

 import java.text.StringCharacterIterator;
 import java.text.CharacterIterator;
 import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.UCharacterProperty;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.impl.ICUDebug;

 /**
  * <p><code>CollationElementIterator</code> is an iterator created by
  * a RuleBasedCollator to walk through a string. The return result of
  * each iteration is a 32-bit collation element that defines the
  * ordering priority of the next character or sequence of characters
  * in the source string.</p>
  *
  * <p>For illustration, consider the following in Spanish:
  * <blockquote>
  * <pre>
  * "ca" -> the first collation element is collation_element('c') and second
  *         collation element is collation_element('a').
  *
  * Since "ch" in Spanish sorts as one entity, the below example returns one
  * collation element for the two characters 'c' and 'h'
  *
  * "cha" -> the first collation element is collation_element('ch') and second
  *          collation element is collation_element('a').
  * </pre>
  * </blockquote>
  * And in German,
  * <blockquote>
  * <pre>
  * Since the character '&#230;' is a composed character of 'a' and 'e', the
  * iterator returns two collation elements for the single character '&#230;'
  *
  * "&#230;b" -> the first collation element is collation_element('a'), the
  *              second collation element is collation_element('e'), and the
  *              third collation element is collation_element('b').
  * </pre>
  * </blockquote>
  * </p>
  *
  * <p>For collation ordering comparison, the collation element results
  * can not be compared simply by using basic arithmetric operators,
  * e.g. &lt;, == or &gt;, further processing has to be done. Details
  * can be found in the ICU
  * <a href=http://oss.software.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
  * user guide</a>. An example of using the CollationElementIterator
  * for collation ordering comparison is the class
  * <a href=StringSearch.html> com.ibm.icu.text.StringSearch</a>.</p>
  *
  * <p>To construct a CollationElementIterator object, users
  * call the method getCollationElementIterator() on a
  * RuleBasedCollator that defines the desired sorting order.</p>
  *
  * <p> Example:
  * <blockquote>
  * <pre>
  *  String testString = "This is a test";
  *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
  *  CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
  *  int primaryOrder = iterator.IGNORABLE;
  *  while (primaryOrder != iterator.NULLORDER) {
  *      int order = iterator.next();
  *      if (order != iterator.IGNORABLE &&
  *          order != iterator.NULLORDER) {
  *          // order is valid, not ignorable and we have not passed the end
  *          // of the iteration, we do something
  *          primaryOrder = CollationElementIterator.primaryOrder(order);
  *          System.out.println("Next primary order 0x" +
  *                             Integer.toHexString(primaryOrder));
  *      }
  *  }
  * </pre>
  * </blockquote>
  * </p>
  * <p>
  * This class is not subclassable
  * </p>
  * @see Collator
  * @see RuleBasedCollator
  * @see StringSearch
  * @author Syn Wee Quek
  * @draft ICU 2.2
  */
 public final class CollationElementIterator
 {


     // public data members --------------------------------------------------

     /**
      * <p>This constant is returned by the iterator in the methods
      * next() and previous() when the end or the beginning of the
      * source string has been reached, and there are no more valid
      * collation elements to return.</p>
      *
      * <p>See class documentation for an example of use.</p>
      * @draft ICU 2.2
      * @see #next
      * @see #previous */
     public final static int NULLORDER = 0xffffffff;

     /**
      * <p>This constant is returned by the iterator in the methods
      * next() and previous() when a collation element result is to be
      * ignored.</p>
      *
      * <p>See class documentation for an example of use.</p>
      * @draft ICU 2.2
      * @see #next
      * @see #previous */
     public static final int IGNORABLE = 0;

     // public methods -------------------------------------------------------

     // public getters -------------------------------------------------------

     /**
      * <p>Returns the character offset in the source string
      * corresponding to the next collation element. I.e., getOffset()
      * returns the position in the source string corresponding to the
      * collation element that will be returned by the next call to
      * next(). This value could be any of:
      * <ul>
      * <li> The index of the <b>first</b> character corresponding to
      * the next collation element. (This means that if
      * <code>setOffset(offset)</code> sets the index in the middle of
      * a contraction, <code>getOffset()</code> returns the index of
      * the first character in the contraction, which may not be equal
      * to the original offset that was set. Hence calling getOffset()
      * immediately after setOffset(offset) does not guarantee that the
      * original offset set will be returned.)
      * <li> If normalization is on, the index of the <b>immediate</b>
      * subsequent character, or composite character with the first
      * character, having a combining class of 0.
      * <li> The length of the source string, if iteration has reached
      * the end.
      *</ul>
      * </p>
      * @return The character offset in the source string corresponding to the
      *         collation element that will be returned by the next call to
      *         next().
      * @draft ICU 2.2
      */
     public int getOffset()
     {
         if (m_bufferOffset_ != -1) {
             if (m_isForwards_) {
                 return m_FCDLimit_;
             }
             return m_FCDStart_;
         }
         return m_source_.getIndex();
     }


     /**
      * <p> Returns the maximum length of any expansion sequence that ends with
      * the specified collation element. If there is no expansion with this
      * collation element as the last element, returns 1.
      * </p>
      * @param ce a collation element returned by previous() or next().
      * @return the maximum length of any expansion sequence ending
      *         with the specified collation element.
      * @draft ICU 2.2
      */
     public int getMaxExpansion(int ce)
     {
         int start = 0;
         int limit = m_collator_.m_expansionEndCE_.length;
         long unsignedce = ce & 0xFFFFFFFFl;
         while (start < limit - 1) {
             int mid = start + ((limit - start) >> 1);
             long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl;
             if (unsignedce <= midce) {
                 limit = mid;
             }
             else {
                 start = mid;
             }
         }
         int result = 1;
         if (m_collator_.m_expansionEndCE_[start] == ce) {
             result = m_collator_.m_expansionEndCEMaxSize_[start];
         }
         else if (limit < m_collator_.m_expansionEndCE_.length &&
                  m_collator_.m_expansionEndCE_[limit] == ce) {
             result = m_collator_.m_expansionEndCEMaxSize_[limit];
         }
         else if ((ce & 0xFFFF) == 0x00C0) {
             result = 2;
         }
         return result;
     }

     // public other methods -------------------------------------------------

     /**
      * <p> Resets the cursor to the beginning of the string. The next
      * call to next() or previous() will return the first and last
      * collation element in the string, respectively.</p>
      *
      * <p>If the RuleBasedCollator used by this iterator has had its
      * attributes changed, calling reset() will reinitialize the
      * iterator to use the new attributes.</p>
      *
      * @draft ICU 2.2
      */
     public void reset()
     {
         m_source_.setIndex(m_source_.getBeginIndex());
         updateInternalState();
     }

     /**
      * <p>Get the next collation element in the source string.</p>
      *
      * <p>This iterator iterates over a sequence of collation elements
      * that were built from the string. Because there isn't
      * necessarily a one-to-one mapping from characters to collation
      * elements, this doesn't mean the same thing as "return the
      * collation element [or ordering priority] of the next character
      * in the string".</p>
      *
      * <p>This function returns the collation element that the
      * iterator is currently pointing to, and then updates the
      * internal pointer to point to the next element.  Previous()
      * updates the pointer first, and then returns the element. This
      * means that when you change direction while iterating (i.e.,
      * call next() and then call previous(), or call previous() and
      * then call next()), you'll get back the same element twice.</p>
      *
      * @return the next collation element or NULLORDER if the end of the
      *         iteration has been reached.
      * @draft ICU 2.2
      */
     public int next()
     {
         m_isForwards_ = true;
         if (m_CEBufferSize_ > 0) {
             if (m_CEBufferOffset_ < m_CEBufferSize_) {
                 // if there are expansions left in the buffer, we return it
                 return m_CEBuffer_[m_CEBufferOffset_ ++];
             }
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }

         char ch = nextChar();
         /* System.out.println("ch " + Integer.toHexString(ch) + " " +
            Integer.toHexString(m_source_.current()));*/
         if (ch == CharacterIterator.DONE) {
             return NULLORDER;
         }
         if (m_collator_.m_isHiragana4_) {
             m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e)
                                      && !(ch > 0x3094 && ch < 0x309d);
         }

         int result = NULLORDER;
         if (ch <= 0xFF) {
             // For latin-1 characters we never need to fall back to the UCA
             // table because all of the UCA data is replicated in the
             // latinOneMapping array
             result = m_collator_.m_trie_.getLatin1LinearValue(ch);
             if (RuleBasedCollator.isSpecial(result)) {
                 result = nextSpecial(m_collator_, result, ch);
             }
         }
         else {
             result = m_collator_.m_trie_.getLeadValue(ch);
             //System.out.println(Integer.toHexString(result));
             if (RuleBasedCollator.isSpecial(result)) {
                 // surrogate leads are handled as special ces
                 result = nextSpecial(m_collator_, result, ch);
             }
             if (result == CE_NOT_FOUND_) {
                 // couldn't find a good CE in the tailoring
                 // if we got here, the codepoint MUST be over 0xFF - so we look
                 // directly in the UCA
                 result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
                 if (RuleBasedCollator.isSpecial(result)) {
                     // UCA also gives us a special CE
                     result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
                 }
             }
         }
         return result;
     }

     /**
      * <p>Get the previous collation element in the source string.</p>
      *
      * <p>This iterator iterates over a sequence of collation elements
      * that were built from the string. Because there isn't
      * necessarily a one-to-one mapping from characters to collation
      * elements, this doesn't mean the same thing as "return the
      * collation element [or ordering priority] of the previous
      * character in the string".</p>
      *
      * <p>This function updates the iterator's internal pointer to
      * point to the collation element preceding the one it's currently
      * pointing to and then returns that element, while next() returns
      * the current element and then updates the pointer. This means
      * that when you change direction while iterating (i.e., call
      * next() and then call previous(), or call previous() and then
      * call next()), you'll get back the same element twice.</p>
      *
      * @return the previous collation element, or NULLORDER when the start of
      *             the iteration has been reached.
      * @draft ICU 2.2
      */
     public int previous()
     {
         if (m_source_.getIndex() <= 0 && m_isForwards_) {
             // if iterator is new or reset, we can immediate perform  backwards
             // iteration even when the offset is not right.
             m_source_.setIndex(m_source_.getEndIndex());
             updateInternalState();
         }
         m_isForwards_ = false;
         int result = NULLORDER;
         if (m_CEBufferSize_ > 0) {
             if (m_CEBufferOffset_ > 0) {
                 return m_CEBuffer_[-- m_CEBufferOffset_];
             }
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }
         char ch = previousChar();
         if (ch == CharacterIterator.DONE) {
             return NULLORDER;
         }
         if (m_collator_.m_isHiragana4_) {
             m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
         }
         if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) {
             result = previousSpecial(m_collator_, CE_CONTRACTION_, ch);
         }
         else {
             if (m_bufferOffset_ < 0 && m_source_.getIndex() != 0
                 && isThaiPreVowel(peekCharacter(-1))) {
                 // we now rearrange unconditionally
                 backupInternalState(m_utilSpecialBackUp_);
                 // we have to check if the previous character is also Thai
                 // if not, we can just set the result
                 // we have already determined that the normalization
                 // buffer is empty
                 m_source_.previous();
                 if (m_source_.getIndex() == 0
                     || !isThaiPreVowel(peekCharacter(-1))) {
                     result = CE_THAI_;
                 }
                 else {
                     // previous is also reordered
                     // we need to go back as long as they are being
                     // reordered
                     // count over the range of reorderable characters
                     // and see
                     // if there is an even or odd number of them
                     // if even, we should not reorder.
                     // If odd we should reorder.
                     int noReordered = 1; // the one we already detected
                     while (m_source_.getIndex() != 0
                            && isThaiPreVowel(m_source_.previous())) {
                          noReordered ++;
                     }
                     if ((noReordered & 1) != 0) {
                         // odd number of reorderables
                         result = CE_THAI_;
                     } else {
                         result = m_collator_.m_trie_.getLeadValue(ch);
                     }
                 }
                 updateInternalState(m_utilSpecialBackUp_);
             }
             else if (ch <= 0xFF) {
                 result = m_collator_.m_trie_.getLatin1LinearValue(ch);
                 if (RuleBasedCollator.isSpecial(result)) {
                     result = previousSpecial(m_collator_, result, ch);
                 }
             }
             else {
                 result = m_collator_.m_trie_.getLeadValue(ch);
             }
             if (RuleBasedCollator.isSpecial(result)) {
                 result = previousSpecial(m_collator_, result, ch);
             }
             if (result == CE_NOT_FOUND_) {
                 if (!isBackwardsStart()
                     && m_collator_.isContractionEnd(ch)) {
                     result = CE_CONTRACTION_;
                 }
                 else {
                     result
                          = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
                 }

                 if (RuleBasedCollator.isSpecial(result)) {
                     result = previousSpecial(RuleBasedCollator.UCA_,
                                              result, ch);
                 }
             }
         }
         return result;
     }

     /**
      * Return the primary order of the specified collation element,
      * i.e. the first 16 bits.  This value is unsigned.
      * @param ce the collation element
      * @return the element's 16 bits primary order.
      * @draft ICU 2.2
      */
     public final static int primaryOrder(int ce)
     {
         return (ce & RuleBasedCollator.CE_PRIMARY_MASK_)
             >>> RuleBasedCollator.CE_PRIMARY_SHIFT_;
     }
     /**
      * Return the secondary order of the specified collation element,
      * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
      * @param ce the collation element
      * @return the element's 8 bits secondary order
      * @draft ICU 2.2
      */
     public final static int secondaryOrder(int ce)
     {
         return (ce & RuleBasedCollator.CE_SECONDARY_MASK_)
             >> RuleBasedCollator.CE_SECONDARY_SHIFT_;
     }

     /**
      * Return the tertiary order of the specified collation element, i.e. the last
      * 8 bits.  This value is unsigned.
      * @param ce the collation element
      * @return the element's 8 bits tertiary order
      * @draft ICU 2.2
      */
     public final static int tertiaryOrder(int ce)
     {
         return ce & RuleBasedCollator.CE_TERTIARY_MASK_;
     }

     /**
      * <p> Sets the iterator to point to the collation element
      * corresponding to the character at the specified offset. The
      * value returned by the next call to next() will be the collation
      * element corresponding to the characters at offset.</p>
      *
      * <p>If offset is in the middle of a contracting character
      * sequence, the iterator is adjusted to the start of the
      * contracting sequence. This means that getOffset() is not
      * guaranteed to return the same value set by this method.</p>
      *
      * <p>If the decomposition mode is on, and offset is in the middle
      * of a decomposible range of source text, the iterator may not
      * return a correct result for the next forwards or backwards
      * iteration.  The user must ensure that the offset is not in the
      * middle of a decomposible range.</p>
      *
      * @param offset the character offset into the original source string to
      *        set. Note that this is not an offset into the corresponding
      *        sequence of collation elements.
      * @draft ICU 2.2
      */
     public void setOffset(int offset)
     {
         m_source_.setIndex(offset);
         char ch = m_source_.current();
         if (ch != CharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
             // if it is unsafe we need to check if it is part of a contraction
             // or a surrogate character
             if (UTF16.isTrailSurrogate(ch)) {
                 // if it is a surrogate pair we move up one character
                 char prevch = m_source_.previous();
                 if (!UTF16.isLeadSurrogate(prevch)) {
                     m_source_.setIndex(offset); // go back to the same index
                 }
             }
             else {
                 // could be part of a contraction
                 // backup to a safe point and iterate till we pass offset
                 while (m_source_.getIndex() > 0) {
                     if (!m_collator_.isUnsafe(ch)) {
                         break;
                     }
                     ch = m_source_.previous();
                 }
                 updateInternalState();
                 int prevoffset = 0;
                 while (m_source_.getIndex() <= offset) {
                     prevoffset = m_source_.getIndex();
                     next();
                 }
                 m_source_.setIndex(prevoffset);
             }
         }
         updateInternalState();
         // direction code to prevent next and previous from returning a
         // character if we are already at the ends
         offset = m_source_.getIndex();
         if (offset == m_source_.getBeginIndex()) {
             // preventing previous() from returning characters from the end of
             // the string again if we are at the beginning
             m_isForwards_ = false;
         }
         else if (offset == m_source_.getEndIndex()) {
             // preventing next() from returning characters from the start of
             // the string again if we are at the end
             m_isForwards_ = true;
         }
     }

     /**
      * <p>Set a new source string for iteration, and reset the offset
      * to the beginning of the text.</p>
      *
      * @param source the new source string for iteration.
      * @draft ICU 2.2
      */
     public void setText(String source)
     {
         m_srcUtilIter_.setText(source);
         m_source_ = m_srcUtilIter_;
         updateInternalState();
     }

     /**
      * <p>Set a new source string iterator for iteration, and reset the
      * offset to the beginning of the text.
      * </p>
      * @param source the new source string iterator for iteration.
      * @draft ICU 2.2
      */
     public void setText(CharacterIterator source)
     {
         m_source_ = source;
         m_source_.setIndex(m_source_.getBeginIndex());
         updateInternalState();
     }

     // public miscellaneous methods -----------------------------------------

     /**
      * Tests that argument object is equals to this CollationElementIterator.
      * Iterators are equal if the objects uses the same RuleBasedCollator,
      * the same source text and have the same current position in iteration.
      * @param that object to test if it is equals to this
      *             CollationElementIterator
      * @draft ICU 2.2
      */
     public boolean equals(Object that)
     {
         if (that == this) {
             return true;
         }
         if (that instanceof CollationElementIterator) {
             CollationElementIterator thatceiter
                                               = (CollationElementIterator)that;
             if (m_collator_.equals(thatceiter.m_collator_)
                 && m_source_.equals(thatceiter.m_source_)) {
                 return true;
             }
         }
         return false;
     }

     // package private constructors ------------------------------------------

     /**
      * <p>CollationElementIterator constructor. This takes a source
      * string and a RuleBasedCollator. The iterator will walk through
      * the source string based on the rules defined by the
      * collator. If the source string is empty, NULLORDER will be
      * returned on the first call to next().</p>
      *
      * @param source the source string.
      * @param collator the RuleBasedCollator
      * @draft ICU 2.2
      */
     CollationElementIterator(String source, RuleBasedCollator collator)
     {
         m_srcUtilIter_ = new StringCharacterIterator(source);
         m_utilStringBuffer_ = new StringBuffer();
         m_source_ = m_srcUtilIter_;
         m_collator_ = collator;
         m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
         m_buffer_ = new StringBuffer();
         m_utilSpecialBackUp_ = new Backup();
         updateInternalState();
     }

     /**
      * <p>CollationElementIterator constructor. This takes a source
      * character iterator and a RuleBasedCollator. The iterator will
      * walk through the source string based on the rules defined by
      * the collator. If the source string is empty, NULLORDER will be
      * returned on the first call to next().</p>
      *
      * @param source the source string iterator.
      * @param collator the RuleBasedCollator
      * @draft ICU 2.2
      */
     CollationElementIterator(CharacterIterator source,
                              RuleBasedCollator collator)
     {
         m_srcUtilIter_ = new StringCharacterIterator("");
         m_utilStringBuffer_ = new StringBuffer();
         m_source_ = source;
         m_collator_ = collator;
         m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
         m_buffer_ = new StringBuffer();
         m_utilSpecialBackUp_ = new Backup();
         updateInternalState();
     }

     // package private data members -----------------------------------------

     /**
      * true if current codepoint was Hiragana
      */
     boolean m_isCodePointHiragana_;
     /**
      * Position in the original string that starts with a non-FCD sequence
      */
     int m_FCDStart_;
     /**
      * This is the CE from CEs buffer that should be returned.
      * Initial value is 0.
      * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
      * backwards will end with m_CEBufferOffset_ == 0.
      * The next/previous after we reach the end/beginning of the m_CEBuffer_
      * will cause this value to be reset to 0.
      */
     int m_CEBufferOffset_;

     /**
      * This is the position to which we have stored processed CEs.
      * Initial value is 0.
      * The next/previous after we reach the end/beginning of the m_CEBuffer_
      * will cause this value to be reset to 0.
      */
     int m_CEBufferSize_;

     // package private methods ----------------------------------------------

     /**
      * Sets the collator used.
      * Internal use, all data members will be reset to the default values
      * @param collator to set
      */
     void setCollator(RuleBasedCollator collator)
     {
         m_collator_ = collator;
         updateInternalState();
     }

     /**
      * <p>Sets the iterator to point to the collation element corresponding to
      * the specified character (the parameter is a CHARACTER offset in the
      * original string, not an offset into its corresponding sequence of
      * collation elements). The value returned by the next call to next()
      * will be the collation element corresponding to the specified position
      * in the text. Unlike the public method setOffset(int), this method does
      * not try to readjust the offset to the start of a contracting sequence.
      * getOffset() is guaranteed to return the same value as was passed to a
      * preceding call to setOffset().</p>
      * @param offset new character offset into the original text to set.
      */
     void setExactOffset(int offset)
     {
         m_source_.setIndex(offset);
         updateInternalState();
     }

     /**
      * Checks if iterator is in the buffer zone
      * @return true if iterator is in buffer zone, false otherwise
      */
     boolean isInBuffer()
     {
         return m_bufferOffset_ > 0;
     }

     /**
      * Determine if a character is a Thai base consonant which sorts before
      * its pre-vowel.
      * @param ch character to test
      * @return true if ch is a Thai base consonants, false otherwise
      */
     static final boolean isThaiBaseConsonant(char ch)
     {
         return ch >= 0xe01 && ch <= 0xe2e;
     }

     /**
      * Determine if a character is a Thai vowel, which sorts after its base
      * consonant.
      * @param ch character to test
      * @return true if ch is a Thai prevowel, false otherwise
      */
     static final boolean isThaiPreVowel(char ch)
     {
         return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
     }

     /**
      * <p>Sets the iterator to point to the collation element corresponding to
      * the specified character (the parameter is a CHARACTER offset in the
      * original string, not an offset into its corresponding sequence of
      * collation elements). The value returned by the next call to next()
      * will be the collation element corresponding to the specified position
      * in the text. Unlike the public method setOffset(int), this method does
      * not try to readjust the offset to the start of a contracting sequence.
      * getOffset() is guaranteed to return the same value as was passed to a
      * preceding call to setOffset().</p>
      * </p>
      * @param source the new source string iterator for iteration.
      * @param offset to the source
      */
     void setText(CharacterIterator source, int offset)
     {
         m_source_ = source;
         m_source_.setIndex(offset);
         updateInternalState();
     }

     // private inner class --------------------------------------------------

     /**
      * Backup data class
      */
     private static final class Backup
     {
         // protected data members -------------------------------------------

         /**
          * Backup non FCD sequence limit
          */
         protected int m_FCDLimit_;
         /**
          * Backup non FCD sequence start
          */
         protected int m_FCDStart_;
         /**
          * Backup if previous Codepoint is Hiragana quatenary
          */
         protected boolean m_isCodePointHiragana_;
         /**
          * Backup buffer position
          */
         protected int m_bufferOffset_;
         /**
          * Backup source iterator offset
          */
         protected int m_offset_;
         /**
          * Backup buffer contents
          */
         protected StringBuffer m_buffer_;

         // protected constructor --------------------------------------------

         /**
          * Empty constructor
          */
         protected Backup()
         {
             m_buffer_ = new StringBuffer();
         }
     }
     // end inner class ------------------------------------------------------

     /**
      * Direction of travel
      */
     private boolean m_isForwards_;
     /**
      * Source string iterator
      */
     private CharacterIterator m_source_;
     /**
      * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
      */
     private int m_bufferOffset_;
     /**
      * Buffer for temporary storage of normalized characters, discontiguous
      * characters and Thai characters
      */
     private StringBuffer m_buffer_;
     /**
      * Position in the original string to continue forward FCD check from.
      */
     private int m_FCDLimit_;
     /**
      * The collator this iterator is based on
      */
     private RuleBasedCollator m_collator_;
     /**
      * true if Hiragana quatenary is on
      */
     private boolean m_isHiragana4_;
     /**
      * CE buffer
      */
     private int m_CEBuffer_[];
     /**
      * In reality we should not have to deal with expansion sequences longer
      * then 16. However this value can be change if a bigger buffer is needed.
      * Note, if the size is change to too small a number, BIG trouble.
      * Reasonable small value is around 10, if there's no Arabic or other
      * funky collations that have long expansion sequence. This is the longest
      * expansion sequence this can handle without bombing out.
      */
     private static final int CE_BUFFER_INIT_SIZE_ = 512;
     /**
      * Backup storage for special processing inner cases
      */
     private Backup m_utilSpecialBackUp_;
     /**
      * Backup storage in special processing entry state
      */
     private Backup m_utilSpecialEntryBackUp_;
     /**
      * Backup storage in special processing discontiguous state
      */
     private Backup m_utilSpecialDiscontiguousBackUp_;
     /**
      * Utility
      */
     private StringCharacterIterator m_srcUtilIter_;
     private StringBuffer m_utilStringBuffer_;
     private StringBuffer m_utilSkippedBuffer_;
     private CollationElementIterator m_utilColEIter_;
     /**
      * One character before the first non-zero combining class character
      */
     private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0;
     /**
      * One character before the first character with leading non-zero combining
      * class
      */
     private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300;
     /**
      * Mask for the last byte
      */
     private static final int LAST_BYTE_MASK_ = 0xFF;
     /**
      * Shift value for the second last byte
      */
     private static final int SECOND_LAST_BYTE_SHIFT_ = 8;

     // special ce values and tags -------------------------------------------
     /*private*/ static final int CE_NOT_FOUND_ = 0xF0000000;
     private static final int CE_EXPANSION_ = 0xF1000000;
     private static final int CE_CONTRACTION_ = 0xF2000000;
     private static final int CE_THAI_ = 0xF3000000;
     /**
      * Indicates the last ce has been consumed. Compare with NULLORDER.
      * NULLORDER is returned if error occurs.
      */
     private static final int CE_NO_MORE_CES_ = 0x00010101;
     private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000;
     private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100;
     private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001;

     private static final int CE_NOT_FOUND_TAG_ = 0;
     /*private*/ static final int CE_EXPANSION_TAG_ = 1;
     /*private*/ static final int CE_CONTRACTION_TAG_ = 2;
     private static final int CE_THAI_TAG_ = 3;
     /**
      * Charset processing, not yet implemented
      */
     private static final int CE_CHARSET_TAG_ = 4;
     /**
      * AC00-D7AF
      */
     private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
     /**
      * D800-DBFF
      */
     private static final int CE_LEAD_SURROGATE_TAG_ = 7;
     /**
      * DC00-DFFF
      */
     private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
     /**
      * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
      */
     private static final int CE_CJK_IMPLICIT_TAG_ = 9;
     private static final int CE_IMPLICIT_TAG_ = 10;
     private static final int CE_SPEC_PROC_TAG_ = 11;
     /**
      * This is a 3 byte primary with starting secondaries and tertiaries.
      * It fits in a single 32 bit CE and is used instead of expansion to save
      * space without affecting the performance (hopefully).
      */
     private static final int CE_LONG_PRIMARY_TAG_ = 12;
     private static final int CE_CE_TAGS_COUNT = 13;
     private static final int CE_BYTE_COMMON_ = 0x05;

     // end special ce values and tags ---------------------------------------

     private static final int HANGUL_SBASE_ = 0xAC00;
     private static final int HANGUL_LBASE_ = 0x1100;
     private static final int HANGUL_VBASE_ = 0x1161;
     private static final int HANGUL_TBASE_ = 0x11A7;
     private static final int HANGUL_VCOUNT_ = 21;
     private static final int HANGUL_TCOUNT_ = 28;

     // CJK stuff ------------------------------------------------------------

     private static final int CJK_BASE_ = 0x4E00;
     private static final int CJK_LIMIT_ = 0x9FFF+1;
     private static final int CJK_COMPAT_USED_BASE_ = 0xFA0E;
     private static final int CJK_COMPAT_USED_LIMIT_ = 0xFA2F + 1;
     private static final int CJK_A_BASE_ = 0x3400;
     private static final int CJK_A_LIMIT_ = 0x4DBF + 1;
     private static final int CJK_B_BASE_ = 0x20000;
     private static final int CJK_B_LIMIT_ = 0x2A6DF + 1;
     private static final int NON_CJK_OFFSET_ = 0x110000;

     // private methods ------------------------------------------------------

     /**
      * Reset the iterator internally
      */
     private void updateInternalState()
     {
         m_isCodePointHiragana_ = false;
         m_buffer_.setLength(0);
         m_bufferOffset_ = -1;
         m_CEBufferOffset_ = 0;
         m_CEBufferSize_ = 0;
         m_FCDLimit_ = -1;
         m_FCDStart_ = m_source_.getEndIndex();
         m_isHiragana4_ = m_collator_.m_isHiragana4_;
         m_isForwards_ = true;
     }

     /**
      * Backup the current internal state
      * @param backup object to store the data
      */
     private void backupInternalState(Backup backup)
     {
         backup.m_offset_ = m_source_.getIndex();
         backup.m_FCDLimit_ = m_FCDLimit_;
         backup.m_FCDStart_ = m_FCDStart_;
         backup.m_isCodePointHiragana_ = m_isCodePointHiragana_;
         backup.m_bufferOffset_ = m_bufferOffset_;
         backup.m_buffer_.setLength(0);
         if (m_bufferOffset_ >= 0) {
             // jdk 1.3.1 does not have append(StringBuffer) yet
             if(ICUDebug.isJDK14OrHigher){
                 backup.m_buffer_.append(m_buffer_);
             }else{
                 backup.m_buffer_.append(m_buffer_.toString());
             }
         }
     }

     /**
      * Update the iterator internally with backed-up state
      * @param backup object that stored the data
      */
     private void updateInternalState(Backup backup)
     {
         m_source_.setIndex(backup.m_offset_);
         m_isCodePointHiragana_ = backup.m_isCodePointHiragana_;
         m_bufferOffset_ = backup.m_bufferOffset_;
         m_FCDLimit_ = backup.m_FCDLimit_;
         m_FCDStart_ = backup.m_FCDStart_;
         m_buffer_.setLength(0);
         if (m_bufferOffset_ >= 0) {
             // jdk 1.3.1 does not have append(StringBuffer) yet
             m_buffer_.append(backup.m_buffer_.toString());
         }
     }

     /**
      * A fast combining class retrieval system.
      * @param ch UTF16 character
      * @return combining class of ch
      */
     private int getCombiningClass(char ch)
     {
         if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ &&
             m_collator_.isUnsafe(ch)) {
             return NormalizerImpl.getCombiningClass(ch);
         }
         return 0;
     }

     /**
      * <p>Incremental normalization, this is an essential optimization.
      * Assuming FCD checks has been done, normalize the non-FCD characters into
      * the buffer.
      * Source offsets points to the current processing character.
      * </p>
      */
     private void normalize()
     {
         int size = m_FCDLimit_ - m_FCDStart_;
         m_buffer_.setLength(0);
         m_source_.setIndex(m_FCDStart_);
         for (int i = 0; i < size; i ++) {
             m_buffer_.append(m_source_.current());
             m_source_.next();
         }
         String decomp = Normalizer.decompose(m_buffer_.toString(), false);
         m_buffer_.setLength(0);
         m_buffer_.append(decomp);
         m_bufferOffset_ = 0;
     }

     /**
      * <p>Incremental FCD check and normalization. Gets the next base character
      * position and determines if the in-between characters needs normalization.
      * </p>
      * <p>When entering, the state is known to be this:
      * <ul>
      * <li>We are working on source string, not the buffer.
      * <li>The leading combining class from the current character is 0 or the
      *     trailing combining class of the previous char was zero.
      * </ul>
      * Incoming source offsets points to the current processing character.
      * Return source offsets points to the current processing character.
      * </p>
      * @param ch current character
      * @param offset current character offset
      * @return true if FCDCheck passes, false otherwise
      */
     private boolean FCDCheck(char ch, int offset)
     {
         boolean result = true;

         // Get the trailing combining class of the current character.
         // If it's zero, we are OK.
         m_FCDStart_ = offset;
         m_source_.setIndex(offset);
         // trie access
         char fcd = NormalizerImpl.getFCD16(ch);
         if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
             ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd
             if (UTF16.isTrailSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
             } else {
                 fcd = 0;
             }
         }

         int prevTrailCC = fcd & LAST_BYTE_MASK_;

         if (prevTrailCC != 0) {
             // The current char has a non-zero trailing CC. Scan forward until
             // we find a char with a leading cc of zero.
             while (true) {
                 ch = m_source_.next();
                 if (ch == CharacterIterator.DONE) {
                     break;
                 }
                 // trie access
                 fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
                     ch = m_source_.next();
                     if (UTF16.isTrailSurrogate(ch)) {
                         fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
                     } else {
                         fcd = 0;
                     }
                 }
                 int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
                 if (leadCC == 0) {
                     // this is a base character, we stop the FCD checks
                     break;
                 }

                 if (leadCC < prevTrailCC) {
                     result = false;
                 }

                 prevTrailCC = fcd & LAST_BYTE_MASK_;
             }
         }
         m_FCDLimit_ = m_source_.getIndex();
         m_source_.setIndex(m_FCDStart_);
         m_source_.next();
         return result;
     }

     /**
      * <p>Method tries to fetch the next character that is in fcd form.</p>
      * <p>Normalization is done if required.</p>
      * <p>Offsets are returned at the next character.</p>
      * @return next fcd character
      */
     private char nextChar()
     {
         char result;

         // loop handles the next character whether it is in the buffer or not.
         if (m_bufferOffset_ < 0) {
             // we're working on the source and not normalizing. fast path.
             // note Thai pre-vowel reordering uses buffer too
             result = m_source_.current();
         }
         else {
             // we are in the buffer, buffer offset will never be 0 here
             if (m_bufferOffset_ >= m_buffer_.length()) {
                 // Null marked end of buffer, revert to the source string and
                 // loop back to top to try again to get a character.
                 m_source_.setIndex(m_FCDLimit_);
                 m_bufferOffset_ = -1;
                 m_buffer_.setLength(0);
                 return nextChar();
             }
             return m_buffer_.charAt(m_bufferOffset_ ++);
         }
         int startoffset = m_source_.getIndex();
         if (result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_
             // Fast fcd safe path. trail combining class == 0.
             || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
             || m_bufferOffset_ >= 0 || m_FCDLimit_ > startoffset) {
             // skip the fcd checks
             m_source_.next();
             return result;
         }

         if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
             // We need to peek at the next character in order to tell if we are
             // FCD
             char next = m_source_.next();
             if (next == CharacterIterator.DONE
                 || next <= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
                 return result; // end of source string and if next character
                 // starts with a base character is always fcd.
             }
         }

         // Need a more complete FCD check and possible normalization.
         if (!FCDCheck(result, startoffset)) {
             normalize();
             result = m_buffer_.charAt(0);
             m_bufferOffset_ = 1;
         }
         return result;
     }

     /**
      * <p>Incremental normalization, this is an essential optimization.
      * Assuming FCD checks has been done, normalize the non-FCD characters into
      * the buffer.
      * Source offsets points to the current processing character.</p>
      */
     private void normalizeBackwards()
     {
         normalize();
         m_bufferOffset_ = m_buffer_.length();
     }

     /**
      * <p>Incremental backwards FCD check and normalization. Gets the previous
      * base character position and determines if the in-between characters
      * needs normalization.
      * </p>
      * <p>When entering, the state is known to be this:
      * <ul>
      * <li>We are working on source string, not the buffer.
      * <li>The trailing combining class from the current character is 0 or the
      *     leading combining class of the next char was zero.
      * </ul>
      * Input source offsets points to the previous character.
      * Return source offsets points to the current processing character.
      * </p>
      * @param ch current character
      * @param offset current character offset
      * @return true if FCDCheck passes, false otherwise
      */
     private boolean FCDCheckBackwards(char ch, int offset)
     {
         boolean result = true;
         char fcd = 0;
         m_FCDLimit_ = offset + 1;
         m_source_.setIndex(offset);
         if (!UTF16.isSurrogate(ch)) {
             fcd = NormalizerImpl.getFCD16(ch);
         }
         else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) {
             // note trail surrogate characters gets 0 fcd
             char trailch = ch;
             ch = m_source_.previous();
             if (UTF16.isLeadSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0) {
                     fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd,
                                                                    trailch);
                 }
             }
             else {
                 fcd = 0; // unpaired surrogate
             }
         }

         int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
         // The current char has a non-zero leading combining class.
         // Scan backward until we find a char with a trailing cc of zero.

         while (leadCC != 0) {
             offset = m_source_.getIndex();
             if (offset == 0) {
                 break;
             }
             ch = m_source_.previous();
             if (!UTF16.isSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16(ch);
             }
             else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
                 char trail = ch;
                 ch = m_source_.previous();
                 if (UTF16.isLeadSurrogate(ch)) {
                     fcd = NormalizerImpl.getFCD16(ch);
                 }
                 if (fcd != 0) {
                     fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, trail);
                 }
             }
             else {
                 fcd = 0; // unpaired surrogate
             }
             int prevTrailCC = fcd & LAST_BYTE_MASK_;
             if (leadCC < prevTrailCC) {
                 result = false;
             }
             leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
         }

         // storing character with 0 lead fcd or the 1st accent with a base
         // character before it
         if (fcd == 0) {
             m_FCDStart_ = offset;
         }
         else {
             m_FCDStart_ = m_source_.getIndex();
         }
         m_source_.setIndex(m_FCDLimit_);
         return result;
     }

     /**
      * <p>Method tries to fetch the previous character that is in fcd form.</p>
      * <p>Normalization is done if required.</p>
      * <p>Offsets are returned at the current character.</p>
      * @return previous fcd character
      */
     private char previousChar()
     {
         if (m_bufferOffset_ >= 0) {
             m_bufferOffset_ --;
             if (m_bufferOffset_ >= 0) {
                 return m_buffer_.charAt(m_bufferOffset_);
             }
             else {
                 // At the start of buffer, route back to string.
                 m_buffer_.setLength(0);
                 if (m_FCDStart_ == m_source_.getBeginIndex()) {
                     m_FCDStart_ = -1;
                     m_source_.setIndex(m_source_.getBeginIndex());
                     return CharacterIterator.DONE;
                 }
                 else {
                     m_FCDLimit_ = m_FCDStart_;
                     m_source_.setIndex(m_FCDStart_);
                     return previousChar();
                 }
             }
         }
         char result = m_source_.previous();
         int startoffset = m_source_.getIndex();
         if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
             || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
             || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
             return result;
         }
         char ch = m_source_.previous();
         if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
             // if previous character is FCD
             m_source_.next();
             return result;
         }
         // Need a more complete FCD check and possible normalization.
         if (!FCDCheckBackwards(result, startoffset)) {
             normalizeBackwards();
             m_bufferOffset_ --;
             result = m_buffer_.charAt(m_bufferOffset_);
         }
         else {
             // fcd checks alway reset m_source_ to the limit of the FCD
             m_source_.setIndex(startoffset);
         }
         return result;
     }

     /**
      * Determines if it is at the start of source iteration
      * @return true if iterator at the start, false otherwise
      */
     private final boolean isBackwardsStart()
     {
         return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
             || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
     }

     /**
      * Checks if iterator is at the end of its source string.
      * @return true if it is at the end, false otherwise
      */
     private final boolean isEnd()
     {
         if (m_bufferOffset_ >= 0) {
             if (m_bufferOffset_ != m_buffer_.length()) {
                 return false;
             }
             else {
                 // at end of buffer. check if fcd is at the end
                 return m_FCDLimit_ == m_source_.getEndIndex();
             }
         }
         return m_source_.getEndIndex() == m_source_.getIndex();
     }

     /**
      * <p>Special CE management for surrogates</p>
      * <p>Lead surrogate is encountered. CE to be retrieved by using the
      * following code unit. If next character is a trail surrogate, both
      * characters will be combined to retrieve the CE, otherwise completely
      * ignorable (UCA specification) is returned.</p>
      * @param collator collator to use
      * @param ce current CE
      * @param trail character
      * @return next CE for the surrogate characters
      */
     private final int nextSurrogate(RuleBasedCollator collator, int ce,
                                     char trail)
     {
         if (!UTF16.isTrailSurrogate(trail)) {
             updateInternalState(m_utilSpecialBackUp_);
             return IGNORABLE;
         }
         // TODO: CE contain the data from the previous CE + the mask.
         // It should at least be unmasked
         int result = collator.m_trie_.getTrailValue(ce, trail);
         if (result == CE_NOT_FOUND_) {
             updateInternalState(m_utilSpecialBackUp_);
         }
         return result;
     }

     /**
      * Gets the CE expansion offset
      * @param collator current collator
      * @param ce ce to test
      * @return expansion offset
      */
     private int getExpansionOffset(RuleBasedCollator collator, int ce)
     {
         return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_;
     }

     /**
      * Swaps the Thai and Laos characters and returns the CEs.
      * @param collator collator to use
      * @param ce current ce
      * @param ch current character
      * @return next CE for Thai characters
      */
     private int nextThai(RuleBasedCollator collator, int ce, char ch)
     {
         if (m_bufferOffset_ != -1 // already swapped
             || isEnd()) {
             // Treat Thai as a length one expansion
             // find the offset to expansion table
             // we now rearrange unconditionally so do not check base consonant
             return collator.m_expansion_[getExpansionOffset(collator, ce)];
         }
         else {
             if (!isEnd()) {
                 // swap the prevowel and the following char into the
                 // buffer with their order swapped
                 // buffer is always clean when we are in the source string
                 // Note: this operation might activate the normalization buffer. We have to check for
                 // that and act accordingly.
                 m_FCDStart_ = m_source_.getIndex() - 1;
                 char thCh = nextChar();
                 int cp = thCh;
                 if (UTF16.isLeadSurrogate(thCh)) {
                     if (!isEnd()) {
                         backupInternalState(m_utilSpecialBackUp_);
                         char trailCh = nextChar();
                         if (UTF16.isTrailSurrogate(trailCh)) {
                             cp = UCharacterProperty.getRawSupplementary(
                                                                 thCh, trailCh);
                         }
                         else {
                             updateInternalState(m_utilSpecialBackUp_);
                         }
                     }
                 }
                 // Now we have the character that needs to be decomposed
                 // if the normalizing buffer was not used, we can just use our
                 // structure and be happy.
                 if (m_bufferOffset_ < 0) {
                     // decompose into writable buffer
                     m_buffer_.replace(0, m_buffer_.length(),
                                       Normalizer.decompose(UTF16.toString(cp),
                                                            false));
                     // reorder Thai and the character after it
                     if (m_buffer_.length() >= 2
                         && UTF16.isLeadSurrogate(m_buffer_.charAt(0))
                         && UTF16.isTrailSurrogate(m_buffer_.charAt(1))) {
                         m_buffer_.insert(2, ch);
                     }
                     else {
                         m_buffer_.insert(1, ch);
                     }
                     m_FCDLimit_ = m_source_.getIndex();
                 }
                 else {
                     // stuff is already normalized... what to do here???

                     // if we are in the normalization buffer, thCh must be in
                     // it.
                     // prove by contradiction
                     // if thCh is is not in the normalization buffer,
                     // that means that trailCh is in the normalization buffer.
                     // that means that trailCh is a trail
                     // surrogate by the above bounding if block. this is a
                     // contradiction because there are no characters at the
                     // moment that decomposes to an unmatched surrogate. qed.
                     if (UCharacter.isSupplementary(cp)) {
                         m_buffer_.insert(2, ch);
                     }
                     else {
                         m_buffer_.insert(1, ch);
                     }
                     /*
                     m_utilStringBuffer_.replace(0, m_utilStringBuffer_.length(),
                               Normalizer.decompose(UTF16.toString(cp), false));
                     if (m_utilStringBuffer_.length() >= 2
                         && UTF16.isLeadSurrogate(m_utilStringBuffer_.charAt(0))
                         && UTF16.isLeadSurrogate(m_utilStringBuffer_.charAt(1)))
                     {
                         m_utilStringBuffer_.insert(2, ch);
                     }
                     else {
                         m_utilStringBuffer_.insert(1, ch);
                     }
                     // we will construct a new iterator and suck out CEs.
                     // Here is the string initialization. We have decomposed
                     // character (decompLen) + 1 Thai + trailing zero
                     String temp = m_utilStringBuffer_.toString();
                     if (m_utilColEIter_ == null) {
                         m_utilColEIter_ = new CollationElementIterator(
                                                                 temp, collator);
                     }
                     else {
                         m_utilColEIter_.m_collator_ = collator;
                         m_utilColEIter_.setText(temp);
                     }

                     // We need the trailing zero so that we can tell the
                     // iterate function that it is in the normalized and
                     // reordered buffer. This buffer is always zero terminated.
                     m_utilColEIter_.m_buffer_.replace(0,
                                      m_utilColEIter_.m_buffer_.length(), temp);
                     m_utilColEIter_.m_bufferOffset_ = 0;
                     // This is where to return after iteration is done.
                     // We point at the end of the string
                     m_utilColEIter_.m_FCDStart_ = 0;
                     m_utilColEIter_.m_FCDLimit_ = temp.length();

                     ce = m_utilColEIter_.next();
                     m_CEBufferSize_ = 0;
                     while (ce != NULLORDER) {
                         if (m_CEBufferSize_ == m_CEBuffer_.length) {
                             try {
                                 // increasing cebuffer size
                                 int tempbuffer[] = new int[m_CEBuffer_.length + 50];
                                 System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0,
                                                  m_CEBuffer_.length);
                                 m_CEBuffer_ = tempbuffer;
                             }
                             catch (Exception e) {
                                 e.printStackTrace();
                                 return NULLORDER;
                             }
                         }
                         m_CEBuffer_[m_CEBufferSize_ ++] = ce;
                         ce = m_utilColEIter_.next();
                     }
                     m_CEBufferOffset_ = 1;
                     // return the first of CEs so that we save a call
                     return m_CEBuffer_[0];
                     */
                 }
                 m_bufferOffset_ = 0;
                 return IGNORABLE;
             } else {
                 return collator.m_expansion_[getExpansionOffset(collator, ce)];
             }
         }
     }

     /**
      * Gets the contraction ce offset
      * @param collator current collator
      * @param ce current ce
      * @return contraction offset
      */
     private int getContractionOffset(RuleBasedCollator collator, int ce)
     {
         return (ce & 0xFFFFFF) - collator.m_contractionOffset_;
     }

     /**
      * Checks if CE is a special tag CE
      * @param ce to check
      * @return true if CE is a special tag CE, false otherwise
      */
     private boolean isSpecialPrefixTag(int ce)
     {
         return RuleBasedCollator.isSpecial(ce) &&
             RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_;
     }

     /**
      * <p>Special processing getting a CE that is preceded by a certain
      * prefix.</p>
      * <p>Used for optimizing Japanese length and iteration marks. When a
      * special processing tag is encountered, iterate backwards to see if
      * there's a match.</p>
      * <p>Contraction tables are used, prefix data is stored backwards in the
      * table.</p>
      * @param collator collator to use
      * @param ce current ce
      * @param entrybackup entry backup iterator status
      * @return next collation element
      */
     private int nextSpecialPrefix(RuleBasedCollator collator, int ce,
                                   Backup entrybackup)
     {
         backupInternalState(m_utilSpecialBackUp_);
         updateInternalState(entrybackup);
         previousChar();
         // We want to look at the character where we entered

         while (true) {
             // This loop will run once per source string character, for as
             // long as we are matching a potential contraction sequence
             // First we position ourselves at the begining of contraction
             // sequence
             int entryoffset = getContractionOffset(collator, ce);
             int offset = entryoffset;
             if (isBackwardsStart()) {
                 ce = collator.m_contractionCE_[offset];
                 break;
             }
             char previous = previousChar();
             while (previous > collator.m_contractionIndex_[offset]) {
                 // contraction characters are ordered, skip smaller characters
                 offset ++;
             }

             if (previous == collator.m_contractionIndex_[offset]) {
                 // Found the source string char in the table.
                 // Pick up the corresponding CE from the table.
                 ce = collator.m_contractionCE_[offset];
             }
             else {
                  // if there is a completely ignorable code point in the middle
                  // of a prefix, we need to act as if it's not there
                  // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef
                  // are set to zero)
                  // lone surrogates cannot be set to zero as it would break
                  // other processing
                  int isZeroCE = collator.m_trie_.getLeadValue(previous);
                  // it's easy for BMP code points
                  if (isZeroCE == 0) {
                      continue;
                  }
                  else if (UTF16.isSurrogate(previous)) {
                      // for supplementary code points, we have to check the
                      // next one situations where we are going to ignore
                      // 1. beginning of the string: schar is a lone surrogate
                      // 2. schar is a lone surrogate
                      // 3. schar is a trail surrogate in a valid surrogate
                      // sequence that is explicitly set to zero.
                      if (!isBackwardsStart()) {
                          char lead = previousChar();
                          if (UTF16.isLeadSurrogate(lead)) {
                              isZeroCE = collator.m_trie_.getLeadValue(lead);
                              if (RuleBasedCollator.getTag(isZeroCE)
                                  == RuleBasedCollator.CE_SURROGATE_TAG_) {
                                  int finalCE = collator.m_trie_.getTrailValue(
                                                            isZeroCE, previous);
                                  if (finalCE == 0) {
                                      // this is a real, assigned completely
                                      // ignorable code point
                                      continue;
                                  }
                              }
                          }
                          else {
                              // lone surrogate, completely ignorable
                              nextChar();
                              continue;
                          }
                          nextChar(); // shift back to original position
                      }
                      else {
                          // lone surrogate at the beggining, completely ignorable
                          continue;
                      }
                  }

                 // Source string char was not in the table, prefix not found
                 ce = collator.m_contractionCE_[entryoffset];
             }

             if (!isSpecialPrefixTag(ce)) {
                 // The source string char was in the contraction table, and
                 // the corresponding CE is not a prefix CE. We found the
                 // prefix, break out of loop, this CE will end up being
                 // returned. This is the normal way out of prefix handling
                 // when the source actually contained the prefix.
                 break;
             }
         }
         if (ce != CE_NOT_FOUND_) {
             // we found something and we can merilly continue
             updateInternalState(m_utilSpecialBackUp_);
         }
         else { // prefix search was a failure, we have to backup all the way to
             // the start
             updateInternalState(entrybackup);
         }
         return ce;
     }

     /**
      * Checks if the ce is a contraction tag
      * @param ce ce to check
      * @return true if ce is a contraction tag, false otherwise
      */
     private boolean isContractionTag(int ce)
     {
         return RuleBasedCollator.isSpecial(ce) &&
             RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_;
     }

     /**
      * Method to copy skipped characters into the buffer and sets the fcd
      * position. To ensure that the skipped characters are considered later,
      * we need to place it in the appropriate position in the buffer and
      * reassign the source index. simple case if index reside in string,
      * simply copy to buffer and fcdposition = pos, pos = start of buffer.
      * if pos in normalization buffer, we'll insert the copy infront of pos
      * and point pos to the start of the buffer. why am i doing these copies?
      * well, so that the whole chunk of codes in the getNextCE,
      * ucol_prv_getSpecialCE does not require any changes, which will be
      * really painful.
      * @param skipped character buffer
      */
     private void setDiscontiguous(StringBuffer skipped)
     {
         if (m_bufferOffset_ >= 0) {
             m_buffer_.replace(0, m_bufferOffset_, skipped.toString());
         }
         else {
             m_FCDLimit_ = m_source_.getIndex();
             m_buffer_.setLength(0);
             m_buffer_.append(skipped.toString());
         }

         m_bufferOffset_ = 0;
     }

     /**
      * Returns the current character for forward iteration
      * @return current character
      */
     private char currentChar()
     {
         if (m_bufferOffset_ < 0) {
             char result = m_source_.previous();
             m_source_.next();
             return result;
         }

         // m_bufferOffset_ is never 0 in normal circumstances except after a
         // discontiguous contraction since it is always returned and moved
         // by 1 when we do nextChar()
         return m_buffer_.charAt(m_bufferOffset_ - 1);
     }

     /**
      * Method to get the discontiguous collation element within the source.
      * Note this function will set the position to the appropriate places.
      * Passed in character offset points to the second combining character
      * after the start character.
      * @param collator current collator used
      * @param entryoffset index to the start character in the contraction table
      * @return discontiguous collation element offset
      */
     private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset)
     {
         int offset = entryoffset;
         boolean multicontraction = false;
         // since it will be stuffed into this iterator and ran over again
         if (m_utilSkippedBuffer_ == null) {
             m_utilSkippedBuffer_ = new StringBuffer();
         }
         else {
             m_utilSkippedBuffer_.setLength(0);
         }
         char ch = currentChar();
         m_utilSkippedBuffer_.append(currentChar());
         // accent after the first character
         if (m_utilSpecialDiscontiguousBackUp_ == null) {
             m_utilSpecialDiscontiguousBackUp_ = new Backup();
         }
         backupInternalState(m_utilSpecialDiscontiguousBackUp_);
         char nextch = ch;
         while (true) {
             ch = nextch;
             nextch = nextChar();
             if (nextch == CharacterIterator.DONE
                 || getCombiningClass(nextch) == 0) {
                 // if there are no more accents to move around
                 // we don't have to shift previousChar, since we are resetting
                 // the offset later
                 if (multicontraction) {
                     if (nextch != CharacterIterator.DONE) {
                         previousChar(); // backtrack
                     }
                     setDiscontiguous(m_utilSkippedBuffer_);
                     return collator.m_contractionCE_[offset];
                 }
                 break;
             }

             offset ++; // skip the combining class offset
             while (nextch > collator.m_contractionIndex_[offset]) {
                 offset ++;
             }

             int ce = CE_NOT_FOUND_;
             if (nextch != collator.m_contractionIndex_[offset]
                     || getCombiningClass(nextch) == getCombiningClass(ch)) {
                     // unmatched or blocked character
                 m_utilSkippedBuffer_.append(nextch);
                 continue;
             }
             else {
                 ce = collator.m_contractionCE_[offset];
             }

             if (ce == CE_NOT_FOUND_) {
                 break;
             }
             else if (isContractionTag(ce)) {
                 // this is a multi-contraction
                 offset = getContractionOffset(collator, ce);
                 if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) {
                     multicontraction = true;
                     backupInternalState(m_utilSpecialDiscontiguousBackUp_);
                 }
             }
             else {
                 setDiscontiguous(m_utilSkippedBuffer_);
                 return ce;
             }
         }

         updateInternalState(m_utilSpecialDiscontiguousBackUp_);
         // backup is one forward of the base character, we need to move back
         // one more
         previousChar();
         return collator.m_contractionCE_[entryoffset];
     }

     /**
      * Gets the next contraction ce
      * @param collator collator to use
      * @param ce current ce
      * @param entrybackup entry backup iterator status
      * @return ce of the next contraction
      */
     private int nextContraction(RuleBasedCollator collator, int ce)
     {
         backupInternalState(m_utilSpecialBackUp_);
         int entryce = CE_NOT_FOUND_;
         while (true) {
             int entryoffset = getContractionOffset(collator, ce);
             int offset = entryoffset;

             if (isEnd()) {
                 ce = collator.m_contractionCE_[offset];
                 if (ce == CE_NOT_FOUND_) {
                     // back up the source over all the chars we scanned going
                     // into this contraction.
                     ce = entryce;
                     updateInternalState(m_utilSpecialBackUp_);
                 }
                 break;
             }

             // get the discontiguos maximum combining class
             byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF);
             // checks if all characters have the same combining class
             byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
             char ch = nextChar();
             offset ++;
             while (ch > collator.m_contractionIndex_[offset]) {
                 // contraction characters are ordered, skip all smaller
                 offset ++;
             }

             if (ch == collator.m_contractionIndex_[offset]) {
                 // Found the source string char in the contraction table.
                 //  Pick up the corresponding CE from the table.
                 ce = collator.m_contractionCE_[offset];
             }
             else {
                 // if there is a completely ignorable code point in the middle
                 // of contraction, we need to act as if it's not there
                 int isZeroCE = collator.m_trie_.getLeadValue(ch);
                 // it's easy for BMP code points
                 if (isZeroCE == 0) {
                     continue;
                 }
                 else if (UTF16.isLeadSurrogate(ch)) {
                     if (!isEnd()) {
                         backupInternalState(m_utilSpecialBackUp_);
                         char trail = nextChar();
                         if (UTF16.isTrailSurrogate(trail)) {
                             // do stuff with trail
                             if (RuleBasedCollator.getTag(isZeroCE)
                                 == RuleBasedCollator.CE_SURROGATE_TAG_) {
                                 int finalCE = collator.m_trie_.getTrailValue(
                                                            isZeroCE, trail);
                                 if (finalCE == 0) {
                                     continue;
                                 }
                             }
                         }
                         else {
                             // broken surrogate sequence, thus completely
                             // ignorable
                             updateInternalState(
                                               m_utilSpecialBackUp_);
                             continue;
                         }
                         updateInternalState(m_utilSpecialBackUp_);
                     }
                     else {
                         // no  more characters, so broken surrogate pair...
                         // this contraction will ultimately fail, but not
                         // because of us
                         continue;
                     }
                }

                 // Source string char was not in contraction table.
                 // Unless it is a discontiguous contraction, we are done
                 byte sCC;
                 if (maxCC == 0 || (sCC = (byte)getCombiningClass(ch)) == 0
                     || sCC > maxCC || (allSame != 0 && sCC == maxCC) ||
                     isEnd()) {
                     // Contraction can not be discontiguous, back up by one
                     previousChar();
                     ce = collator.m_contractionCE_[entryoffset];
                 }
                 else {
                     // Contraction is possibly discontiguous.
                     // find the next character if ch is not a base character
                     char nextch = nextChar();
                     if (nextch != CharacterIterator.DONE) {
                         previousChar();
                     }
                     if (getCombiningClass(nextch) == 0) {
                         previousChar();
                         // base character not part of discontiguous contraction
                         ce = collator.m_contractionCE_[entryoffset];
                     }
                     else {
                         ce = nextDiscontiguous(collator, entryoffset);
                     }
                 }
             }

             if (ce == CE_NOT_FOUND_) {
                 // source did not match the contraction, revert back original
                 updateInternalState(m_utilSpecialBackUp_);
                 ce = entryce;
                 break;
             }

             // source was a contraction
             if (!isContractionTag(ce)) {
                 break;
             }

             // ccontinue looping to check for the remaining contraction.
             if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) {
                 // there are further contractions to be performed, so we store
                 // the so-far completed ce, so that if we fail in the next
                 // round we just return this one.
                 entryce = collator.m_contractionCE_[entryoffset];
                 backupInternalState(m_utilSpecialBackUp_);
                 if (m_utilSpecialBackUp_.m_bufferOffset_ >= 0) {
                     m_utilSpecialBackUp_.m_bufferOffset_ --;
                 }
                 else {
                     m_utilSpecialBackUp_.m_offset_ --;
                 }
             }
         }
         return ce;
     }

     /**
      * Gets the next ce for long primaries, stuffs the rest of the collation
      * elements into the ce buffer
      * @param ce current ce
      * @return next ce
      */
     private int nextLongPrimary(int ce)
     {
         m_CEBuffer_[1] = ((ce & 0xFF) << 24)
             | RuleBasedCollator.CE_CONTINUATION_MARKER_;
         m_CEBufferOffset_ = 1;
         m_CEBufferSize_ = 2;
         m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) |
             CE_BYTE_COMMON_;
         return m_CEBuffer_[0];
     }

     /**
      * Gets the number of expansion
      * @param ce current ce
      * @return number of expansion
      */
     private int getExpansionCount(int ce)
     {
         return ce & 0xF;
     }

     /**
      * Gets the next expansion ce and stuffs the rest of the collation elements
      * into the ce buffer
      * @param collator current collator
      * @param ce current ce
      * @return next expansion ce
      */
     private int nextExpansion(RuleBasedCollator collator, int ce)
     {
         // NOTE: we can encounter both continuations and expansions in an
         // expansion!
         // I have to decide where continuations are going to be dealt with
         int offset = getExpansionOffset(collator, ce);
         m_CEBufferSize_ = getExpansionCount(ce);
         m_CEBufferOffset_ = 1;
         m_CEBuffer_[0] = collator.m_expansion_[offset];
         if (m_CEBufferSize_ != 0) {
             // if there are less than 16 elements in expansion
             for (int i = 1; i < m_CEBufferSize_; i ++) {
                 m_CEBuffer_[i] = collator.m_expansion_[offset + i];
             }
         }
         else {
             // ce are terminated
             m_CEBufferSize_ = 1;
             while (collator.m_expansion_[offset] != 0) {
                 m_CEBuffer_[m_CEBufferSize_ ++] =
                     collator.m_expansion_[++ offset];
             }
         }
         // in case of one element expansion, we
         // want to immediately return CEpos
         if (m_CEBufferSize_ == 1) {
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }
         return m_CEBuffer_[0];
     }

     /**
      * Gets the next implicit ce for codepoints
      * @param codepoint current codepoint
      * @return implicit ce
      */
     private int nextImplicit(int codepoint)
     {
         if (!UCharacter.isLegal(codepoint)) {
             // synwee to check with vladimir on the range of isNonChar()
             // illegal code value, use completely ignoreable!
             return IGNORABLE;
         }
         int result = getImplicitPrimary(codepoint);
         m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
                          | 0x00000505;
         m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
         m_CEBufferOffset_ = 1;
         m_CEBufferSize_ = 2;
         return m_CEBuffer_[0];
     }

     /**
      * Returns the next ce associated with the following surrogate characters
      * @param ch current character
      * @return ce
      */
     private int nextSurrogate(char ch)
     {
         char nextch = nextChar();
         if (nextch != CharacterIterator.DONE &&
             UTF16.isTrailSurrogate(nextch)) {
             int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
             return nextImplicit(codepoint);
         }
         if (nextch != CharacterIterator.DONE) {
             previousChar(); // reverts back to the original position
         }
         return IGNORABLE; // completely ignorable
     }

     /**
      * Returns the next ce for a hangul character, this is an implicit
      * calculation
      * @param collator current collator
      * @param ch current character
      * @return hangul ce
      */
     private int nextHangul(RuleBasedCollator collator, char ch)
     {
         char L = (char)(ch - HANGUL_SBASE_);

         // divide into pieces
         // do it in this order since some compilers can do % and / in one
         // operation
         char T = (char)(L % HANGUL_TCOUNT_);
         L /= HANGUL_TCOUNT_;
         char V = (char)(L % HANGUL_VCOUNT_);
         L /= HANGUL_VCOUNT_;

         // offset them
         L += HANGUL_LBASE_;
         V += HANGUL_VBASE_;
         T += HANGUL_TBASE_;

         // return the first CE, but first put the rest into the expansion
         // buffer
         m_CEBufferSize_ = 0;
         if (!collator.m_isJamoSpecial_) { // FAST PATH
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(L);
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(V);

             if (T != HANGUL_TBASE_) {
                 m_CEBuffer_[m_CEBufferSize_ ++] =
                     collator.m_trie_.getLeadValue(T);
             }
             m_CEBufferOffset_ = 1;
             return m_CEBuffer_[0];
         }
         else {
             // Jamo is Special
             // Since Hanguls pass the FCD check, it is guaranteed that we
             // won't be in the normalization buffer if something like this
             // happens
             // Move Jamos into normalization buffer
             m_buffer_.append((char)L);
             m_buffer_.append((char)V);
             if (T != HANGUL_TBASE_) {
                 m_buffer_.append((char)T);
             }
             m_FCDLimit_ = m_source_.getIndex();
             m_FCDStart_ = m_FCDLimit_ - 1;
             // Indicate where to continue in main input string after
             // exhausting the buffer
             return IGNORABLE;
         }
     }

     /**
      * <p>Special CE management. Expansions, contractions etc...</p>
      * @param collator can be plain UCA
      * @param ce current ce
      * @param ch current character
      * @return next special ce
      */
     private int nextSpecial(RuleBasedCollator collator, int ce, char ch)
     {
         int codepoint = ch;
         Backup entrybackup = m_utilSpecialEntryBackUp_;
         // this is to handle recursive looping
         if (entrybackup != null) {
             m_utilSpecialEntryBackUp_ = null;
         }
         else {
             entrybackup = new Backup();
         }
         backupInternalState(entrybackup);
         try { // forces it to assign m_utilSpecialEntryBackup_
             while (true) {
                 // This loop will repeat only in the case of contractions,
                 // surrogate
                 switch(RuleBasedCollator.getTag(ce)) {
                 case CE_NOT_FOUND_TAG_:
                     // impossible case for icu4j
                     return ce;
                 case RuleBasedCollator.CE_SURROGATE_TAG_:
                     if (isEnd()) {
                         return IGNORABLE;
                     }
                     backupInternalState(m_utilSpecialBackUp_);
                     char trail = nextChar();
                     ce = nextSurrogate(collator, ce, trail);
                     // calculate the supplementary code point value,
                     // if surrogate was not tailored we go one more round
                     codepoint =
                         UCharacterProperty.getRawSupplementary(ch, trail);
                     break;
                 case CE_THAI_TAG_:
                     ce = nextThai(collator, ce, ch);
                     break;
                 case CE_SPEC_PROC_TAG_:
                     ce = nextSpecialPrefix(collator, ce, entrybackup);
                     break;
                 case CE_CONTRACTION_TAG_:
                     ce = nextContraction(collator, ce);
                     break;
                 case CE_LONG_PRIMARY_TAG_:
                     return nextLongPrimary(ce);
                 case CE_EXPANSION_TAG_:
                     return nextExpansion(collator, ce);
                     // various implicits optimization
                 case CE_CJK_IMPLICIT_TAG_:
                     // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
                     return nextImplicit(codepoint);
                 case CE_IMPLICIT_TAG_: // everything that is not defined
                     return nextImplicit(codepoint);
                 case CE_TRAIL_SURROGATE_TAG_:
                     return IGNORABLE; // DC00-DFFF broken surrogate
                 case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
                     return nextSurrogate(ch);
                 case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
                     return nextHangul(collator, ch);
                 case CE_CHARSET_TAG_:
                                     // not yet implemented probably after 1.8
                     return CE_NOT_FOUND_;
                 default:
                     ce = IGNORABLE;
                     // synwee todo, throw exception or something here.
                 }
                 if (!RuleBasedCollator.isSpecial(ce)) {
                     break;
                 }
             }
         } finally {
             m_utilSpecialEntryBackUp_ = entrybackup;
         }
         return ce;
     }

     /**
      * Getting the previous Thai ce
      * @param collator current collator
      * @param ch current character
      * @return previous Thai ce
      */
     private int previousThai(RuleBasedCollator collator, int ce, char ch)
     {
         if (m_bufferOffset_ >= 0 || m_source_.getIndex() == 0) {
             // if we have already swapped or at the start of the source
             // Treat Thai as a length one expansion
             return collator.m_expansion_[getExpansionOffset(collator, ce)];
         }

         // since the icu4j iterator does correct iteration when iteration
         // changes direction in the runs, we have to do alittle different
         // handling from c here.

         // check that ch is from the normalization buffer or not
         boolean innorm = m_bufferOffset_ >= 0;
         char prevch = previousChar();
         if (!isThaiPreVowel(prevch)) {
             // we now rearrange unconditionally do not check for base consonant
             if (prevch != CharacterIterator.DONE) {
                 nextChar();
             }
             // Treat Thai as a length one expansion
             return collator.m_expansion_[getExpansionOffset(collator, ce)];
         }

         // Move the prevowel and the following base Consonant into the
         // normalization buffer with their order swapped
         // buffer is always clean when we are in the source string

         boolean reorder = true;
         m_FCDStart_ = m_source_.getIndex();
         if (innorm) {
             // ch is part of the normalization buffer, we simply check and
             // insert prevch
             if (m_collator_.isContractionEnd(ch)) {
                 reorder = false;
             }
             m_bufferOffset_ = 2;
             // we don't have to set the FCD limit here since we are already
             // in the normalization buffer
         }
         else {
             String decomp = Normalizer.decompose(UTF16.toString(ch), false);
             // we need to check if we will hit a contraction trigger because of
             // decomposition
             for (int i = decomp.length() - 1; i >= 0; i --) {
                 if (m_collator_.isContractionEnd(decomp.charAt(i))) {
                     reorder = false;
                     break;
                 }
             }

             m_buffer_.replace(0, m_buffer_.length(), decomp);
             m_bufferOffset_ = m_buffer_.length() + 1;
             m_FCDLimit_ = m_FCDStart_ + 2;
         }
         if (reorder) {
             m_buffer_.insert(1, prevch);
         }
         else {
             m_buffer_.insert(0, prevch);
         }
         return IGNORABLE;
     }

     /**
      * Special processing is getting a CE that is preceded by a certain prefix.
      * Currently this is only needed for optimizing Japanese length and
      * iteration marks. When we encouter a special processing tag, we go
      * backwards and try to see if we have a match. Contraction tables are used
      * - so the whole process is not unlike contraction. prefix data is stored
      * backwards in the table.
      * @param collator current collator
      * @param ce current ce
      * @return previous ce
      */
     private int previousSpecialPrefix(RuleBasedCollator collator, int ce)
     {
         backupInternalState(m_utilSpecialBackUp_);
         while (true) {
             // position ourselves at the begining of contraction sequence
             int offset = getContractionOffset(collator, ce);
             int entryoffset = offset;
             if (isBackwardsStart()) {
                 ce = collator.m_contractionCE_[offset];
                 break;
             }
             char prevch = previousChar();
             while (prevch > collator.m_contractionIndex_[offset]) {
                 // since contraction codepoints are ordered, we skip all that
                 // are smaller
                 offset ++;
             }
             if (prevch == collator.m_contractionIndex_[offset]) {
                 ce = collator.m_contractionCE_[offset];
             }
             else {
                 // if there is a completely ignorable code point in the middle
                 // of a prefix, we need to act as if it's not there assumption:
                 // 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to
                 // zero)
                 // lone surrogates cannot be set to zero as it would break
                 // other processing
                 int isZeroCE = collator.m_trie_.getLeadValue(prevch);
                 // it's easy for BMP code points
                 if (isZeroCE == 0) {
                     continue;
                 }
                 else if (UTF16.isTrailSurrogate(prevch)
                          || UTF16.isLeadSurrogate(prevch)) {
                     // for supplementary code points, we have to check the next one
                     // situations where we are going to ignore
                     // 1. beginning of the string: schar is a lone surrogate
                     // 2. schar is a lone surrogate
                     // 3. schar is a trail surrogate in a valid surrogate
                     //    sequence that is explicitly set to zero.
                     if (!isBackwardsStart()) {
                         char lead = previousChar();
                         if (UTF16.isLeadSurrogate(lead)) {
                             isZeroCE = collator.m_trie_.getLeadValue(lead);
                             if (RuleBasedCollator.getTag(isZeroCE)
                                 == RuleBasedCollator.CE_SURROGATE_TAG_) {
                                 int finalCE = collator.m_trie_.getTrailValue(
                                                                       isZeroCE,
                                                                       prevch);
                                 if (finalCE == 0) {
                                     // this is a real, assigned completely
                                     // ignorable code point
                                     continue;
                                 }
                             }
                         }
                         else {
                             nextChar(); // revert to original offset
                             // lone surrogate, completely ignorable
                             continue;
                         }
                         nextChar(); // revert to original offset
                     }
                     else {
                          // lone surrogate at the beggining, completely ignorable
                          continue;
                     }
                 }

                 // char was not in the table. prefix not found
                 ce = collator.m_contractionCE_[entryoffset];
             }

             if (!isSpecialPrefixTag(ce)) {
                 // char was in the contraction table, and the corresponding ce
                 // is not a prefix ce.  We found the prefix, break out of loop,
                 // this ce will end up being returned.
                 break;
             }
         }
         updateInternalState(m_utilSpecialBackUp_);
         return ce;
     }

     /**
      * Retrieves the previous contraction ce. To ensure that the backwards and
      * forwards iteration matches, we take the current region of most possible
      * match and pass it through the forward iteration. This will ensure that
      * the obstinate problem of overlapping contractions will not occur.
      * @param collator current collator
      * @param ce current ce
      * @param ch current character
      * @return previous contraction ce
      */
     private int previousContraction(RuleBasedCollator collator, int ce, char ch)
     {
         m_utilStringBuffer_.setLength(0);
         // since we might encounter normalized characters (from the thai
         // processing) we can't use peekCharacter() here.
         char prevch = previousChar();
         boolean atStart = false;
         while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
             m_utilStringBuffer_.insert(0, ch);
             ch = prevch;
             if (isBackwardsStart()) {
                 atStart = true;
                 break;
             }
             prevch = previousChar();
         }
         if (!atStart) {
             // undo the previousChar() if we didn't reach the beginning
             nextChar();
         }
         // adds the initial base character to the string
         m_utilStringBuffer_.insert(0, ch);

         // a new collation element iterator is used to simply things, since
         // using the current collation element iterator will mean that the
         // forward and backwards iteration will share and change the same
         // buffers. it is going to be painful.
         int originaldecomp = collator.getDecomposition();
         // for faster access, since string would have been normalized above
         collator.setDecomposition(Collator.NO_DECOMPOSITION);
         if (m_utilColEIter_ == null) {
             m_utilColEIter_ = new CollationElementIterator(
                                                 m_utilStringBuffer_.toString(),
                                                 collator);
         }
         else {
             m_utilColEIter_.m_collator_ = collator;
             m_utilColEIter_.setText(m_utilStringBuffer_.toString());
         }
         ce = m_utilColEIter_.next();
         m_CEBufferSize_ = 0;
         while (ce != NULLORDER) {
             if (m_CEBufferSize_ == m_CEBuffer_.length) {
                 try {
                     // increasing cebuffer size
                     int tempbuffer[] = new int[m_CEBuffer_.length + 50];
                     System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0,
                                      m_CEBuffer_.length);
                     m_CEBuffer_ = tempbuffer;
                 }
                 catch (Exception e) {
                     e.printStackTrace();
                     return NULLORDER;
                 }
             }
             m_CEBuffer_[m_CEBufferSize_ ++] = ce;
             ce = m_utilColEIter_.next();
         }
         collator.setDecomposition(originaldecomp);
         m_CEBufferOffset_ = m_CEBufferSize_ - 1;
         return m_CEBuffer_[m_CEBufferOffset_];
     }

     /**
      * Returns the previous long primary ces
      * @param ce long primary ce
      * @return previous long primary ces
      */
     private int previousLongPrimary(int ce)
     {
         m_CEBufferSize_ = 0;
         m_CEBuffer_[m_CEBufferSize_ ++] =
             ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_;
         m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24)
             | RuleBasedCollator.CE_CONTINUATION_MARKER_;
         m_CEBufferOffset_ = m_CEBufferSize_ - 1;
         return m_CEBuffer_[m_CEBufferOffset_];
     }

     /**
      * Returns the previous expansion ces
      * @param collator current collator
      * @param ce current ce
      * @return previous expansion ce
      */
     private int previousExpansion(RuleBasedCollator collator, int ce)
     {
         // find the offset to expansion table
         int offset = getExpansionOffset(collator, ce);
         m_CEBufferSize_ = getExpansionCount(ce);
         if (m_CEBufferSize_ != 0) {
             // less than 16 elements in expansion
             for (int i = 0; i < m_CEBufferSize_; i ++) {
                 m_CEBuffer_[i] = collator.m_expansion_[offset + i];
             }

         }
         else {
             // null terminated ces
             while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) {
                 m_CEBuffer_[m_CEBufferSize_] =
                     collator.m_expansion_[offset + m_CEBufferSize_];
                 m_CEBufferSize_ ++;
             }
         }
         m_CEBufferOffset_ = m_CEBufferSize_ - 1;
         return m_CEBuffer_[m_CEBufferOffset_];
     }

     /**
      * Returns previous hangul ces
      * @param collator current collator
      * @param ch current character
      * @return previous hangul ce
      */
     private int previousHangul(RuleBasedCollator collator, char ch)
     {
         char L = (char)(ch - HANGUL_SBASE_);
         // we do it in this order since some compilers can do % and / in one
         // operation
         char T = (char)(L % HANGUL_TCOUNT_);
         L /= HANGUL_TCOUNT_;
         char V = (char)(L % HANGUL_VCOUNT_);
         L /= HANGUL_VCOUNT_;

         // offset them
         L += HANGUL_LBASE_;
         V += HANGUL_VBASE_;
         T += HANGUL_TBASE_;

         m_CEBufferSize_ = 0;
         if (!collator.m_isJamoSpecial_) {
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(L);
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(V);
             if (T != HANGUL_TBASE_) {
                 m_CEBuffer_[m_CEBufferSize_ ++] =
                     collator.m_trie_.getLeadValue(T);
             }
             m_CEBufferOffset_ = m_CEBufferSize_ - 1;
             return m_CEBuffer_[m_CEBufferOffset_];
         }
         else {
             // Since Hanguls pass the FCD check, it is guaranteed that we won't
             // be in the normalization buffer if something like this happens
             // Move Jamos into normalization buffer
             m_buffer_.append(L);
             m_buffer_.append(V);
             if (T != HANGUL_TBASE_) {
                 m_buffer_.append(T);
             }

             m_FCDStart_ = m_source_.getIndex();
             m_FCDLimit_ = m_FCDStart_ + 1;
             return IGNORABLE;
         }
     }

     /**
      * Gets implicit codepoint ces
      * @param codepoint current codepoint
      * @return implicit codepoint ces
      */
     private int previousImplicit(int codepoint)
     {
         if (!UCharacter.isLegal(codepoint)) {
             return IGNORABLE; // illegal code value, completely ignoreable!
         }
         int result = getImplicitPrimary(codepoint);
         m_CEBufferSize_ = 2;
         m_CEBufferOffset_ = 1;
         m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
                          | 0x00000505;
         m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
         return m_CEBuffer_[1];
     }

     /**
      * Gets the previous surrogate ce
      * @param ch current character
      * @return previous surrogate ce
      */
     private int previousSurrogate(char ch)
     {
         if (isBackwardsStart()) {
             // we are at the start of the string, wrong place to be at
             return IGNORABLE;
         }
         char prevch = previousChar();
         // Handles Han and Supplementary characters here.
         if (UTF16.isLeadSurrogate(prevch)) {
             return previousImplicit(
                           UCharacterProperty.getRawSupplementary(prevch, ch));
         }
         if (prevch != CharacterIterator.DONE) {
             nextChar();
         }
         return IGNORABLE; // completely ignorable
     }

     /**
      * <p>Special CE management. Expansions, contractions etc...</p>
      * @param collator can be plain UCA
      * @param ce current ce
      * @param ch current character
      * @return previous special ce
      */
     private int previousSpecial(RuleBasedCollator collator, int ce, char ch)
     {
         while(true) {
             // the only ces that loops are thai, special prefix and
             // contractions
             switch (RuleBasedCollator.getTag(ce)) {
             case CE_NOT_FOUND_TAG_:  // this tag always returns
                 return ce;
             case RuleBasedCollator.CE_SURROGATE_TAG_:
                                 // essentialy a disengaged lead surrogate. a broken
                                 // sequence was encountered and this is an error
                 return IGNORABLE;
             case CE_THAI_TAG_:
                 ce = previousThai(collator, ce, ch);
                 break;
             case CE_SPEC_PROC_TAG_:
                 ce = previousSpecialPrefix(collator, ce);
                 break;
             case CE_CONTRACTION_TAG_:
                 // may loop for first character e.g. "0x0f71" for english
                 if (isBackwardsStart()) {
                     // start of string or this is not the end of any contraction
                     ce = collator.m_contractionCE_[
                                             getContractionOffset(collator, ce)];
                     break;
                 }
                 return previousContraction(collator, ce, ch); // else
             case CE_LONG_PRIMARY_TAG_:
                 return previousLongPrimary(ce);
             case CE_EXPANSION_TAG_: // always returns
                 return previousExpansion(collator, ce);
             case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
                 return previousHangul(collator, ch);
             case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
                 return IGNORABLE; // broken surrogate sequence
             case CE_TRAIL_SURROGATE_TAG_: // DC00-DFFF
                 return previousSurrogate(ch);
             case CE_CJK_IMPLICIT_TAG_:
                 // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
                 return previousImplicit(ch);
             case CE_IMPLICIT_TAG_: // everything that is not defined
                 // UCA is filled with these. Tailorings are NOT_FOUND
                 return previousImplicit(ch);
             case CE_CHARSET_TAG_: // this tag always returns
                 return CE_NOT_FOUND_;
             default: // this tag always returns
                 ce = IGNORABLE;
             }
             if (!RuleBasedCollator.isSpecial(ce)) {
                 break;
             }
         }
         return ce;
     }

     /**
      * GET IMPLICIT PRIMARY WEIGHTS
      * @param cp codepoint
      * @param value is left justified primary key
      */
     private static final int getImplicitPrimary(int cp)
     {
         cp = swapCJK(cp);

         //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
         // we now have a range of numbers from 0 to 21FFFF.
         // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
         // we must leave a gap of 01 between all values of the last byte, so
         // the last byte has 126 values (3 byte case)
         // we shift so that HAN all has the same first primary, for
         // compression.
         // for the 4 byte case, we make the gap as large as we can fit.
         // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
         // Four byte forms (most supplementaries) are EF xx xx xx (with a gap
         // of LAST2_MULTIPLIER == 14)

         int last0 = cp - RuleBasedCollator.IMPLICIT_4BYTE_BOUNDARY_;
         if (last0 < 0) {
             int last1 = cp / RuleBasedCollator.LAST_COUNT_;
             last0 = cp % RuleBasedCollator.LAST_COUNT_;

             int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
             last1 %= RuleBasedCollator.OTHER_COUNT_;
             return RuleBasedCollator.IMPLICIT_BASE_3BYTE_ + (last2 << 24)
                    + (last1 << 16)
                    + ((last0 * RuleBasedCollator.LAST_MULTIPLIER_) << 8);
         }
         else {
             int last1 = last0 / RuleBasedCollator.LAST_COUNT2_;
             last0 %= RuleBasedCollator.LAST_COUNT2_;

             int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
             last1 %= RuleBasedCollator.OTHER_COUNT_;

             int last3 = last2 / RuleBasedCollator.OTHER_COUNT_;
             last2 %= RuleBasedCollator.OTHER_COUNT_;
             return RuleBasedCollator.IMPLICIT_BASE_4BYTE_ + (last3 << 24)
                    + (last2 << 16) + (last1 << 8)
                    + (last0 * RuleBasedCollator.LAST2_MULTIPLIER_);
         }
     }

     /**
      * Swapping CJK characters for implicit ces
      * @param cp codepoint CJK
      * @return swapped result
      */
     private static final int swapCJK(int cp)
     {
         if (cp >= CJK_BASE_) {
             if (cp < CJK_LIMIT_) {
                 return cp - CJK_BASE_;
             }
             if (cp < CJK_COMPAT_USED_BASE_) {
                 return cp + NON_CJK_OFFSET_;
             }
             if (cp < CJK_COMPAT_USED_LIMIT_) {
                 return cp - CJK_COMPAT_USED_BASE_ + (CJK_LIMIT_ - CJK_BASE_);
             }
             if (cp < CJK_B_BASE_) {
                 return cp + NON_CJK_OFFSET_;
             }
             if (cp < CJK_B_LIMIT_) {
                 return cp; // non-BMP-CJK
             }
             return cp + NON_CJK_OFFSET_; // non-CJK
         }
         if (cp < CJK_A_BASE_) {
             return cp + NON_CJK_OFFSET_;
         }
         if (cp < CJK_A_LIMIT_) {
             return cp - CJK_A_BASE_ + (CJK_LIMIT_ - CJK_BASE_)
                    + (CJK_COMPAT_USED_LIMIT_ - CJK_COMPAT_USED_BASE_);
         }
         return cp + NON_CJK_OFFSET_; // non-CJK
     }

     /**
      * Gets a character from the source string at a given offset.
      * Handles both normal and iterative cases.
      * No error checking and does not access the normalization buffer
      * - caller beware!
      * @param offset offset from current position which character is to be
      *               retrieved
      * @return character at current position + offset
      */
     private char peekCharacter(int offset)
     {
         if (offset != 0) {
             int currentoffset = m_source_.getIndex();
             m_source_.setIndex(currentoffset + offset);
             char result = m_source_.current();
             m_source_.setIndex(currentoffset);
             return result;
         }
         else {
             return m_source_.current();
         }
     }
 }