src/com/ibm/icu/text/CollationElementIterator.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2005, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 *
 *******************************************************************************
 */
 package com.ibm.icu.text;

 /***
  * import java.text.StringCharacterIterator;
  * import java.text.CharacterIterator;
  */
 import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.UCharacterProperty;
 import com.ibm.icu.impl.StringUCharacterIterator;
 import com.ibm.icu.impl.CharacterIteratorWrapper;
 import com.ibm.icu.impl.ICUDebug;
 import com.ibm.icu.lang.UCharacter;
 import java.text.CharacterIterator;
 import java.util.MissingResourceException;

 /**
  * <p><code>CollationElementIterator</code> is an iterator created by
  * a RuleBasedCollator to walk through a string. The return result of
  * each iteration is a 32-bit collation element that defines the
  * ordering priority of the next character or sequence of characters
  * in the source string.</p>
  *
  * <p>For illustration, consider the following in Spanish:
  * <blockquote>
  * <pre>
  * "ca" -> the first collation element is collation_element('c') and second
  *         collation element is collation_element('a').
  *
  * Since "ch" in Spanish sorts as one entity, the below example returns one
  * collation element for the two characters 'c' and 'h'
  *
  * "cha" -> the first collation element is collation_element('ch') and second
  *          collation element is collation_element('a').
  * </pre>
  * </blockquote>
  * And in German,
  * <blockquote>
  * <pre>
  * Since the character '&#230;' is a composed character of 'a' and 'e', the
  * iterator returns two collation elements for the single character '&#230;'
  *
  * "&#230;b" -> the first collation element is collation_element('a'), the
  *              second collation element is collation_element('e'), and the
  *              third collation element is collation_element('b').
  * </pre>
  * </blockquote>
  * </p>
  *
  * <p>For collation ordering comparison, the collation element results
  * can not be compared simply by using basic arithmetric operators,
  * e.g. &lt;, == or &gt;, further processing has to be done. Details
  * can be found in the ICU
  * <a href="http://icu.sourceforge.net/userguide/Collate_ServiceArchitecture.html">
  * user guide</a>. An example of using the CollationElementIterator
  * for collation ordering comparison is the class
  * <a href=StringSearch.html> com.ibm.icu.text.StringSearch</a>.</p>
  *
  * <p>To construct a CollationElementIterator object, users
  * call the method getCollationElementIterator() on a
  * RuleBasedCollator that defines the desired sorting order.</p>
  *
  * <p> Example:
  * <blockquote>
  * <pre>
  *  String testString = "This is a test";
  *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
  *  CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
  *  int primaryOrder = iterator.IGNORABLE;
  *  while (primaryOrder != iterator.NULLORDER) {
  *      int order = iterator.next();
  *      if (order != iterator.IGNORABLE &&
  *          order != iterator.NULLORDER) {
  *          // order is valid, not ignorable and we have not passed the end
  *          // of the iteration, we do something
  *          primaryOrder = CollationElementIterator.primaryOrder(order);
  *          System.out.println("Next primary order 0x" +
  *                             Integer.toHexString(primaryOrder));
  *      }
  *  }
  * </pre>
  * </blockquote>
  * </p>
  * <p>
  * This class is not subclassable
  * </p>
  * @see Collator
  * @see RuleBasedCollator
  * @see StringSearch
  * @author Syn Wee Quek
  * @stable ICU 2.8
  */
 public final class CollationElementIterator
 {


     // public data members --------------------------------------------------

     /**
      * <p>This constant is returned by the iterator in the methods
      * next() and previous() when the end or the beginning of the
      * source string has been reached, and there are no more valid
      * collation elements to return.</p>
      *
      * <p>See class documentation for an example of use.</p>
      * @stable ICU 2.8
      * @see #next
      * @see #previous */
     public final static int NULLORDER = 0xffffffff;

     /**
      * <p>This constant is returned by the iterator in the methods
      * next() and previous() when a collation element result is to be
      * ignored.</p>
      *
      * <p>See class documentation for an example of use.</p>
      * @stable ICU 2.8
      * @see #next
      * @see #previous */
     public static final int IGNORABLE = 0;

     // public methods -------------------------------------------------------

     // public getters -------------------------------------------------------

     /**
      * <p>Returns the character offset in the source string
      * corresponding to the next collation element. I.e., getOffset()
      * returns the position in the source string corresponding to the
      * collation element that will be returned by the next call to
      * next(). This value could be any of:
      * <ul>
      * <li> The index of the <b>first</b> character corresponding to
      * the next collation element. (This means that if
      * <code>setOffset(offset)</code> sets the index in the middle of
      * a contraction, <code>getOffset()</code> returns the index of
      * the first character in the contraction, which may not be equal
      * to the original offset that was set. Hence calling getOffset()
      * immediately after setOffset(offset) does not guarantee that the
      * original offset set will be returned.)
      * <li> If normalization is on, the index of the <b>immediate</b>
      * subsequent character, or composite character with the first
      * character, having a combining class of 0.
      * <li> The length of the source string, if iteration has reached
      * the end.
      *</ul>
      * </p>
      * @return The character offset in the source string corresponding to the
      *         collation element that will be returned by the next call to
      *         next().
      * @stable ICU 2.8
      */
     public int getOffset()
     {
         if (m_bufferOffset_ != -1) {
             if (m_isForwards_) {
                 return m_FCDLimit_;
             }
             return m_FCDStart_;
         }
         return m_source_.getIndex();
     }


     /**
      * <p> Returns the maximum length of any expansion sequence that ends with
      * the specified collation element. If there is no expansion with this
      * collation element as the last element, returns 1.
      * </p>
      * @param ce a collation element returned by previous() or next().
      * @return the maximum length of any expansion sequence ending
      *         with the specified collation element.
      * @stable ICU 2.8
      */
     public int getMaxExpansion(int ce)
     {
         int start = 0;
         int limit = m_collator_.m_expansionEndCE_.length;
         long unsignedce = ce & 0xFFFFFFFFl;
         while (start < limit - 1) {
             int mid = start + ((limit - start) >> 1);
             long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl;
             if (unsignedce <= midce) {
                 limit = mid;
             }
             else {
                 start = mid;
             }
         }
         int result = 1;
         if (m_collator_.m_expansionEndCE_[start] == ce) {
             result = m_collator_.m_expansionEndCEMaxSize_[start];
         }
         else if (limit < m_collator_.m_expansionEndCE_.length &&
                  m_collator_.m_expansionEndCE_[limit] == ce) {
             result = m_collator_.m_expansionEndCEMaxSize_[limit];
         }
         else if ((ce & 0xFFFF) == 0x00C0) {
             result = 2;
         }
         return result;
     }

     // public other methods -------------------------------------------------

     /**
      * <p> Resets the cursor to the beginning of the string. The next
      * call to next() or previous() will return the first and last
      * collation element in the string, respectively.</p>
      *
      * <p>If the RuleBasedCollator used by this iterator has had its
      * attributes changed, calling reset() will reinitialize the
      * iterator to use the new attributes.</p>
      *
      * @stable ICU 2.8
      */
     public void reset()
     {
         m_source_.setToStart();
         updateInternalState();
     }

     /**
      * <p>Get the next collation element in the source string.</p>
      *
      * <p>This iterator iterates over a sequence of collation elements
      * that were built from the string. Because there isn't
      * necessarily a one-to-one mapping from characters to collation
      * elements, this doesn't mean the same thing as "return the
      * collation element [or ordering priority] of the next character
      * in the string".</p>
      *
      * <p>This function returns the collation element that the
      * iterator is currently pointing to, and then updates the
      * internal pointer to point to the next element.  Previous()
      * updates the pointer first, and then returns the element. This
      * means that when you change direction while iterating (i.e.,
      * call next() and then call previous(), or call previous() and
      * then call next()), you'll get back the same element twice.</p>
      *
      * @return the next collation element or NULLORDER if the end of the
      *         iteration has been reached.
      * @stable ICU 2.8
      */
     public int next()
     {
         m_isForwards_ = true;
         if (m_CEBufferSize_ > 0) {
             if (m_CEBufferOffset_ < m_CEBufferSize_) {
                 // if there are expansions left in the buffer, we return it
                 return m_CEBuffer_[m_CEBufferOffset_ ++];
             }
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }

         int ch_int = nextChar();

         if (ch_int == UCharacterIterator.DONE) {
             return NULLORDER;
         }
         char ch = (char)ch_int;
         if (m_collator_.m_isHiragana4_) {
             m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e)
                                      && !(ch > 0x3094 && ch < 0x309d);
         }

         int result = NULLORDER;
         if (ch <= 0xFF) {
             // For latin-1 characters we never need to fall back to the UCA
             // table because all of the UCA data is replicated in the
             // latinOneMapping array
             result = m_collator_.m_trie_.getLatin1LinearValue(ch);
             if (RuleBasedCollator.isSpecial(result)) {
                 result = nextSpecial(m_collator_, result, ch);
             }
         }
         else {
             result = m_collator_.m_trie_.getLeadValue(ch);
             //System.out.println(Integer.toHexString(result));
             if (RuleBasedCollator.isSpecial(result)) {
                 // surrogate leads are handled as special ces
                 result = nextSpecial(m_collator_, result, ch);
             }
             if (result == CE_NOT_FOUND_ && RuleBasedCollator.UCA_ != null) {
                 // couldn't find a good CE in the tailoring
                 // if we got here, the codepoint MUST be over 0xFF - so we look
                 // directly in the UCA
                 result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
                 if (RuleBasedCollator.isSpecial(result)) {
                     // UCA also gives us a special CE
                     result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
                 }
             }
         }
         if(result == CE_NOT_FOUND_) {
             // maybe there is no UCA, unlikely in Java, but ported for consistency
             result = nextImplicit(ch);
         }
         return result;
     }

     /**
      * <p>Get the previous collation element in the source string.</p>
      *
      * <p>This iterator iterates over a sequence of collation elements
      * that were built from the string. Because there isn't
      * necessarily a one-to-one mapping from characters to collation
      * elements, this doesn't mean the same thing as "return the
      * collation element [or ordering priority] of the previous
      * character in the string".</p>
      *
      * <p>This function updates the iterator's internal pointer to
      * point to the collation element preceding the one it's currently
      * pointing to and then returns that element, while next() returns
      * the current element and then updates the pointer. This means
      * that when you change direction while iterating (i.e., call
      * next() and then call previous(), or call previous() and then
      * call next()), you'll get back the same element twice.</p>
      *
      * @return the previous collation element, or NULLORDER when the start of
      *             the iteration has been reached.
      * @stable ICU 2.8
      */
     public int previous()
     {
         if (m_source_.getIndex() <= 0 && m_isForwards_) {
             // if iterator is new or reset, we can immediate perform  backwards
             // iteration even when the offset is not right.
             m_source_.setToLimit();
             updateInternalState();
         }
         m_isForwards_ = false;
         int result = NULLORDER;
         if (m_CEBufferSize_ > 0) {
             if (m_CEBufferOffset_ > 0) {
                 return m_CEBuffer_[-- m_CEBufferOffset_];
             }
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }
         int ch_int = previousChar();
         if (ch_int == UCharacterIterator.DONE) {
             return NULLORDER;
         }
         char ch = (char)ch_int;
         if (m_collator_.m_isHiragana4_) {
             m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
         }
         if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) {
             result = previousSpecial(m_collator_, CE_CONTRACTION_, ch);
         }
         else {
             if (ch <= 0xFF) {
                 result = m_collator_.m_trie_.getLatin1LinearValue(ch);
             }
             else {
                 result = m_collator_.m_trie_.getLeadValue(ch);
             }
             if (RuleBasedCollator.isSpecial(result)) {
                 result = previousSpecial(m_collator_, result, ch);
             }
             if (result == CE_NOT_FOUND_) {
                 if (!isBackwardsStart()
                     && m_collator_.isContractionEnd(ch)) {
                     result = CE_CONTRACTION_;
                 }
                 else {
                     if(RuleBasedCollator.UCA_ != null) {
                         result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
                     }
                 }

                 if (RuleBasedCollator.isSpecial(result)) {
                     if(RuleBasedCollator.UCA_ != null) {
                         result = previousSpecial(RuleBasedCollator.UCA_, result, ch);
                     }
                 }
             }
         }
         if(result == CE_NOT_FOUND_) {
             result = previousImplicit(ch);
         }
         return result;
     }

     /**
      * Return the primary order of the specified collation element,
      * i.e. the first 16 bits.  This value is unsigned.
      * @param ce the collation element
      * @return the element's 16 bits primary order.
      * @stable ICU 2.8
      */
     public final static int primaryOrder(int ce)
     {
         return (ce & RuleBasedCollator.CE_PRIMARY_MASK_)
             >>> RuleBasedCollator.CE_PRIMARY_SHIFT_;
     }
     /**
      * Return the secondary order of the specified collation element,
      * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
      * @param ce the collation element
      * @return the element's 8 bits secondary order
      * @stable ICU 2.8
      */
     public final static int secondaryOrder(int ce)
     {
         return (ce & RuleBasedCollator.CE_SECONDARY_MASK_)
             >> RuleBasedCollator.CE_SECONDARY_SHIFT_;
     }

     /**
      * Return the tertiary order of the specified collation element, i.e. the last
      * 8 bits.  This value is unsigned.
      * @param ce the collation element
      * @return the element's 8 bits tertiary order
      * @stable ICU 2.8
      */
     public final static int tertiaryOrder(int ce)
     {
         return ce & RuleBasedCollator.CE_TERTIARY_MASK_;
     }

     /**
      * <p> Sets the iterator to point to the collation element
      * corresponding to the character at the specified offset. The
      * value returned by the next call to next() will be the collation
      * element corresponding to the characters at offset.</p>
      *
      * <p>If offset is in the middle of a contracting character
      * sequence, the iterator is adjusted to the start of the
      * contracting sequence. This means that getOffset() is not
      * guaranteed to return the same value set by this method.</p>
      *
      * <p>If the decomposition mode is on, and offset is in the middle
      * of a decomposible range of source text, the iterator may not
      * return a correct result for the next forwards or backwards
      * iteration.  The user must ensure that the offset is not in the
      * middle of a decomposible range.</p>
      *
      * @param offset the character offset into the original source string to
      *        set. Note that this is not an offset into the corresponding
      *        sequence of collation elements.
      * @stable ICU 2.8
      */
     public void setOffset(int offset)
     {
         m_source_.setIndex(offset);
         int ch_int = m_source_.current();
         char ch = (char)ch_int;
         if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
             // if it is unsafe we need to check if it is part of a contraction
             // or a surrogate character
             if (UTF16.isTrailSurrogate(ch)) {
                 // if it is a surrogate pair we move up one character
                 char prevch = (char)m_source_.previous();
                 if (!UTF16.isLeadSurrogate(prevch)) {
                     m_source_.setIndex(offset); // go back to the same index
                 }
             }
             else {
                 // could be part of a contraction
                 // backup to a safe point and iterate till we pass offset
                 while (m_source_.getIndex() > 0) {
                     if (!m_collator_.isUnsafe(ch)) {
                         break;
                     }
                     ch = (char)m_source_.previous();
                 }
                 updateInternalState();
                 int prevoffset = 0;
                 while (m_source_.getIndex() <= offset) {
                     prevoffset = m_source_.getIndex();
                     next();
                 }
                 m_source_.setIndex(prevoffset);
             }
         }
         updateInternalState();
         // direction code to prevent next and previous from returning a
         // character if we are already at the ends
         offset = m_source_.getIndex();
         if (offset == 0/* m_source_.getBeginIndex() */) {
             // preventing previous() from returning characters from the end of
             // the string again if we are at the beginning
             m_isForwards_ = false;
         }
         else if (offset == m_source_.getLength()) {
             // preventing next() from returning characters from the start of
             // the string again if we are at the end
             m_isForwards_ = true;
         }
     }

     /**
      * <p>Set a new source string for iteration, and reset the offset
      * to the beginning of the text.</p>
      *
      * @param source the new source string for iteration.
      * @stable ICU 2.8
      */
     public void setText(String source)
     {
         m_srcUtilIter_.setText(source);
         m_source_ = m_srcUtilIter_;
         updateInternalState();
     }

     /**
      * <p>Set a new source string iterator for iteration, and reset the
      * offset to the beginning of the text.
      * </p>
      * <p>The source iterator's integrity will be preserved since a new copy
      * will be created for use.</p>
      * @param source the new source string iterator for iteration.
      * @stable ICU 2.8
      */
     public void setText(UCharacterIterator source)
     {
         m_srcUtilIter_.setText(source.getText());
         m_source_ = m_srcUtilIter_;
         updateInternalState();
     }

     /**
      * <p>Set a new source string iterator for iteration, and reset the
      * offset to the beginning of the text.
      * </p>
      * @param source the new source string iterator for iteration.
      * @stable ICU 2.8
      */
     public void setText(CharacterIterator source)
     {
         m_source_ = new CharacterIteratorWrapper(source);
         m_source_.setToStart();
         updateInternalState();
     }

     // public miscellaneous methods -----------------------------------------

     /**
      * Tests that argument object is equals to this CollationElementIterator.
      * Iterators are equal if the objects uses the same RuleBasedCollator,
      * the same source text and have the same current position in iteration.
      * @param that object to test if it is equals to this
      *             CollationElementIterator
      * @stable ICU 2.8
      */
     public boolean equals(Object that)
     {
         if (that == this) {
             return true;
         }
         if (that instanceof CollationElementIterator) {
             CollationElementIterator thatceiter
                                               = (CollationElementIterator)that;
             if (!m_collator_.equals(thatceiter.m_collator_)) {
                 return false;
             }
             // checks the text
             return m_source_.getIndex() == thatceiter.m_source_.getIndex()
                    && m_source_.getText().equals(
                                             thatceiter.m_source_.getText());
         }
         return false;
     }

     // package private constructors ------------------------------------------

     /**
      * <p>CollationElementIterator constructor. This takes a source
      * string and a RuleBasedCollator. The iterator will walk through
      * the source string based on the rules defined by the
      * collator. If the source string is empty, NULLORDER will be
      * returned on the first call to next().</p>
      *
      * @param source the source string.
      * @param collator the RuleBasedCollator
      * @stable ICU 2.8
      */
     CollationElementIterator(String source, RuleBasedCollator collator)
     {
         m_srcUtilIter_ = new StringUCharacterIterator(source);
         m_utilStringBuffer_ = new StringBuffer();
         m_source_ = m_srcUtilIter_;
         m_collator_ = collator;
         m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
         m_buffer_ = new StringBuffer();
         m_utilSpecialBackUp_ = new Backup();
         updateInternalState();
     }

     /**
      * <p>CollationElementIterator constructor. This takes a source
      * character iterator and a RuleBasedCollator. The iterator will
      * walk through the source string based on the rules defined by
      * the collator. If the source string is empty, NULLORDER will be
      * returned on the first call to next().</p>
      *
      * @param source the source string iterator.
      * @param collator the RuleBasedCollator
      * @stable ICU 2.8
      */
     CollationElementIterator(CharacterIterator source,
                              RuleBasedCollator collator)
     {
         m_srcUtilIter_ = new StringUCharacterIterator();
         m_utilStringBuffer_ = new StringBuffer();
         m_source_ = new CharacterIteratorWrapper(source);
         m_collator_ = collator;
         m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
         m_buffer_ = new StringBuffer();
         m_utilSpecialBackUp_ = new Backup();
         updateInternalState();
     }

     /**
      * <p>CollationElementIterator constructor. This takes a source
      * character iterator and a RuleBasedCollator. The iterator will
      * walk through the source string based on the rules defined by
      * the collator. If the source string is empty, NULLORDER will be
      * returned on the first call to next().</p>
      *
      * @param source the source string iterator.
      * @param collator the RuleBasedCollator
      * @stable ICU 2.8
      */
     CollationElementIterator(UCharacterIterator source,
                              RuleBasedCollator collator)
     {
         m_srcUtilIter_ = new StringUCharacterIterator();
         m_utilStringBuffer_ = new StringBuffer();
         m_srcUtilIter_.setText(source.getText());
         m_source_ = m_srcUtilIter_;
         m_collator_ = collator;
         m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
         m_buffer_ = new StringBuffer();
         m_utilSpecialBackUp_ = new Backup();
         updateInternalState();
     }

     // package private data members -----------------------------------------

     /**
      * true if current codepoint was Hiragana
      */
     boolean m_isCodePointHiragana_;
     /**
      * Position in the original string that starts with a non-FCD sequence
      */
     int m_FCDStart_;
     /**
      * This is the CE from CEs buffer that should be returned.
      * Initial value is 0.
      * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
      * backwards will end with m_CEBufferOffset_ == 0.
      * The next/previous after we reach the end/beginning of the m_CEBuffer_
      * will cause this value to be reset to 0.
      */
     int m_CEBufferOffset_;

     /**
      * This is the position to which we have stored processed CEs.
      * Initial value is 0.
      * The next/previous after we reach the end/beginning of the m_CEBuffer_
      * will cause this value to be reset to 0.
      */
     int m_CEBufferSize_;
     static final int CE_NOT_FOUND_ = 0xF0000000;
     static final int CE_EXPANSION_TAG_ = 1;
     static final int CE_CONTRACTION_TAG_ = 2;
     /**
      * Collate Digits As Numbers (CODAN) implementation
      */
     static final int CE_DIGIT_TAG_ = 13;

     // package private methods ----------------------------------------------

     /**
      * Sets the collator used.
      * Internal use, all data members will be reset to the default values
      * @param collator to set
      */
     void setCollator(RuleBasedCollator collator)
     {
         m_collator_ = collator;
         updateInternalState();
     }

     /**
      * <p>Sets the iterator to point to the collation element corresponding to
      * the specified character (the parameter is a CHARACTER offset in the
      * original string, not an offset into its corresponding sequence of
      * collation elements). The value returned by the next call to next()
      * will be the collation element corresponding to the specified position
      * in the text. Unlike the public method setOffset(int), this method does
      * not try to readjust the offset to the start of a contracting sequence.
      * getOffset() is guaranteed to return the same value as was passed to a
      * preceding call to setOffset().</p>
      * @param offset new character offset into the original text to set.
      */
     void setExactOffset(int offset)
     {
         m_source_.setIndex(offset);
         updateInternalState();
     }

     /**
      * Checks if iterator is in the buffer zone
      * @return true if iterator is in buffer zone, false otherwise
      */
     boolean isInBuffer()
     {
         return m_bufferOffset_ > 0;
     }


     /**
      * <p>Sets the iterator to point to the collation element corresponding to
      * the specified character (the parameter is a CHARACTER offset in the
      * original string, not an offset into its corresponding sequence of
      * collation elements). The value returned by the next call to next()
      * will be the collation element corresponding to the specified position
      * in the text. Unlike the public method setOffset(int), this method does
      * not try to readjust the offset to the start of a contracting sequence.
      * getOffset() is guaranteed to return the same value as was passed to a
      * preceding call to setOffset().</p>
      * </p>
      * @param source the new source string iterator for iteration.
      * @param offset to the source
      */
     void setText(UCharacterIterator source, int offset)
     {
         m_srcUtilIter_.setText(source.getText());
         m_source_ = m_srcUtilIter_;
         m_source_.setIndex(offset);
         updateInternalState();
     }

     // private inner class --------------------------------------------------

     /**
      * Backup data class
      */
     private static final class Backup
     {
         // protected data members -------------------------------------------

         /**
          * Backup non FCD sequence limit
          */
         protected int m_FCDLimit_;
         /**
          * Backup non FCD sequence start
          */
         protected int m_FCDStart_;
         /**
          * Backup if previous Codepoint is Hiragana quatenary
          */
         protected boolean m_isCodePointHiragana_;
         /**
          * Backup buffer position
          */
         protected int m_bufferOffset_;
         /**
          * Backup source iterator offset
          */
         protected int m_offset_;
         /**
          * Backup buffer contents
          */
         protected StringBuffer m_buffer_;

         // protected constructor --------------------------------------------

         /**
          * Empty constructor
          */
         protected Backup()
         {
             m_buffer_ = new StringBuffer();
         }
     }
     // end inner class ------------------------------------------------------

     /**
      * Direction of travel
      */
     private boolean m_isForwards_;
     /**
      * Source string iterator
      */
     private UCharacterIterator m_source_;
     /**
      * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
      */
     private int m_bufferOffset_;
     /**
      * Buffer for temporary storage of normalized characters, discontiguous
      * characters and Thai characters
      */
     private StringBuffer m_buffer_;
     /**
      * Position in the original string to continue forward FCD check from.
      */
     private int m_FCDLimit_;
     /**
      * The collator this iterator is based on
      */
     private RuleBasedCollator m_collator_;
     /**
      * true if Hiragana quatenary is on
      */
     private boolean m_isHiragana4_;
     /**
      * CE buffer
      */
     private int m_CEBuffer_[];
     /**
      * In reality we should not have to deal with expansion sequences longer
      * then 16. However this value can be change if a bigger buffer is needed.
      * Note, if the size is change to too small a number, BIG trouble.
      * Reasonable small value is around 10, if there's no Arabic or other
      * funky collations that have long expansion sequence. This is the longest
      * expansion sequence this can handle without bombing out.
      */
     private static final int CE_BUFFER_INIT_SIZE_ = 512;
     /**
      * Backup storage for special processing inner cases
      */
     private Backup m_utilSpecialBackUp_;
     /**
      * Backup storage in special processing entry state
      */
     private Backup m_utilSpecialEntryBackUp_;
     /**
      * Backup storage in special processing discontiguous state
      */
     private Backup m_utilSpecialDiscontiguousBackUp_;
     /**
      * Utility
      */
     private StringUCharacterIterator m_srcUtilIter_;
     private StringBuffer m_utilStringBuffer_;
     private StringBuffer m_utilSkippedBuffer_;
     private CollationElementIterator m_utilColEIter_;
     /**
      * One character before the first non-zero combining class character
      */
     private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0;
     /**
      * One character before the first character with leading non-zero combining
      * class
      */
     private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300;
     /**
      * Mask for the last byte
      */
     private static final int LAST_BYTE_MASK_ = 0xFF;
     /**
      * Shift value for the second last byte
      */
     private static final int SECOND_LAST_BYTE_SHIFT_ = 8;

     // special ce values and tags -------------------------------------------

     private static final int CE_EXPANSION_ = 0xF1000000;
     private static final int CE_CONTRACTION_ = 0xF2000000;
     /**
      * Indicates the last ce has been consumed. Compare with NULLORDER.
      * NULLORDER is returned if error occurs.
      */
     private static final int CE_NO_MORE_CES_ = 0x00010101;
     private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000;
     private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100;
     private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001;

     private static final int CE_NOT_FOUND_TAG_ = 0;
     /**
      * Charset processing, not yet implemented
      */
     private static final int CE_CHARSET_TAG_ = 4;
     /**
      * AC00-D7AF
      */
     private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
     /**
      * D800-DBFF
      */
     private static final int CE_LEAD_SURROGATE_TAG_ = 7;
     /**
      * DC00-DFFF
      */
     private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
     /**
      * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
      */
     private static final int CE_CJK_IMPLICIT_TAG_ = 9;
     private static final int CE_IMPLICIT_TAG_ = 10;
     static final int CE_SPEC_PROC_TAG_ = 11;
     /**
      * This is a 3 byte primary with starting secondaries and tertiaries.
      * It fits in a single 32 bit CE and is used instead of expansion to save
      * space without affecting the performance (hopefully).
      */
     private static final int CE_LONG_PRIMARY_TAG_ = 12;

     private static final int CE_CE_TAGS_COUNT = 14;
     private static final int CE_BYTE_COMMON_ = 0x05;

     // end special ce values and tags ---------------------------------------

     private static final int HANGUL_SBASE_ = 0xAC00;
     private static final int HANGUL_LBASE_ = 0x1100;
     private static final int HANGUL_VBASE_ = 0x1161;
     private static final int HANGUL_TBASE_ = 0x11A7;
     private static final int HANGUL_VCOUNT_ = 21;
     private static final int HANGUL_TCOUNT_ = 28;

     // CJK stuff ------------------------------------------------------------

     private static final int CJK_BASE_ = 0x4E00;
     private static final int CJK_LIMIT_ = 0x9FFF+1;
     private static final int CJK_COMPAT_USED_BASE_ = 0xFA0E;
     private static final int CJK_COMPAT_USED_LIMIT_ = 0xFA2F + 1;
     private static final int CJK_A_BASE_ = 0x3400;
     private static final int CJK_A_LIMIT_ = 0x4DBF + 1;
     private static final int CJK_B_BASE_ = 0x20000;
     private static final int CJK_B_LIMIT_ = 0x2A6DF + 1;
     private static final int NON_CJK_OFFSET_ = 0x110000;

     private static final boolean DEBUG  =  ICUDebug.enabled("collator");

     // private methods ------------------------------------------------------

     /**
      * Reset the iterator internally
      */
     private void updateInternalState()
     {
         m_isCodePointHiragana_ = false;
         m_buffer_.setLength(0);
         m_bufferOffset_ = -1;
         m_CEBufferOffset_ = 0;
         m_CEBufferSize_ = 0;
         m_FCDLimit_ = -1;
         m_FCDStart_ = m_source_.getLength();
         m_isHiragana4_ = m_collator_.m_isHiragana4_;
         m_isForwards_ = true;
     }

     /**
      * Backup the current internal state
      * @param backup object to store the data
      */
     private void backupInternalState(Backup backup)
     {
         backup.m_offset_ = m_source_.getIndex();
         backup.m_FCDLimit_ = m_FCDLimit_;
         backup.m_FCDStart_ = m_FCDStart_;
         backup.m_isCodePointHiragana_ = m_isCodePointHiragana_;
         backup.m_bufferOffset_ = m_bufferOffset_;
         backup.m_buffer_.setLength(0);
         if (m_bufferOffset_ >= 0) {
             // jdk 1.3.1 does not have append(StringBuffer) yet
             if(ICUDebug.isJDK14OrHigher){
                 backup.m_buffer_.append(m_buffer_);
             }else{
                 backup.m_buffer_.append(m_buffer_.toString());
             }
         }
     }

     /**
      * Update the iterator internally with backed-up state
      * @param backup object that stored the data
      */
     private void updateInternalState(Backup backup)
     {
         m_source_.setIndex(backup.m_offset_);
         m_isCodePointHiragana_ = backup.m_isCodePointHiragana_;
         m_bufferOffset_ = backup.m_bufferOffset_;
         m_FCDLimit_ = backup.m_FCDLimit_;
         m_FCDStart_ = backup.m_FCDStart_;
         m_buffer_.setLength(0);
         if (m_bufferOffset_ >= 0) {
             // jdk 1.3.1 does not have append(StringBuffer) yet
             m_buffer_.append(backup.m_buffer_.toString());
         }
     }

     /**
      * A fast combining class retrieval system.
      * @param ch UTF16 character
      * @return combining class of ch
      */
     private int getCombiningClass(int ch)
     {
         if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ &&
             m_collator_.isUnsafe((char)ch) || ch > 0xFFFF) {
             return NormalizerImpl.getCombiningClass(ch);
         }
         return 0;
     }

     /**
      * <p>Incremental normalization, this is an essential optimization.
      * Assuming FCD checks has been done, normalize the non-FCD characters into
      * the buffer.
      * Source offsets points to the current processing character.
      * </p>
      */
     private void normalize()
     {
         int size = m_FCDLimit_ - m_FCDStart_;
         m_buffer_.setLength(0);
         m_source_.setIndex(m_FCDStart_);
         for (int i = 0; i < size; i ++) {
             m_buffer_.append((char)m_source_.next());
         }
         String decomp = Normalizer.decompose(m_buffer_.toString(), false);
         m_buffer_.setLength(0);
         m_buffer_.append(decomp);
         m_bufferOffset_ = 0;
     }

     /**
      * <p>Incremental FCD check and normalization. Gets the next base character
      * position and determines if the in-between characters needs normalization.
      * </p>
      * <p>When entering, the state is known to be this:
      * <ul>
      * <li>We are working on source string, not the buffer.
      * <li>The leading combining class from the current character is 0 or the
      *     trailing combining class of the previous char was zero.
      * </ul>
      * Incoming source offsets points to the current processing character.
      * Return source offsets points to the current processing character.
      * </p>
      * @param ch current character
      * @param offset current character offset
      * @return true if FCDCheck passes, false otherwise
      */
     private boolean FCDCheck(char ch, int offset)
     {
         boolean result = true;

         // Get the trailing combining class of the current character.
         // If it's zero, we are OK.
         m_FCDStart_ = offset;
         m_source_.setIndex(offset);
         // trie access
         char fcd = NormalizerImpl.getFCD16(ch);
         if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
             m_source_.next();
             ch = (char)m_source_.current();
             // UCharacterIterator.DONE has 0 fcd
             if (UTF16.isTrailSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
             } else {
                 fcd = 0;
             }
         }

         int prevTrailCC = fcd & LAST_BYTE_MASK_;

         if (prevTrailCC != 0) {
             // The current char has a non-zero trailing CC. Scan forward until
             // we find a char with a leading cc of zero.
             while (true) {
                 m_source_.next();
                 int ch_int = m_source_.current();
                 if (ch_int == UCharacterIterator.DONE) {
                     break;
                 }
                 ch = (char)ch_int;
                 // trie access
                 fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
                     m_source_.next();
                     ch = (char)m_source_.current();
                     if (UTF16.isTrailSurrogate(ch)) {
                         fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
                     } else {
                         fcd = 0;
                     }
                 }
                 int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
                 if (leadCC == 0) {
                     // this is a base character, we stop the FCD checks
                     break;
                 }

                 if (leadCC < prevTrailCC) {
                     result = false;
                 }

                 prevTrailCC = fcd & LAST_BYTE_MASK_;
             }
         }
         m_FCDLimit_ = m_source_.getIndex();
         m_source_.setIndex(m_FCDStart_);
         m_source_.next();
         return result;
     }

     /**
      * <p>Method tries to fetch the next character that is in fcd form.</p>
      * <p>Normalization is done if required.</p>
      * <p>Offsets are returned at the next character.</p>
      * @return next fcd character
      */
     private int nextChar()
     {
         int result;

         // loop handles the next character whether it is in the buffer or not.
         if (m_bufferOffset_ < 0) {
             // we're working on the source and not normalizing. fast path.
             // note Thai pre-vowel reordering uses buffer too
             result = m_source_.current();
         }
         else {
             // we are in the buffer, buffer offset will never be 0 here
             if (m_bufferOffset_ >= m_buffer_.length()) {
                 // Null marked end of buffer, revert to the source string and
                 // loop back to top to try again to get a character.
                 m_source_.setIndex(m_FCDLimit_);
                 m_bufferOffset_ = -1;
                 m_buffer_.setLength(0);
                 return nextChar();
             }
             return m_buffer_.charAt(m_bufferOffset_ ++);
         }
         int startoffset = m_source_.getIndex();
         if (result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_
             // Fast fcd safe path. trail combining class == 0.
             || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
             || m_bufferOffset_ >= 0 || m_FCDLimit_ > startoffset) {
             // skip the fcd checks
             m_source_.next();
             return result;
         }

         if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
             // We need to peek at the next character in order to tell if we are
             // FCD
             m_source_.next();
             int next = m_source_.current();
             if (next == UCharacterIterator.DONE
                 || next < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
                 return result; // end of source string and if next character
                 // starts with a base character is always fcd.
             }
         }

         // Need a more complete FCD check and possible normalization.
         if (!FCDCheck((char)result, startoffset)) {
             normalize();
             result = m_buffer_.charAt(0);
             m_bufferOffset_ = 1;
         }
         return result;
     }

     /**
      * <p>Incremental normalization, this is an essential optimization.
      * Assuming FCD checks has been done, normalize the non-FCD characters into
      * the buffer.
      * Source offsets points to the current processing character.</p>
      */
     private void normalizeBackwards()
     {
         normalize();
         m_bufferOffset_ = m_buffer_.length();
     }

     /**
      * <p>Incremental backwards FCD check and normalization. Gets the previous
      * base character position and determines if the in-between characters
      * needs normalization.
      * </p>
      * <p>When entering, the state is known to be this:
      * <ul>
      * <li>We are working on source string, not the buffer.
      * <li>The trailing combining class from the current character is 0 or the
      *     leading combining class of the next char was zero.
      * </ul>
      * Input source offsets points to the previous character.
      * Return source offsets points to the current processing character.
      * </p>
      * @param ch current character
      * @param offset current character offset
      * @return true if FCDCheck passes, false otherwise
      */
     private boolean FCDCheckBackwards(char ch, int offset)
     {
         boolean result = true;
         char fcd = 0;
         m_FCDLimit_ = offset + 1;
         m_source_.setIndex(offset);
         if (!UTF16.isSurrogate(ch)) {
             fcd = NormalizerImpl.getFCD16(ch);
         }
         else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) {
             // note trail surrogate characters gets 0 fcd
             char trailch = ch;
             ch = (char)m_source_.previous();
             if (UTF16.isLeadSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0) {
                     fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd,
                                                                    trailch);
                 }
             }
             else {
                 fcd = 0; // unpaired surrogate
             }
         }

         int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
         // The current char has a non-zero leading combining class.
         // Scan backward until we find a char with a trailing cc of zero.

         while (leadCC != 0) {
             offset = m_source_.getIndex();
             if (offset == 0) {
                 break;
             }
             ch = (char)m_source_.previous();
             if (!UTF16.isSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16(ch);
             }
             else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
                 char trail = ch;
                 ch = (char)m_source_.previous();
                 if (UTF16.isLeadSurrogate(ch)) {
                     fcd = NormalizerImpl.getFCD16(ch);
                 }
                 if (fcd != 0) {
                     fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, trail);
                 }
             }
             else {
                 fcd = 0; // unpaired surrogate
             }
             int prevTrailCC = fcd & LAST_BYTE_MASK_;
             if (leadCC < prevTrailCC) {
                 result = false;
             }
             leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
         }

         // storing character with 0 lead fcd or the 1st accent with a base
         // character before it
         if (fcd == 0) {
             m_FCDStart_ = offset;
         }
         else {
             m_FCDStart_ = m_source_.getIndex();
         }
         m_source_.setIndex(m_FCDLimit_);
         return result;
     }

     /**
      * <p>Method tries to fetch the previous character that is in fcd form.</p>
      * <p>Normalization is done if required.</p>
      * <p>Offsets are returned at the current character.</p>
      * @return previous fcd character
      */
     private int previousChar()
     {
         if (m_bufferOffset_ >= 0) {
             m_bufferOffset_ --;
             if (m_bufferOffset_ >= 0) {
                 return m_buffer_.charAt(m_bufferOffset_);
             }
             else {
                 // At the start of buffer, route back to string.
                 m_buffer_.setLength(0);
                 if (m_FCDStart_ == 0) {
                     m_FCDStart_ = -1;
                     m_source_.setIndex(0);
                     return UCharacterIterator.DONE;
                 }
                 else {
                     m_FCDLimit_ = m_FCDStart_;
                     m_source_.setIndex(m_FCDStart_);
                     return previousChar();
                 }
             }
         }
         int result = m_source_.previous();
         int startoffset = m_source_.getIndex();
         if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
             || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
             || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
             return result;
         }
         int ch = m_source_.previous();
         if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
             // if previous character is FCD
             m_source_.next();
             return result;
         }
         // Need a more complete FCD check and possible normalization.
         if (!FCDCheckBackwards((char)result, startoffset)) {
             normalizeBackwards();
             m_bufferOffset_ --;
             result = m_buffer_.charAt(m_bufferOffset_);
         }
         else {
             // fcd checks alway reset m_source_ to the limit of the FCD
             m_source_.setIndex(startoffset);
         }
         return result;
     }

     /**
      * Determines if it is at the start of source iteration
      * @return true if iterator at the start, false otherwise
      */
     private final boolean isBackwardsStart()
     {
         return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
             || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
     }

     /**
      * Checks if iterator is at the end of its source string.
      * @return true if it is at the end, false otherwise
      */
     private final boolean isEnd()
     {
         if (m_bufferOffset_ >= 0) {
             if (m_bufferOffset_ != m_buffer_.length()) {
                 return false;
             }
             else {
                 // at end of buffer. check if fcd is at the end
                 return m_FCDLimit_ == m_source_.getLength();
             }
         }
         return m_source_.getLength() == m_source_.getIndex();
     }

     /**
      * <p>Special CE management for surrogates</p>
      * <p>Lead surrogate is encountered. CE to be retrieved by using the
      * following code unit. If next character is a trail surrogate, both
      * characters will be combined to retrieve the CE, otherwise completely
      * ignorable (UCA specification) is returned.</p>
      * @param collator collator to use
      * @param ce current CE
      * @param trail character
      * @return next CE for the surrogate characters
      */
     private final int nextSurrogate(RuleBasedCollator collator, int ce,
                                     char trail)
     {
         if (!UTF16.isTrailSurrogate(trail)) {
             updateInternalState(m_utilSpecialBackUp_);
             return IGNORABLE;
         }
         // TODO: CE contain the data from the previous CE + the mask.
         // It should at least be unmasked
         int result = collator.m_trie_.getTrailValue(ce, trail);
         if (result == CE_NOT_FOUND_) {
             updateInternalState(m_utilSpecialBackUp_);
         }
         return result;
     }

     /**
      * Gets the CE expansion offset
      * @param collator current collator
      * @param ce ce to test
      * @return expansion offset
      */
     private int getExpansionOffset(RuleBasedCollator collator, int ce)
     {
         return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_;
     }


     /**
      * Gets the contraction ce offset
      * @param collator current collator
      * @param ce current ce
      * @return contraction offset
      */
     private int getContractionOffset(RuleBasedCollator collator, int ce)
     {
         return (ce & 0xFFFFFF) - collator.m_contractionOffset_;
     }

     /**
      * Checks if CE is a special tag CE
      * @param ce to check
      * @return true if CE is a special tag CE, false otherwise
      */
     private boolean isSpecialPrefixTag(int ce)
     {
         return RuleBasedCollator.isSpecial(ce) &&
             RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_;
     }

     /**
      * <p>Special processing getting a CE that is preceded by a certain
      * prefix.</p>
      * <p>Used for optimizing Japanese length and iteration marks. When a
      * special processing tag is encountered, iterate backwards to see if
      * there's a match.</p>
      * <p>Contraction tables are used, prefix data is stored backwards in the
      * table.</p>
      * @param collator collator to use
      * @param ce current ce
      * @param entrybackup entry backup iterator status
      * @return next collation element
      */
     private int nextSpecialPrefix(RuleBasedCollator collator, int ce,
                                   Backup entrybackup)
     {
         backupInternalState(m_utilSpecialBackUp_);
         updateInternalState(entrybackup);
         previousChar();
         // We want to look at the character where we entered

         while (true) {
             // This loop will run once per source string character, for as
             // long as we are matching a potential contraction sequence
             // First we position ourselves at the begining of contraction
             // sequence
             int entryoffset = getContractionOffset(collator, ce);
             int offset = entryoffset;
             if (isBackwardsStart()) {
                 ce = collator.m_contractionCE_[offset];
                 break;
             }
             char previous = (char)previousChar();
             while (previous > collator.m_contractionIndex_[offset]) {
                 // contraction characters are ordered, skip smaller characters
                 offset ++;
             }

             if (previous == collator.m_contractionIndex_[offset]) {
                 // Found the source string char in the table.
                 // Pick up the corresponding CE from the table.
                 ce = collator.m_contractionCE_[offset];
             }
             else {
                 // Source string char was not in the table, prefix not found
                 ce = collator.m_contractionCE_[entryoffset];
             }

             if (!isSpecialPrefixTag(ce)) {
                 // The source string char was in the contraction table, and
                 // the corresponding CE is not a prefix CE. We found the
                 // prefix, break out of loop, this CE will end up being
                 // returned. This is the normal way out of prefix handling
                 // when the source actually contained the prefix.
                 break;
             }
         }
         if (ce != CE_NOT_FOUND_) {
             // we found something and we can merilly continue
             updateInternalState(m_utilSpecialBackUp_);
         }
         else { // prefix search was a failure, we have to backup all the way to
             // the start
             updateInternalState(entrybackup);
         }
         return ce;
     }

     /**
      * Checks if the ce is a contraction tag
      * @param ce ce to check
      * @return true if ce is a contraction tag, false otherwise
      */
     private boolean isContractionTag(int ce)
     {
         return RuleBasedCollator.isSpecial(ce) &&
             RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_;
     }

     /**
      * Method to copy skipped characters into the buffer and sets the fcd
      * position. To ensure that the skipped characters are considered later,
      * we need to place it in the appropriate position in the buffer and
      * reassign the source index. simple case if index reside in string,
      * simply copy to buffer and fcdposition = pos, pos = start of buffer.
      * if pos in normalization buffer, we'll insert the copy infront of pos
      * and point pos to the start of the buffer. why am i doing these copies?
      * well, so that the whole chunk of codes in the getNextCE,
      * ucol_prv_getSpecialCE does not require any changes, which will be
      * really painful.
      * @param skipped character buffer
      */
     private void setDiscontiguous(StringBuffer skipped)
     {
         if (m_bufferOffset_ >= 0) {
             m_buffer_.replace(0, m_bufferOffset_, skipped.toString());
         }
         else {
             m_FCDLimit_ = m_source_.getIndex();
             m_buffer_.setLength(0);
             m_buffer_.append(skipped.toString());
         }

         m_bufferOffset_ = 0;
     }

     /**
      * Returns the current character for forward iteration
      * @return current character
      */
     private int currentChar()
     {
         if (m_bufferOffset_ < 0) {
             m_source_.previous();
             return m_source_.next();
         }

         // m_bufferOffset_ is never 0 in normal circumstances except after a
         // discontiguous contraction since it is always returned and moved
         // by 1 when we do nextChar()
         return m_buffer_.charAt(m_bufferOffset_ - 1);
     }

     /**
      * Method to get the discontiguous collation element within the source.
      * Note this function will set the position to the appropriate places.
      * Passed in character offset points to the second combining character
      * after the start character.
      * @param collator current collator used
      * @param entryoffset index to the start character in the contraction table
      * @return discontiguous collation element offset
      */
     private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset)
     {
         int offset = entryoffset;
         boolean multicontraction = false;
         // since it will be stuffed into this iterator and ran over again
         if (m_utilSkippedBuffer_ == null) {
             m_utilSkippedBuffer_ = new StringBuffer();
         }
         else {
             m_utilSkippedBuffer_.setLength(0);
         }
         char ch = (char)currentChar();
         m_utilSkippedBuffer_.append((char)currentChar());
         // accent after the first character
         if (m_utilSpecialDiscontiguousBackUp_ == null) {
             m_utilSpecialDiscontiguousBackUp_ = new Backup();
         }
         backupInternalState(m_utilSpecialDiscontiguousBackUp_);
         char nextch = ch;
         while (true) {
             ch = nextch;
             int ch_int = nextChar();
             nextch = (char)ch_int;
             if (ch_int == UCharacterIterator.DONE
                 || getCombiningClass(nextch) == 0) {
                 // if there are no more accents to move around
                 // we don't have to shift previousChar, since we are resetting
                 // the offset later
                 if (multicontraction) {
                     if (ch_int != UCharacterIterator.DONE) {
                         previousChar(); // backtrack
                     }
                     setDiscontiguous(m_utilSkippedBuffer_);
                     return collator.m_contractionCE_[offset];
                 }
                 break;
             }

             offset ++; // skip the combining class offset
             while (nextch > collator.m_contractionIndex_[offset]) {
                 offset ++;
             }

             int ce = CE_NOT_FOUND_;
             if (nextch != collator.m_contractionIndex_[offset]
                     || getCombiningClass(nextch) == getCombiningClass(ch)) {
                     // unmatched or blocked character
                 m_utilSkippedBuffer_.append(nextch);
                 continue;
             }
             else {
                 ce = collator.m_contractionCE_[offset];
             }

             if (ce == CE_NOT_FOUND_) {
                 break;
             }
             else if (isContractionTag(ce)) {
                 // this is a multi-contraction
                 offset = getContractionOffset(collator, ce);
                 if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) {
                     multicontraction = true;
                     backupInternalState(m_utilSpecialDiscontiguousBackUp_);
                 }
             }
             else {
                 setDiscontiguous(m_utilSkippedBuffer_);
                 return ce;
             }
         }

         updateInternalState(m_utilSpecialDiscontiguousBackUp_);
         // backup is one forward of the base character, we need to move back
         // one more
         previousChar();
         return collator.m_contractionCE_[entryoffset];
     }

     /**
      * Gets the next contraction ce
      * @param collator collator to use
      * @param ce current ce
      * @param entrybackup entry backup iterator status
      * @return ce of the next contraction
      */
     private int nextContraction(RuleBasedCollator collator, int ce)
     {
         backupInternalState(m_utilSpecialBackUp_);
         int entryce = collator.m_contractionCE_[getContractionOffset(collator, ce)]; //CE_NOT_FOUND_;
         while (true) {
             int entryoffset = getContractionOffset(collator, ce);
             int offset = entryoffset;

             if (isEnd()) {
                 ce = collator.m_contractionCE_[offset];
                 if (ce == CE_NOT_FOUND_) {
                     // back up the source over all the chars we scanned going
                     // into this contraction.
                     ce = entryce;
                     updateInternalState(m_utilSpecialBackUp_);
                 }
                 break;
             }

             // get the discontiguos maximum combining class
             int maxCC = (collator.m_contractionIndex_[offset] & 0xFF);
             // checks if all characters have the same combining class
             byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
             char ch = (char)nextChar();
             offset ++;
             while (ch > collator.m_contractionIndex_[offset]) {
                 // contraction characters are ordered, skip all smaller
                 offset ++;
             }

             if (ch == collator.m_contractionIndex_[offset]) {
                 // Found the source string char in the contraction table.
                 //  Pick up the corresponding CE from the table.
                 ce = collator.m_contractionCE_[offset];
             }
             else {
                 // Source string char was not in contraction table.
                 // Unless it is a discontiguous contraction, we are done
                 int miss = ch;
                 if(UTF16.isLeadSurrogate(ch)) { // in order to do the proper detection, we
                     // need to see if we're dealing with a supplementary
                     miss = UCharacterProperty.getRawSupplementary(ch, (char) nextChar());
                   }
                 int sCC;
                 if (maxCC == 0 || (sCC = getCombiningClass(miss)) == 0
                     || sCC > maxCC || (allSame != 0 && sCC == maxCC) ||
                     isEnd()) {
                     // Contraction can not be discontiguous, back up by one
                     previousChar();
                     if(miss > 0xFFFF) {
                         previousChar();
                     }
                     ce = collator.m_contractionCE_[entryoffset];
                 }
                 else {
                     // Contraction is possibly discontiguous.
                     // find the next character if ch is not a base character
                     int ch_int = nextChar();
                     if (ch_int != UCharacterIterator.DONE) {
                         previousChar();
                     }
                     char nextch = (char)ch_int;
                     if (getCombiningClass(nextch) == 0) {
                         previousChar();
                         if(miss > 0xFFFF) {
                             previousChar();
                         }
                         // base character not part of discontiguous contraction
                         ce = collator.m_contractionCE_[entryoffset];
                     }
                     else {
                         ce = nextDiscontiguous(collator, entryoffset);
                     }
                 }
             }

             if (ce == CE_NOT_FOUND_) {
                 // source did not match the contraction, revert back original
                 updateInternalState(m_utilSpecialBackUp_);
                 ce = entryce;
                 break;
             }

             // source was a contraction
             if (!isContractionTag(ce)) {
                 break;
             }

             // ccontinue looping to check for the remaining contraction.
             if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) {
                 // there are further contractions to be performed, so we store
                 // the so-far completed ce, so that if we fail in the next
                 // round we just return this one.
                 entryce = collator.m_contractionCE_[entryoffset];
                 backupInternalState(m_utilSpecialBackUp_);
                 if (m_utilSpecialBackUp_.m_bufferOffset_ >= 0) {
                     m_utilSpecialBackUp_.m_bufferOffset_ --;
                 }
                 else {
                     m_utilSpecialBackUp_.m_offset_ --;
                 }
             }
         }
         return ce;
     }

     /**
      * Gets the next ce for long primaries, stuffs the rest of the collation
      * elements into the ce buffer
      * @param ce current ce
      * @return next ce
      */
     private int nextLongPrimary(int ce)
     {
         m_CEBuffer_[1] = ((ce & 0xFF) << 24)
             | RuleBasedCollator.CE_CONTINUATION_MARKER_;
         m_CEBufferOffset_ = 1;
         m_CEBufferSize_ = 2;
         m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) |
             CE_BYTE_COMMON_;
         return m_CEBuffer_[0];
     }

     /**
      * Gets the number of expansion
      * @param ce current ce
      * @return number of expansion
      */
     private int getExpansionCount(int ce)
     {
         return ce & 0xF;
     }

     /**
      * Gets the next expansion ce and stuffs the rest of the collation elements
      * into the ce buffer
      * @param collator current collator
      * @param ce current ce
      * @return next expansion ce
      */
     private int nextExpansion(RuleBasedCollator collator, int ce)
     {
         // NOTE: we can encounter both continuations and expansions in an
         // expansion!
         // I have to decide where continuations are going to be dealt with
         int offset = getExpansionOffset(collator, ce);
         m_CEBufferSize_ = getExpansionCount(ce);
         m_CEBufferOffset_ = 1;
         m_CEBuffer_[0] = collator.m_expansion_[offset];
         if (m_CEBufferSize_ != 0) {
             // if there are less than 16 elements in expansion
             for (int i = 1; i < m_CEBufferSize_; i ++) {
                 m_CEBuffer_[i] = collator.m_expansion_[offset + i];
             }
         }
         else {
             // ce are terminated
             m_CEBufferSize_ = 1;
             while (collator.m_expansion_[offset] != 0) {
                 m_CEBuffer_[m_CEBufferSize_ ++] =
                     collator.m_expansion_[++ offset];
             }
         }
         // in case of one element expansion, we
         // want to immediately return CEpos
         if (m_CEBufferSize_ == 1) {
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }
         return m_CEBuffer_[0];
     }

     /**
      * Gets the next digit ce
      * @param collator current collator
      * @param ce current collation element
      * @param cp current codepoint
      * @return next digit ce
      */
     private int nextDigit(RuleBasedCollator collator, int ce, int cp)
     {
         // We do a check to see if we want to collate digits as numbers;
         // if so we generate a custom collation key. Otherwise we pull out
         // the value stored in the expansion table.

         if (m_collator_.m_isNumericCollation_){
             int collateVal = 0;
             int trailingZeroIndex = 0;
             boolean nonZeroValReached = false;

             // I just need a temporary place to store my generated CEs.
             // icu4c uses a unsigned byte array, i'll use a stringbuffer here
             // to avoid dealing with the sign problems and array allocation
             // clear and set initial string buffer length
             m_utilStringBuffer_.setLength(3);

             // We parse the source string until we hit a char that's NOT a
             // digit.
             // Use this u_charDigitValue. This might be slow because we have
             // to handle surrogates...
             int digVal = UCharacter.digit(cp);
             // if we have arrived here, we have already processed possible
             // supplementaries that trigered the digit tag -
             // all supplementaries are marked in the UCA.
             // We  pad a zero in front of the first element anyways.
             // This takes care of the (probably) most common case where
             // people are sorting things followed by a single digit
             int digIndx = 1;
             for (;;) {
                 // Make sure we have enough space.
                 if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) {
                     m_utilStringBuffer_.setLength(m_utilStringBuffer_.length()
                                                   << 1);
                 }
                 // Skipping over leading zeroes.
                 if (digVal != 0 || nonZeroValReached) {
                     if (digVal != 0 && !nonZeroValReached) {
                         nonZeroValReached = true;
                     }
                     // We parse the digit string into base 100 numbers
                     // (this fits into a byte).
                     // We only add to the buffer in twos, thus if we are
                     // parsing an odd character, that serves as the
                     // 'tens' digit while the if we are parsing an even
                     // one, that is the 'ones' digit. We dumped the
                     // parsed base 100 value (collateVal) into a buffer.
                     // We multiply each collateVal by 2 (to give us room)
                     // and add 5 (to avoid overlapping magic CE byte
                     // values). The last byte we subtract 1 to ensure it is
                     // less than all the other bytes.
                     if (digIndx % 2 == 1) {
                         collateVal += digVal;
                         // This removes trailing zeroes.
                         if (collateVal == 0 && trailingZeroIndex == 0) {
                             trailingZeroIndex = ((digIndx - 1) >>> 1) + 2;
                         }
                         else if (trailingZeroIndex != 0) {
                             trailingZeroIndex = 0;
                         }
                         m_utilStringBuffer_.setCharAt(
                                             ((digIndx - 1) >>> 1) + 2,
                                             (char)((collateVal << 1) + 6));
                         collateVal = 0;
                     }
                     else {
                         // We drop the collation value into the buffer so if
                         // we need to do a "front patch" we don't have to
                         // check to see if we're hitting the last element.
                         collateVal = digVal * 10;
                         m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2,
                                                 (char)((collateVal << 1) + 6));
                     }
                     digIndx ++;
                 }

                 // Get next character.
                 if (!isEnd()){
                     backupInternalState(m_utilSpecialBackUp_);
                     int char32 = nextChar();
                     char ch = (char)char32;
                     if (UTF16.isLeadSurrogate(ch)){
                         if (!isEnd()) {
                             char trail = (char)nextChar();
                             if (UTF16.isTrailSurrogate(trail)) {
                                char32 = UCharacterProperty.getRawSupplementary(
                                                                    ch, trail);
                             }
                             else {
                                 goBackOne();
                             }
                         }
                     }

                     digVal = UCharacter.digit(char32);
                     if (digVal == -1) {
                         // Resetting position to point to the next unprocessed
                         // char. We overshot it when doing our test/set for
                         // numbers.
                         updateInternalState(m_utilSpecialBackUp_);
                         break;
                     }
                 }
                 else {
                     break;
                 }
             }

             if (nonZeroValReached == false){
                 digIndx = 2;
                 m_utilStringBuffer_.setCharAt(2, (char)6);
             }

             int endIndex = trailingZeroIndex != 0 ? trailingZeroIndex
                                              : (digIndx >>> 1) + 2;
             if (digIndx % 2 != 0){
                 // We missed a value. Since digIndx isn't even, stuck too many
                 // values into the buffer (this is what we get for padding the
                 // first byte with a zero). "Front-patch" now by pushing all
                 // nybbles forward.
                 // Doing it this way ensures that at least 50% of the time
                 // (statistically speaking) we'll only be doing a single pass
                 // and optimizes for strings with single digits. I'm just
                 // assuming that's the more common case.
                 for (int i = 2; i < endIndex; i ++){
                     m_utilStringBuffer_.setCharAt(i,
                         (char)((((((m_utilStringBuffer_.charAt(i) - 6) >>> 1)
                                   % 10) * 10)
                                  + (((m_utilStringBuffer_.charAt(i + 1) - 6)
                                       >>> 1) / 10) << 1) + 6));
                 }
                 -- digIndx;
             }

             // Subtract one off of the last byte.
             m_utilStringBuffer_.setCharAt(endIndex - 1,
                          (char)(m_utilStringBuffer_.charAt(endIndex - 1) - 1));

             // We want to skip over the first two slots in the buffer.
             // The first slot is reserved for the header byte CODAN_PLACEHOLDER.
             // The second slot is for the sign/exponent byte:
             // 0x80 + (decimalPos/2) & 7f.
             m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER);
             m_utilStringBuffer_.setCharAt(1,
                                      (char)(0x80 + ((digIndx >>> 1) & 0x7F)));

             // Now transfer the collation key to our collIterate struct.
             // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
             ce = (((m_utilStringBuffer_.charAt(0) << 8)
                        // Primary weight
                        | m_utilStringBuffer_.charAt(1))
                                     << RuleBasedCollator.CE_PRIMARY_SHIFT_)
                        //  Secondary weight
                        | (RuleBasedCollator.BYTE_COMMON_
                           << RuleBasedCollator.CE_SECONDARY_SHIFT_)
                        | RuleBasedCollator.BYTE_COMMON_; // Tertiary weight.
             int i = 2; // Reset the index into the buffer.

             m_CEBuffer_[0] = ce;
             m_CEBufferSize_ = 1;
             m_CEBufferOffset_ = 1;
             while (i < endIndex)
             {
                 int primWeight = m_utilStringBuffer_.charAt(i ++) << 8;
                 if (i < endIndex) {
                     primWeight |= m_utilStringBuffer_.charAt(i ++);
                 }
                 m_CEBuffer_[m_CEBufferSize_ ++]
                     = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_)
                       | RuleBasedCollator.CE_CONTINUATION_MARKER_;
             }
             return ce;
         }

         // no numeric mode, we'll just switch to whatever we stashed and
         // continue
         // find the offset to expansion table
         return collator.m_expansion_[getExpansionOffset(collator, ce)];
     }

     /**
      * Gets the next implicit ce for codepoints
      * @param codepoint current codepoint
      * @return implicit ce
      */
     private int nextImplicit(int codepoint)
     {
         if (!UCharacter.isLegal(codepoint)) {
             // synwee to check with vladimir on the range of isNonChar()
             // illegal code value, use completely ignoreable!
             return IGNORABLE;
         }
         int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
         m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
                          | 0x00000505;
         m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
         m_CEBufferOffset_ = 1;
         m_CEBufferSize_ = 2;
         return m_CEBuffer_[0];
     }

     /**
      * Returns the next ce associated with the following surrogate characters
      * @param ch current character
      * @return ce
      */
     private int nextSurrogate(char ch)
     {
         int ch_int = nextChar();
         char nextch = (char)ch_int;
         if (ch_int != CharacterIterator.DONE &&
             UTF16.isTrailSurrogate(nextch)) {
             int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
             return nextImplicit(codepoint);
         }
         if (nextch != CharacterIterator.DONE) {
             previousChar(); // reverts back to the original position
         }
         return IGNORABLE; // completely ignorable
     }

     /**
      * Returns the next ce for a hangul character, this is an implicit
      * calculation
      * @param collator current collator
      * @param ch current character
      * @return hangul ce
      */
     private int nextHangul(RuleBasedCollator collator, char ch)
     {
         char L = (char)(ch - HANGUL_SBASE_);

         // divide into pieces
         // do it in this order since some compilers can do % and / in one
         // operation
         char T = (char)(L % HANGUL_TCOUNT_);
         L /= HANGUL_TCOUNT_;
         char V = (char)(L % HANGUL_VCOUNT_);
         L /= HANGUL_VCOUNT_;

         // offset them
         L += HANGUL_LBASE_;
         V += HANGUL_VBASE_;
         T += HANGUL_TBASE_;

         // return the first CE, but first put the rest into the expansion
         // buffer
         m_CEBufferSize_ = 0;
         if (!collator.m_isJamoSpecial_) { // FAST PATH
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(L);
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(V);

             if (T != HANGUL_TBASE_) {
                 m_CEBuffer_[m_CEBufferSize_ ++] =
                     collator.m_trie_.getLeadValue(T);
             }
             m_CEBufferOffset_ = 1;
             return m_CEBuffer_[0];
         }
         else {
             // Jamo is Special
             // Since Hanguls pass the FCD check, it is guaranteed that we
             // won't be in the normalization buffer if something like this
             // happens
             // Move Jamos into normalization buffer
             m_buffer_.append((char)L);
             m_buffer_.append((char)V);
             if (T != HANGUL_TBASE_) {
                 m_buffer_.append((char)T);
             }
             m_FCDLimit_ = m_source_.getIndex();
             m_FCDStart_ = m_FCDLimit_ - 1;
             // Indicate where to continue in main input string after
             // exhausting the buffer
             return IGNORABLE;
         }
     }

     /**
      * <p>Special CE management. Expansions, contractions etc...</p>
      * @param collator can be plain UCA
      * @param ce current ce
      * @param ch current character
      * @return next special ce
      */
     private int nextSpecial(RuleBasedCollator collator, int ce, char ch)
     {
         int codepoint = ch;
         Backup entrybackup = m_utilSpecialEntryBackUp_;
         // this is to handle recursive looping
         if (entrybackup != null) {
             m_utilSpecialEntryBackUp_ = null;
         }
         else {
             entrybackup = new Backup();
         }
         backupInternalState(entrybackup);
         try { // forces it to assign m_utilSpecialEntryBackup_
             while (true) {
                 // This loop will repeat only in the case of contractions,
                 // surrogate
                 switch(RuleBasedCollator.getTag(ce)) {
                 case CE_NOT_FOUND_TAG_:
                     // impossible case for icu4j
                     return ce;
                 case RuleBasedCollator.CE_SURROGATE_TAG_:
                     if (isEnd()) {
                         return IGNORABLE;
                     }
                     backupInternalState(m_utilSpecialBackUp_);
                     char trail = (char)nextChar();
                     ce = nextSurrogate(collator, ce, trail);
                     // calculate the supplementary code point value,
                     // if surrogate was not tailored we go one more round
                     codepoint =
                         UCharacterProperty.getRawSupplementary(ch, trail);
                     break;
                 case CE_SPEC_PROC_TAG_:
                     ce = nextSpecialPrefix(collator, ce, entrybackup);
                     break;
                 case CE_CONTRACTION_TAG_:
                     ce = nextContraction(collator, ce);
                     break;
                 case CE_LONG_PRIMARY_TAG_:
                     return nextLongPrimary(ce);
                 case CE_EXPANSION_TAG_:
                     return nextExpansion(collator, ce);
                 case CE_DIGIT_TAG_:
                     ce = nextDigit(collator, ce, codepoint);
                     break;
                     // various implicits optimization
                 case CE_CJK_IMPLICIT_TAG_:
                     // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
                     return nextImplicit(codepoint);
                 case CE_IMPLICIT_TAG_: // everything that is not defined
                     return nextImplicit(codepoint);
                 case CE_TRAIL_SURROGATE_TAG_:
                     return IGNORABLE; // DC00-DFFF broken surrogate
                 case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
                     return nextSurrogate(ch);
                 case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
                     return nextHangul(collator, ch);
                 case CE_CHARSET_TAG_:
                                     // not yet implemented probably after 1.8
                     return CE_NOT_FOUND_;
                 default:
                     ce = IGNORABLE;
                     // synwee todo, throw exception or something here.
                 }
                 if (!RuleBasedCollator.isSpecial(ce)) {
                     break;
                 }
             }
         }
         finally {
             m_utilSpecialEntryBackUp_ = entrybackup;
         }
         return ce;
     }

     /**
      * Special processing is getting a CE that is preceded by a certain prefix.
      * Currently this is only needed for optimizing Japanese length and
      * iteration marks. When we encouter a special processing tag, we go
      * backwards and try to see if we have a match. Contraction tables are used
      * - so the whole process is not unlike contraction. prefix data is stored
      * backwards in the table.
      * @param collator current collator
      * @param ce current ce
      * @return previous ce
      */
     private int previousSpecialPrefix(RuleBasedCollator collator, int ce)
     {
         backupInternalState(m_utilSpecialBackUp_);
         while (true) {
             // position ourselves at the begining of contraction sequence
             int offset = getContractionOffset(collator, ce);
             int entryoffset = offset;
             if (isBackwardsStart()) {
                 ce = collator.m_contractionCE_[offset];
                 break;
             }
             char prevch = (char)previousChar();
             while (prevch > collator.m_contractionIndex_[offset]) {
                 // since contraction codepoints are ordered, we skip all that
                 // are smaller
                 offset ++;
             }
             if (prevch == collator.m_contractionIndex_[offset]) {
                 ce = collator.m_contractionCE_[offset];
             }
             else {
                 // if there is a completely ignorable code point in the middle
                 // of a prefix, we need to act as if it's not there assumption:
                 // 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to
                 // zero)
                 // lone surrogates cannot be set to zero as it would break
                 // other processing
                 int isZeroCE = collator.m_trie_.getLeadValue(prevch);
                 // it's easy for BMP code points
                 if (isZeroCE == 0) {
                     continue;
                 }
                 else if (UTF16.isTrailSurrogate(prevch)
                          || UTF16.isLeadSurrogate(prevch)) {
                     // for supplementary code points, we have to check the next one
                     // situations where we are going to ignore
                     // 1. beginning of the string: schar is a lone surrogate
                     // 2. schar is a lone surrogate
                     // 3. schar is a trail surrogate in a valid surrogate
                     //    sequence that is explicitly set to zero.
                     if (!isBackwardsStart()) {
                         char lead = (char)previousChar();
                         if (UTF16.isLeadSurrogate(lead)) {
                             isZeroCE = collator.m_trie_.getLeadValue(lead);
                             if (RuleBasedCollator.getTag(isZeroCE)
                                 == RuleBasedCollator.CE_SURROGATE_TAG_) {
                                 int finalCE = collator.m_trie_.getTrailValue(
                                                                       isZeroCE,
                                                                       prevch);
                                 if (finalCE == 0) {
                                     // this is a real, assigned completely
                                     // ignorable code point
                                     continue;
                                 }
                             }
                         }
                         else {
                             nextChar(); // revert to original offset
                             // lone surrogate, completely ignorable
                             continue;
                         }
                         nextChar(); // revert to original offset
                     }
                     else {
                          // lone surrogate at the beggining, completely ignorable
                          continue;
                     }
                 }

                 // char was not in the table. prefix not found
                 ce = collator.m_contractionCE_[entryoffset];
             }

             if (!isSpecialPrefixTag(ce)) {
                 // char was in the contraction table, and the corresponding ce
                 // is not a prefix ce.  We found the prefix, break out of loop,
                 // this ce will end up being returned.
                 break;
             }
         }
         updateInternalState(m_utilSpecialBackUp_);
         return ce;
     }

     /**
      * Retrieves the previous contraction ce. To ensure that the backwards and
      * forwards iteration matches, we take the current region of most possible
      * match and pass it through the forward iteration. This will ensure that
      * the obstinate problem of overlapping contractions will not occur.
      * @param collator current collator
      * @param ce current ce
      * @param ch current character
      * @return previous contraction ce
      */
     private int previousContraction(RuleBasedCollator collator, int ce, char ch)
     {
         m_utilStringBuffer_.setLength(0);
         // since we might encounter normalized characters (from the thai
         // processing) we can't use peekCharacter() here.
         char prevch = (char)previousChar();
         boolean atStart = false;
         // TODO: address the comment above - maybe now we *can* use peekCharacter
         //while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
         while (collator.isUnsafe(ch)) {
             m_utilStringBuffer_.insert(0, ch);
             ch = prevch;
             if (isBackwardsStart()) {
                 atStart = true;
                 break;
             }
             prevch = (char)previousChar();
         }
         if (!atStart) {
             // undo the previousChar() if we didn't reach the beginning
             nextChar();
         }
         // adds the initial base character to the string
         m_utilStringBuffer_.insert(0, ch);

         // a new collation element iterator is used to simply things, since
         // using the current collation element iterator will mean that the
         // forward and backwards iteration will share and change the same
         // buffers. it is going to be painful.
         int originaldecomp = collator.getDecomposition();
         // for faster access, since string would have been normalized above
         collator.setDecomposition(Collator.NO_DECOMPOSITION);
         if (m_utilColEIter_ == null) {
             m_utilColEIter_ = new CollationElementIterator(
                                                 m_utilStringBuffer_.toString(),
                                                 collator);
         }
         else {
             m_utilColEIter_.m_collator_ = collator;
             m_utilColEIter_.setText(m_utilStringBuffer_.toString());
         }
         ce = m_utilColEIter_.next();
         m_CEBufferSize_ = 0;
         while (ce != NULLORDER) {
             if (m_CEBufferSize_ == m_CEBuffer_.length) {
                 try {
                     // increasing cebuffer size
                     int tempbuffer[] = new int[m_CEBuffer_.length + 50];
                     System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0,
                                      m_CEBuffer_.length);
                     m_CEBuffer_ = tempbuffer;
                 }
                 catch( MissingResourceException e)
                 {
                     throw e;
                 }
                 catch (Exception e) {
                     if(DEBUG){
                         e.printStackTrace();
                     }
                     return NULLORDER;
                 }
             }
             m_CEBuffer_[m_CEBufferSize_ ++] = ce;
             ce = m_utilColEIter_.next();
         }
         collator.setDecomposition(originaldecomp);
         m_CEBufferOffset_ = m_CEBufferSize_ - 1;
         return m_CEBuffer_[m_CEBufferOffset_];
     }

     /**
      * Returns the previous long primary ces
      * @param ce long primary ce
      * @return previous long primary ces
      */
     private int previousLongPrimary(int ce)
     {
         m_CEBufferSize_ = 0;
         m_CEBuffer_[m_CEBufferSize_ ++] =
             ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_;
         m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24)
             | RuleBasedCollator.CE_CONTINUATION_MARKER_;
         m_CEBufferOffset_ = m_CEBufferSize_ - 1;
         return m_CEBuffer_[m_CEBufferOffset_];
     }

     /**
      * Returns the previous expansion ces
      * @param collator current collator
      * @param ce current ce
      * @return previous expansion ce
      */
     private int previousExpansion(RuleBasedCollator collator, int ce)
     {
         // find the offset to expansion table
         int offset = getExpansionOffset(collator, ce);
         m_CEBufferSize_ = getExpansionCount(ce);
         if (m_CEBufferSize_ != 0) {
             // less than 16 elements in expansion
             for (int i = 0; i < m_CEBufferSize_; i ++) {
                 m_CEBuffer_[i] = collator.m_expansion_[offset + i];
             }

         }
         else {
             // null terminated ces
             while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) {
                 m_CEBuffer_[m_CEBufferSize_] =
                     collator.m_expansion_[offset + m_CEBufferSize_];
                 m_CEBufferSize_ ++;
             }
         }
         m_CEBufferOffset_ = m_CEBufferSize_ - 1;
         return m_CEBuffer_[m_CEBufferOffset_];
     }

     /**
      * Getting the digit collation elements
      * @param collator
      * @param ce current collation element
      * @param ch current code point
      * @return digit collation element
      */
     private int previousDigit(RuleBasedCollator collator, int ce, char ch)
     {
         // We do a check to see if we want to collate digits as numbers; if so we generate
         //  a custom collation key. Otherwise we pull out the value stored in the expansion table.
         if (m_collator_.m_isNumericCollation_){
             int leadingZeroIndex = 0;
             int collateVal = 0;
             boolean nonZeroValReached = false;

             // clear and set initial string buffer length
             m_utilStringBuffer_.setLength(3);

             // We parse the source string until we hit a char that's NOT a digit
             // Use this u_charDigitValue. This might be slow because we have to
             // handle surrogates...
             int char32 = ch;
             if (UTF16.isTrailSurrogate(ch)) {
                 if (!isBackwardsStart()){
                     char lead = (char)previousChar();
                     if (UTF16.isLeadSurrogate(lead)) {
                         char32 = UCharacterProperty.getRawSupplementary(lead,
                                                                         ch);
                     }
                     else {
                         goForwardOne();
                     }
                 }
             }
             int digVal = UCharacter.digit(char32);
             int digIndx = 0;
             for (;;) {
                 // Make sure we have enough space.
                 if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) {
                     m_utilStringBuffer_.setLength(m_utilStringBuffer_.length()
                                                   << 1);
                 }
                 // Skipping over "trailing" zeroes but we still add to digIndx.
                 if (digVal != 0 || nonZeroValReached) {
                     if (digVal != 0 && !nonZeroValReached) {
                         nonZeroValReached = true;
                     }

                     // We parse the digit string into base 100 numbers (this
                     // fits into a byte).
                     // We only add to the buffer in twos, thus if we are
                     // parsing an odd character, that serves as the 'tens'
                     // digit while the if we are parsing an even one, that is
                     // the 'ones' digit. We dumped the parsed base 100 value
                     // (collateVal) into a buffer. We multiply each collateVal
                     // by 2 (to give us room) and add 5 (to avoid overlapping
                     // magic CE byte values). The last byte we subtract 1 to
                     // ensure it is less than all the other bytes.
                     // Since we're doing in this reverse we want to put the
                     // first digit encountered into the ones place and the
                     // second digit encountered into the tens place.

                     if (digIndx % 2 == 1){
                         collateVal += digVal * 10;

                         // This removes leading zeroes.
                         if (collateVal == 0 && leadingZeroIndex == 0) {
                            leadingZeroIndex = ((digIndx - 1) >>> 1) + 2;
                         }
                         else if (leadingZeroIndex != 0) {
                             leadingZeroIndex = 0;
                         }

                         m_utilStringBuffer_.setCharAt(((digIndx - 1) >>> 1) + 2,
                                                 (char)((collateVal << 1) + 6));
                         collateVal = 0;
                     }
                     else {
                         collateVal = digVal;
                     }
                 }
                 digIndx ++;

                 if (!isBackwardsStart()){
                     backupInternalState(m_utilSpecialBackUp_);
                     char32 = previousChar();
                     ch = (char)ch;
                     if (UTF16.isTrailSurrogate(ch)){
                         if (!isBackwardsStart()) {
                             char lead = (char)previousChar();
                             if (UTF16.isLeadSurrogate(lead)) {
                                 char32
                                     = UCharacterProperty.getRawSupplementary(
                                                                     lead, ch);
                             }
                             else {
                                 updateInternalState(m_utilSpecialBackUp_);
                             }
                         }
                     }

                     digVal = UCharacter.digit(char32);
                     if (digVal == -1) {
                         updateInternalState(m_utilSpecialBackUp_);
                         break;
                     }
                 }
                 else {
                     break;
                 }
             }

             if (nonZeroValReached == false) {
                 digIndx = 2;
                 m_utilStringBuffer_.setCharAt(2, (char)6);
             }

             if (digIndx % 2 != 0) {
                 if (collateVal == 0 && leadingZeroIndex == 0) {
                     // This removes the leading 0 in a odd number sequence of
                     // numbers e.g. avery001
                     leadingZeroIndex = ((digIndx - 1) >>> 1) + 2;
                 }
                 else {
                     // this is not a leading 0, we add it in
                     m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2,
                                                 (char)((collateVal << 1) + 6));
                     digIndx ++;
                 }
             }

             int endIndex = leadingZeroIndex != 0 ? leadingZeroIndex
                                                : ((digIndx >>> 1) + 2) ;
             digIndx = ((endIndex - 2) << 1) + 1; // removing initial zeros
             // Subtract one off of the last byte.
             // Really the first byte here, but it's reversed...
             m_utilStringBuffer_.setCharAt(2,
                                     (char)(m_utilStringBuffer_.charAt(2) - 1));
             // We want to skip over the first two slots in the buffer.
             // The first slot is reserved for the header byte CODAN_PLACEHOLDER.
             // The second slot is for the sign/exponent byte:
             // 0x80 + (decimalPos/2) & 7f.
             m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER);
             m_utilStringBuffer_.setCharAt(1,
                                     (char)(0x80 + ((digIndx >>> 1) & 0x7F)));

             // Now transfer the collation key to our collIterate struct.
             // The total size for our collation key is endIndx bumped up to the
             // next largest even value divided by two.
             m_CEBufferSize_ = 0;
             m_CEBuffer_[m_CEBufferSize_ ++]
                         = (((m_utilStringBuffer_.charAt(0) << 8)
                             // Primary weight
                             | m_utilStringBuffer_.charAt(1))
                               << RuleBasedCollator.CE_PRIMARY_SHIFT_)
                             // Secondary weight
                             | (RuleBasedCollator.BYTE_COMMON_
                                << RuleBasedCollator.CE_SECONDARY_SHIFT_)
                             // Tertiary weight.
                             | RuleBasedCollator.BYTE_COMMON_;
              int i = endIndex - 1; // Reset the index into the buffer.
              while (i >= 2) {
                 int primWeight = m_utilStringBuffer_.charAt(i --) << 8;
                 if (i >= 2) {
                     primWeight |= m_utilStringBuffer_.charAt(i --);
                 }
                 m_CEBuffer_[m_CEBufferSize_ ++]
                     = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_)
                       | RuleBasedCollator.CE_CONTINUATION_MARKER_;
              }
              m_CEBufferOffset_ = m_CEBufferSize_ - 1;
              return m_CEBuffer_[m_CEBufferOffset_];
          }
          else {
              return collator.m_expansion_[getExpansionOffset(collator, ce)];
          }
     }

     /**
      * Returns previous hangul ces
      * @param collator current collator
      * @param ch current character
      * @return previous hangul ce
      */
     private int previousHangul(RuleBasedCollator collator, char ch)
     {
         char L = (char)(ch - HANGUL_SBASE_);
         // we do it in this order since some compilers can do % and / in one
         // operation
         char T = (char)(L % HANGUL_TCOUNT_);
         L /= HANGUL_TCOUNT_;
         char V = (char)(L % HANGUL_VCOUNT_);
         L /= HANGUL_VCOUNT_;

         // offset them
         L += HANGUL_LBASE_;
         V += HANGUL_VBASE_;
         T += HANGUL_TBASE_;

         m_CEBufferSize_ = 0;
         if (!collator.m_isJamoSpecial_) {
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(L);
             m_CEBuffer_[m_CEBufferSize_ ++] =
                 collator.m_trie_.getLeadValue(V);
             if (T != HANGUL_TBASE_) {
                 m_CEBuffer_[m_CEBufferSize_ ++] =
                     collator.m_trie_.getLeadValue(T);
             }
             m_CEBufferOffset_ = m_CEBufferSize_ - 1;
             return m_CEBuffer_[m_CEBufferOffset_];
         }
         else {
             // Since Hanguls pass the FCD check, it is guaranteed that we won't
             // be in the normalization buffer if something like this happens
             // Move Jamos into normalization buffer
             m_buffer_.append(L);
             m_buffer_.append(V);
             if (T != HANGUL_TBASE_) {
                 m_buffer_.append(T);
             }

             m_FCDStart_ = m_source_.getIndex();
             m_FCDLimit_ = m_FCDStart_ + 1;
             return IGNORABLE;
         }
     }

     /**
      * Gets implicit codepoint ces
      * @param codepoint current codepoint
      * @return implicit codepoint ces
      */
     private int previousImplicit(int codepoint)
     {
         if (!UCharacter.isLegal(codepoint)) {
             return IGNORABLE; // illegal code value, completely ignoreable!
         }
         int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
         m_CEBufferSize_ = 2;
         m_CEBufferOffset_ = 1;
         m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
                          | 0x00000505;
         m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
         return m_CEBuffer_[1];
     }

     /**
      * Gets the previous surrogate ce
      * @param ch current character
      * @return previous surrogate ce
      */
     private int previousSurrogate(char ch)
     {
         if (isBackwardsStart()) {
             // we are at the start of the string, wrong place to be at
             return IGNORABLE;
         }
         char prevch = (char)previousChar();
         // Handles Han and Supplementary characters here.
         if (UTF16.isLeadSurrogate(prevch)) {
             return previousImplicit(
                           UCharacterProperty.getRawSupplementary(prevch, ch));
         }
         if (prevch != CharacterIterator.DONE) {
             nextChar();
         }
         return IGNORABLE; // completely ignorable
     }

     /**
      * <p>Special CE management. Expansions, contractions etc...</p>
      * @param collator can be plain UCA
      * @param ce current ce
      * @param ch current character
      * @return previous special ce
      */
     private int previousSpecial(RuleBasedCollator collator, int ce, char ch)
     {
         while(true) {
             // the only ces that loops are thai, special prefix and
             // contractions
             switch (RuleBasedCollator.getTag(ce)) {
             case CE_NOT_FOUND_TAG_:  // this tag always returns
                 return ce;
             case RuleBasedCollator.CE_SURROGATE_TAG_:
                                 // essentialy a disengaged lead surrogate. a broken
                                 // sequence was encountered and this is an error
                 return IGNORABLE;
             case CE_SPEC_PROC_TAG_:
                 ce = previousSpecialPrefix(collator, ce);
                 break;
             case CE_CONTRACTION_TAG_:
                 // may loop for first character e.g. "0x0f71" for english
                 if (isBackwardsStart()) {
                     // start of string or this is not the end of any contraction
                     ce = collator.m_contractionCE_[
                                             getContractionOffset(collator, ce)];
                     break;
                 }
                 return previousContraction(collator, ce, ch); // else
             case CE_LONG_PRIMARY_TAG_:
                 return previousLongPrimary(ce);
             case CE_EXPANSION_TAG_: // always returns
                 return previousExpansion(collator, ce);
             case CE_DIGIT_TAG_:
                 ce = previousDigit(collator, ce, ch);
                 break;
             case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
                 return previousHangul(collator, ch);
             case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
                 return IGNORABLE; // broken surrogate sequence
             case CE_TRAIL_SURROGATE_TAG_: // DC00-DFFF
                 return previousSurrogate(ch);
             case CE_CJK_IMPLICIT_TAG_:
                 // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
                 return previousImplicit(ch);
             case CE_IMPLICIT_TAG_: // everything that is not defined
                 // UCA is filled with these. Tailorings are NOT_FOUND
                 return previousImplicit(ch);
             case CE_CHARSET_TAG_: // this tag always returns
                 return CE_NOT_FOUND_;
             default: // this tag always returns
                 ce = IGNORABLE;
             }
             if (!RuleBasedCollator.isSpecial(ce)) {
                 break;
             }
         }
         return ce;
     }

     /**
      * GET IMPLICIT PRIMARY WEIGHTS
      * @param cp codepoint
      * @param value is left justified primary key
      */
 //    private static final int getImplicitPrimary(int cp)
 //    {
 //        cp = swapCJK(cp);
 //
 //        //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
 //        // we now have a range of numbers from 0 to 21FFFF.
 //        // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
 //        // we must leave a gap of 01 between all values of the last byte, so
 //        // the last byte has 126 values (3 byte case)
 //        // we shift so that HAN all has the same first primary, for
 //        // compression.
 //        // for the 4 byte case, we make the gap as large as we can fit.
 //        // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
 //        // Four byte forms (most supplementaries) are EF xx xx xx (with a gap
 //        // of LAST2_MULTIPLIER == 14)
 //
 //        int last0 = cp - RuleBasedCollator.IMPLICIT_4BYTE_BOUNDARY_;
 //        if (last0 < 0) {
 //            int last1 = cp / RuleBasedCollator.LAST_COUNT_;
 //            last0 = cp % RuleBasedCollator.LAST_COUNT_;
 //
 //            int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
 //            last1 %= RuleBasedCollator.OTHER_COUNT_;
 //            return RuleBasedCollator.IMPLICIT_BASE_3BYTE_ + (last2 << 24)
 //                   + (last1 << 16)
 //                   + ((last0 * RuleBasedCollator.LAST_MULTIPLIER_) << 8);
 //        }
 //        else {
 //            int last1 = last0 / RuleBasedCollator.LAST_COUNT2_;
 //            last0 %= RuleBasedCollator.LAST_COUNT2_;
 //
 //            int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
 //            last1 %= RuleBasedCollator.OTHER_COUNT_;
 //
 //            int last3 = last2 / RuleBasedCollator.OTHER_COUNT_;
 //            last2 %= RuleBasedCollator.OTHER_COUNT_;
 //            return RuleBasedCollator.IMPLICIT_BASE_4BYTE_ + (last3 << 24)
 //                   + (last2 << 16) + (last1 << 8)
 //                   + (last0 * RuleBasedCollator.LAST2_MULTIPLIER_);
 //        }
 //    }

 //    /**
 //     * Swapping CJK characters for implicit ces
 //     * @param cp codepoint CJK
 //     * @return swapped result
 //     */
 //    private static final int swapCJK(int cp)
 //    {
 //        if (cp >= CJK_BASE_) {
 //            if (cp < CJK_LIMIT_) {
 //                return cp - CJK_BASE_;
 //            }
 //            if (cp < CJK_COMPAT_USED_BASE_) {
 //                return cp + NON_CJK_OFFSET_;
 //            }
 //            if (cp < CJK_COMPAT_USED_LIMIT_) {
 //                return cp - CJK_COMPAT_USED_BASE_ + (CJK_LIMIT_ - CJK_BASE_);
 //            }
 //            if (cp < CJK_B_BASE_) {
 //                return cp + NON_CJK_OFFSET_;
 //            }
 //            if (cp < CJK_B_LIMIT_) {
 //                return cp; // non-BMP-CJK
 //            }
 //            return cp + NON_CJK_OFFSET_; // non-CJK
 //        }
 //        if (cp < CJK_A_BASE_) {
 //            return cp + NON_CJK_OFFSET_;
 //        }
 //        if (cp < CJK_A_LIMIT_) {
 //            return cp - CJK_A_BASE_ + (CJK_LIMIT_ - CJK_BASE_)
 //                   + (CJK_COMPAT_USED_LIMIT_ - CJK_COMPAT_USED_BASE_);
 //        }
 //        return cp + NON_CJK_OFFSET_; // non-CJK
 //    }

     /**
      * Gets a character from the source string at a given offset.
      * Handles both normal and iterative cases.
      * No error checking and does not access the normalization buffer
      * - caller beware!
      * @param offset offset from current position which character is to be
      *               retrieved
      * @return character at current position + offset
      */
     private char peekCharacter(int offset)
     {
         if (offset != 0) {
             int currentoffset = m_source_.getIndex();
             m_source_.setIndex(currentoffset + offset);
             char result = (char)m_source_.current();
             m_source_.setIndex(currentoffset);
             return result;
         }
         else {
             return (char)m_source_.current();
         }
     }

     /**
      * Moves back 1 position in the source string. This is slightly less
      * complicated than previousChar in that it doesn't normalize while
      * moving back. Boundary checks are not performed.
      * This method is to be used with caution, with the assumption that
      * moving back one position will not exceed the source limits.
      * Use only with nextChar() and never call this API twice in a row without
      * nextChar() in the middle.
      */
     private void goBackOne()
     {
         if (m_bufferOffset_ >= 0) {
             m_bufferOffset_ --;
         }
         else {
             m_source_.setIndex(m_source_.getIndex() - 1);
         }
     }

     /**
      * Moves forward 1 position in the source string. This is slightly less
      * complicated than nextChar in that it doesn't normalize while
      * moving back. Boundary checks are not performed.
      * This method is to be used with caution, with the assumption that
      * moving back one position will not exceed the source limits.
      * Use only with previousChar() and never call this API twice in a row
      * without previousChar() in the middle.
      */
     private void goForwardOne()
     {
         if (m_bufferOffset_ < 0) {
             // we're working on the source and not normalizing. fast path.
             // note Thai pre-vowel reordering uses buffer too
             m_source_.setIndex(m_source_.getIndex() + 1);
         }
         else {
             // we are in the buffer, buffer offset will never be 0 here
             m_bufferOffset_ ++;
         }
     }
 }