icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java - external/github.com/unicode-org/icu - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /*
  *******************************************************************************
  * Copyright (C) 2005-2016 International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */

 package com.ibm.icu.text;

 import static com.ibm.icu.impl.CharacterIteration.DONE32;
 import static com.ibm.icu.impl.CharacterIteration.next32;
 import static com.ibm.icu.impl.CharacterIteration.nextTrail32;

 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.text.CharacterIterator;
 import java.util.ArrayList;
 import java.util.List;

 import com.ibm.icu.impl.CharacterIteration;
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUDebug;
 import com.ibm.icu.impl.RBBIDataWrapper;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.util.CodePointTrie;

 /**
  * Rule Based Break Iterator
  * This is a port of the C++ class RuleBasedBreakIterator from ICU4C.
  *
  * @stable ICU 2.0
  */
 public class RuleBasedBreakIterator extends BreakIterator {
     //=======================================================================
     // Constructors & Factories
     //=======================================================================

     /**
      * private constructor
      */
     private RuleBasedBreakIterator() {
         fDictionaryCharCount  = 0;
         synchronized(gAllBreakEngines) {
             fBreakEngines = new ArrayList<>(gAllBreakEngines);
         }
     }

     /**
      * Create a break iterator from a precompiled set of break rules.
      *
      * Creating a break iterator from the binary rules is much faster than
      * creating one from source rules.
      *
      * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
      * Binary break iterator rules are not guaranteed to be compatible between
      * different versions of ICU.
      *
      * @param is an input stream supplying the compiled binary rules.
      * @throws IOException if there is an error while reading the rules from the InputStream.
      * @see    #compileRules(String, OutputStream)
      * @stable ICU 4.8
      */
     public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
         RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
         This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is));
         This.fLookAheadMatches = new int[This.fRData.fFTable.fLookAheadResultsSize];
         return This;
     }

     /**
      * Create a break iterator from a precompiled set of break rules.
      *
      * Creating a break iterator from the binary rules is much faster than
      * creating one from source rules.
      *
      * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
      * Binary break iterator rules are not guaranteed to be compatible between
      * different versions of ICU.
      *
      * @param bytes a buffer supplying the compiled binary rules.
      * @throws IOException if there is an error while reading the rules from the buffer.
      * @see    #compileRules(String, OutputStream)
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException {
         RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
         This.fRData = RBBIDataWrapper.get(bytes);
         This.fLookAheadMatches = new int[This.fRData.fFTable.fLookAheadResultsSize];
         return This;
     }

     /**
      * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
      * @param rules The break rules to be used.
      * @stable ICU 2.2
      */
     public RuleBasedBreakIterator(String rules)  {
         this();
         try {
             ByteArrayOutputStream ruleOS = new ByteArrayOutputStream();
             compileRules(rules, ruleOS);
             fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray()));
             fLookAheadMatches = new int[fRData.fFTable.fLookAheadResultsSize];
         } catch (IOException e) {
             ///CLOVER:OFF
             // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler,
             //  causing bogus compiled rules to be produced, but with no compile error raised.
             RuntimeException rte = new RuntimeException("RuleBasedBreakIterator rule compilation internal error: "
                     + e.getMessage());
             throw rte;
             ///CLOVER:ON
         }
     }

     //=======================================================================
     // Boilerplate
     //=======================================================================

     /**
      * Clones this iterator.
      * @return A newly-constructed RuleBasedBreakIterator with the same
      * behavior as this one.
      * @stable ICU 2.0
      */
     @Override
     public Object clone()  {
         RuleBasedBreakIterator result;
         result = (RuleBasedBreakIterator)super.clone();
         if (fText != null) {
             result.fText = (CharacterIterator)(fText.clone());
         }
         synchronized (gAllBreakEngines)  {
             result.fBreakEngines = new ArrayList<>(gAllBreakEngines);
         }
         result.fLookAheadMatches = new int[fRData.fFTable.fLookAheadResultsSize];
         result.fBreakCache = result.new BreakCache(fBreakCache);
         result.fDictionaryCache = result.new DictionaryCache(fDictionaryCache);
         return result;
     }


     /**
      * Returns true if both BreakIterators are of the same class, have the same
      * rules, and iterate over the same text.
      * @stable ICU 2.0
      */
     @Override
     public boolean equals(Object that) {
         if (that == null) {
             return false;
         }
         if (this == that) {
             return true;
         }
         try {
             RuleBasedBreakIterator other = (RuleBasedBreakIterator) that;
             if (fRData != other.fRData && (fRData == null || other.fRData == null)) {
                 return false;
             }
             if (fRData != null && other.fRData != null &&
                     (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) {
                 return false;
             }
             if (fText == null && other.fText == null) {
                 return true;
             }
             if (fText == null || other.fText == null || !fText.equals(other.fText)) {
                 return false;
             }
             return fPosition == other.fPosition;
         }
         catch(ClassCastException e) {
             return false;
         }
      }

     /**
      * Returns the description (rules) used to create this iterator.
      * (In ICU4C, the same function is RuleBasedBreakIterator::getRules())
      * @stable ICU 2.0
      */
     @Override
     public String toString() {
         String retStr = "";
         if (fRData != null) {
             retStr =  fRData.fRuleSource;
         }
         return retStr;
     }

     /**
      * Compute a hashcode for this BreakIterator
      * @return A hash code
      * @stable ICU 2.0
      */
     @Override
     public int hashCode()
     {
         return fRData.fRuleSource.hashCode();
     }


     private static final int  START_STATE = 1;     // The state number of the starting state
     private static final int  STOP_STATE  = 0;     // The state-transition value indicating "stop"

     // RBBIRunMode - the state machine runs an extra iteration at the beginning and end
     //               of user text.  A variable with this enum type keeps track of where we
     //               are.  The state machine only fetches user text input while in RUN mode.
     private static final int  RBBI_START  = 0;
     private static final int  RBBI_RUN    = 1;
     private static final int  RBBI_END    = 2;

     /**
      * The character iterator through which this BreakIterator accesses the text.
      */
     private CharacterIterator   fText = new java.text.StringCharacterIterator("");

     /**
      * The rule data for this BreakIterator instance.
      * Not intended for public use. Declared public for testing purposes only.
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public RBBIDataWrapper    fRData;

     /**
      *  The iteration state - current position, rule status for the current position,
      *                        and whether the iterator ran off the end, yielding UBRK_DONE.
      *                        Current position is pinned to be 0 < position <= text.length.
      *                        Current position is always set to a boundary.
      *
      *  The current  position of the iterator. Pinned, 0 < fPosition <= text.length.
      *  Never has the value UBRK_DONE (-1).
      */
     private int                fPosition;

     /**
      * Index of the Rule {tag} values for the most recent match.
      */
     private int                fRuleStatusIndex;

     /**
      * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
      */
     private boolean            fDone;

     /**
      *  Array of look-ahead tentative results.
      */
     private int[]              fLookAheadMatches;

     /**
      *   Cache of previously determined boundary positions.
      */
     private BreakCache         fBreakCache = new BreakCache();


     /**
      * Counter for the number of characters encountered with the "dictionary"
      *   flag set.  Normal RBBI iterators don't use it, although the code
      *   for updating it is live.  Dictionary Based break iterators (a subclass
      *   of us) access this field directly.
      * @internal
      */
     private int fDictionaryCharCount;

     private DictionaryCache     fDictionaryCache = new DictionaryCache();

     /**
      * ICU debug argument name for RBBI
      */
     private static final String RBBI_DEBUG_ARG = "rbbi";

     /**
      * Debugging flag.  Trace operation of state machine when true.
      */
     private static final boolean TRACE = ICUDebug.enabled(RBBI_DEBUG_ARG)
             && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0;

     /**
      * The "default" break engine - just skips over ranges of dictionary words,
      * producing no breaks. Should only be used if characters need to be handled
      * by a dictionary but we have no dictionary implementation for them.
      *
      * Only one instance; shared by all break iterators.
      */
     private static final UnhandledBreakEngine gUnhandledBreakEngine;

     /**
      * List of all known break engines, common for all break iterators.
      * Lazily updated as break engines are needed, because instantiation of
      * break engines is expensive.
      *
      * Because gAllBreakEngines can be referenced concurrently from different
      * BreakIterator instances, all access is synchronized.
      */
     private static final List<LanguageBreakEngine> gAllBreakEngines;

     static {
         gUnhandledBreakEngine = new UnhandledBreakEngine();
         gAllBreakEngines = new ArrayList<>();
         gAllBreakEngines.add(gUnhandledBreakEngine);
     }

     /**
      * List of all known break engines. Similar to gAllBreakEngines, but local to a
      * break iterator, allowing it to be used without synchronization.
      */
     private List<LanguageBreakEngine> fBreakEngines;

     /**
      * Dump the contents of the state table and character classes for this break iterator.
      * For debugging only.
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public void dump(java.io.PrintStream out) {
         if (out == null) {
             out = System.out;
         }
         this.fRData.dump(out);
     }

     /**
      * Compile a set of source break rules into the binary state tables used
      * by the break iterator engine.  Creating a break iterator from precompiled
      * rules is much faster than creating one from source rules.
      *
      * Binary break rules are not guaranteed to be compatible between different
      * versions of ICU.
      *
      *
      * @param rules  The source form of the break rules
      * @param ruleBinary  An output stream to receive the compiled rules.
      * @throws IOException If there is an error writing the output.
      * @see #getInstanceFromCompiledRules(InputStream)
      * @stable ICU 4.8
      */
     public static void compileRules(String rules, OutputStream ruleBinary) throws IOException {
         RBBIRuleBuilder.compileRules(rules, ruleBinary);
     }

     //=======================================================================
     // BreakIterator overrides
     //=======================================================================

     /**
      * Sets the current iteration position to the beginning of the text.
      * (i.e., the CharacterIterator's starting offset).
      * @return The offset of the beginning of the text.
      * @stable ICU 2.0
      */
     @Override
     public int first() {
         if (fText == null) {
             return BreakIterator.DONE;
         }
         fText.first();
         int start =  fText.getIndex();
         if (!fBreakCache.seek(start)) {
             fBreakCache.populateNear(start);
         }
         fBreakCache.current();
         assert(fPosition == start);
         return fPosition;
     }

     /**
      * Sets the current iteration position to the end of the text.
      * (i.e., the CharacterIterator's ending offset).
      * @return The text's past-the-end offset.
      * @stable ICU 2.0
      */
     @Override
     public int last() {
         if (fText == null) {
             return BreakIterator.DONE;
         }
         int endPos = fText.getEndIndex();
         boolean endShouldBeBoundary = isBoundary(endPos);      // Has side effect of setting iterator position.
         assert(endShouldBeBoundary);
         if (fPosition != endPos) {
             assert(fPosition == endPos);
         }
         return endPos;
     }

     /**
      * Advances the iterator either forward or backward the specified number of steps.
      * Negative values move backward, and positive values move forward.  This is
      * equivalent to repeatedly calling next() or previous().
      * @param n The number of steps to move.  The sign indicates the direction
      * (negative is backwards, and positive is forwards).
      * @return The character offset of the boundary position n boundaries away from
      * the current one.
      * @stable ICU 2.0
      */
     @Override
     public int next(int n) {
         int result = 0;
         if (n > 0) {
             for (; n > 0 && result != DONE; --n) {
                 result = next();
             }
         } else if (n < 0) {
             for (; n < 0 && result != DONE; ++n) {
                 result = previous();
             }
         } else {
             result = current();
         }
         return result;
     }

     /**
      * Advances the iterator to the next boundary position.
      * @return The position of the first boundary after this one.
      * @stable ICU 2.0
      */
     @Override
     public int next() {
         fBreakCache.next();
         return fDone ? DONE : fPosition;
     }

     /**
      * Moves the iterator backwards, to the boundary preceding the current one.
      * @return The position of the boundary position immediately preceding the starting position.
      * @stable ICU 2.0
      */
     @Override
     public int previous() {
         fBreakCache.previous();
         return fDone ? DONE : fPosition;
     }

     /**
      * Sets the iterator to refer to the first boundary position following
      * the specified position.
      * @param startPos The position from which to begin searching for a break position.
      * @return The position of the first break after the current position.
      * @stable ICU 2.0
      */
     @Override
     public int following(int startPos) {
         // if the supplied position is before the beginning, return the
         // text's starting offset
         if (startPos < fText.getBeginIndex()) {
             return first();
         }

         // Move requested offset to a code point start. It might be between a lead and trail surrogate.
         // Or it may be beyond the end of the text.
         startPos = CISetIndex32(fText, startPos);
         fBreakCache.following(startPos);
         return fDone ? DONE : fPosition;
     }


     /**
      * Sets the iterator to refer to the last boundary position before the
      * specified position.
      * @param offset The position to begin searching for a break from.
      * @return The position of the last boundary before the starting position.
      * @stable ICU 2.0
      */
     @Override
     public int preceding(int offset) {
         if (fText == null || offset > fText.getEndIndex()) {
             return last();
         } else if (offset < fText.getBeginIndex()) {
             return first();
         }

         // Move requested offset to a code point start. It might be between a lead and trail surrogate.
         // int adjustedOffset = CISetIndex32(fText, offset);    // TODO: restore to match ICU4C behavior.
         int adjustedOffset = offset;
         fBreakCache.preceding(adjustedOffset);
         return fDone ? DONE : fPosition;

     }


     /**
      * Throw IllegalArgumentException unless begin &lt;= offset &lt; end.
      * @stable ICU 2.0
      */
     protected static final void checkOffset(int offset, CharacterIterator text) {
         if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
             throw new IllegalArgumentException("offset out of bounds");
         }
     }


     /**
      * Returns true if the specified position is a boundary position.  As a side
      * effect, leaves the iterator pointing to the first boundary position at
      * or after "offset".
      * @param offset the offset to check.
      * @return True if "offset" is a boundary position.
      * @stable ICU 2.0
      */
     @Override
     public boolean isBoundary(int offset) {
         // TODO: behavior difference with ICU4C, which considers out-of-range offsets
         //       to not be boundaries, and to not be errors.
         checkOffset(offset, fText);

         // Adjust offset to be on a code point boundary and not beyond the end of the text.
         // Note that isBoundary() is always false for offsets that are not on code point boundaries.
         // But we still need the side effect of leaving iteration at the following boundary.
         int adjustedOffset = CISetIndex32(fText, offset);

         boolean result = false;
         if (fBreakCache.seek(adjustedOffset) || fBreakCache.populateNear(adjustedOffset)) {
             result = (fBreakCache.current() == offset);
         }

         if (!result) {
             // Not on a boundary. isBoundary() must leave iterator on the following boundary.
             // fBreakCache.seek(), above, left us on the preceding boundary, so advance one.
             next();
         }
         return result;

     }

     /**
      * Returns the current iteration position.  Note that DONE is never
      * returned from this function; if iteration has run to the end of a
      * string, current() will return the length of the string while
      * next() will return BreakIterator.DONE).
      * @return The current iteration position.
      * @stable ICU 2.0
      */
     @Override
     public int current() {
         return (fText != null) ? fPosition : BreakIterator.DONE;
     }


     /**
      * Return the status tag from the break rule that determined the boundary at
      * the current iteration position.  The values appear in the rule source
      * within brackets, {123}, for example.  For rules that do not specify a
      * status, a default value of 0 is returned.  If more than one rule applies,
      * the numerically largest of the possible status values is returned.
      * <p>
      * Of the standard types of ICU break iterators, only the word and line break
      * iterator provides status values.  The values are defined in
      * class RuleBasedBreakIterator, and allow distinguishing between words
      * that contain alphabetic letters, "words" that appear to be numbers,
      * punctuation and spaces, words containing ideographic characters, and
      * more.  Call <code>getRuleStatus</code> after obtaining a boundary
      * position from <code>next()</code>, <code>previous()</code>, or
      * any other break iterator functions that returns a boundary position.
      * <p>
      * Note that <code>getRuleStatus()</code> returns the value corresponding to
      * <code>current()</code> index even after <code>next()</code> has returned DONE.
      * <p>

      * @return the status from the break rule that determined the boundary
      * at the current iteration position.
      *
      * @stable ICU 60
      */

     @Override
     public int  getRuleStatus() {
         //   Status records have this form:
         //           Count N         <--  fLastRuleStatusIndex points here.
         //           Status val 0
         //           Status val 1
         //              ...
         //           Status val N-1  <--  the value we need to return
         //   The status values are sorted in ascending order.
         //   This function returns the last (largest) of the array of status values.
         int  idx = fRuleStatusIndex + fRData.fStatusTable[fRuleStatusIndex];
         int  tagVal = fRData.fStatusTable[idx];
         return tagVal;
     }

     /**
      * Get the status (tag) values from the break rule(s) that determined the boundary
      * at the current iteration position.  The values appear in the rule source
      * within brackets, {123}, for example.  The default status value for rules
      * that do not explicitly provide one is zero.
      * <p>
      * The status values used by the standard ICU break rules are defined
      * as public constants in class RuleBasedBreakIterator.
      * <p>
      * If the size  of the output array is insufficient to hold the data,
      *  the output will be truncated to the available length.  No exception
      *  will be thrown.
      *
      * @param fillInArray an array to be filled in with the status values.
      * @return          The number of rule status values from the rules that determined
      *                  the boundary at the current iteration position.
      *                  In the event that the array is too small, the return value
      *                  is the total number of status values that were available,
      *                  not the reduced number that were actually returned.
      * @stable ICU 60
      */
     @Override
     public int getRuleStatusVec(int[] fillInArray) {
         int numStatusVals = fRData.fStatusTable[fRuleStatusIndex];
         if (fillInArray != null) {
             int numToCopy = Math.min(numStatusVals, fillInArray.length);
             for (int i=0; i<numToCopy; i++) {
                 fillInArray[i] = fRData.fStatusTable[fRuleStatusIndex + i + 1];
             }
         }
         return numStatusVals;
     }

     /**
      * Returns a CharacterIterator over the text being analyzed.
      * <p>
      * <b><i>Caution:</i></b>The state of the returned CharacterIterator
      * must not be modified in any way while the BreakIterator is still in use.
      * Doing so will lead to undefined behavior of the BreakIterator.
      * Clone the returned CharacterIterator first and work with that.
      * <p>
      * The returned CharacterIterator is a reference
      * to the <b>actual iterator being used</b> by the BreakIterator.
      * No guarantees are made about the current position
      * of this iterator when it is returned; it may differ from the
      * BreakIterators current position.  If you need to move that
      * position to examine the text, clone this function's return value first.
      * @return An iterator over the text being analyzed.
      * @stable ICU 2.0
      */
     @Override
     public CharacterIterator getText() {
         return fText;
     }

     /**
      * Set the iterator to analyze a new piece of text.  This function resets
      * the current iteration position to the beginning of the text.
      * (The old iterator is dropped.)
      * <p>
      * <b><i>Caution:</i></b> The supplied CharacterIterator is used
      * directly by the BreakIterator, and must not be altered in any
      * way by code outside of the BreakIterator.
      * Doing so will lead to undefined behavior of the BreakIterator.
      *
      * @param newText An iterator over the text to analyze.
      * @stable ICU 2.0
      */
     @Override
     public void setText(CharacterIterator newText) {
         if (newText != null) {
             fBreakCache.reset(newText.getBeginIndex(), 0);
         } else {
             fBreakCache.reset();
         }
         fDictionaryCache.reset();
         fText = newText;
         this.first();
     }

      /**
      * Control debug, trace and dump options.
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public static final String fDebugEnv = ICUDebug.enabled(RBBI_DEBUG_ARG) ?
                                            ICUDebug.value(RBBI_DEBUG_ARG) : null;


     private LanguageBreakEngine getLanguageBreakEngine(int c) {

         // We have a dictionary character.
         // Does an already instantiated break engine handle it?
         for (LanguageBreakEngine candidate : fBreakEngines) {
             if (candidate.handles(c)) {
                 return candidate;
             }
         }

         synchronized (gAllBreakEngines) {
             // This break iterator's list of break engines didn't handle the character.
             // Check the global list, another break iterator may have instantiated the
             // desired engine.
             for (LanguageBreakEngine candidate : gAllBreakEngines) {
                 if (candidate.handles(c)) {
                     fBreakEngines.add(candidate);
                     return candidate;
                 }
             }

             // The global list doesn't have an existing engine, build one.
             int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
             if (script == UScript.KATAKANA || script == UScript.HIRAGANA) {
                 // Katakana, Hiragana and Han are handled by the same dictionary engine.
                 // Fold them together for mapping from script -> engine.
                 script = UScript.HAN;
             }

             LanguageBreakEngine eng;
             try {
                 switch (script) {
                 case UScript.THAI:
                     eng = new ThaiBreakEngine();
                     break;
                 case UScript.LAO:
                     eng = new LaoBreakEngine();
                     break;
                 case UScript.MYANMAR:
                     eng = new BurmeseBreakEngine();
                     break;
                 case UScript.KHMER:
                     eng = new KhmerBreakEngine();
                     break;
                 case UScript.HAN:
                     eng = new CjkBreakEngine(false);
                      break;
                 case UScript.HANGUL:
                     eng = new CjkBreakEngine(true);
                     break;
                 default:
                     gUnhandledBreakEngine.handleChar(c);
                     eng = gUnhandledBreakEngine;
                     break;
                 }
             } catch (IOException e) {
                 eng = null;
             }

             if (eng != null && eng != gUnhandledBreakEngine) {
                 gAllBreakEngines.add(eng);
                 fBreakEngines.add(eng);
             }
             return eng;
         }   // end synchronized(gAllBreakEngines)
     }

     /**
      * The State Machine Engine for moving forward is here.
      * This function is the heart of the RBBI run time engine.
      *
      * Input
      *    fPosition, the position in the text to begin from.
      * Output
      *    fPosition:           the boundary following the starting position.
      *    fDictionaryCharCount the number of dictionary characters encountered.
      *                         If > 0, the segment will be further subdivided
      *    fRuleStatusIndex     Info from the state table indicating which rules caused the boundary.
      *
      * @return the new iterator position
      *
      * A note on supplementary characters and the position of underlying
      * Java CharacterIterator:   Normally, a character iterator is positioned at
      * the char most recently returned by next().  Within this function, when
      * a supplementary char is being processed, the char iterator is left
      * sitting on the trail surrogate, in the middle of the code point.
      * This is different from everywhere else, where an iterator always
      * points at the lead surrogate of a supplementary.
      */
     private int handleNext() {
         if (TRACE) {
             System.out.println("Handle Next   pos      char  state category");
         }

         // handleNext always sets the break tag value.
         // Set the default for it.
         fRuleStatusIndex  = 0;
         fDictionaryCharCount = 0;

         // caches for quicker access
         CharacterIterator text = fText;
         CodePointTrie trie = fRData.fTrie;

         char[] stateTable  = fRData.fFTable.fTable;
         int initialPosition = fPosition;
         text.setIndex(initialPosition);
         int result          = initialPosition;

         // Set up the starting char
         int c = text.current();
         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
             c = nextTrail32(text, c);
             if (c == DONE32) {
                 fDone = true;
                 return BreakIterator.DONE;
             }
         }

         // Set the initial state for the state machine
         int state           = START_STATE;
         int row             = fRData.getRowIndex(state);
         short category      = 3;
         int flagsState      = fRData.fFTable.fFlags;
         int dictStart       = fRData.fFTable.fDictCategoriesStart;
         int mode            = RBBI_RUN;
         if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
             category = 2;
             mode     = RBBI_START;
             if (TRACE) {
                 System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
                 System.out.print(RBBIDataWrapper.intToHexString(c, 10));
                 System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
             }
         }

         // loop until we reach the end of the text or transition to state 0
         while (state != STOP_STATE) {
             if (c == DONE32) {
                 // Reached end of input string.
                 if (mode == RBBI_END) {
                     // We have already run the loop one last time with the
                     // character set to the pseudo {eof} value. Now it is time
                     // to unconditionally bail out.
                     break;
                 }
                 // Run the loop one last time with the fake end-of-input character category
                 mode = RBBI_END;
                 category = 1;
             }
             else if (mode == RBBI_RUN) {
                 // Get the char category.  An incoming category of 1 or 2 mens that
                 //      we are preset for doing the beginning or end of input, and
                 //      that we shouldn't get a category from an actual text input character.
                 //

                 // look up the current character's character category, which tells us
                 // which column in the state table to look at.
                 //
                 category = (short) trie.get(c);

                 // Check for categories that require word dictionary handling.
                 if (category >= dictStart) {
                     fDictionaryCharCount++;
                 }

                 if (TRACE) {
                     System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
                     System.out.print(RBBIDataWrapper.intToHexString(c, 10));
                     System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
                 }

                 // Advance to the next character.
                 // If this is a beginning-of-input loop iteration, don't advance.
                 //    The next iteration will be processing the first real input character.
                 c = text.next();
                 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
                     c = nextTrail32(text, c);
                 }
             }
             else {
                 mode = RBBI_RUN;
             }

             // look up a state transition in the state table
             state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
             row   = fRData.getRowIndex(state);
             int accepting = stateTable[row + RBBIDataWrapper.ACCEPTING];
             if (accepting == RBBIDataWrapper.ACCEPTING_UNCONDITIONAL) {
                 // Match found, common case
                 result = text.getIndex();
                 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
                     // The iterator has been left in the middle of a surrogate pair.
                     // We want the start of it.
                     result--;
                 }

                 //  Remember the break status (tag) values.
                 fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGSIDX];
             } else if (accepting > RBBIDataWrapper.ACCEPTING_UNCONDITIONAL) {
                 // Lookahead match is completed
                 int lookaheadResult = fLookAheadMatches[accepting];
                 if (lookaheadResult >= 0) {
                     fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGSIDX];
                     fPosition = lookaheadResult;
                     return lookaheadResult;
                 }
             }


             // If we are at the position of the '/' in a look-ahead (hard break) rule;
             // record the current position, to be returned later, if the full rule matches.
             // TODO: Move this check before the previous check of fAccepting.
             //       This would enable hard-break rules with no following context.
             //       But there are line break test failures when trying this. Investigate.
             //       Issue ICU-20837
             int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
             if (rule != 0) {
                 int  pos = text.getIndex();
                 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
                     // The iterator has been left in the middle of a surrogate pair.
                     // We want the beginning  of it.
                     pos--;
                 }
                 fLookAheadMatches[rule] = pos;
             }


         }        // End of state machine main loop

         // The state machine is done.  Check whether it found a match...

         // If the iterator failed to advance in the match engine force it ahead by one.
         // This indicates a defect in the break rules, which should always match
         // at least one character.

         if (result == initialPosition) {
             if (TRACE) {
                 System.out.println("Iterator did not move. Advancing by 1.");
             }
             text.setIndex(initialPosition);
             next32(text);
             result = text.getIndex();
             fRuleStatusIndex = 0;
         }

         // Leave the iterator at our result position.
         //   (we may have advanced beyond the last accepting position chasing after
         //    longer matches that never completed.)
         fPosition = result;

         if (TRACE) {
             System.out.println("result = " + result);
         }
         return result;
     }

     /**
      * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
      * This locates a "Safe Position" from which the forward break rules
      * will operate correctly. A Safe Position is not necessarily a boundary itself.
      *
      * The logic of this function is very similar to handleNext(), above, but simpler
      * because the safe table does not require as many options.
      *
      * @param fromPosition the position in the input text to begin the iteration.
      * @internal
      */
     private int handleSafePrevious(int fromPosition) {
         char            state;
         short           category = 0;
         int             result = 0;

         // caches for quicker access
         CharacterIterator text = fText;
         CodePointTrie trie = fRData.fTrie;
         char[] stateTable  = fRData.fRTable.fTable;

         CISetIndex32(text, fromPosition);
         if (TRACE) {
             System.out.print("Handle Previous   pos   char  state category");
         }

         // if we're already at the start of the text, return DONE.
         if (text.getIndex() == text.getBeginIndex()) {
             return BreakIterator.DONE;
         }

         //  Set the initial state for the state machine
         int c = CharacterIteration.previous32(text);
         state = START_STATE;
         int row = fRData.getRowIndex(state);

         // loop until we reach the start of the text or transition to state 0
         //
         for (; c != DONE32; c = CharacterIteration.previous32(text)) {

             // look up the current character's character category, which tells us
             // which column in the state table to look at.
             //
             //  And off the dictionary flag bit. For reverse iteration it is not used.
             category = (short) trie.get(c);
             if (TRACE) {
                 System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
                 System.out.print(RBBIDataWrapper.intToHexString(c, 10));
                 System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
             }

             // State Transition - move machine to its next state
             //
             assert(category < fRData.fHeader.fCatCount);
             state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
             row   = fRData.getRowIndex(state);

             if (state == STOP_STATE) {
                 // This is the normal exit from the lookup state machine.
                 // Transition to state zero means we have found a safe point.
                 break;
             }
         }

         // The state machine is done.
         result = text.getIndex();
         if (TRACE) {
             System.out.println("result = " + result);
         }
         return result;
     }

     /**
      * Set the index of a CharacterIterator.
      * Pin the index to the valid range range of BeginIndex <= index <= EndIndex.
      * If the index points to a trail surrogate of a supplementary character, adjust it
      * to the start (lead surrogate) index.
      *
      * @param ci A CharacterIterator to set
      * @param index the index to set
      * @return the resulting index, possibly pinned or adjusted.
      */
     private static int CISetIndex32(CharacterIterator ci, int index) {
         if (index <= ci.getBeginIndex()) {
             ci.first();
         } else if (index >= ci.getEndIndex()) {
             ci.setIndex(ci.getEndIndex());
         } else if (Character.isLowSurrogate(ci.setIndex(index))) {
             if (!Character.isHighSurrogate(ci.previous())) {
                 ci.next();
             }
         }
         return ci.getIndex();
     }

     /** DictionaryCache  stores the boundaries obtained from a run of dictionary characters.
      *                 Dictionary boundaries are moved first to this cache, then from here
      *                 to the main BreakCache, where they may inter-leave with non-dictionary
      *                 boundaries. The public BreakIterator API always fetches directly
      *                 from the main BreakCache, not from here.
      *
      *                 In common situations, the number of boundaries in a single dictionary run
      *                 should be quite small, it will be terminated by punctuation, spaces,
      *                 or any other non-dictionary characters. The main BreakCache may end
      *                 up with boundaries from multiple dictionary based runs.
      *
      *                 The boundaries are stored in a simple ArrayList (vector), with the
      *                 assumption that they will be accessed sequentially.
      */
     class DictionaryCache  {

          void reset() {
              fPositionInCache = -1;
              fStart = 0;
              fLimit = 0;
              fFirstRuleStatusIndex = 0;
              fOtherRuleStatusIndex = 0;
              fBreaks.removeAllElements();
          };

          boolean following(int fromPos) {
              if (fromPos >= fLimit || fromPos < fStart) {
                  fPositionInCache = -1;
                  return false;
              }

              // Sequential iteration, move from previous boundary to the following

              int r = 0;
              if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) {
                  ++fPositionInCache;
                  if (fPositionInCache >= fBreaks.size()) {
                      fPositionInCache = -1;
                      return false;
                  }
                  r = fBreaks.elementAt(fPositionInCache);
                  assert(r > fromPos);
                  fBoundary = r;
                  fStatusIndex = fOtherRuleStatusIndex;
                  return true;
              }

              // Random indexing. Linear search for the boundary following the given position.

              for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
                  r= fBreaks.elementAt(fPositionInCache);
                  if (r > fromPos) {
                      fBoundary = r;
                      fStatusIndex = fOtherRuleStatusIndex;
                      return true;
                  }
              }

              // Internal error. fStart <= fromPos < fLimit, but no cached boundary.
              assert(false);
              fPositionInCache = -1;
              return false;
          };

          boolean preceding(int fromPos) {
              if (fromPos <= fStart || fromPos > fLimit) {
                  fPositionInCache = -1;
                  return false;
              }

              if (fromPos == fLimit) {
                  fPositionInCache = fBreaks.size() - 1;
                  if (fPositionInCache >= 0) {
                      assert(fBreaks.elementAt(fPositionInCache) == fromPos);
                  }
              }

              int r;
              if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) {
                  --fPositionInCache;
                  r = fBreaks.elementAt(fPositionInCache);
                  assert(r < fromPos);
                  fBoundary = r;
                  fStatusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
                  return true;
              }

              if (fPositionInCache == 0) {
                  fPositionInCache = -1;
                  return false;
              }

              for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
                  r = fBreaks.elementAt(fPositionInCache);
                  if (r < fromPos) {
                      fBoundary = r;
                      fStatusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
                      return true;
                  }
              }
              assert(false);
              fPositionInCache = -1;
              return false;
          };

         /**
          * Populate the cache with the dictionary based boundaries within a region of text.
          * @param startPos  The start position of a range of text
          * @param endPos    The end position of a range of text
          * @param firstRuleStatus The rule status index that applies to the break at startPos
          * @param otherRuleStatus The rule status index that applies to boundaries other than startPos
          * @internal
          */
         void populateDictionary(int startPos, int endPos,
                                 int firstRuleStatus, int otherRuleStatus) {
             if ((endPos - startPos) <= 1) {
                 return;
             }

             reset();
             fFirstRuleStatusIndex = firstRuleStatus;
             fOtherRuleStatusIndex = otherRuleStatus;

             int rangeStart = startPos;
             int rangeEnd = endPos;

             int         category;
             int         current;
             int         foundBreakCount = 0;

             // Loop through the text, looking for ranges of dictionary characters.
             // For each span, find the appropriate break engine, and ask it to find
             // any breaks within the span.

             fText.setIndex(rangeStart);
             int     c = CharacterIteration.current32(fText);
             category = (short)fRData.fTrie.get(c);
             int dictStart = fRData.fFTable.fDictCategoriesStart;

             while(true) {
                 while((current = fText.getIndex()) < rangeEnd && (category < dictStart)) {
                     c = CharacterIteration.next32(fText);    // pre-increment
                     category = (short)fRData.fTrie.get(c);
                 }
                 if (current >= rangeEnd) {
                     break;
                 }

                 // We now have a dictionary character. Get the appropriate language object
                 // to deal with it.
                 LanguageBreakEngine lbe = getLanguageBreakEngine(c);

                 // Ask the language object if there are any breaks. It will add them to the cache and
                 // leave the text pointer on the other side of its range, ready to search for the next one.
                 if (lbe != null) {
                     foundBreakCount += lbe.findBreaks(fText, rangeStart, rangeEnd, fBreaks);
                 }

                 // Reload the loop variables for the next go-round
                 c = CharacterIteration.current32(fText);
                 category = (short)fRData.fTrie.get(c);
             }

             // If we found breaks, ensure that the first and last entries are
             // the original starting and ending position. And initialize the
             // cache iteration position to the first entry.

             // System.out.printf("foundBreakCount = %d%n", foundBreakCount);
             if (foundBreakCount > 0) {
                 assert(foundBreakCount == fBreaks.size());
                 if (startPos < fBreaks.elementAt(0)) {
                     // The dictionary did not place a boundary at the start of the segment of text.
                     // Add one now. This should not commonly happen, but it would be easy for interactions
                     // of the rules for dictionary segments and the break engine implementations to
                     // inadvertently cause it. Cover it here, just in case.
                     fBreaks.offer(startPos);
                 }
                 if (endPos > fBreaks.peek()) {
                     fBreaks.push(endPos);
                 }
                 fPositionInCache = 0;
                 // Note: Dictionary matching may extend beyond the original limit.
                 fStart = fBreaks.elementAt(0);
                 fLimit = fBreaks.peek();
             } else {
                 // there were no language-based breaks, even though the segment contained
                 // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
                 // for this range will fail, and the calling code will fall back to the rule based boundaries.
             }

         };


         DictionaryCache() {
             fPositionInCache = -1;
             fBreaks = new DictionaryBreakEngine.DequeI();
         }

         /**
          * copy constructor. Used by RuleBasedBreakIterator.clone().
          *
          * @param src the source object to be copied.
          */
         DictionaryCache(DictionaryCache src)  {
             try {
                 fBreaks = (DictionaryBreakEngine.DequeI)src.fBreaks.clone();
             }
             catch (CloneNotSupportedException e) {
                 throw new RuntimeException(e);
             }
             fPositionInCache      = src.fPositionInCache;
             fStart                = src.fStart;
             fLimit                = src.fLimit;
             fFirstRuleStatusIndex = src.fFirstRuleStatusIndex;
             fOtherRuleStatusIndex = src.fOtherRuleStatusIndex;
             fBoundary             = src.fBoundary;
             fStatusIndex          = src.fStatusIndex;
         }

         // A data structure containing the boundaries themselves. Essentially a vector of raw ints.
         DictionaryBreakEngine.DequeI fBreaks;
         int             fPositionInCache;       // Index in fBreaks of last boundary returned by following()
         //                                      //    or preceding(). Optimizes sequential access.
         int             fStart;                 // Text position of first boundary in cache.
         int             fLimit;                 // Last boundary in cache. Which is the limit of the
         //                                      //    text segment being handled by the dictionary.
         int             fFirstRuleStatusIndex;  // Rule status info for first boundary.
         int             fOtherRuleStatusIndex;  // Rule status info for 2nd through last boundaries.
         int             fBoundary;              // Current boundary. Set by preceding(), following().
         int             fStatusIndex;           // Current rule status index. Set by preceding, following().
     };


 /*
  * class BreakCache
  *
  * Cache of break boundary positions and rule status values.
  * Break iterator API functions, next(), previous(), etc., will use cached results
  * when possible, and otherwise cache new results as they are obtained.
  *
  * Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
  *
  * The cache is implemented as a single circular buffer.
  */

 /*
  * size of the circular cache buffer.
  */

 class BreakCache {

     BreakCache() {
         reset();
     };

     void  reset(int pos, int ruleStatus) {
         fStartBufIdx = 0;
         fEndBufIdx = 0;
         fTextIdx = pos;
         fBufIdx = 0;
         fBoundaries[0] = pos;
         fStatuses[0] = (short)ruleStatus;
     }

     void  reset() {reset(0, 0); };

     void  next() {
         if (fBufIdx == fEndBufIdx) {
             fDone = !populateFollowing();
             fPosition = fTextIdx;
             fRuleStatusIndex = fStatuses[fBufIdx];
         } else {
             fBufIdx = modChunkSize(fBufIdx + 1);
             fTextIdx = fPosition = fBoundaries[fBufIdx];
             fRuleStatusIndex = fStatuses[fBufIdx];
         }
     };

     void  previous() {
         int initialBufIdx = fBufIdx;
         if (fBufIdx == fStartBufIdx) {
             // At start of cache. Prepend to it.
             populatePreceding();
         } else {
             // Cache already holds the next boundary
             fBufIdx = modChunkSize(fBufIdx - 1);
             fTextIdx = fBoundaries[fBufIdx];
         }
         fDone = (fBufIdx == initialBufIdx);
         fPosition = fTextIdx;
         fRuleStatusIndex = fStatuses[fBufIdx];
         return;
     };

     // Move the iteration state to the position following the startPosition.
     // Input position must be pinned to the input length.
     void following(int startPos) {
         if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) {
             // startPos is in the cache. Do a next() from that position.
             // TODO: an awkward set of interactions with bi->fDone
             //       seek() does not clear it; it can't because of interactions with populateNear().
             //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
             //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
             fDone = false;
             next();
         }

     };

     void  preceding(int startPos) {
         if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) {
             if (startPos == fTextIdx) {
                 previous();
             } else {
                 // seek() leaves the BreakCache positioned at the preceding boundary
                 //        if the requested position is between two bounaries.
                 // current() pushes the BreakCache position out to the BreakIterator itself.
                 assert(startPos > fTextIdx);
                 current();
             }
         }
         return;
     };

     /**
      * Update the state of the public BreakIterator (fBI) to reflect the
      * current state of the break iterator cache (this).
      */
     int current() {
         fPosition = fTextIdx;
         fRuleStatusIndex = fStatuses[fBufIdx];
         fDone = false;
         return fTextIdx;
     };

     /**
      * Add boundaries to the cache near the specified position.
      * The given position need not be a boundary itself.
      * The input position must be within the range of the text, and
      * on a code point boundary.
      * If the requested position is a break boundary, leave the iteration
      * position on it.
      * If the requested position is not a boundary, leave the iteration
      * position on the preceding boundary and include both the the
      * preceding and following boundaries in the cache.
      * Additional boundaries, either preceding or following, may be added
      * to the cache as a side effect.
      *
      * Return false if the operation failed.
      */
     boolean populateNear(int position) {
         assert(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);

         // Find a boundary somewhere in the vicinity of the requested position.
         // Depending on the safe rules and the text data, it could be either before, at, or after
         // the requested position.


         // If the requested position is not near already cached positions, clear the existing cache,
         // find a near-by boundary and begin new cache contents there.

         if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
             int aBoundary = fText.getBeginIndex();
             int ruleStatusIndex = 0;
             if (position > aBoundary + 20) {
                 int backupPos = handleSafePrevious(position);
                 if (backupPos > aBoundary) {
                     // Advance to the boundary following the backup position.
                     // There is a complication: the safe reverse rules identify pairs of code points
                     // that are safe. If advancing from the safe point moves forwards by less than
                     // two code points, we need to advance one more time to ensure that the boundary
                     // is good, including a correct rules status value.
                     //
                     fPosition = backupPos;
                     aBoundary = handleNext();
                     if (aBoundary == backupPos + 1 ||
                             (aBoundary == backupPos + 2 &&
                             Character.isHighSurrogate(fText.setIndex(backupPos)) &&
                             Character.isLowSurrogate(fText.next()))) {
                         // The initial handleNext() only advanced by a single code point. Go again.
                         // Safe rules identify safe pairs.
                         aBoundary = handleNext();
                     }
                 }
                 ruleStatusIndex = fRuleStatusIndex;
             }
             reset(aBoundary, ruleStatusIndex);               // Reset cache to hold aBoundary as a single starting point.
         }

         // Fill in boundaries between existing cache content and the new requested position.

         if (fBoundaries[fEndBufIdx] < position) {
             // The last position in the cache precedes the requested position.
             // Add following position(s) to the cache.
             while (fBoundaries[fEndBufIdx] < position) {
                 if (!populateFollowing()) {
                     assert false;
                     return false;
                 }
             }
             fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
             fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
             while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
                 previous();
             }
             return true;
         }

         if (fBoundaries[fStartBufIdx] > position) {
             // The first position in the cache is beyond the requested position.
             // back up more until we get a boundary <= the requested position.
             while (fBoundaries[fStartBufIdx] > position) {
                 populatePreceding();
             }
             fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
             fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
             while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
                 next();
             }
             if (fTextIdx > position) {
                 // If position is not itself a boundary, the next() loop above will overshoot.
                 // Back up one, leaving cache position at the boundary preceding the requested position.
                 previous();
             }
             return true;
         }

         assert fTextIdx == position;
         return true;

     };

     /**
      *  Add boundary(s) to the cache following the current last boundary.
      *  Return false if at the end of the text, and no more boundaries can be added.
      *  Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
      */
     boolean populateFollowing() {
         int fromPosition = fBoundaries[fEndBufIdx];
         int fromRuleStatusIdx = fStatuses[fEndBufIdx];
         int pos = 0;
         int ruleStatusIdx = 0;

         if (fDictionaryCache.following(fromPosition)) {
             addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
             return true;
         }

         fPosition = fromPosition;
         pos = handleNext();
         if (pos == BreakIterator.DONE) {
             return false;
         }

         ruleStatusIdx = fRuleStatusIndex;
         if (fDictionaryCharCount > 0) {
             // The text segment obtained from the rules includes dictionary characters.
             // Subdivide it, with subdivided results going into the dictionary cache.
             fDictionaryCache.populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
             if (fDictionaryCache.following(fromPosition)) {
                 addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
                 return true;
                 // TODO: may want to move a sizable chunk of the dictionary cache to the break cache at this point.
                 //       But be careful with interactions with populateNear().
             }
         }

         // Rule based segment did not include dictionary characters.
         // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
         //    meaning that we didn't take the return, above.
         // Add its end point to the cache.
         addFollowing(pos, ruleStatusIdx, UpdateCachePosition);

         // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
         //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
         //
         for (int count=0; count<6; ++count) {
             pos = handleNext();
             if (pos == BreakIterator.DONE || fDictionaryCharCount > 0) {
                 break;
             }
             addFollowing(pos, fRuleStatusIndex, RetainCachePosition);
         }
         return true;
     };

     /**
      *  Add one or more boundaries to the cache preceding the first currently cached boundary.
      *  Leave the iteration position on the first added boundary.
      *  Return false if no boundaries could be added (if at the start of the text.)
      */
     boolean populatePreceding() {
         int textBegin = fText.getBeginIndex();
         int fromPosition = fBoundaries[fStartBufIdx];
         if (fromPosition == textBegin) {
             return false;
         }

         int position = textBegin;
         int positionStatusIdx = 0;

         if (fDictionaryCache.preceding(fromPosition)) {
             addPreceding(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
             return true;
         }

         int backupPosition = fromPosition;

         // Find a boundary somewhere preceding the first already-cached boundary
         do {
             backupPosition = backupPosition - 30;
             if (backupPosition <= textBegin) {
                 backupPosition = textBegin;
             } else {
                 backupPosition = handleSafePrevious(backupPosition);
             }
             if (backupPosition == BreakIterator.DONE || backupPosition == textBegin) {
                 position = textBegin;
                 positionStatusIdx = 0;
             } else {
                 // Advance to the boundary following the backup position.
                 // There is a complication: the safe reverse rules identify pairs of code points
                 // that are safe. If advancing from the safe point moves forwards by less than
                 // two code points, we need to advance one more time to ensure that the boundary
                 // is good, including a correct rules status value.
                 //
                 fPosition = backupPosition;  // TODO: pass starting position in a clearer way.
                 position = handleNext();
                 if (position == backupPosition + 1 ||
                         (position == backupPosition + 2 &&
                         Character.isHighSurrogate(fText.setIndex(backupPosition)) &&
                         Character.isLowSurrogate(fText.next()))) {
                     // The initial handleNext() only advanced by a single code point. Go again.
                     // Safe rules identify safe pairs.
                     position = handleNext();
                 }
                 positionStatusIdx = fRuleStatusIndex;
             }
         } while (position >= fromPosition);

         // Find boundaries between the one we just located and the first already-cached boundary
         // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer.

         fSideBuffer.removeAllElements();
         fSideBuffer.push(position);
         fSideBuffer.push(positionStatusIdx);

         do {
             int prevPosition = fPosition = position;
             int prevStatusIdx = positionStatusIdx;
             position = handleNext();
             positionStatusIdx = fRuleStatusIndex;
             if (position == BreakIterator.DONE) {
                 break;
             }

             boolean segmentHandledByDictionary = false;
             if (fDictionaryCharCount != 0) {
                 // Segment from the rules includes dictionary characters.
                 // Subdivide it, with subdivided results going into the dictionary cache.
                 int dictSegEndPosition = position;
                 fDictionaryCache.populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
                 while (fDictionaryCache.following(prevPosition)) {
                     position = fDictionaryCache.fBoundary;
                     positionStatusIdx = fDictionaryCache.fStatusIndex;
                     segmentHandledByDictionary = true;
                     assert(position > prevPosition);
                     if (position >= fromPosition) {
                         break;
                     }
                     assert(position <= dictSegEndPosition);
                     fSideBuffer.push(position);
                     fSideBuffer.push(positionStatusIdx);
                     prevPosition = position;
                 }
                 assert(position==dictSegEndPosition || position>=fromPosition);
             }

             if (!segmentHandledByDictionary && position < fromPosition) {
                 fSideBuffer.push(position);
                 fSideBuffer.push(positionStatusIdx);
             }
         } while (position < fromPosition);

         // Move boundaries from the side buffer to the main circular buffer.
         boolean success = false;
         if (!fSideBuffer.isEmpty()) {
             positionStatusIdx = fSideBuffer.pop();
             position = fSideBuffer.pop();
             addPreceding(position, positionStatusIdx, UpdateCachePosition);
             success = true;
         }

         while (!fSideBuffer.isEmpty()) {
             positionStatusIdx = fSideBuffer.pop();
             position = fSideBuffer.pop();
             if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
                 // No space in circular buffer to hold a new preceding result while
                 // also retaining the current cache (iteration) position.
                 // Bailing out is safe; the cache will refill again if needed.
                 break;
             }
         }
         return success;
     };


     static final boolean RetainCachePosition = false;
     static final boolean UpdateCachePosition = true;

     /**
      * Add the boundary following the current position.
      * The current position can be left as it was, or changed to the newly added boundary,
      * as specified by the update parameter.
      */
     void addFollowing(int position, int ruleStatusIdx, boolean update) {
         assert(position > fBoundaries[fEndBufIdx]);
         assert(ruleStatusIdx <= Short.MAX_VALUE);
         int nextIdx = modChunkSize(fEndBufIdx + 1);
         if (nextIdx == fStartBufIdx) {
             fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
         }
         fBoundaries[nextIdx] = position;
         fStatuses[nextIdx] = (short)ruleStatusIdx;
         fEndBufIdx = nextIdx;
         if (update == UpdateCachePosition) {
             // Set current position to the newly added boundary.
             fBufIdx = nextIdx;
             fTextIdx = position;
         } else {
             // Retaining the original cache position.
             // Check if the added boundary wraps around the buffer, and would over-write the original position.
             // It's the responsibility of callers of this function to not add too many.
             assert(nextIdx != fBufIdx);
         }

     };


     /**
      * Add the boundary preceding the current position.
      * The current position can be left as it was, or changed to the newly added boundary,
      * as specified by the update parameter.
      */
     boolean addPreceding(int position, int ruleStatusIdx, boolean update) {
         assert(position < fBoundaries[fStartBufIdx]);
         assert(ruleStatusIdx <= Short.MAX_VALUE);
         int nextIdx = modChunkSize(fStartBufIdx - 1);
         if (nextIdx == fEndBufIdx) {
             if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
                 // Failure. The insertion of the new boundary would claim the buffer position that is the
                 // current iteration position. And we also want to retain the current iteration position.
                 // (The buffer is already completely full of entries that precede the iteration position.)
                 return false;
             }
             fEndBufIdx = modChunkSize(fEndBufIdx - 1);
         }
         fBoundaries[nextIdx] = position;
         fStatuses[nextIdx] = (short)ruleStatusIdx;
         fStartBufIdx = nextIdx;
         if (update == UpdateCachePosition) {
             fBufIdx = nextIdx;
             fTextIdx = position;
         }
         return true;
     };

     /**
      *  Set the cache position to the specified position, or, if the position
      *  falls between to cached boundaries, to the preceding boundary.
      *  Fails if the requested position is outside of the range of boundaries currently held by the cache.
      *  The startPosition must be on a code point boundary.
      *
      *  Return true if successful, false if the specified position is after
      *  the last cached boundary or before the first.
      */
     boolean seek(int pos) {
         if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
             return false;
         }
         if (pos == fBoundaries[fStartBufIdx]) {
             // Common case: seek(0), from BreakIterator::first()
             fBufIdx = fStartBufIdx;
             fTextIdx = fBoundaries[fBufIdx];
             return true;
         }
         if (pos == fBoundaries[fEndBufIdx]) {
             fBufIdx = fEndBufIdx;
             fTextIdx = fBoundaries[fBufIdx];
             return true;
         }

         int min = fStartBufIdx;
         int max = fEndBufIdx;
         while (min != max) {
             int probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
             probe = modChunkSize(probe);
             if (fBoundaries[probe] > pos) {
                 max = probe;
             } else {
                 min = modChunkSize(probe + 1);
             }
         }
         assert(fBoundaries[max] > pos);
         fBufIdx = modChunkSize(max - 1);
         fTextIdx = fBoundaries[fBufIdx];
         assert(fTextIdx <= pos);
         return true;

     };


     /**
      * copy constructor, used from RuleBasedBreakIterator.clone().
      *
      * @param src
      */
     BreakCache(BreakCache src)  {
         fStartBufIdx = src.fStartBufIdx;
         fEndBufIdx = src.fEndBufIdx;
         fTextIdx = src.fTextIdx;
         fBufIdx = src.fBufIdx;
         fBoundaries = src.fBoundaries.clone();
         fStatuses = src.fStatuses.clone();
         fSideBuffer = new DictionaryBreakEngine.DequeI();  // Transient, no need to clone contents.
     }

     void dumpCache() {
         System.out.printf("fTextIdx:%d   fBufIdx:%d%n", fTextIdx, fBufIdx);
         for (int i=fStartBufIdx; ; i=modChunkSize(i+1)) {
             System.out.printf("%d  %d%n", i, fBoundaries[i]);
             if (i == fEndBufIdx) {
                 break;
             }
         }
     };

     private final int   modChunkSize(int index) { return index & (CACHE_SIZE - 1); };

     static final int CACHE_SIZE = 128;
     // static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");

     int                 fStartBufIdx;
     int                 fEndBufIdx;    // inclusive

     int                 fTextIdx;
     int                 fBufIdx;

     int[]               fBoundaries = new int[CACHE_SIZE];
     short[]             fStatuses = new short[CACHE_SIZE];

     DictionaryBreakEngine.DequeI   fSideBuffer = new DictionaryBreakEngine.DequeI();
 };


 }