| // © 2016 and later: Unicode, Inc. and others. | 
 | // License & terms of use: http://www.unicode.org/copyright.html | 
 | /* | 
 | ********************************************************************** | 
 | * Copyright (c) 2003-2011, International Business Machines | 
 | * Corporation and others.  All Rights Reserved. | 
 | ********************************************************************** | 
 | * Author: Alan Liu | 
 | * Created: September 24 2003 | 
 | * Since: ICU 2.8 | 
 | ********************************************************************** | 
 | */ | 
 | #ifndef _RULEITER_H_ | 
 | #define _RULEITER_H_ | 
 |  | 
 | #include "unicode/uobject.h" | 
 |  | 
 | U_NAMESPACE_BEGIN | 
 |  | 
 | class UnicodeString; | 
 | class ParsePosition; | 
 | class SymbolTable; | 
 |  | 
 | /** | 
 |  * An iterator that returns 32-bit code points.  This class is deliberately | 
 |  * <em>not</em> related to any of the ICU character iterator classes | 
 |  * in order to minimize complexity. | 
 |  * @author Alan Liu | 
 |  * @since ICU 2.8 | 
 |  */ | 
 | class RuleCharacterIterator : public UMemory { | 
 |  | 
 |     // TODO: Ideas for later.  (Do not implement if not needed, lest the | 
 |     // code coverage numbers go down due to unused methods.) | 
 |     // 1. Add a copy constructor, operator==() method. | 
 |     // 2. Rather than return DONE, throw an exception if the end | 
 |     // is reached -- this is an alternate usage model, probably not useful. | 
 |  | 
 | private: | 
 |     /** | 
 |      * Text being iterated. | 
 |      */     | 
 |     const UnicodeString& text; | 
 |  | 
 |     /** | 
 |      * Position of iterator. | 
 |      */ | 
 |     ParsePosition& pos; | 
 |  | 
 |     /** | 
 |      * Symbol table used to parse and dereference variables.  May be 0. | 
 |      */ | 
 |     const SymbolTable* sym; | 
 |      | 
 |     /** | 
 |      * Current variable expansion, or 0 if none. | 
 |      */ | 
 |     const UnicodeString* buf; | 
 |  | 
 |     /** | 
 |      * Position within buf.  Meaningless if buf == 0. | 
 |      */ | 
 |     int32_t bufPos; | 
 |  | 
 | public: | 
 |     /** | 
 |      * Value returned when there are no more characters to iterate. | 
 |      */ | 
 |     enum { DONE = -1 }; | 
 |  | 
 |     /** | 
 |      * Bitmask option to enable parsing of variable names.  If (options & | 
 |      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to | 
 |      * its value.  Variables are parsed using the SymbolTable API. | 
 |      */ | 
 |     enum { PARSE_VARIABLES = 1 }; | 
 |  | 
 |     /** | 
 |      * Bitmask option to enable parsing of escape sequences.  If (options & | 
 |      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded | 
 |      * to its value.  Escapes are parsed using Utility.unescapeAt(). | 
 |      */ | 
 |     enum { PARSE_ESCAPES   = 2 }; | 
 |  | 
 |     /** | 
 |      * Bitmask option to enable skipping of whitespace.  If (options & | 
 |      * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently | 
 |      * skipped, as if they were not present in the input. | 
 |      */ | 
 |     enum { SKIP_WHITESPACE = 4 }; | 
 |  | 
 |     /** | 
 |      * Constructs an iterator over the given text, starting at the given | 
 |      * position. | 
 |      * @param text the text to be iterated | 
 |      * @param sym the symbol table, or null if there is none.  If sym is null, | 
 |      * then variables will not be deferenced, even if the PARSE_VARIABLES | 
 |      * option is set. | 
 |      * @param pos upon input, the index of the next character to return.  If a | 
 |      * variable has been dereferenced, then pos will <em>not</em> increment as | 
 |      * characters of the variable value are iterated. | 
 |      */ | 
 |     RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym, | 
 |                           ParsePosition& pos); | 
 |      | 
 |     /** | 
 |      * Returns true if this iterator has no more characters to return. | 
 |      */ | 
 |     UBool atEnd() const; | 
 |  | 
 |     /** | 
 |      * Returns the next character using the given options, or DONE if there | 
 |      * are no more characters, and advance the position to the next | 
 |      * character. | 
 |      * @param options one or more of the following options, bitwise-OR-ed | 
 |      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. | 
 |      * @param isEscaped output parameter set to TRUE if the character | 
 |      * was escaped | 
 |      * @param ec input-output error code.  An error will only be set by | 
 |      * this routing if options includes PARSE_VARIABLES and an unknown | 
 |      * variable name is seen, or if options includes PARSE_ESCAPES and | 
 |      * an invalid escape sequence is seen. | 
 |      * @return the current 32-bit code point, or DONE | 
 |      */ | 
 |     UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec); | 
 |  | 
 |     /** | 
 |      * Returns true if this iterator is currently within a variable expansion. | 
 |      */ | 
 |     inline UBool inVariable() const; | 
 |  | 
 |     /** | 
 |      * An opaque object representing the position of a RuleCharacterIterator. | 
 |      */ | 
 |     struct Pos : public UMemory { | 
 |     private: | 
 |         const UnicodeString* buf; | 
 |         int32_t pos; | 
 |         int32_t bufPos; | 
 |         friend class RuleCharacterIterator; | 
 |     }; | 
 |  | 
 |     /** | 
 |      * Sets an object which, when later passed to setPos(), will | 
 |      * restore this iterator's position.  Usage idiom: | 
 |      * | 
 |      * RuleCharacterIterator iterator = ...; | 
 |      * RuleCharacterIterator::Pos pos; | 
 |      * iterator.getPos(pos); | 
 |      * for (;;) { | 
 |      *   iterator.getPos(pos); | 
 |      *   int c = iterator.next(...); | 
 |      *   ... | 
 |      * } | 
 |      * iterator.setPos(pos); | 
 |      * | 
 |      * @param p a position object to be set to this iterator's | 
 |      * current position. | 
 |      */ | 
 |     void getPos(Pos& p) const; | 
 |  | 
 |     /** | 
 |      * Restores this iterator to the position it had when getPos() | 
 |      * set the given object. | 
 |      * @param p a position object previously set by getPos() | 
 |      */ | 
 |     void setPos(const Pos& p); | 
 |  | 
 |     /** | 
 |      * Skips ahead past any ignored characters, as indicated by the given | 
 |      * options.  This is useful in conjunction with the lookahead() method. | 
 |      * | 
 |      * Currently, this only has an effect for SKIP_WHITESPACE. | 
 |      * @param options one or more of the following options, bitwise-OR-ed | 
 |      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. | 
 |      */ | 
 |     void skipIgnored(int32_t options); | 
 |  | 
 |     /** | 
 |      * Returns a string containing the remainder of the characters to be | 
 |      * returned by this iterator, without any option processing.  If the | 
 |      * iterator is currently within a variable expansion, this will only | 
 |      * extend to the end of the variable expansion.  This method is provided | 
 |      * so that iterators may interoperate with string-based APIs.  The typical | 
 |      * sequence of calls is to call skipIgnored(), then call lookahead(), then | 
 |      * parse the string returned by lookahead(), then call jumpahead() to | 
 |      * resynchronize the iterator. | 
 |      * @param result a string to receive the characters to be returned | 
 |      * by future calls to next() | 
 |      * @param maxLookAhead The maximum to copy into the result. | 
 |      * @return a reference to result | 
 |      */ | 
 |     UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const; | 
 |  | 
 |     /** | 
 |      * Advances the position by the given number of 16-bit code units. | 
 |      * This is useful in conjunction with the lookahead() method. | 
 |      * @param count the number of 16-bit code units to jump over | 
 |      */ | 
 |     void jumpahead(int32_t count); | 
 |  | 
 |     /** | 
 |      * Returns a string representation of this object, consisting of the | 
 |      * characters being iterated, with a '|' marking the current position. | 
 |      * Position within an expanded variable is <em>not</em> indicated. | 
 |      * @param result output parameter to receive a string | 
 |      * representation of this object | 
 |      */ | 
 | //    UnicodeString& toString(UnicodeString& result) const; | 
 |      | 
 | private: | 
 |     /** | 
 |      * Returns the current 32-bit code point without parsing escapes, parsing | 
 |      * variables, or skipping whitespace. | 
 |      * @return the current 32-bit code point | 
 |      */ | 
 |     UChar32 _current() const; | 
 |      | 
 |     /** | 
 |      * Advances the position by the given amount. | 
 |      * @param count the number of 16-bit code units to advance past | 
 |      */ | 
 |     void _advance(int32_t count); | 
 | }; | 
 |  | 
 | inline UBool RuleCharacterIterator::inVariable() const { | 
 |     return buf != 0; | 
 | } | 
 |  | 
 | U_NAMESPACE_END | 
 |  | 
 | #endif // _RULEITER_H_ | 
 | //eof |