|  | // © 2016 and later: Unicode, Inc. and others. | 
|  | // License & terms of use: http://www.unicode.org/copyright.html | 
|  | /* | 
|  | ********************************************************************** | 
|  | * Copyright (c) 2003-2011, International Business Machines | 
|  | * Corporation and others.  All Rights Reserved. | 
|  | ********************************************************************** | 
|  | * Author: Alan Liu | 
|  | * Created: September 24 2003 | 
|  | * Since: ICU 2.8 | 
|  | ********************************************************************** | 
|  | */ | 
|  | #include "ruleiter.h" | 
|  | #include "unicode/parsepos.h" | 
|  | #include "unicode/symtable.h" | 
|  | #include "unicode/unistr.h" | 
|  | #include "unicode/utf16.h" | 
|  | #include "patternprops.h" | 
|  |  | 
|  | /* \U87654321 or \ud800\udc00 */ | 
|  | #define MAX_U_NOTATION_LEN 12 | 
|  |  | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, | 
|  | ParsePosition& thePos) : | 
|  | text(theText), | 
|  | pos(thePos), | 
|  | sym(theSym), | 
|  | buf(0), | 
|  | bufPos(0) | 
|  | {} | 
|  |  | 
|  | UBool RuleCharacterIterator::atEnd() const { | 
|  | return buf == 0 && pos.getIndex() == text.length(); | 
|  | } | 
|  |  | 
|  | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { | 
|  | if (U_FAILURE(ec)) return DONE; | 
|  |  | 
|  | UChar32 c = DONE; | 
|  | isEscaped = false; | 
|  |  | 
|  | for (;;) { | 
|  | c = _current(); | 
|  | _advance(U16_LENGTH(c)); | 
|  |  | 
|  | if (c == SymbolTable::SYMBOL_REF && buf == 0 && | 
|  | (options & PARSE_VARIABLES) != 0 && sym != 0) { | 
|  | UnicodeString name = sym->parseReference(text, pos, text.length()); | 
|  | // If name is empty there was an isolated SYMBOL_REF; | 
|  | // return it.  Caller must be prepared for this. | 
|  | if (name.length() == 0) { | 
|  | break; | 
|  | } | 
|  | bufPos = 0; | 
|  | buf = sym->lookup(name); | 
|  | if (buf == 0) { | 
|  | ec = U_UNDEFINED_VARIABLE; | 
|  | return DONE; | 
|  | } | 
|  | // Handle empty variable value | 
|  | if (buf->length() == 0) { | 
|  | buf = 0; | 
|  | } | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { | 
|  | UnicodeString tempEscape; | 
|  | int32_t offset = 0; | 
|  | c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); | 
|  | jumpahead(offset); | 
|  | isEscaped = true; | 
|  | if (c < 0) { | 
|  | ec = U_MALFORMED_UNICODE_ESCAPE; | 
|  | return DONE; | 
|  | } | 
|  | } | 
|  |  | 
|  | break; | 
|  | } | 
|  |  | 
|  | return c; | 
|  | } | 
|  |  | 
|  | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { | 
|  | p.buf = buf; | 
|  | p.pos = pos.getIndex(); | 
|  | p.bufPos = bufPos; | 
|  | } | 
|  |  | 
|  | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { | 
|  | buf = p.buf; | 
|  | pos.setIndex(p.pos); | 
|  | bufPos = p.bufPos; | 
|  | } | 
|  |  | 
|  | void RuleCharacterIterator::skipIgnored(int32_t options) { | 
|  | if ((options & SKIP_WHITESPACE) != 0) { | 
|  | for (;;) { | 
|  | UChar32 a = _current(); | 
|  | if (!PatternProps::isWhiteSpace(a)) break; | 
|  | _advance(U16_LENGTH(a)); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { | 
|  | if (maxLookAhead < 0) { | 
|  | maxLookAhead = 0x7FFFFFFF; | 
|  | } | 
|  | if (buf != 0) { | 
|  | buf->extract(bufPos, maxLookAhead, result); | 
|  | } else { | 
|  | text.extract(pos.getIndex(), maxLookAhead, result); | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | void RuleCharacterIterator::jumpahead(int32_t count) { | 
|  | _advance(count); | 
|  | } | 
|  |  | 
|  | /* | 
|  | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { | 
|  | int32_t b = pos.getIndex(); | 
|  | text.extract(0, b, result); | 
|  | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index | 
|  | } | 
|  | */ | 
|  |  | 
|  | UChar32 RuleCharacterIterator::_current() const { | 
|  | if (buf != 0) { | 
|  | return buf->char32At(bufPos); | 
|  | } else { | 
|  | int i = pos.getIndex(); | 
|  | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; | 
|  | } | 
|  | } | 
|  |  | 
|  | void RuleCharacterIterator::_advance(int32_t count) { | 
|  | if (buf != 0) { | 
|  | bufPos += count; | 
|  | if (bufPos == buf->length()) { | 
|  | buf = 0; | 
|  | } | 
|  | } else { | 
|  | pos.setIndex(pos.getIndex() + count); | 
|  | if (pos.getIndex() > text.length()) { | 
|  | pos.setIndex(text.length()); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | U_NAMESPACE_END | 
|  |  | 
|  | //eof |