| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2010-2012, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| * file name: bytestrieiterator.cpp |
| * encoding: UTF-8 |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2010nov03 |
| * created by: Markus W. Scherer |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "unicode/bytestrie.h" |
| #include "unicode/stringpiece.h" |
| #include "charstr.h" |
| #include "uvectr32.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, |
| UErrorCode &errorCode) |
| : bytes_(static_cast<const uint8_t *>(trieBytes)), |
| pos_(bytes_), initialPos_(bytes_), |
| remainingMatchLength_(-1), initialRemainingMatchLength_(-1), |
| str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| if(U_FAILURE(errorCode)) { |
| return; |
| } |
| // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into |
| // a public API header for which we would want it to depend only on |
| // other public headers. |
| // Unlike BytesTrie itself, its Iterator performs memory allocations anyway |
| // via the CharString and UVector32 implementations, so this additional |
| // cost is minimal. |
| str_=new CharString(); |
| stack_=new UVector32(errorCode); |
| if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| } |
| } |
| |
| BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, |
| UErrorCode &errorCode) |
| : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), |
| remainingMatchLength_(trie.remainingMatchLength_), |
| initialRemainingMatchLength_(trie.remainingMatchLength_), |
| str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| if(U_FAILURE(errorCode)) { |
| return; |
| } |
| str_=new CharString(); |
| stack_=new UVector32(errorCode); |
| if(U_FAILURE(errorCode)) { |
| return; |
| } |
| if(str_==NULL || stack_==NULL) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. |
| if(length>=0) { |
| // Pending linear-match node, append remaining bytes to str_. |
| ++length; |
| if(maxLength_>0 && length>maxLength_) { |
| length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. |
| } |
| str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); |
| pos_+=length; |
| remainingMatchLength_-=length; |
| } |
| } |
| |
| BytesTrie::Iterator::~Iterator() { |
| delete str_; |
| delete stack_; |
| } |
| |
| BytesTrie::Iterator & |
| BytesTrie::Iterator::reset() { |
| pos_=initialPos_; |
| remainingMatchLength_=initialRemainingMatchLength_; |
| int32_t length=remainingMatchLength_+1; // Remaining match length. |
| if(maxLength_>0 && length>maxLength_) { |
| length=maxLength_; |
| } |
| str_->truncate(length); |
| pos_+=length; |
| remainingMatchLength_-=length; |
| stack_->setSize(0); |
| return *this; |
| } |
| |
| UBool |
| BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } |
| |
| UBool |
| BytesTrie::Iterator::next(UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { |
| return FALSE; |
| } |
| const uint8_t *pos=pos_; |
| if(pos==NULL) { |
| if(stack_->isEmpty()) { |
| return FALSE; |
| } |
| // Pop the state off the stack and continue with the next outbound edge of |
| // the branch node. |
| int32_t stackSize=stack_->size(); |
| int32_t length=stack_->elementAti(stackSize-1); |
| pos=bytes_+stack_->elementAti(stackSize-2); |
| stack_->setSize(stackSize-2); |
| str_->truncate(length&0xffff); |
| length=(int32_t)((uint32_t)length>>16); |
| if(length>1) { |
| pos=branchNext(pos, length, errorCode); |
| if(pos==NULL) { |
| return TRUE; // Reached a final value. |
| } |
| } else { |
| str_->append((char)*pos++, errorCode); |
| } |
| } |
| if(remainingMatchLength_>=0) { |
| // We only get here if we started in a pending linear-match node |
| // with more than maxLength remaining bytes. |
| return truncateAndStop(); |
| } |
| for(;;) { |
| int32_t node=*pos++; |
| if(node>=kMinValueLead) { |
| // Deliver value for the byte sequence so far. |
| UBool isFinal=(UBool)(node&kValueIsFinal); |
| value_=readValue(pos, node>>1); |
| if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { |
| pos_=NULL; |
| } else { |
| pos_=skipValue(pos, node); |
| } |
| return TRUE; |
| } |
| if(maxLength_>0 && str_->length()==maxLength_) { |
| return truncateAndStop(); |
| } |
| if(node<kMinLinearMatch) { |
| if(node==0) { |
| node=*pos++; |
| } |
| pos=branchNext(pos, node+1, errorCode); |
| if(pos==NULL) { |
| return TRUE; // Reached a final value. |
| } |
| } else { |
| // Linear-match node, append length bytes to str_. |
| int32_t length=node-kMinLinearMatch+1; |
| if(maxLength_>0 && str_->length()+length>maxLength_) { |
| str_->append(reinterpret_cast<const char *>(pos), |
| maxLength_-str_->length(), errorCode); |
| return truncateAndStop(); |
| } |
| str_->append(reinterpret_cast<const char *>(pos), length, errorCode); |
| pos+=length; |
| } |
| } |
| } |
| |
| StringPiece |
| BytesTrie::Iterator::getString() const { |
| return str_ == NULL ? StringPiece() : str_->toStringPiece(); |
| } |
| |
| UBool |
| BytesTrie::Iterator::truncateAndStop() { |
| pos_=NULL; |
| value_=-1; // no real value for str |
| return TRUE; |
| } |
| |
| // Branch node, needs to take the first outbound edge and push state for the rest. |
| const uint8_t * |
| BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { |
| while(length>kMaxBranchLinearSubNodeLength) { |
| ++pos; // ignore the comparison byte |
| // Push state for the greater-or-equal edge. |
| stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); |
| stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); |
| // Follow the less-than edge. |
| length>>=1; |
| pos=jumpByDelta(pos); |
| } |
| // List of key-value pairs where values are either final values or jump deltas. |
| // Read the first (key, value) pair. |
| uint8_t trieByte=*pos++; |
| int32_t node=*pos++; |
| UBool isFinal=(UBool)(node&kValueIsFinal); |
| int32_t value=readValue(pos, node>>1); |
| pos=skipValue(pos, node); |
| stack_->addElement((int32_t)(pos-bytes_), errorCode); |
| stack_->addElement(((length-1)<<16)|str_->length(), errorCode); |
| str_->append((char)trieByte, errorCode); |
| if(isFinal) { |
| pos_=NULL; |
| value_=value; |
| return NULL; |
| } else { |
| return pos+value; |
| } |
| } |
| |
| U_NAMESPACE_END |