|  | /* | 
|  | ********************************************************************** | 
|  | *   Copyright (C) 1999-2001 International Business Machines Corporation   * | 
|  | *   and others. All rights reserved.                                 * | 
|  | ********************************************************************** | 
|  | *   Date        Name        Description | 
|  | *   11/11/99    rgillam     Complete port from Java. | 
|  | ********************************************************************** | 
|  | */ | 
|  |  | 
|  | #include "unicode/rbbi.h" | 
|  | #include "unicode/schriter.h" | 
|  | #include "rbbi_tbl.h" | 
|  | #include "filestrm.h" | 
|  | #include "cmemory.h" | 
|  |  | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | /** | 
|  | * A token used as a character-category value to identify ignore characters | 
|  | */ | 
|  | const int8_t | 
|  | RuleBasedBreakIterator::UBRK_IGNORE = -1; | 
|  |  | 
|  | /** | 
|  | * The state number of the starting state | 
|  | */ | 
|  | const int16_t | 
|  | RuleBasedBreakIterator::START_STATE = 1; | 
|  |  | 
|  | /** | 
|  | * The state-transition value indicating "stop" | 
|  | */ | 
|  | const int16_t | 
|  | RuleBasedBreakIterator::STOP_STATE = 0; | 
|  |  | 
|  | /** | 
|  | * Class ID.  (value is irrelevant; address is important) | 
|  | */ | 
|  | const char | 
|  | RuleBasedBreakIterator::fgClassID = 0; | 
|  |  | 
|  | //======================================================================= | 
|  | // constructors | 
|  | //======================================================================= | 
|  |  | 
|  | /** | 
|  | * Constructs a RuleBasedBreakIterator that uses the already-created | 
|  | * tables object that is passed in as a parameter. | 
|  | */ | 
|  | RuleBasedBreakIterator::RuleBasedBreakIterator(RuleBasedBreakIteratorTables* adoptTables) | 
|  | : text(NULL), | 
|  | tables(adoptTables) | 
|  | { | 
|  | } | 
|  |  | 
|  | // This constructor uses the udata interface to create a BreakIterator whose | 
|  | // internal tables live in a memory-mapped file.  "image" is a pointer to the | 
|  | // beginning of that file. | 
|  | RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* image) | 
|  | : text(NULL), | 
|  | tables(image != NULL ? new RuleBasedBreakIteratorTables(image) : NULL) | 
|  | { | 
|  | if (tables != NULL) | 
|  | tables->addReference(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Copy constructor.  Will produce a collator with the same behavior, | 
|  | * and which iterates over the same text, as the one passed in. | 
|  | */ | 
|  | RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& that) | 
|  | : BreakIterator(), // The copy constructor is private :( | 
|  | text(that.text->clone()), | 
|  | tables(that.tables) | 
|  | { | 
|  | tables->addReference(); | 
|  | } | 
|  |  | 
|  | //======================================================================= | 
|  | // boilerplate | 
|  | //======================================================================= | 
|  | /** | 
|  | * Destructor | 
|  | */ | 
|  | RuleBasedBreakIterator::~RuleBasedBreakIterator() { | 
|  | delete text; | 
|  | tables->removeReference(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Assignment operator.  Sets this iterator to have the same behavior, | 
|  | * and iterate over the same text, as the one passed in. | 
|  | */ | 
|  | RuleBasedBreakIterator& | 
|  | RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { | 
|  | delete text; | 
|  | text = that.text->clone(); | 
|  |  | 
|  | tables->removeReference(); | 
|  | tables = that.tables; | 
|  | tables->addReference(); | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns a newly-constructed RuleBasedBreakIterator with the same | 
|  | * behavior, and iterating over the same text, as this one. | 
|  | */ | 
|  | BreakIterator* | 
|  | RuleBasedBreakIterator::clone(void) const { | 
|  | return new RuleBasedBreakIterator(*this); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Equality operator.  Returns TRUE if both BreakIterators are of the | 
|  | * same class, have the same behavior, and iterate over the same text. | 
|  | */ | 
|  | UBool | 
|  | RuleBasedBreakIterator::operator==(const BreakIterator& that) const { | 
|  | if (that.getDynamicClassID() != getDynamicClassID()) | 
|  | return FALSE; | 
|  |  | 
|  |  | 
|  | const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&)that; | 
|  | return (that2.text == text || *that2.text == *text) | 
|  | && (that2.tables == tables || *that2.tables == *tables); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Compute a hash code for this BreakIterator | 
|  | * @return A hash code | 
|  | */ | 
|  | int32_t | 
|  | RuleBasedBreakIterator::hashCode(void) const { | 
|  | return tables->hashCode(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the description used to create this iterator | 
|  | */ | 
|  | const UnicodeString& | 
|  | RuleBasedBreakIterator::getRules() const { | 
|  | return tables->getRules(); | 
|  | } | 
|  |  | 
|  | //======================================================================= | 
|  | // BreakIterator overrides | 
|  | //======================================================================= | 
|  |  | 
|  | /** | 
|  | * Return a CharacterIterator over the text being analyzed.  This version | 
|  | * of this method returns the actual CharacterIterator we're using internally. | 
|  | * Changing the state of this iterator can have undefined consequences.  If | 
|  | * you need to change it, clone it first. | 
|  | * @return An iterator over the text being analyzed. | 
|  | */ | 
|  | const CharacterIterator& | 
|  | RuleBasedBreakIterator::getText() const { | 
|  | RuleBasedBreakIterator* nonConstThis = (RuleBasedBreakIterator*)this; | 
|  |  | 
|  | // The iterator is initialized pointing to no text at all, so if this | 
|  | // function is called while we're in that state, we have to fudge an | 
|  | // an iterator to return. | 
|  | if (nonConstThis->text == NULL) | 
|  | nonConstThis->text = new StringCharacterIterator(""); | 
|  | return *nonConstThis->text; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Set the iterator to analyze a new piece of text.  This function resets | 
|  | * the current iteration position to the beginning of the text. | 
|  | * @param newText An iterator over the text to analyze. | 
|  | */ | 
|  | void | 
|  | RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { | 
|  | reset(); | 
|  | delete text; | 
|  | text = newText; | 
|  | text->first(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Set the iterator to analyze a new piece of text.  This function resets | 
|  | * the current iteration position to the beginning of the text. | 
|  | * @param newText An iterator over the text to analyze. | 
|  | */ | 
|  | void | 
|  | RuleBasedBreakIterator::setText(const UnicodeString& newText) { | 
|  | reset(); | 
|  | if (text != NULL && text->getDynamicClassID() | 
|  | == StringCharacterIterator::getStaticClassID()) { | 
|  | ((StringCharacterIterator*)text)->setText(newText); | 
|  | } | 
|  | else { | 
|  | delete text; | 
|  | text = new StringCharacterIterator(newText); | 
|  | text->first(); | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifdef ICU_ENABLE_DEPRECATED_BREAKITERATOR | 
|  | /** | 
|  | * Returns a newly-created CharacterIterator that the caller is to take | 
|  | * ownership of. | 
|  | * THIS FUNCTION SHOULD NOT BE HERE.  IT'S HERE BECAUSE BreakIterator DEFINES | 
|  | * IT AS PURE VIRTUAL, FORCING RBBI TO IMPLEMENT IT.  IT SHOULD BE REMOVED | 
|  | * FROM *BOTH* CLASSES. | 
|  | */ | 
|  | CharacterIterator* | 
|  | RuleBasedBreakIterator::createText() const { | 
|  | if (text == NULL) | 
|  | return new StringCharacterIterator(""); | 
|  | else | 
|  | return text->clone(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Set the iterator to analyze a new piece of text.  This function resets | 
|  | * the current iteration position to the beginning of the text. | 
|  | * @param newText The text to analyze. | 
|  | * THIS FUNCTION SHOULD NOT BE HERE.  IT'S HERE BECAUSE BreakIterator DEFINES | 
|  | * IT AS PURE VIRTUAL, FORCING RBBI TO IMPLEMENT IT.  IT SHOULD BE REMOVED | 
|  | * FROM *BOTH* CLASSES. | 
|  | */ | 
|  | void | 
|  | RuleBasedBreakIterator::setText(const UnicodeString* newText) { | 
|  | setText(*newText); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | /** | 
|  | * Sets the current iteration position to the beginning of the text. | 
|  | * (i.e., the CharacterIterator's starting offset). | 
|  | * @return The offset of the beginning of the text. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::first(void) { | 
|  | reset(); | 
|  | if (text == NULL) | 
|  | return BreakIterator::DONE; | 
|  |  | 
|  | text->first(); | 
|  | return text->getIndex(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Sets the current iteration position to the end of the text. | 
|  | * (i.e., the CharacterIterator's ending offset). | 
|  | * @return The text's past-the-end offset. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::last(void) { | 
|  | reset(); | 
|  | if (text == NULL) | 
|  | return BreakIterator::DONE; | 
|  |  | 
|  | // I'm not sure why, but t.last() returns the offset of the last character, | 
|  | // rather than the past-the-end offset | 
|  |  | 
|  | int32_t pos = text->endIndex(); | 
|  | text->setIndex(pos); | 
|  | return pos; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Advances the iterator either forward or backward the specified number of steps. | 
|  | * Negative values move backward, and positive values move forward.  This is | 
|  | * equivalent to repeatedly calling next() or previous(). | 
|  | * @param n The number of steps to move.  The sign indicates the direction | 
|  | * (negative is backwards, and positive is forwards). | 
|  | * @return The character offset of the boundary position n boundaries away from | 
|  | * the current one. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::next(int32_t n) { | 
|  | int32_t result = current(); | 
|  | while (n > 0) { | 
|  | result = handleNext(); | 
|  | --n; | 
|  | } | 
|  | while (n < 0) { | 
|  | result = previous(); | 
|  | ++n; | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Advances the iterator to the next boundary position. | 
|  | * @return The position of the first boundary after this one. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::next(void) { | 
|  | return handleNext(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Advances the iterator backwards, to the last boundary preceding this one. | 
|  | * @return The position of the last boundary position preceding this one. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::previous(void) { | 
|  | // if we're already sitting at the beginning of the text, return DONE | 
|  | if (text == NULL || current() == text->startIndex()) | 
|  | return BreakIterator::DONE; | 
|  |  | 
|  | // set things up.  handlePrevious() will back us up to some valid | 
|  | // break position before the current position (we back our internal | 
|  | // iterator up one step to prevent handlePrevious() from returning | 
|  | // the current position), but not necessarily the last one before | 
|  | // where we started | 
|  | int32_t start = current(); | 
|  | text->previous(); | 
|  | int32_t lastResult = handlePrevious(); | 
|  | int32_t result = lastResult; | 
|  |  | 
|  | // iterate forward from the known break position until we pass our | 
|  | // starting point.  The last break position before the starting | 
|  | // point is our return value | 
|  | while (result != BreakIterator::DONE && result < start) { | 
|  | lastResult = result; | 
|  | result = handleNext(); | 
|  | } | 
|  |  | 
|  | // set the current iteration position to be the last break position | 
|  | // before where we started, and then return that value | 
|  | text->setIndex(lastResult); | 
|  | return lastResult; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Sets the iterator to refer to the first boundary position following | 
|  | * the specified position. | 
|  | * @offset The position from which to begin searching for a break position. | 
|  | * @return The position of the first break after the current position. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::following(int32_t offset) { | 
|  | // if the offset passed in is already past the end of the text, | 
|  | // just return DONE; if it's before the beginning, return the | 
|  | // text's starting offset | 
|  | if (text == NULL || offset >= text->endIndex()) { | 
|  | return BreakIterator::DONE; | 
|  | } | 
|  | else if (offset < text->startIndex()) { | 
|  | return text->startIndex(); | 
|  | } | 
|  |  | 
|  | // otherwise, set our internal iteration position (temporarily) | 
|  | // to the position passed in.  If this is the _beginning_ position, | 
|  | // then we can just use next() to get our return value | 
|  | text->setIndex(offset); | 
|  | if (offset == text->startIndex()) | 
|  | return handleNext(); | 
|  |  | 
|  | // otherwise, we have to sync up first.  Use handlePrevious() to back | 
|  | // us up to a known break position before the specified position (if | 
|  | // we can determine that the specified position is a break position, | 
|  | // we don't back up at all).  This may or may not be the last break | 
|  | // position at or before our starting position.  Advance forward | 
|  | // from here until we've passed the starting position.  The position | 
|  | // we stop on will be the first break position after the specified one. | 
|  | int32_t result = handlePrevious(); | 
|  | while (result != BreakIterator::DONE && result <= offset) | 
|  | result = handleNext(); | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Sets the iterator to refer to the last boundary position before the | 
|  | * specified position. | 
|  | * @offset The position to begin searching for a break from. | 
|  | * @return The position of the last boundary before the starting position. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::preceding(int32_t offset) { | 
|  | // if the offset passed in is already past the end of the text, | 
|  | // just return DONE; if it's before the beginning, return the | 
|  | // text's starting offset | 
|  | if (text == NULL || offset > text->endIndex()) { | 
|  | return BreakIterator::DONE; | 
|  | } | 
|  | else if (offset < text->startIndex()) { | 
|  | return text->startIndex(); | 
|  | } | 
|  |  | 
|  | // if we start by updating the current iteration position to the | 
|  | // position specified by the caller, we can just use previous() | 
|  | // to carry out this operation | 
|  | text->setIndex(offset); | 
|  | return previous(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns true if the specfied position is a boundary position.  As a side | 
|  | * effect, leaves the iterator pointing to the first boundary position at | 
|  | * or after "offset". | 
|  | * @param offset the offset to check. | 
|  | * @return True if "offset" is a boundary position. | 
|  | */ | 
|  | UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { | 
|  | // the beginning index of the iterator is always a boundary position by definition | 
|  | if (text == NULL || offset == text->startIndex()) { | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | // out-of-range indexes are never boundary positions | 
|  | else if (offset < text->startIndex() || offset > text->endIndex()) { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | // otherwise, we can use following() on the position before the specified | 
|  | // one and return true of the position we get back is the one the user | 
|  | // specified | 
|  | else | 
|  | return following(offset - 1) == offset; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Returns the current iteration position. | 
|  | * @return The current iteration position. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::current(void) const { | 
|  | return (text != NULL) ? text->getIndex() : BreakIterator::DONE; | 
|  | } | 
|  |  | 
|  | //======================================================================= | 
|  | // implementation | 
|  | //======================================================================= | 
|  |  | 
|  | /** | 
|  | * This method is the actual implementation of the next() method.  All iteration | 
|  | * vectors through here.  This method initializes the state machine to state 1 | 
|  | * and advances through the text character by character until we reach the end | 
|  | * of the text or the state machine transitions to state 0.  We update our return | 
|  | * value every time the state machine passes through a possible end state. | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::handleNext(void) { | 
|  | // if we're already at the end of the text, return DONE. | 
|  | if (text == NULL || tables == NULL || text->getIndex() == text->endIndex()) | 
|  | return BreakIterator::DONE; | 
|  |  | 
|  | // no matter what, we always advance at least one character forward | 
|  | int32_t result = text->getIndex() + 1; | 
|  | int32_t lookaheadResult = 0; | 
|  |  | 
|  | // begin in state 1 | 
|  | int32_t state = START_STATE; | 
|  | int32_t category; | 
|  | UChar c = text->current(); | 
|  | UChar lastC = c; | 
|  | int32_t lastCPos = 0; | 
|  |  | 
|  |  | 
|  | // loop until we reach the end of the text or transition to state 0 | 
|  | while (c != CharacterIterator::DONE && state != STOP_STATE) { | 
|  |  | 
|  | // look up the current character's character category (which tells us | 
|  | // which column in the state table to look at) | 
|  | category = tables->lookupCategory(c, this); | 
|  |  | 
|  | // if the character isn't an ignore character, look up a state | 
|  | // transition in the state table | 
|  | if (category != UBRK_IGNORE) { | 
|  | state = tables->lookupState(state, category); | 
|  | } | 
|  |  | 
|  | // if the state we've just transitioned to is a lookahead state, | 
|  | // (but not also an end state), save its position.  If it's | 
|  | // both a lookahead state and an end state, update the break position | 
|  | // to the last saved lookup-state position | 
|  | if (tables->isLookaheadState(state)) { | 
|  | if (tables->isEndState(state)) { | 
|  | if (lookaheadResult > 0) { | 
|  | result = lookaheadResult; | 
|  | } | 
|  | else { | 
|  | result = text->getIndex() + 1; | 
|  | } | 
|  | } | 
|  | else { | 
|  | lookaheadResult = text->getIndex() + 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | // otherwise, if the state we've just transitioned to is an accepting state, | 
|  | // update our return value to be the current iteration position | 
|  | else { | 
|  | if (tables->isEndState(state)) { | 
|  | result = text->getIndex() + 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | // keep track of the last "real" character we saw.  If this character isn't an | 
|  | // ignore character, take note of it and its position in the text | 
|  | if (category != UBRK_IGNORE && state != STOP_STATE) { | 
|  | lastC = c; | 
|  | lastCPos = text->getIndex(); | 
|  | } | 
|  | c = text->next(); | 
|  | } | 
|  |  | 
|  | // if we've run off the end of the text, and the very last character took us into | 
|  | // a lookahead state, advance the break position to the lookahead position | 
|  | // (the theory here is that if there are no characters at all after the lookahead | 
|  | // position, that always matches the lookahead criteria) | 
|  | if (c == CharacterIterator::DONE && lookaheadResult == text->endIndex()) { | 
|  | result = lookaheadResult; | 
|  | } | 
|  |  | 
|  | // if the last character we saw before the one that took us into the stop state | 
|  | // was a mandatory breaking character, then the break position goes right after it | 
|  | // (this is here so that breaks come before, rather than after, a string of | 
|  | // ignore characters when they follow a mandatory break character) | 
|  | else if (lastC == 0x0a || lastC == 0x0d || lastC == 0x0c || lastC == 0x2028 | 
|  | || lastC == 0x2029) { | 
|  | result = lastCPos + 1; | 
|  | } | 
|  |  | 
|  | text->setIndex(result); | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * This method backs the iterator back up to a "safe position" in the text. | 
|  | * This is a position that we know, without any context, must be a break position. | 
|  | * The various calling methods then iterate forward from this safe position to | 
|  | * the appropriate position to return.  (For more information, see the description | 
|  | * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.) | 
|  | */ | 
|  | int32_t RuleBasedBreakIterator::handlePrevious(void) { | 
|  | if (text == NULL || tables == NULL) | 
|  | return 0; | 
|  |  | 
|  | int32_t state = START_STATE; | 
|  | int32_t category = 0; | 
|  | int32_t lastCategory = 0; | 
|  | UChar c = text->current(); | 
|  |  | 
|  | // loop until we reach the beginning of the text or transition to state 0 | 
|  | while (c != CharacterIterator::DONE && state != STOP_STATE) { | 
|  |  | 
|  | // save the last character's category and look up the current | 
|  | // character's category | 
|  | lastCategory = category; | 
|  | category = tables->lookupCategory(c, this); | 
|  |  | 
|  | // if the current character isn't an ignore character, look up a | 
|  | // state transition in the backwards state table | 
|  | if (category != UBRK_IGNORE) | 
|  | state = tables->lookupBackwardState(state, category); | 
|  |  | 
|  | // then advance one character backwards | 
|  | c = text->previous(); | 
|  | } | 
|  |  | 
|  | // if we didn't march off the beginning of the text, we're either one or two | 
|  | // positions away from the real break position.  (One because of the call to | 
|  | // previous() at the end of the loop above, and another because the character | 
|  | // that takes us into the stop state will always be the character BEFORE | 
|  | // the break position.) | 
|  | if (c != CharacterIterator::DONE) { | 
|  | if (lastCategory != UBRK_IGNORE) | 
|  | text->setIndex(text->getIndex() + 2); | 
|  | else | 
|  | text->next(); | 
|  | } | 
|  |  | 
|  | return text->getIndex(); | 
|  | } | 
|  |  | 
|  | void | 
|  | RuleBasedBreakIterator::reset() | 
|  | { | 
|  | // Base-class version of this function is a no-op. | 
|  | // Subclasses may override with their own reset behavior. | 
|  | } | 
|  |  | 
|  | // internal type for BufferClone | 
|  | struct bufferCloneStructUChar | 
|  | { | 
|  | uint8_t bi   [sizeof(RuleBasedBreakIterator)] ; | 
|  | uint8_t text [sizeof(UCharCharacterIterator)] ; | 
|  | }; | 
|  |  | 
|  | struct bufferCloneStructString | 
|  | { | 
|  | uint8_t bi   [sizeof(RuleBasedBreakIterator)] ; | 
|  | uint8_t text [sizeof(StringCharacterIterator)] ; | 
|  | }; | 
|  |  | 
|  | BreakIterator *  RuleBasedBreakIterator::createBufferClone(void *stackBuffer, | 
|  | int32_t &BufferSize, | 
|  | UErrorCode &status) | 
|  | { | 
|  | RuleBasedBreakIterator * localIterator; | 
|  | int32_t bufferSizeNeeded = 0; | 
|  | UBool IterIsUChar = FALSE; | 
|  | UBool IterIsString = FALSE; | 
|  | char *stackBufferChars = (char *)stackBuffer; | 
|  |  | 
|  | if (U_FAILURE(status)){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Pointers on 64-bit platforms need to be aligned | 
|  | * on a 64-bit boundry in memory. | 
|  | */ | 
|  | if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { | 
|  | int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); | 
|  | BufferSize -= offsetUp; | 
|  | stackBufferChars += offsetUp; | 
|  | } | 
|  | stackBuffer = (void *)stackBufferChars; | 
|  |  | 
|  | if (text == NULL) | 
|  | { | 
|  | bufferSizeNeeded = (int32_t) sizeof(RuleBasedBreakIterator); | 
|  | } | 
|  | else if (text->getDynamicClassID() == StringCharacterIterator::getStaticClassID()) | 
|  | { | 
|  | bufferSizeNeeded = (int32_t) sizeof(struct bufferCloneStructString); | 
|  | IterIsString = TRUE; | 
|  | } | 
|  | else if (text->getDynamicClassID() == UCharCharacterIterator::getStaticClassID()) | 
|  | { | 
|  | bufferSizeNeeded = (int32_t) sizeof(struct bufferCloneStructUChar); | 
|  | IterIsUChar = TRUE; | 
|  | } | 
|  | else | 
|  | { | 
|  | // code has changed - time to make a real CharacterIterator::CreateBufferClone() | 
|  | } | 
|  | if (BufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ | 
|  | BufferSize = bufferSizeNeeded; | 
|  | return 0; | 
|  | } | 
|  | if (BufferSize < bufferSizeNeeded || !stackBuffer) | 
|  | { | 
|  | /* allocate one here...*/ | 
|  | localIterator = new RuleBasedBreakIterator(*this); | 
|  | status = U_SAFECLONE_ALLOCATED_ERROR; | 
|  | return localIterator; | 
|  | } | 
|  | if (IterIsUChar) { | 
|  | struct bufferCloneStructUChar * localClone | 
|  | = (struct bufferCloneStructUChar  *)stackBuffer; | 
|  | localIterator = (RuleBasedBreakIterator *)&localClone->bi; | 
|  | uprv_memcpy(localIterator, this, sizeof(RuleBasedBreakIterator)); | 
|  | uprv_memcpy(&localClone->text, text, sizeof(UCharCharacterIterator)); | 
|  | localIterator->text = (CharacterIterator *) &localClone->text; | 
|  | } else if (IterIsString) { | 
|  | struct bufferCloneStructString * localClone | 
|  | = (struct bufferCloneStructString  *)stackBuffer; | 
|  | localIterator = (RuleBasedBreakIterator *)&localClone->bi; | 
|  | uprv_memcpy(localIterator, this, sizeof(RuleBasedBreakIterator)); | 
|  | uprv_memcpy(&localClone->text, text, sizeof(StringCharacterIterator)); | 
|  | localIterator->text = (CharacterIterator *)&localClone->text; | 
|  | } else { | 
|  | RuleBasedBreakIterator * localClone | 
|  | = (RuleBasedBreakIterator *)stackBuffer; | 
|  | localIterator = localClone; | 
|  | uprv_memcpy(localIterator, this, sizeof(RuleBasedBreakIterator)); | 
|  | } | 
|  |  | 
|  | localIterator->fBufferClone = TRUE; | 
|  |  | 
|  | return localIterator; | 
|  | } | 
|  |  | 
|  | #ifdef RBBI_DEBUG | 
|  | void RuleBasedBreakIterator::debugDumpTables() const { | 
|  | tables->debugDumpTables(); | 
|  | } | 
|  | #endif | 
|  |  | 
|  |  | 
|  | U_NAMESPACE_END | 
|  |  |