|  | /* | 
|  | ******************************************************************************* | 
|  | * Copyright (C) 1996-1999, International Business Machines Corporation and    * | 
|  | * others. All Rights Reserved.                                                * | 
|  | ******************************************************************************* | 
|  | * | 
|  | * File tblcoll.cpp | 
|  | * | 
|  | * Created by: Helena Shih | 
|  | * | 
|  | * Modification History: | 
|  | * | 
|  | *  Date        Name        Description | 
|  | *  2/5/97      aliu        Added streamIn and streamOut methods.  Added | 
|  | *                          constructor which reads RuleBasedCollator object from | 
|  | *                          a binary file.  Added writeToFile method which streams | 
|  | *                          RuleBasedCollator out to a binary file.  The streamIn | 
|  | *                          and streamOut methods use istream and ostream objects | 
|  | *                          in binary mode. | 
|  | *  2/11/97     aliu        Moved declarations out of for loop initializer. | 
|  | *                          Added Mac compatibility #ifdef for ios::nocreate. | 
|  | *  2/12/97     aliu        Modified to use TableCollationData sub-object to | 
|  | *                          hold invariant data. | 
|  | *  2/13/97     aliu        Moved several methods into this class from Collation. | 
|  | *                          Added a private RuleBasedCollator(Locale&) constructor, | 
|  | *                          to be used by Collator::getInstance().  General | 
|  | *                          clean up.  Made use of UErrorCode variables consistent. | 
|  | *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy | 
|  | *                          constructor and getDynamicClassID. | 
|  | *  3/5/97      aliu        Changed compaction cycle to improve performance.  We | 
|  | *                          use the maximum allowable value which is kBlockCount. | 
|  | *                          Modified getRules() to load rules dynamically.  Changed | 
|  | *                          constructFromFile() call to accomodate this (added | 
|  | *                          parameter to specify whether binary loading is to | 
|  | *                          take place). | 
|  | * 05/06/97     helena      Added memory allocation error check. | 
|  | *  6/20/97     helena      Java class name change. | 
|  | *  6/23/97     helena      Adding comments to make code more readable. | 
|  | * 09/03/97     helena      Added createCollationKeyValues(). | 
|  | * 06/26/98     erm         Changes for CollationKeys using byte arrays. | 
|  | * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java | 
|  | * 04/23/99     stephen     Removed EDecompositionMode, merged with | 
|  | *                          Normalizer::EMode | 
|  | * 06/14/99     stephen     Removed kResourceBundleSuffix | 
|  | * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx | 
|  | *                          files are no longer used. | 
|  | * 11/02/99     helena      Collator performance enhancements.  Special case | 
|  | *                          for NO_OP situations. | 
|  | * 11/17/99     srl         More performance enhancements. Inlined some internal functions. | 
|  | * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator | 
|  | *                          to implementation file. | 
|  | ******************************************************************************* | 
|  | */ | 
|  |  | 
|  | #include "ucmp32.h" | 
|  | #include "tcoldata.h" | 
|  |  | 
|  | #include "unicode/tblcoll.h" | 
|  |  | 
|  | #include "unicode/coleitr.h" | 
|  | #include "unicode/locid.h" | 
|  | #include "unicode/unicode.h" | 
|  | #include "tables.h" | 
|  | #include "unicode/normlzr.h" | 
|  | #include "mergecol.h" | 
|  | #include "unicode/resbund.h" | 
|  | #include "filestrm.h" | 
|  | #include "umemstrm.h" | 
|  |  | 
|  | #ifdef _DEBUG | 
|  | #include "unistrm.h" | 
|  | #endif | 
|  |  | 
|  | #include "compitr.h" | 
|  |  | 
|  | #include <string.h> | 
|  |  | 
|  | #include "unicode/ustring.h" | 
|  |  | 
|  | #include "cmemory.h" | 
|  |  | 
|  |  | 
|  | class RuleBasedCollatorStreamer | 
|  | { | 
|  | public: | 
|  | static void streamIn(RuleBasedCollator* collator, FileStream* is); | 
|  | static void streamOut(const RuleBasedCollator* collator, FileStream* os); | 
|  | static void streamIn(RuleBasedCollator* collator, UMemoryStream* is); | 
|  | static void streamOut(const RuleBasedCollator* collator, UMemoryStream* os); | 
|  | }; | 
|  |  | 
|  | //=========================================================================================== | 
|  | //  The following diagram shows the data structure of the RuleBasedCollator object. | 
|  | //  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6. | 
|  | //  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...". | 
|  | //  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and | 
|  | //  sorts 'o-umlaut' as if it's always expanded with 'e'. | 
|  | // | 
|  | // mapping table                       contracting list                  expanding list | 
|  | // (contains all unicode char | 
|  | //  entries)                         ___     _____________         _________________________ | 
|  | //   ________                   |==>|_*_|-->|'c'  |v('c') |   |==>|v('o')|v('umlaut')|v('e')| | 
|  | //  |_\u0001_|--> v('\u0001')   |   |_:_|   |-------------|   |   |-------------------------| | 
|  | //  |_\u0002_|--> v('\u0002')   |   |_:_|   |'ch' |v('ch')|   |   |             :           | | 
|  | //  |____:___|                  |   |_:_|   |-------------|   |   |-------------------------| | 
|  | //  |____:___|                  |           |'cH' |v('cH')|   |   |             :           | | 
|  | //  |__'a'___|--> v('a')        |           |-------------|   |   |-------------------------| | 
|  | //  |__'b'___|--> v('b')        |           |'Ch' |v('Ch')|   |   |             :           | | 
|  | //  |____:___|                  |           |-------------|   |   |-------------------------| | 
|  | //  |____:___|                  |           |'CH' |v('CH')|   |   |             :           | | 
|  | //  |___'c'__|-------------------            -------------    |   |-------------------------| | 
|  | //  |____:___|                                                |   |             :           | | 
|  | //  |o-umlaut|------------------------------------------------    |_________________________| | 
|  | //  |____:___| | 
|  | // | 
|  | // | 
|  | // Noted by Helena Shih on 6/23/97 with pending design changes (slimming collation). | 
|  | //============================================================================================ | 
|  |  | 
|  | const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;             // need look up in .commit() | 
|  | const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;       // Expand index follows | 
|  | const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;     // contract indexes follows | 
|  | const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;              // unmapped character values | 
|  | const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000; // primary strength increment | 
|  | const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100; // secondary strength increment | 
|  | const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001; // tertiary strength increment | 
|  | const int32_t RuleBasedCollator::MAXIGNORABLE = 0x00010000;          // maximum ignorable char order value | 
|  | const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;      // mask off anything but primary order | 
|  | const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;    // mask off anything but secondary order | 
|  | const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;     // mask off anything but tertiary order | 
|  | const int32_t RuleBasedCollator::SECONDARYRESETMASK = 0x0000ffff;    // mask off secondary and tertiary order | 
|  | const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;         // mask off ignorable char order | 
|  | const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000; // use only the primary difference | 
|  | const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;  // use only the primary and secondary difference | 
|  | const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;             // primary order shift | 
|  | const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;            // secondary order shift | 
|  | const int32_t RuleBasedCollator::SORTKEYOFFSET = 1;                  // minimum sort key offset | 
|  | const int32_t RuleBasedCollator::CONTRACTCHAROVERFLOW = 0x7FFFFFFF;  // Indicates the char is a contract char | 
|  |  | 
|  | const int16_t RuleBasedCollator::FILEID = 0x5443;                    // unique file id for parity check | 
|  | const char* RuleBasedCollator::kFilenameSuffix = ".col";             // binary collation file extension | 
|  | char  RuleBasedCollator::fgClassID = 0; // Value is irrelevant       // class id | 
|  |  | 
|  | //////////////////////////////////////////////////////////////////////// | 
|  | // NormalizerIterator | 
|  | // | 
|  | // This class is essentially a duplicate of CollationElementIterator, | 
|  | // stripped down for speed.  It is declared here so we can incorporate | 
|  | // internal classes as subobjects, as well as just to hide it from the | 
|  | // public interface. | 
|  | //////////////////////////////////////////////////////////////////////// | 
|  |  | 
|  | /* Internal class for quick iteration over the text. | 
|  | 100% pure inline code | 
|  | */ | 
|  | class NormalizerIterator { | 
|  | public: | 
|  | Normalizer *cursor; | 
|  | VectorOfInt *bufferAlias; | 
|  | VectorOfInt *reorderBuffer; | 
|  | VectorOfInt ownBuffer; | 
|  | UChar*      text; | 
|  | int32_t     expIndex; | 
|  | int32_t     textLen; | 
|  | UTextOffset  currentOffset; | 
|  |  | 
|  | NormalizerIterator(void); | 
|  | NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode); | 
|  | ~NormalizerIterator(void); | 
|  | void setText(const UChar* source, int32_t length, UErrorCode& status); | 
|  | void setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status); | 
|  |  | 
|  | UChar current(void) const; | 
|  | UChar next(void); | 
|  | void reset(void); | 
|  | }; | 
|  |  | 
|  | inline | 
|  | NormalizerIterator::NormalizerIterator() : | 
|  | cursor(0), | 
|  | bufferAlias(0), | 
|  | reorderBuffer(0), | 
|  | ownBuffer(2), | 
|  | text(0), | 
|  | textLen(0), | 
|  | currentOffset(0), | 
|  | expIndex(0) | 
|  | { | 
|  | } | 
|  |  | 
|  | inline | 
|  | NormalizerIterator::NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode) : | 
|  | cursor(0), | 
|  | bufferAlias(0), | 
|  | reorderBuffer(0), | 
|  | ownBuffer(2), | 
|  | text(0), | 
|  | textLen(0), | 
|  | currentOffset(0), | 
|  | expIndex(0) | 
|  | { | 
|  | if (mode == Normalizer::NO_OP) { | 
|  | text = (UChar*)source; | 
|  | textLen = length; | 
|  | currentOffset = 0; | 
|  | } else { | 
|  | cursor = new Normalizer(source, length, mode); | 
|  |  | 
|  | } | 
|  | } | 
|  |  | 
|  | inline | 
|  | NormalizerIterator::~NormalizerIterator() | 
|  | { | 
|  | if (cursor != 0) { | 
|  | delete cursor; | 
|  | cursor = 0; | 
|  | } | 
|  | if (reorderBuffer != 0) { | 
|  | delete reorderBuffer; | 
|  | } | 
|  | } | 
|  |  | 
|  | inline | 
|  | void | 
|  | NormalizerIterator::setText(const UChar* source, int32_t length, UErrorCode& status) | 
|  | { | 
|  | if (cursor == 0) { | 
|  | text = (UChar*)source; | 
|  | textLen = length; | 
|  | currentOffset = 0; | 
|  |  | 
|  | } else { | 
|  | text = 0; | 
|  | cursor->setText(source, length, status); | 
|  | } | 
|  | bufferAlias = 0; | 
|  | currentOffset = 0; | 
|  | } | 
|  |  | 
|  | /* You can only set mode after the comparision of two strings is completed. | 
|  | Setting the mode in the middle of a comparison is not allowed. | 
|  | */ | 
|  | inline | 
|  | void | 
|  |  | 
|  | NormalizerIterator::setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status) | 
|  | { | 
|  | if(mode != Normalizer::NO_OP) | 
|  | { | 
|  | /* DO have a mode -  will need a normalizer object */ | 
|  | if(cursor != NULL) | 
|  | { | 
|  | /* Just modify the existing cursor */ | 
|  | cursor->setMode(mode); | 
|  | cursor->setText(source, length, status); | 
|  | } | 
|  | else | 
|  | { | 
|  | cursor = new Normalizer(source, length, mode); | 
|  | } | 
|  |  | 
|  | /* RESET the old data */ | 
|  | text = 0; | 
|  | textLen = 0; | 
|  | } | 
|  | else | 
|  | { | 
|  | /* NO_OP mode.. */ | 
|  | if(cursor != NULL) | 
|  | { /* get rid of the old cursor */ | 
|  | delete cursor; | 
|  | cursor = 0; | 
|  | } | 
|  |  | 
|  | text = (UChar*)source; | 
|  | textLen = length; | 
|  | } | 
|  | currentOffset = 0; /* always */ | 
|  |  | 
|  | bufferAlias = 0; | 
|  | } | 
|  |  | 
|  | inline | 
|  | UChar | 
|  | NormalizerIterator::current(void) const | 
|  | { | 
|  | if (text != 0) { | 
|  | if(currentOffset >= textLen) | 
|  | { | 
|  | return Normalizer::DONE; | 
|  | } | 
|  | else | 
|  | { | 
|  | return text[currentOffset]; | 
|  | } | 
|  | } | 
|  |  | 
|  | return (UChar)cursor->current(); | 
|  | } | 
|  |  | 
|  |  | 
|  | inline | 
|  | UChar | 
|  | NormalizerIterator::next(void) | 
|  | { | 
|  | if (text != 0) { | 
|  | return ((currentOffset < textLen) ? text[++currentOffset] : Normalizer::DONE); | 
|  | } | 
|  | return (UChar)cursor->next(); | 
|  | } | 
|  |  | 
|  | inline | 
|  | void | 
|  | NormalizerIterator::reset(void) | 
|  | { | 
|  | currentOffset = 0; | 
|  | if(cursor) | 
|  | { | 
|  | cursor->reset(); | 
|  | } | 
|  | } | 
|  |  | 
|  | //================ Some inline definitions of implementation functions........ ======== | 
|  | /** | 
|  | * A clone of CollationElementIterator::makeReorderedBuffer, trimmed down | 
|  | * to only handle forward. | 
|  | */ | 
|  | inline VectorOfInt* | 
|  | RuleBasedCollator::makeReorderedBuffer(NormalizerIterator* cursor, | 
|  | UChar colFirst, | 
|  | int32_t lastValue, | 
|  | VectorOfInt* lastExpansion) const { | 
|  | VectorOfInt* result; | 
|  |  | 
|  | int32_t firstValue = ucmp32_get(data->mapping, colFirst); | 
|  | if (firstValue >= CONTRACTCHARINDEX) { | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | firstValue = nextContractChar(cursor, colFirst, status); | 
|  | } | 
|  |  | 
|  | VectorOfInt* firstExpansion = NULL; | 
|  | if (firstValue >= EXPANDCHARINDEX) { | 
|  | firstExpansion = getExpandValueList(firstValue); | 
|  | } | 
|  |  | 
|  | if (firstExpansion == NULL && lastExpansion == NULL) { | 
|  | cursor->ownBuffer.at(0) = firstValue; | 
|  | cursor->ownBuffer.at(1) = lastValue; | 
|  | result = &cursor->ownBuffer; | 
|  | } | 
|  | else { | 
|  | int32_t firstLength = firstExpansion==NULL? 1 : firstExpansion->size(); | 
|  | int32_t lastLength = lastExpansion==NULL? 1 : lastExpansion->size(); | 
|  | if (cursor->reorderBuffer == NULL) { | 
|  | cursor->reorderBuffer = new VectorOfInt(firstLength+lastLength); | 
|  | } | 
|  | // reorderdBuffer gets reused for the life of this object. | 
|  | // Since its internal buffer only grows, there is a danger | 
|  | // that it will get really, really big, and never shrink.  If | 
|  | // this is actually happening, insert code here to check for | 
|  | // the condition.  Something along the lines of: | 
|  | //! else if (reorderBuffer->size() >= 256 && | 
|  | //!          (firstLength+lastLength) < 16) { | 
|  | //!     delete reorderBuffer; | 
|  | //!     reorderBuffer = new VectorOfInt(firstLength+lastLength); | 
|  | //! } | 
|  | // The specific numeric values need to be determined | 
|  | // empirically. [aliu] | 
|  | result = cursor->reorderBuffer; | 
|  |  | 
|  | if (firstExpansion == NULL) { | 
|  | result->atPut(0, firstValue); | 
|  | } | 
|  | else { | 
|  | // System.arraycopy(firstExpansion, 0, result, 0, firstLength); | 
|  | *result = *firstExpansion; | 
|  | } | 
|  |  | 
|  | if (lastExpansion == NULL) { | 
|  | result->atPut(firstLength, lastValue); | 
|  | } | 
|  | else { | 
|  | // System.arraycopy(lastExpansion, 0, result, firstLength, lastLength); | 
|  | for (int32_t i=0; i<lastLength; ++i) { | 
|  | result->atPut(firstLength + i, lastExpansion->at(i)); | 
|  | } | 
|  | } | 
|  | result->setSize(firstLength+lastLength); | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | inline int32_t | 
|  | RuleBasedCollator::strengthOrder(int32_t value) const | 
|  | { | 
|  | if (getStrength() == PRIMARY) | 
|  | { | 
|  | return (value & PRIMARYDIFFERENCEONLY); | 
|  | } else if (getStrength() == SECONDARY) | 
|  | { | 
|  | return (value & SECONDARYDIFFERENCEONLY); | 
|  | } | 
|  | return value; | 
|  | } | 
|  |  | 
|  |  | 
|  | inline int32_t | 
|  | RuleBasedCollator::getStrengthOrder(NormalizerIterator* cursor, | 
|  | UErrorCode status) const | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return CollationElementIterator::NULLORDER; | 
|  | } | 
|  |  | 
|  | if (cursor->bufferAlias != NULL) | 
|  | { | 
|  | // bufferAlias needs a bit of an explanation. | 
|  | // When we hit an expanding character in the text, we call the order's | 
|  | // getExpandValues method to retrieve an array of the orderings for all | 
|  | // of the characters in the expansion (see the end of this method). | 
|  | // The first ordering is returned, and an alias to the orderings array | 
|  | // is saved so that the remaining orderings can be returned on subsequent | 
|  | // calls to next.  So, if the expanding buffer is not exhausted, | 
|  | // all we have to do here is return the next ordering in the buffer. | 
|  | if (cursor->expIndex < cursor->bufferAlias->size()) | 
|  | { | 
|  | //_L((stderr, "next from [%08X] from bufferAlias\n", this)); | 
|  | return strengthOrder(cursor->bufferAlias->at(cursor->expIndex++)); | 
|  | } | 
|  | else | 
|  | { | 
|  | cursor->bufferAlias = NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | UChar ch = cursor->current(); | 
|  | cursor->next(); | 
|  |  | 
|  | //_L((stderr, "Next from [%08X] = [%04X], [%c]\n", cursor, (int)ch & 0xFFFF, (char)(ch & 0xFF))); | 
|  |  | 
|  | if (ch == Normalizer::DONE) { | 
|  | return CollationElementIterator::NULLORDER; | 
|  | } | 
|  | // Ask the collator for this character's ordering. | 
|  | int32_t value = ucmp32_get(data->mapping, ch); | 
|  |  | 
|  | if (value == UNMAPPED) | 
|  | { | 
|  | // Returned an "unmapped" flag and save the character so it can be | 
|  | // returned next time this method is called. | 
|  | if (ch == 0x0000) return ch; // \u0000 is not valid in C++'s UnicodeString | 
|  | cursor->ownBuffer.at(0) = CollationElementIterator::UNMAPPEDCHARVALUE; | 
|  | cursor->ownBuffer.at(1) = ch << 16; | 
|  | cursor->bufferAlias = &cursor->ownBuffer; | 
|  |  | 
|  | } else { | 
|  |  | 
|  | if (value >= CONTRACTCHARINDEX) | 
|  | { | 
|  | value = nextContractChar(cursor, ch, status); | 
|  | } | 
|  |  | 
|  | if (value >= EXPANDCHARINDEX) { | 
|  | cursor->bufferAlias = getExpandValueList(value); | 
|  | } | 
|  |  | 
|  | if (CollationElementIterator::isThaiPreVowel(ch)) { | 
|  | UChar consonant = cursor->current(); | 
|  | if (CollationElementIterator::isThaiBaseConsonant(consonant)) { | 
|  | cursor->next(); | 
|  | cursor->bufferAlias = makeReorderedBuffer(cursor, consonant, value, | 
|  | cursor->bufferAlias); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (cursor->bufferAlias != NULL) { | 
|  | cursor->expIndex = 1; | 
|  | value = cursor->bufferAlias->at(0); | 
|  | } | 
|  |  | 
|  | return strengthOrder(value); | 
|  | } | 
|  |  | 
|  | // ==================== End inlines ============================================ | 
|  |  | 
|  |  | 
|  | //=============================================================================== | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator() | 
|  | : Collator(), | 
|  | isOverIgnore(FALSE), | 
|  | mPattern(0), | 
|  | //      sourceCursor(0), | 
|  | //targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | data(0), | 
|  | dataIsOwned(FALSE) | 
|  | { | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator(const  RuleBasedCollator&  that) | 
|  | : Collator(that), | 
|  | isOverIgnore(that.isOverIgnore), | 
|  | mPattern(0), | 
|  | //      sourceCursor(0), | 
|  | //targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | dataIsOwned(FALSE), | 
|  | data(that.data) // Alias the data pointer | 
|  | { | 
|  | } | 
|  |  | 
|  | UBool | 
|  | RuleBasedCollator::operator==(const Collator& that) const | 
|  | { | 
|  | if (this == &that) | 
|  | { | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | if (this->getDynamicClassID() != that.getDynamicClassID()) | 
|  | { | 
|  | return FALSE;  // not the same class | 
|  | } | 
|  |  | 
|  | if (!Collator::operator==(that)) | 
|  | { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; | 
|  |  | 
|  | if (isOverIgnore != thatAlias.isOverIgnore) | 
|  | { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | if (data != thatAlias.data) | 
|  | { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | RuleBasedCollator& | 
|  | RuleBasedCollator::operator=(const  RuleBasedCollator& that) | 
|  | { | 
|  | if (this != &that) | 
|  | { | 
|  | Collator::operator=(that); | 
|  | isOverIgnore = that.isOverIgnore; | 
|  |  | 
|  | if (dataIsOwned) | 
|  | { | 
|  | delete data; | 
|  | } | 
|  |  | 
|  | data = 0; | 
|  | delete mPattern; | 
|  | mPattern = 0; | 
|  | dataIsOwned = FALSE; | 
|  | data = that.data; | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules, | 
|  | UErrorCode&      status) | 
|  | : Collator(), | 
|  | isOverIgnore(FALSE), | 
|  | mPattern(0), | 
|  | //      sourceCursor(0), | 
|  | ///      targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | data(0), | 
|  | dataIsOwned(FALSE) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | constructFromRules(rules, status); | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules, | 
|  | ECollationStrength collationStrength, | 
|  | UErrorCode&      status) | 
|  | : Collator(collationStrength, Normalizer::NO_OP), | 
|  | isOverIgnore(FALSE), | 
|  | mPattern(0), | 
|  | //    sourceCursor(0), | 
|  | //    targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | data(0), | 
|  | dataIsOwned(FALSE) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  | constructFromRules(rules, status); | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules, | 
|  | Normalizer::EMode decompositionMode, | 
|  | UErrorCode&      status) | 
|  | : Collator(TERTIARY, decompositionMode), | 
|  | isOverIgnore(FALSE), | 
|  | mPattern(0), | 
|  | //    sourceCursor(0), | 
|  | //    targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | data(0), | 
|  | dataIsOwned(FALSE) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | constructFromRules(rules, status); | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules, | 
|  | ECollationStrength collationStrength, | 
|  | Normalizer::EMode decompositionMode, | 
|  | UErrorCode&      status) | 
|  | : Collator(collationStrength, decompositionMode), | 
|  | isOverIgnore(FALSE), | 
|  | mPattern(0), | 
|  | //      sourceCursor(0), | 
|  | //targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | data(0), | 
|  | dataIsOwned(FALSE) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | constructFromRules(rules, status); | 
|  | } | 
|  |  | 
|  | void RuleBasedCollator::constructFromRules(const UnicodeString& rules, | 
|  | UErrorCode& status) | 
|  | { | 
|  | // Construct this collator's ruleset from its string representation | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (rules.isBogus()) | 
|  | { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (dataIsOwned) | 
|  | { | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  |  | 
|  | isOverIgnore = FALSE; | 
|  | setStrength(Collator::TERTIARY); | 
|  |  | 
|  | data = new TableCollationData; | 
|  | if (data->isBogus()) | 
|  | { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | delete data; | 
|  | data = 0; | 
|  | return; | 
|  | } | 
|  |  | 
|  | // We constructed the data using the build method, so we own it. | 
|  | dataIsOwned = TRUE; | 
|  |  | 
|  | // Now that we've got all the buffers allocated, do the actual work | 
|  | mPattern = 0; | 
|  | build(rules, status); | 
|  | } | 
|  |  | 
|  | void | 
|  | RuleBasedCollator::constructFromFile(const char* fileName, | 
|  | UErrorCode& status) | 
|  | { | 
|  | // This method tries to read in a flattened RuleBasedCollator that | 
|  | // has been previously streamed out using the streamOut() method. | 
|  | // The 'fileName' parameter should contain a full pathname valid on | 
|  | // the local environment. | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (dataIsOwned) | 
|  | { | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  |  | 
|  | mPattern = 0; | 
|  | isOverIgnore = FALSE; | 
|  | setStrength(Collator::TERTIARY); // This is the default strength | 
|  |  | 
|  | FileStream* ifs = T_FileStream_open(fileName, "rb"); | 
|  | if (ifs == 0) { | 
|  | status = U_FILE_ACCESS_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | // The streamIn function does the actual work here... | 
|  | RuleBasedCollatorStreamer::streamIn(this, ifs); | 
|  |  | 
|  | if (!T_FileStream_error(ifs)) | 
|  | { | 
|  | status = U_ZERO_ERROR; | 
|  | } | 
|  | else if (data && data->isBogus()) | 
|  | { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  | else | 
|  | { | 
|  | status = U_MISSING_RESOURCE_ERROR; | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  |  | 
|  | #ifdef COLLDEBUG | 
|  | fprintf(stderr, "binary read %s size %d, %s\n", fileName, T_FileStream_size(ifs), u_errorName(status)); | 
|  | #endif | 
|  |  | 
|  | // We constructed the data when streaming it in, so we own it | 
|  | dataIsOwned = TRUE; | 
|  |  | 
|  | T_FileStream_close(ifs); | 
|  | } | 
|  |  | 
|  | const char * | 
|  | RuleBasedCollator::constructFromBundle(const Locale & name, | 
|  | UErrorCode& status) | 
|  | { | 
|  | // This method tries to locate binary collation data which has been | 
|  | // previously streamed to a binary object "%%Collation" in a | 
|  | // resource bundle. If the data is found, it is cached. | 
|  | // cache is checked before actually streaming in data | 
|  | // resource bundle fallback mechanism is used. | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if (dataIsOwned) | 
|  | { | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  | const char* realName = 0; | 
|  |  | 
|  | mPattern = 0; | 
|  | isOverIgnore = FALSE; | 
|  | setStrength(Collator::TERTIARY); // This is the default strength | 
|  |  | 
|  | ResourceBundle rb((char *)0, name, status); | 
|  | if(U_SUCCESS(status)) { | 
|  | ResourceBundle binary = rb.get("%%Collation", status); //This is the bundle that actually contains the collation data | 
|  | realName = binary.getName(); | 
|  | if(U_SUCCESS(status)) { | 
|  | UErrorCode intStatus = U_ZERO_ERROR; | 
|  | constructFromCache(realName, intStatus); // check whether we already have this data in cache | 
|  | if(U_SUCCESS(intStatus)) { | 
|  | return realName; | 
|  | } | 
|  | int32_t inDataLen = 0; | 
|  | const uint8_t *inData = binary.getBinary(inDataLen, status); //This got us the real binary data | 
|  |  | 
|  | UMemoryStream *ifs = uprv_mstrm_openBuffer(inData, inDataLen); | 
|  |  | 
|  | if (ifs == 0) { | 
|  | status = U_FILE_ACCESS_ERROR; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // The streamIn function does the actual work here... | 
|  | RuleBasedCollatorStreamer::streamIn(this, ifs); | 
|  |  | 
|  | if (!uprv_mstrm_error(ifs)) { | 
|  | } | 
|  | else if (data && data->isBogus()) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | delete data; | 
|  | data = 0; | 
|  | } else { | 
|  | status = U_MISSING_RESOURCE_ERROR; | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  |  | 
|  | // We constructed the data when streaming it in, so we own it | 
|  | dataIsOwned = TRUE; | 
|  |  | 
|  | uprv_mstrm_close(ifs); | 
|  | addToCache(realName); // add the newly constructed data to cache | 
|  | return realName; | 
|  | } else { | 
|  | status = U_MISSING_RESOURCE_ERROR; | 
|  | return 0; | 
|  | } | 
|  | } else { | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::RuleBasedCollator(   const Locale& desiredLocale, | 
|  | UErrorCode& status) | 
|  | : Collator(), | 
|  | isOverIgnore(FALSE), | 
|  | dataIsOwned(FALSE), | 
|  | data(0), | 
|  | //      sourceCursor(0), | 
|  | //targetCursor(0), | 
|  | cursor1(0), | 
|  | cursor2(0), | 
|  | mPattern(0) | 
|  | { | 
|  |  | 
|  |  | 
|  | if (U_FAILURE(status)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Try to load, in order: | 
|  | // 1. The desired locale's collation. | 
|  | // 2. A fallback of the desired locale. | 
|  | // 3. The default locale's collation. | 
|  | // 4. A fallback of the default locale. | 
|  | // 5. The default collation rules, which contains en_US collation rules. | 
|  |  | 
|  | // To reiterate, we try: | 
|  | // Specific: | 
|  | //  language+country+variant | 
|  | //  language+country | 
|  | //  language | 
|  | // Default: | 
|  | //  language+country+variant | 
|  | //  language+country | 
|  | //  language | 
|  | // Root: (aka DEFAULTRULES) | 
|  | // steps 1-5 are handled by resource bundle fallback mechanism. | 
|  | // however, in a very unprobable situation that no resource bundle | 
|  | // data exists, step 5 is repeated with hardcoded default rules. | 
|  |  | 
|  | const char *locName = constructFromBundle(desiredLocale, status);  /*!*/ | 
|  | data->desiredLocale = desiredLocale; | 
|  |  | 
|  | if (U_SUCCESS(status)) { | 
|  | data->realLocaleName = locName; | 
|  | if(status != U_USING_DEFAULT_ERROR) { | 
|  | setDecomposition(Normalizer::NO_OP); | 
|  | } | 
|  | } else { | 
|  | UErrorCode intStatus = U_ZERO_ERROR; | 
|  | constructFromCache(ResourceBundle::kDefaultFilename, intStatus); | 
|  | if(U_FAILURE(intStatus)) { | 
|  | intStatus = U_ZERO_ERROR; | 
|  | constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus); | 
|  | if (intStatus == U_ZERO_ERROR) { | 
|  | status = U_USING_DEFAULT_ERROR; | 
|  | } else { | 
|  | status = intStatus;     // bubble back | 
|  | } | 
|  |  | 
|  | if (status == U_MEMORY_ALLOCATION_ERROR) { | 
|  | return; | 
|  | } | 
|  | } | 
|  | data->realLocaleName = ResourceBundle::kDefaultFilename; | 
|  | setDecomposition(Normalizer::NO_OP); | 
|  | addToCache(ResourceBundle::kDefaultFilename); | 
|  | } | 
|  | return; | 
|  | } | 
|  |  | 
|  | void | 
|  | RuleBasedCollator::constructFromFile(   const Locale&           locale, | 
|  | const UnicodeString&    localeFileName, | 
|  | UBool                  tryBinaryFile, | 
|  | UErrorCode&              status) | 
|  | { | 
|  | // constructFromFile creates a collation object by reading from a | 
|  | // file.  It does not employ the usual FILE search mechanism with | 
|  | // locales, default locales, and base locales.  Instead, it tries to | 
|  | // look only in files with the given localFileName.  It does, | 
|  | // however, employ the LOCALE search mechanism. | 
|  |  | 
|  | // This method maintains the binary collation files.  If a collation | 
|  | // is not present in binary form, but is present in text form (in a | 
|  | // resource bundle file), it will be loaded in text form, and then | 
|  | // written to disk. | 
|  |  | 
|  | // If tryBinaryFile is true, then try to load from the binary file first. | 
|  |  | 
|  | if(U_FAILURE(status)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if(dataIsOwned) { | 
|  | delete data; | 
|  | data = 0; | 
|  | } | 
|  |  | 
|  | if(tryBinaryFile) { | 
|  | char *binaryFilePath = createPathName(UnicodeString(u_getDataDirectory(),""), | 
|  | localeFileName, UnicodeString(kFilenameSuffix,"")); | 
|  |  | 
|  | // Try to load up the collation from a binary file first | 
|  | constructFromFile(binaryFilePath, status); | 
|  | #ifdef COLLDEBUG | 
|  | cerr << localeFileName  << kFilenameSuffix << " binary load " << u_errorName(status) << endl; | 
|  | #endif | 
|  | if(U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR) { | 
|  | delete [] binaryFilePath; | 
|  | return; | 
|  | } | 
|  | if(status == U_FILE_ACCESS_ERROR) { | 
|  | status = U_ZERO_ERROR; | 
|  | } | 
|  | delete [] binaryFilePath; | 
|  | } | 
|  |  | 
|  | // Now try to load it up from a resource bundle text source file | 
|  | UnicodeString dataDir = UnicodeString(u_getDataDirectory(),""); | 
|  |  | 
|  | char *ch; | 
|  | ch = new char[localeFileName.size() + 1]; | 
|  | ch[localeFileName.extract(0, 0x7fffffff, ch, "")] = 0; | 
|  | ResourceBundle bundle(dataDir, ch, status); | 
|  |  | 
|  | delete [] ch; | 
|  |  | 
|  | // if there is no resource bundle file for the give locale, break out | 
|  | if(U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | #ifdef COLLDEBUG | 
|  | cerr << localeFileName << " ascii load " << u_errorName(status) << endl; | 
|  | #endif | 
|  |  | 
|  | // check and see if this resource bundle contains collation data | 
|  |  | 
|  | UnicodeString colString; | 
|  | UErrorCode intStatus = U_ZERO_ERROR; | 
|  |  | 
|  | ResourceBundle colElems = bundle.get("CollationElements", intStatus); | 
|  | if (U_FAILURE(intStatus)) | 
|  | { | 
|  | status = U_MISSING_RESOURCE_ERROR; | 
|  | return; | 
|  | } | 
|  | colString = colElems.getStringEx("Sequence", intStatus); | 
|  |  | 
|  | if(U_FAILURE(intStatus)) { | 
|  | status = U_MISSING_RESOURCE_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | if(colString.isBogus()) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Having loaded the collation from the resource bundle text file, | 
|  | // now retrieve the CollationElements tagged data, merged with the | 
|  | // default rules.  If that fails, use the default rules alone. | 
|  |  | 
|  | colString.insert(0, DEFAULTRULES); | 
|  | if(colString.isBogus()) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | constructFromRules(colString, intStatus); | 
|  | if(intStatus == U_MEMORY_ALLOCATION_ERROR) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | if(intStatus != U_ZERO_ERROR)  { | 
|  | status = U_USING_DEFAULT_ERROR; | 
|  |  | 
|  | // predefined tables should contain correct grammar | 
|  | intStatus = U_ZERO_ERROR; | 
|  | constructFromRules(DEFAULTRULES, intStatus); | 
|  | if(intStatus != U_ZERO_ERROR) { | 
|  | status = intStatus; | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifdef COLLDEBUG | 
|  | cerr << localeFileName << " ascii load " << (U_SUCCESS(status) ? "OK" : "Failed") << " - try= " << (tryBinaryFile?"true":"false") << endl; | 
|  | #endif | 
|  |  | 
|  | } | 
|  |  | 
|  | RuleBasedCollator::~RuleBasedCollator() | 
|  | { | 
|  | if (dataIsOwned) | 
|  | { | 
|  | delete data; | 
|  | } | 
|  |  | 
|  | data = 0; | 
|  |  | 
|  | //    delete sourceCursor; | 
|  | //    sourceCursor = 0; | 
|  |  | 
|  | //    delete targetCursor; | 
|  | //    targetCursor = 0; | 
|  |  | 
|  | if (cursor1 != NULL) { | 
|  | delete cursor1; | 
|  | cursor1 = 0; | 
|  | } | 
|  | if (cursor2 != NULL) { | 
|  | delete cursor2; | 
|  | cursor2 = 0; | 
|  | } | 
|  |  | 
|  | delete mPattern; | 
|  | mPattern = 0; | 
|  | } | 
|  |  | 
|  | Collator* | 
|  | RuleBasedCollator::clone() const | 
|  | { | 
|  | return new RuleBasedCollator(*this); | 
|  | } | 
|  |  | 
|  | // Create a CollationElementIterator object that will iterator over the elements | 
|  | // in a string, using the collation rules defined in this RuleBasedCollator | 
|  | CollationElementIterator* | 
|  | RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const | 
|  | { | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | CollationElementIterator *newCursor = 0; | 
|  |  | 
|  | newCursor = new CollationElementIterator(source, this, status); | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return newCursor; | 
|  | } | 
|  |  | 
|  | // Create a CollationElementIterator object that will iterator over the elements | 
|  | // in a string, using the collation rules defined in this RuleBasedCollator | 
|  | CollationElementIterator* | 
|  | RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const | 
|  | { | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | CollationElementIterator *newCursor = 0; | 
|  |  | 
|  | newCursor = new CollationElementIterator(source, this, status); | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return newCursor; | 
|  | } | 
|  |  | 
|  | // Return a string representation of this collator's rules. | 
|  | // The string can later be passed to the constructor that takes a | 
|  | // UnicodeString argument, which will construct a collator that's | 
|  | // functionally identical to this one. | 
|  | // You can also allow users to edit the string in order to change | 
|  | // the collation data, or you can print it out for inspection, or whatever. | 
|  |  | 
|  | const UnicodeString& | 
|  | RuleBasedCollator::getRules() const | 
|  | { | 
|  | if (mPattern != 0) | 
|  | { | 
|  | MergeCollation*& nonConstMPattern = *(MergeCollation**)&mPattern; | 
|  | mPattern->emitPattern(data->ruleTable); | 
|  | data->isRuleTableLoaded = TRUE; | 
|  | delete nonConstMPattern; | 
|  | nonConstMPattern = 0; | 
|  | } | 
|  | else if (!data->isRuleTableLoaded) | 
|  | { | 
|  | // At this point the caller wants the rules, but the rule table data | 
|  | // is not loaded.  Furthermore, there is no mPattern object to load | 
|  | // the rules from.  Therefore, we fetch the rules off the disk. | 
|  | // Notice that we pass in a tryBinaryFile value of FALSE, since | 
|  | // by design the binary file has NO rules in it! | 
|  | //UErrorCode status = U_ZERO_ERROR; | 
|  | //RuleBasedCollator temp(data->realLocaleName, status); | 
|  | RuleBasedCollator temp; | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | temp.constructFromFile(data->desiredLocale, data->realLocaleName, FALSE, status); | 
|  |  | 
|  | // We must check that mPattern is nonzero here, or we run the risk | 
|  | // of an infinite loop. | 
|  | if (U_SUCCESS(status) && temp.mPattern != 0) | 
|  | { | 
|  | data->ruleTable = temp.getRules(); | 
|  | data->isRuleTableLoaded = TRUE; | 
|  | #ifdef _DEBUG | 
|  | //              // the following is useful for specific debugging purposes | 
|  | //               UnicodeString name; | 
|  | //               cerr << "Table collation rules loaded dynamically for " | 
|  | //                   << data->desiredLocale.getName(name) | 
|  | //                   << " at " | 
|  | //                   << data->realLocaleName | 
|  | //                   << ", " << dec << data->ruleTable.size() << " characters" | 
|  | //                   << endl; | 
|  | #endif | 
|  | } | 
|  | else | 
|  | { | 
|  | #ifdef _DEBUG | 
|  | //              UnicodeString name; | 
|  | //              cerr << "Unable to load table collation rules dynamically for " | 
|  | //                  << data->desiredLocale.getName(name) | 
|  | //                  << " at " | 
|  | //                  << data->realLocaleName | 
|  | //                  << endl; | 
|  | //              cerr << "Status " << u_errorName(status) << ", mPattern " << temp.mPattern << endl; | 
|  | #endif | 
|  | /* SRL have to add this because we now have the situation where | 
|  | DEFAULT is loaded from a binary file w/ no rules. */ | 
|  | UErrorCode intStatus = U_ZERO_ERROR; | 
|  | temp.constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus); | 
|  |  | 
|  | if(U_SUCCESS(intStatus) && (temp.mPattern != 0)) | 
|  | { | 
|  | data->ruleTable = temp.getRules(); | 
|  | data->isRuleTableLoaded = TRUE; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return data->ruleTable; | 
|  | } | 
|  |  | 
|  |  | 
|  | Collator::EComparisonResult | 
|  | RuleBasedCollator::compare( const UnicodeString& source, | 
|  | const UnicodeString& target, | 
|  | int32_t length) const | 
|  | { | 
|  | UnicodeString source_togo; | 
|  | UnicodeString target_togo; | 
|  | UTextOffset begin=0; | 
|  |  | 
|  | source.extract(begin, uprv_min(length,source.length()), source_togo); | 
|  | target.extract(begin, uprv_min(length,target.length()), target_togo); | 
|  | return (RuleBasedCollator::compare(source_togo, target_togo)); | 
|  | } | 
|  |  | 
|  | Collator::EComparisonResult | 
|  | RuleBasedCollator::compare(const   UChar* source, | 
|  | int32_t sourceLength, | 
|  | const   UChar*  target, | 
|  | int32_t targetLength) const | 
|  | { | 
|  | // check if source and target are valid strings | 
|  | if (((source == 0) && (target == 0)) || | 
|  | ((sourceLength == 0) && (targetLength == 0))) | 
|  | { | 
|  | return Collator::EQUAL; | 
|  | } | 
|  |  | 
|  | Collator::EComparisonResult result = Collator::EQUAL; | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  |  | 
|  | if (cursor1 == NULL) | 
|  | { | 
|  | ((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLength, getDecomposition()); | 
|  | } | 
|  | else | 
|  | { | 
|  | cursor1->setModeAndText(getDecomposition(), source, sourceLength, status); | 
|  | } | 
|  |  | 
|  | if ( /*cursor1->cursor == NULL ||*/ U_FAILURE(status)) | 
|  | { | 
|  | return Collator::EQUAL; | 
|  | } | 
|  |  | 
|  | if (cursor2 == NULL) | 
|  | { | 
|  | ((RuleBasedCollator *)this)->cursor2 = new NormalizerIterator(target, targetLength, getDecomposition()); | 
|  | } | 
|  | else | 
|  | { | 
|  | cursor2->setModeAndText(getDecomposition(), target, targetLength, status); | 
|  | } | 
|  |  | 
|  | if (/*cursor2 == NULL ||*/ U_FAILURE(status)) | 
|  | { | 
|  | return Collator::EQUAL; | 
|  | } | 
|  |  | 
|  | int32_t sOrder, tOrder; | 
|  | //    int32_t sOrder = CollationElementIterator::NULLORDER, tOrder = CollationElementIterator::NULLORDER; | 
|  | UBool gets = TRUE, gett = TRUE; | 
|  | UBool initialCheckSecTer = getStrength() >= Collator::SECONDARY; | 
|  | UBool checkSecTer = initialCheckSecTer; | 
|  | UBool checkTertiary = getStrength() >= Collator::TERTIARY; | 
|  | UBool isFrenchSec = data->isFrenchSec; | 
|  | uint32_t pSOrder, pTOrder; | 
|  |  | 
|  | while(TRUE) | 
|  | { | 
|  | // Get the next collation element in each of the strings, unless | 
|  | // we've been requested to skip it. | 
|  | if (gets) | 
|  | { | 
|  | sOrder = getStrengthOrder((NormalizerIterator*)cursor1, status); | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return Collator::EQUAL; | 
|  | } | 
|  | } | 
|  |  | 
|  | gets = TRUE; | 
|  |  | 
|  | if (gett) | 
|  | { | 
|  | tOrder = getStrengthOrder((NormalizerIterator*)cursor2, status); | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return Collator::EQUAL; | 
|  | } | 
|  | } | 
|  |  | 
|  | gett = TRUE; | 
|  |  | 
|  | // If we've hit the end of one of the strings, jump out of the loop | 
|  | if ((sOrder == CollationElementIterator::NULLORDER)|| | 
|  | (tOrder == CollationElementIterator::NULLORDER)) | 
|  | { | 
|  | break; | 
|  | } | 
|  |  | 
|  | // If there's no difference at this position, we can skip to the | 
|  | // next one. | 
|  | pSOrder = CollationElementIterator::primaryOrder(sOrder); | 
|  | pTOrder = CollationElementIterator::primaryOrder(tOrder); | 
|  | if (sOrder == tOrder) | 
|  | { | 
|  | if (isFrenchSec && pSOrder != 0) | 
|  | { | 
|  | if (!checkSecTer) | 
|  | { | 
|  | // in french, a secondary difference more to the right is stronger, | 
|  | // so accents have to be checked with each base element | 
|  | checkSecTer = initialCheckSecTer; | 
|  |  | 
|  | // but tertiary differences are less important than the first | 
|  | // secondary difference, so checking tertiary remains disabled | 
|  | checkTertiary = FALSE; | 
|  | } | 
|  | } | 
|  |  | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // Compare primary differences first. | 
|  | if (pSOrder != pTOrder) | 
|  | { | 
|  | if (sOrder == 0) | 
|  | { | 
|  | // The entire source element is ignorable. | 
|  | // Skip to the next source element, but don't fetch another target element. | 
|  | gett = FALSE; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (tOrder == 0) | 
|  | { | 
|  | gets = FALSE; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // The source and target elements aren't ignorable, but it's still possible | 
|  | // for the primary component of one of the elements to be ignorable.... | 
|  | if (pSOrder == 0)  // primary order in source is ignorable | 
|  | { | 
|  | // The source's primary is ignorable, but the target's isn't.  We treat ignorables | 
|  | // as a secondary difference, so remember that we found one. | 
|  | if (checkSecTer) | 
|  | { | 
|  | result = Collator::GREATER;  // (strength is SECONDARY) | 
|  | checkSecTer = FALSE; | 
|  | } | 
|  |  | 
|  | // Skip to the next source element, but don't fetch another target element. | 
|  | gett = FALSE; | 
|  | } | 
|  | else if (pTOrder == 0) | 
|  | { | 
|  | // record differences - see the comment above. | 
|  | if (checkSecTer) | 
|  | { | 
|  | result = Collator::LESS;  // (strength is SECONDARY) | 
|  | checkSecTer = FALSE; | 
|  | } | 
|  |  | 
|  | // Skip to the next target element, but don't fetch another source element. | 
|  | gets = FALSE; | 
|  | } | 
|  | else | 
|  | { | 
|  | // Neither of the orders is ignorable, and we already know that the primary | 
|  | // orders are different because of the (pSOrder != pTOrder) test above. | 
|  | // Record the difference and stop the comparison. | 
|  | if (pSOrder < pTOrder) | 
|  | { | 
|  | return Collator::LESS;  // (strength is PRIMARY) | 
|  | } | 
|  |  | 
|  | return Collator::GREATER;  // (strength is PRIMARY) | 
|  | } | 
|  | } | 
|  | else | 
|  | { // else of if ( pSOrder != pTOrder ) | 
|  | // primary order is the same, but complete order is different. So there | 
|  | // are no base elements at this point, only ignorables (Since the strings are | 
|  | // normalized) | 
|  |  | 
|  | if (checkSecTer) | 
|  | { | 
|  | // a secondary or tertiary difference may still matter | 
|  | uint32_t secSOrder = CollationElementIterator::secondaryOrder(sOrder); | 
|  | uint32_t secTOrder = CollationElementIterator::secondaryOrder(tOrder); | 
|  |  | 
|  | if (secSOrder != secTOrder) | 
|  | { | 
|  | // there is a secondary difference | 
|  | result = (secSOrder < secTOrder) ? Collator::LESS : Collator::GREATER; | 
|  | // (strength is SECONDARY) | 
|  | checkSecTer = FALSE; | 
|  | // (even in french, only the first secondary difference within | 
|  | //  a base character matters) | 
|  | } | 
|  | else | 
|  | { | 
|  | if (checkTertiary) | 
|  | { | 
|  | // a tertiary difference may still matter | 
|  | uint32_t terSOrder = CollationElementIterator::tertiaryOrder(sOrder); | 
|  | uint32_t terTOrder = CollationElementIterator::tertiaryOrder(tOrder); | 
|  |  | 
|  | if (terSOrder != terTOrder) | 
|  | { | 
|  | // there is a tertiary difference | 
|  | result = (terSOrder < terTOrder) ? Collator::LESS : Collator::GREATER; | 
|  | // (strength is TERTIARY) | 
|  | checkTertiary = FALSE; | 
|  | } | 
|  | } | 
|  | } | 
|  | } // if (checkSecTer) | 
|  |  | 
|  | }  // if ( pSOrder != pTOrder ) | 
|  | } // while() | 
|  |  | 
|  | if (sOrder != CollationElementIterator::NULLORDER) | 
|  | { | 
|  | // (tOrder must be CollationElementIterator::NULLORDER, | 
|  | //  since this point is only reached when sOrder or tOrder is NULLORDER.) | 
|  | // The source string has more elements, but the target string hasn't. | 
|  | do | 
|  | { | 
|  | if (CollationElementIterator::primaryOrder(sOrder) != 0) | 
|  | { | 
|  | // We found an additional non-ignorable base character in the source string. | 
|  | // This is a primary difference, so the source is greater | 
|  | return Collator::GREATER; // (strength is PRIMARY) | 
|  | } | 
|  |  | 
|  | if (CollationElementIterator::secondaryOrder(sOrder) != 0) | 
|  | { | 
|  | // Additional secondary elements mean the source string is greater | 
|  | if (checkSecTer) | 
|  | { | 
|  | result = Collator::GREATER;  // (strength is SECONDARY) | 
|  | checkSecTer = FALSE; | 
|  | } | 
|  | } | 
|  | } | 
|  | while ((sOrder = getStrengthOrder(cursor1, status)) != CollationElementIterator::NULLORDER); | 
|  | } | 
|  | else if (tOrder != CollationElementIterator::NULLORDER) | 
|  | { | 
|  | // The target string has more elements, but the source string hasn't. | 
|  | do | 
|  | { | 
|  | if (CollationElementIterator::primaryOrder(tOrder) != 0) | 
|  | { | 
|  | // We found an additional non-ignorable base character in the target string. | 
|  | // This is a primary difference, so the source is less | 
|  | return Collator::LESS; // (strength is PRIMARY) | 
|  | } | 
|  |  | 
|  | if (CollationElementIterator::secondaryOrder(tOrder) != 0) | 
|  | { | 
|  | // Additional secondary elements in the target mean the source string is less | 
|  | if (checkSecTer) | 
|  | { | 
|  | result = Collator::LESS;  // (strength is SECONDARY) | 
|  | checkSecTer = FALSE; | 
|  | } | 
|  | } | 
|  | } | 
|  | while ((tOrder = getStrengthOrder(cursor2, status)) != CollationElementIterator::NULLORDER); | 
|  | } | 
|  |  | 
|  |  | 
|  | // For IDENTICAL comparisons, we use a bitwise character comparison | 
|  | // as a tiebreaker if all else is equal | 
|  | // NOTE: The java code compares result with 0, and | 
|  | // puts the result of the string comparison directly into result | 
|  | if (result == Collator::EQUAL && getStrength() == IDENTICAL) | 
|  | { | 
|  | #if 0 | 
|  | // ******** for the  UChar normalization interface. | 
|  | // It doesn't work much faster, and the code was broken | 
|  | // so it's commented out. --srl | 
|  | //          UChar sourceDecomp[1024], targetDecomp[1024]; | 
|  | //  	int32_t sourceDecompLength = 1024; | 
|  | //  	int32_t targetDecompLength = 1024; | 
|  |  | 
|  | //          int8_t comparison; | 
|  | //  	Normalizer::EMode decompMode = getDecomposition(); | 
|  |  | 
|  | //  	if (decompMode != Normalizer::NO_OP) | 
|  | //  	  { | 
|  | //  	    Normalizer::normalize(source, sourceLength, decompMode, | 
|  | //  				  0, sourceDecomp, sourceDecompLength, status); | 
|  |  | 
|  | //  	    Normalizer::normalize(target, targetLength, decompMode, | 
|  | //  				  0, targetDecomp, targetDecompLength, status); | 
|  |  | 
|  | //  	    comparison = u_strcmp(sourceDecomp,targetDecomp); | 
|  | //  	  } | 
|  | //  	else | 
|  | //  	  { | 
|  | //  	    comparison = u_strcmp(source, target); /* ! */ | 
|  | //  	  } | 
|  |  | 
|  | #else | 
|  |  | 
|  | UnicodeString sourceDecomp, targetDecomp; | 
|  |  | 
|  | int8_t comparison; | 
|  |  | 
|  | Normalizer::normalize(source, getDecomposition(), | 
|  | 0, sourceDecomp,  status); | 
|  |  | 
|  | Normalizer::normalize(target, getDecomposition(), | 
|  | 0, targetDecomp,  status); | 
|  |  | 
|  | comparison = sourceDecomp.compare(targetDecomp); | 
|  | #endif | 
|  |  | 
|  | if (comparison < 0) | 
|  | { | 
|  | result = Collator::LESS; | 
|  | } | 
|  | else if (comparison == 0) | 
|  | { | 
|  | result = Collator::EQUAL; | 
|  | } | 
|  | else | 
|  | { | 
|  | result = Collator::GREATER; | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  |  | 
|  | int32_t | 
|  | RuleBasedCollator::nextContractChar(NormalizerIterator *cursor, | 
|  | UChar ch, | 
|  | UErrorCode& status) const | 
|  | { | 
|  | // First get the ordering of this single character | 
|  | VectorOfPToContractElement *list = getContractValues(ch); | 
|  | EntryPair *pair = (EntryPair *)list->at(0); | 
|  | int32_t order = pair->value; | 
|  |  | 
|  | // Now iterate through the chars following it and | 
|  | // look for the longest match | 
|  | ((UnicodeString&)key).remove(); | 
|  | ((UnicodeString&)key) += ch; | 
|  |  | 
|  | while ((ch = cursor->current()) != Normalizer::DONE) | 
|  | { | 
|  | ((UnicodeString&)key) += ch; | 
|  |  | 
|  | int32_t n = getEntry(list, key, TRUE); | 
|  |  | 
|  | if (n == UNMAPPED) | 
|  | { | 
|  | break; | 
|  | } | 
|  | cursor->next(); | 
|  |  | 
|  | pair = (EntryPair *)list->at(n); | 
|  | order = pair->value; | 
|  | } | 
|  |  | 
|  | return order; | 
|  | } | 
|  |  | 
|  | // Compare two strings using this collator | 
|  | Collator::EComparisonResult | 
|  | RuleBasedCollator::compare(const UnicodeString& source, | 
|  | const UnicodeString& target) const | 
|  | { | 
|  | return compare(source.getUChars(), source.length(), target.getUChars(), target.length()); | 
|  | } | 
|  |  | 
|  | // Retrieve a collation key for the specified string | 
|  | // The key can be compared with other collation keys using a bitwise comparison | 
|  | // (e.g. memcmp) to find the ordering of their respective source strings. | 
|  | // This is handy when doing a sort, where each sort key must be compared | 
|  | // many times. | 
|  | // | 
|  | // The basic algorithm here is to find all of the collation elements for each | 
|  | // character in the source string, convert them to an ASCII representation, | 
|  | // and put them into the collation key.  But it's trickier than that. | 
|  | // Each collation element in a string has three components: primary ('A' vs 'B'), | 
|  | // secondary ('u' vs 'ü'), and tertiary ('A' vs 'a'), and a primary difference | 
|  | // at the end of a string takes precedence over a secondary or tertiary | 
|  | // difference earlier in the string. | 
|  | // | 
|  | // To account for this, we put all of the primary orders at the beginning of the | 
|  | // string, followed by the secondary and tertiary orders. Each set of orders is | 
|  | // terminated by nulls so that a key for a string which is a initial substring of | 
|  | // another key will compare less without any special case. | 
|  | // | 
|  | // Here's a hypothetical example, with the collation element represented as | 
|  | // a three-digit number, one digit for primary, one for secondary, etc. | 
|  | // | 
|  | // String:              A     a     B    É | 
|  | // Collation Elements: 101   100   201  511 | 
|  | // Collation Key:      1125<null>0001<null>1011<null> | 
|  | // | 
|  | // To make things even trickier, secondary differences (accent marks) are compared | 
|  | // starting at the *end* of the string in languages with French secondary ordering. | 
|  | // But when comparing the accent marks on a single base character, they are compared | 
|  | // from the beginning.  To handle this, we reverse all of the accents that belong | 
|  | // to each base character, then we reverse the entire string of secondary orderings | 
|  | // at the end. | 
|  | // | 
|  | CollationKey& | 
|  | RuleBasedCollator::getCollationKey( const   UnicodeString&  source, | 
|  | CollationKey&   sortkey, | 
|  | UErrorCode&      status) const | 
|  | { | 
|  | return RuleBasedCollator::getCollationKey(source.getUChars(), source.length(), sortkey, status); | 
|  | } | 
|  |  | 
|  | CollationKey& | 
|  | RuleBasedCollator::getCollationKey( const   UChar*  source, | 
|  | int32_t sourceLen, | 
|  | CollationKey&   sortkey, | 
|  | UErrorCode&      status) const | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | status = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | return sortkey.setToBogus(); | 
|  | } | 
|  |  | 
|  | if ((!source) || (sourceLen == 0)) | 
|  | { | 
|  | return sortkey.reset(); | 
|  | } | 
|  |  | 
|  | if (cursor1 == NULL) | 
|  | { | 
|  | ((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLen, getDecomposition()); | 
|  | } | 
|  | else | 
|  | { | 
|  | cursor1->setModeAndText(getDecomposition(), source,sourceLen, status); | 
|  | } | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return sortkey.setToBogus(); | 
|  | } | 
|  |  | 
|  | UBool  compareSec   = (getStrength() >= Collator::SECONDARY); | 
|  | UBool  compareTer   = (getStrength() >= Collator::TERTIARY); | 
|  | UBool  compareIdent = (getStrength() == Collator::IDENTICAL); | 
|  | int32_t order        = 0; | 
|  | int32_t totalPrimary = 0; | 
|  | int32_t totalSec     = 0; | 
|  | int32_t totalTer     = 0; | 
|  | int32_t totalIdent     = 0; | 
|  | UnicodeString decomp; | 
|  |  | 
|  | // iterate over the source, counting primary, secondary, and tertiary entries | 
|  | while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) != | 
|  | CollationElementIterator::NULLORDER) | 
|  | { | 
|  | int32_t secOrder = CollationElementIterator::secondaryOrder(order); | 
|  | int32_t terOrder = CollationElementIterator::tertiaryOrder(order); | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return sortkey.setToBogus(); | 
|  | } | 
|  |  | 
|  | if (! CollationElementIterator::isIgnorable(order)) | 
|  | { | 
|  | totalPrimary += 1; | 
|  |  | 
|  | if (compareSec) | 
|  | { | 
|  | totalSec += 1; | 
|  | } | 
|  |  | 
|  | if (compareTer) | 
|  | { | 
|  | totalTer += 1; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | if (compareSec && secOrder != 0) | 
|  | { | 
|  | totalSec += 1; | 
|  | } | 
|  |  | 
|  | if (compareTer && terOrder != 0) | 
|  | { | 
|  | totalTer += 1; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // count the null bytes after the entires | 
|  | totalPrimary += 1; | 
|  |  | 
|  | if (compareSec) | 
|  | { | 
|  | totalSec += 1; | 
|  | } | 
|  |  | 
|  | if (compareTer) | 
|  | { | 
|  | totalTer += 1; | 
|  | } | 
|  |  | 
|  | if (compareIdent) | 
|  | { | 
|  | Normalizer::normalize(source, getDecomposition(), // SRL: ?? | 
|  | 0, decomp, status); | 
|  |  | 
|  | if (U_SUCCESS(status)) | 
|  | { | 
|  | totalIdent = decomp.length() + 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Compute total number of bytes to hold the entries | 
|  | // and make sure the key can hold them | 
|  | uint32_t size   = 2 * (totalPrimary + totalSec + totalTer + totalIdent); | 
|  |  | 
|  | sortkey.ensureCapacity(size); | 
|  |  | 
|  | if (sortkey.isBogus()) | 
|  | { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return sortkey; | 
|  | } | 
|  |  | 
|  | int32_t primaryCursor = 0; | 
|  | int32_t secCursor     = 2 * totalPrimary; | 
|  | int32_t secBase       = secCursor; | 
|  | int32_t preSecIgnore  = secBase; | 
|  | int32_t terCursor     = secCursor + (2 * totalSec); | 
|  | int32_t identCursor      = terCursor + (2 * totalTer); | 
|  |  | 
|  | // reset source to the beginning | 
|  | cursor1->reset(); | 
|  |  | 
|  | // now iterate over the source computing the actual entries | 
|  | while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) != CollationElementIterator::NULLORDER) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return sortkey.reset(); | 
|  | } | 
|  |  | 
|  | int32_t primaryOrder = CollationElementIterator::primaryOrder(order); | 
|  | int32_t secOrder     = CollationElementIterator::secondaryOrder(order); | 
|  | int32_t terOrder     = CollationElementIterator::tertiaryOrder(order); | 
|  |  | 
|  | if (! CollationElementIterator::isIgnorable(order)) | 
|  | { | 
|  | primaryCursor = sortkey.storeBytes(primaryCursor, primaryOrder + SORTKEYOFFSET); | 
|  |  | 
|  | if (compareSec) | 
|  | { | 
|  | if (data->isFrenchSec && (preSecIgnore < secCursor)) | 
|  | { | 
|  | sortkey.reverseBytes(preSecIgnore, secCursor); | 
|  | } | 
|  |  | 
|  | secCursor = sortkey.storeBytes(secCursor, secOrder + SORTKEYOFFSET); | 
|  |  | 
|  | preSecIgnore = secCursor; | 
|  | } | 
|  |  | 
|  | if (compareTer) | 
|  | { | 
|  | terCursor = sortkey.storeBytes(terCursor, terOrder + SORTKEYOFFSET); | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | if (compareSec && secOrder != 0) | 
|  | { | 
|  | secCursor = sortkey.storeBytes(secCursor, secOrder + data->maxSecOrder + SORTKEYOFFSET); | 
|  | } | 
|  |  | 
|  | if (compareTer && terOrder != 0) | 
|  | { | 
|  | terCursor = sortkey.storeBytes(terCursor, terOrder + data->maxTerOrder + SORTKEYOFFSET); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // append 0 at the end of each portion. | 
|  | sortkey.storeBytes(primaryCursor, 0); | 
|  |  | 
|  | if (compareSec) | 
|  | { | 
|  | if (data->isFrenchSec) | 
|  | { | 
|  | if (preSecIgnore < secCursor) | 
|  | { | 
|  | sortkey.reverseBytes(preSecIgnore, secCursor); | 
|  | } | 
|  |  | 
|  | sortkey.reverseBytes(secBase, secCursor); | 
|  | } | 
|  |  | 
|  | sortkey.storeBytes(secCursor, 0); | 
|  | } | 
|  |  | 
|  | if (compareTer) | 
|  | { | 
|  | sortkey.storeBytes(terCursor, 0); | 
|  | } | 
|  |  | 
|  | if (compareIdent) | 
|  | { | 
|  | sortkey.storeUnicodeString(identCursor, decomp); | 
|  | } | 
|  |  | 
|  | //    Debugging - print out the sortkey [--srl] | 
|  | //      { | 
|  | //        const uint8_t *bytes; | 
|  | //        int32_t xcount; | 
|  | //        bytes = sortkey.getByteArray(xcount); | 
|  | //        //      fprintf(stderr, "\n\n-  [%02X] [%02X]\n\n", (int)(bytes[0]&0xFF), (int)(bytes[1]&0xFF) ); | 
|  | //      } | 
|  |  | 
|  | return sortkey; | 
|  | } | 
|  |  | 
|  |  | 
|  | // Build this collator's rule tables based on a string representation of the rules | 
|  | // See the big diagram at the top of this file for an overview of how the tables | 
|  | // are organized. | 
|  | void | 
|  | RuleBasedCollator::build(const UnicodeString&   pattern, | 
|  | UErrorCode&      status) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // This array maps Unicode characters to their collation ordering | 
|  | data->mapping = ucmp32_open(UNMAPPED); | 
|  |  | 
|  | if (data->mapping->fBogus) | 
|  | { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | Collator::ECollationStrength aStrength = Collator::IDENTICAL; | 
|  | UBool isSource = TRUE; | 
|  | int32_t i = 0; | 
|  | UnicodeString lastGroupChars; | 
|  | UnicodeString expChars; | 
|  | UnicodeString groupChars; | 
|  |  | 
|  | if (pattern.length() == 0) | 
|  | { | 
|  | status = U_INVALID_FORMAT_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Build the merged collation entries | 
|  | // Since rules can be specified in any order in the string | 
|  | // (e.g. "c , C < d , D < e , E .... C < CH") | 
|  | // this splits all of the rules in the string out into separate | 
|  | // objects and then sorts them.  In the above example, it merges the | 
|  | // "C < CH" rule in just before the "C < D" rule. | 
|  |  | 
|  | mPattern = new MergeCollation(pattern, getDecomposition(), status); | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | ucmp32_close(data->mapping); | 
|  | data->mapping = 0; | 
|  | delete mPattern; | 
|  | mPattern = 0; | 
|  | return; | 
|  | } | 
|  |  | 
|  | int32_t order = 0; | 
|  |  | 
|  | // Walk through each entry | 
|  | for (i = 0; i < mPattern->getCount(); ++i) | 
|  | { | 
|  | const PatternEntry* entry = mPattern->getItemAt(i); | 
|  | groupChars.remove(); | 
|  | expChars.remove(); | 
|  |  | 
|  | // if entry is valid | 
|  | if (entry != NULL) | 
|  | { | 
|  | entry->getChars(groupChars); | 
|  |  | 
|  | // check if french secondary needs to be turned on | 
|  | if ((groupChars.length() > 1) && | 
|  | (groupChars[groupChars.length()-1] == 0x0040)) | 
|  | { | 
|  | data->isFrenchSec = TRUE; | 
|  | groupChars.remove(groupChars.length()-1); | 
|  | } | 
|  |  | 
|  | order = increment((Collator::ECollationStrength)entry->getStrength(), order); | 
|  |  | 
|  | if (entry->getExtension(expChars).length() != 0) | 
|  | { | 
|  | // encountered an expanding character, where one character on input | 
|  | // expands to several sort elements (e.g. 'ö' --> 'o' 'e') | 
|  | addExpandOrder(groupChars, expChars, order, status); | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  | } | 
|  | else if (groupChars.length() > 1) | 
|  | { | 
|  | // encountered a contracting character, where several characters on input | 
|  | // contract into one sort order.  For example, "ch" is treated as a single | 
|  | // character in traditional Spanish sorting. | 
|  | addContractOrder(groupChars, order, status); | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | // Nothing out of the ordinary -- one character maps to one sort order | 
|  | addOrder(groupChars[0], order, status); | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // add expanding entries for pre-composed characters | 
|  | addComposedChars(); | 
|  |  | 
|  | // Fill in all the expanding chars values | 
|  | commit(); | 
|  |  | 
|  | // Compact the data mapping table | 
|  | ucmp32_compact(data->mapping, 1); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Add expanding entries for pre-composed unicode characters so that this | 
|  | * collator can be used reasonably well with decomposition turned off. | 
|  | */ | 
|  | void RuleBasedCollator::addComposedChars() | 
|  | { | 
|  | UnicodeString buf; | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  |  | 
|  | // Iterate through all of the pre-composed characters in Unicode | 
|  | ComposedCharIter iter; | 
|  | UnicodeString decomp; | 
|  |  | 
|  | while (iter.hasNext()) | 
|  | { | 
|  | UChar c = iter.next(); | 
|  |  | 
|  | if (getCharOrder(c) == UNMAPPED) | 
|  | { | 
|  | // | 
|  | // We don't already have an ordering for this pre-composed character. | 
|  | // | 
|  | // First, see if the decomposed string is already in our | 
|  | // tables as a single contracting-string ordering. | 
|  | // If so, just map the precomposed character to that order. | 
|  | // | 
|  | // TODO: What we should really be doing here is trying to find the | 
|  | // longest initial substring of the decomposition that is present | 
|  | // in the tables as a contracting character sequence, and find its | 
|  | // ordering.  Then do this recursively with the remaining chars | 
|  | // so that we build a list of orderings, and add that list to | 
|  | // the expansion table. | 
|  | // That would be more correct but also significantly slower, so | 
|  | // I'm not totally sure it's worth doing. | 
|  | // | 
|  | iter.getDecomposition(decomp); | 
|  | int contractOrder = getContractOrder(decomp); | 
|  |  | 
|  | if (contractOrder != UNMAPPED) | 
|  | { | 
|  | addOrder(c, contractOrder, status); | 
|  | } | 
|  | else | 
|  | { | 
|  | // | 
|  | // We don't have a contracting ordering for the entire string | 
|  | // that results from the decomposition, but if we have orders | 
|  | // for each individual character, we can add an expanding | 
|  | // table entry for the pre-composed character | 
|  | // | 
|  | UBool allThere = TRUE; | 
|  | int32_t i; | 
|  |  | 
|  | for (i = 0; i < decomp.length(); i += 1) | 
|  | { | 
|  | if (getCharOrder(decomp[i]) == UNMAPPED) | 
|  | { | 
|  | allThere = FALSE; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (allThere) | 
|  | { | 
|  | buf.remove(); | 
|  | buf += c; | 
|  | addExpandOrder(buf, decomp, UNMAPPED, status); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // When the expanding character tables are built by addExpandOrder, | 
|  | // it doesn't know what the final ordering of each character | 
|  | // in the expansion will be.  Instead, it just puts the raw character | 
|  | // code into the table, adding CHARINDEX as a flag.  Now that we've | 
|  | // finished building the mapping table, we can go back and look up | 
|  | // that character to see what its real collation order is and | 
|  | // stick that into the expansion table.  That lets us avoid doing | 
|  | // a two-stage lookup later. | 
|  |  | 
|  | void | 
|  | RuleBasedCollator::commit() | 
|  | { | 
|  | // if there are any expanding characters | 
|  | if (data->expandTable != NULL) | 
|  | { | 
|  | int32_t i; | 
|  | for (i = 0; i < data->expandTable->size(); i += 1) | 
|  | { | 
|  | VectorOfInt* valueList = data->expandTable->at(i); | 
|  | int32_t j; | 
|  | for (j = 0; j < valueList->size(); j++) | 
|  | { | 
|  | // found a expanding character | 
|  | // the expanding char value is not filled in yet | 
|  | if ((valueList->at(j) < EXPANDCHARINDEX) && | 
|  | (valueList->at(j) > CHARINDEX)) | 
|  | { | 
|  | // Get the real values for the non-filled entry | 
|  | UChar ch = (UChar)(valueList->at(j) - CHARINDEX); | 
|  | int32_t realValue = ucmp32_get(data->mapping, ch); | 
|  |  | 
|  | if (realValue == UNMAPPED) | 
|  | { | 
|  | // The real value is still unmapped, maybe it'signorable | 
|  | valueList->atPut(j, IGNORABLEMASK & ch); | 
|  | } | 
|  | // fill in the value | 
|  | else | 
|  | { | 
|  | valueList->atPut(j, realValue); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | *  Increment of the last order based on the comparison level. | 
|  | */ | 
|  | int32_t | 
|  | RuleBasedCollator::increment(Collator::ECollationStrength aStrength, int32_t lastValue) | 
|  | { | 
|  | switch(aStrength) | 
|  | { | 
|  | case Collator::PRIMARY: | 
|  | // increment priamry order  and mask off secondary and tertiary difference | 
|  | lastValue += PRIMARYORDERINCREMENT; | 
|  | lastValue &= PRIMARYORDERMASK; | 
|  | isOverIgnore = TRUE; | 
|  | break; | 
|  |  | 
|  | case Collator::SECONDARY: | 
|  | // increment secondary order and mask off tertiary difference | 
|  | lastValue += SECONDARYORDERINCREMENT; | 
|  | lastValue &= SECONDARYDIFFERENCEONLY; | 
|  |  | 
|  | // record max # of ignorable chars with secondary difference | 
|  | if (isOverIgnore == FALSE) | 
|  | { | 
|  | data->maxSecOrder += 1; | 
|  | } | 
|  | break; | 
|  |  | 
|  | case Collator::TERTIARY: | 
|  | // increment tertiary order | 
|  | lastValue += TERTIARYORDERINCREMENT; | 
|  |  | 
|  | // record max # of ignorable chars with tertiary difference | 
|  | if (isOverIgnore == FALSE) | 
|  | { | 
|  | data->maxTerOrder += 1; | 
|  | } | 
|  | break; | 
|  |  | 
|  | // case IDENTICAL? | 
|  | } | 
|  |  | 
|  | return lastValue; | 
|  | } | 
|  |  | 
|  | // Adds a character and its designated order into the collation table. | 
|  | // This is the simple case, with no expansion or contraction | 
|  | void | 
|  | RuleBasedCollator::addOrder(UChar ch, | 
|  | int32_t anOrder, | 
|  | UErrorCode& status) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // try to find the order of the char in the mapping table | 
|  | int32_t order = ucmp32_get(data->mapping, ch); | 
|  |  | 
|  | if (order >= CONTRACTCHARINDEX) | 
|  | { | 
|  | // There's already an entry for this character that points to a contracting | 
|  | // character table.  Instead of adding the character directly to the mapping | 
|  | // table, we must add it to the contract table instead. | 
|  | key.remove(); | 
|  | key += ch; | 
|  | if (key.isBogus()) | 
|  | { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | addContractOrder(key, anOrder, status); | 
|  | } | 
|  | else | 
|  | { | 
|  | // add the entry to the mapping table, the same later entry replaces the previous one | 
|  | ucmp32_set(data->mapping, ch, anOrder); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Add an expanding-character entry to the table. | 
|  | void | 
|  | RuleBasedCollator::addExpandOrder(  const   UnicodeString& contractChars, | 
|  | const   UnicodeString& expandChars, | 
|  | int32_t anOrder, | 
|  | UErrorCode& status) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Create an expansion table entry | 
|  | int32_t tableIndex = addExpansion(anOrder, expandChars); | 
|  |  | 
|  | // And add its index into the main mapping table | 
|  | if (contractChars.length() > 1) | 
|  | { | 
|  | addContractOrder(contractChars, tableIndex, status); | 
|  | } | 
|  | else | 
|  | { | 
|  | addOrder(contractChars[0], tableIndex, status); | 
|  | } | 
|  | } | 
|  |  | 
|  | int32_t RuleBasedCollator::addExpansion(int32_t anOrder, const UnicodeString &expandChars) | 
|  | { | 
|  | if (data->expandTable == NULL) | 
|  | { | 
|  | data->expandTable = new VectorOfPToExpandTable(); | 
|  |  | 
|  | if (data->expandTable == NULL) | 
|  | { | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | // If anOrder is valid, we want to add it at the beginning of the list | 
|  | int32_t offset = (anOrder == UNMAPPED) ? 0 : 1; | 
|  |  | 
|  | VectorOfInt *valueList = new VectorOfInt(expandChars.length() + offset); | 
|  |  | 
|  | if (offset == 1) | 
|  | { | 
|  | valueList->atPut(0, anOrder); | 
|  | } | 
|  |  | 
|  | int32_t i; | 
|  | for (i = 0; i < expandChars.length(); i += 1) | 
|  | { | 
|  | UChar ch = expandChars[i]; | 
|  | int32_t mapValue = getCharOrder(ch); | 
|  |  | 
|  | if (mapValue != UNMAPPED) | 
|  | { | 
|  | valueList->atPut(i + offset, mapValue); | 
|  | } | 
|  | else | 
|  | { | 
|  | // can't find it in the table, will be filled in by commit(). | 
|  | valueList->atPut(i + offset, CHARINDEX + (int32_t)ch); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Add the expanding char list into the expansion table. | 
|  | int32_t tableIndex = EXPANDCHARINDEX + data->expandTable->size(); | 
|  | data->expandTable->atPut(data->expandTable->size(), valueList); | 
|  |  | 
|  | return tableIndex; | 
|  | } | 
|  |  | 
|  | // Add a string of characters that contracts into a single ordering. | 
|  | void | 
|  | RuleBasedCollator::addContractOrder(const   UnicodeString& groupChars, | 
|  | int32_t anOrder, | 
|  | UBool fwd, | 
|  | UErrorCode& status) | 
|  | { | 
|  | if (U_FAILURE(status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (data->contractTable == NULL) | 
|  | { | 
|  | data->contractTable = new VectorOfPToContractTable(); | 
|  | if (data->contractTable->isBogus()) | 
|  | { | 
|  | delete data->contractTable; | 
|  | data->contractTable = NULL; | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // See if the initial character of the string already has a contract table. | 
|  | // e.g. for "ch", look for 'c'. | 
|  | int32_t entry = ucmp32_get(data->mapping, groupChars[0]); | 
|  | VectorOfPToContractElement *entryTable = getContractValues(entry - CONTRACTCHARINDEX); | 
|  |  | 
|  | if (entryTable == NULL) | 
|  | { | 
|  | // We need to create a new table of contract entries for this base char | 
|  | int32_t tableIndex = CONTRACTCHARINDEX + data->contractTable->size(); | 
|  | EntryPair *pair = NULL; | 
|  | UnicodeString substring; | 
|  |  | 
|  | entryTable = new VectorOfPToContractElement(); | 
|  | if (entryTable->isBogus()) | 
|  | { | 
|  | delete entryTable; | 
|  | delete data->contractTable; | 
|  | data->contractTable = NULL; | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | data->contractTable->atPut(data->contractTable->size(), entryTable); | 
|  | if (data->contractTable->isBogus()) | 
|  | { | 
|  | delete entryTable; | 
|  | delete data->contractTable; | 
|  | data->contractTable = NULL; | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  |  | 
|  | // Add the initial character's current ordering first. then | 
|  | // update its mapping to point to this contract table | 
|  | groupChars.extract(0, 1, substring); | 
|  | if (substring.isBogus()) | 
|  | { | 
|  | delete entryTable; | 
|  | delete data->contractTable; | 
|  | data->contractTable = NULL; | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | pair = new EntryPair(substring, entry); | 
|  |  | 
|  | entryTable->atPut(0, pair); | 
|  | if (entryTable->isBogus()) | 
|  | { | 
|  | delete entryTable; | 
|  | delete data->contractTable; | 
|  | data->contractTable = NULL; | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | ucmp32_set(data->mapping, groupChars[0], tableIndex); | 
|  | } | 
|  |  | 
|  | // Now add (or replace) this string in the table | 
|  | int32_t index = getEntry(entryTable, groupChars, fwd); | 
|  |  | 
|  | if (index != UNMAPPED) | 
|  | { | 
|  | EntryPair *pair = (EntryPair *) entryTable->at(index); | 
|  | pair->value = anOrder; | 
|  | } | 
|  | else | 
|  | { | 
|  | EntryPair *pair = new EntryPair(groupChars, anOrder, fwd); | 
|  |  | 
|  | entryTable->atPut(entryTable->size(), pair); | 
|  | } | 
|  |  | 
|  | // If this was a forward mapping for a contracting string, also add a | 
|  | // reverse mapping for it, so that CollationElementIterator::previous | 
|  | // can work right | 
|  | if (fwd) | 
|  | { | 
|  | UnicodeString reverse(groupChars); | 
|  |  | 
|  | if (reverse.isBogus()) | 
|  | { | 
|  | delete entryTable; | 
|  | delete data->contractTable; | 
|  | data->contractTable = NULL; | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | addContractOrder(reverse.reverse(), anOrder, FALSE, status); | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * If the given string has been specified as a contracting string | 
|  | * in this collation table, return its ordering. | 
|  | * Otherwise return UNMAPPED. | 
|  | */ | 
|  | int32_t RuleBasedCollator::getContractOrder(const UnicodeString &groupChars) const | 
|  | { | 
|  | int32_t result = UNMAPPED; | 
|  |  | 
|  | if (data->contractTable != NULL) | 
|  | { | 
|  | VectorOfPToContractElement *entryTable = getContractValues(groupChars[0]); | 
|  |  | 
|  | if (entryTable != NULL) | 
|  | { | 
|  | int32_t index = getEntry(entryTable, groupChars, TRUE); | 
|  |  | 
|  | if (index != UNMAPPED) | 
|  | { | 
|  | EntryPair *pair = entryTable->at(index); | 
|  |  | 
|  | result = pair->value; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | int32_t RuleBasedCollator::getCharOrder(UChar ch) const | 
|  | { | 
|  | int32_t order = ucmp32_get(data->mapping, ch); | 
|  |  | 
|  | if (order >= CONTRACTCHARINDEX) | 
|  | { | 
|  | VectorOfPToContractElement *groupList = getContractValues(order - CONTRACTCHARINDEX); | 
|  | EntryPair *pair = groupList->at(0); | 
|  |  | 
|  | order = pair->value; | 
|  | } | 
|  |  | 
|  | return order; | 
|  | } | 
|  |  | 
|  | // Create a hash code for this collation.  Just hash the main rule table -- | 
|  | // that should be good enough for almost any use. | 
|  | int32_t | 
|  | RuleBasedCollator::hashCode() const | 
|  | { | 
|  | int32_t         value = 0; | 
|  | int32_t         c; | 
|  | int32_t         count = getRules().length(); | 
|  | UTextOffset      pos = count - 1; | 
|  |  | 
|  | if (count > 64) | 
|  | { | 
|  | count = 64; // only hash upto limit | 
|  | } | 
|  |  | 
|  | int16_t i = 0; | 
|  |  | 
|  | while (i < count) | 
|  | { | 
|  | c = data->ruleTable[pos]; | 
|  | value = ((value << (c & 0x0f)) ^ (c << 8)) + (c ^ value); | 
|  | i += 1; | 
|  | pos -= 1; | 
|  | } | 
|  |  | 
|  | if (value == 0) | 
|  | { | 
|  | value = 1; | 
|  | } | 
|  |  | 
|  | return value; | 
|  | } | 
|  |  | 
|  | // find the contracting char entry in the list | 
|  | int32_t | 
|  | RuleBasedCollator::getEntry(VectorOfPToContractElement* list, | 
|  | const UnicodeString& name, | 
|  | UBool fwd) | 
|  | { | 
|  | int32_t i; | 
|  |  | 
|  | if (list != NULL) | 
|  | { | 
|  | for (i = 0; i < list->size(); i += 1) | 
|  | { | 
|  | EntryPair *pair = list->at(i); | 
|  |  | 
|  | if ((pair != NULL) && (pair->fwd == fwd) && (pair->entryName == name)) | 
|  | { | 
|  | return i; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return RuleBasedCollator::UNMAPPED; | 
|  | } | 
|  |  | 
|  | // look for the contracting list entry with the beginning char | 
|  | VectorOfPToContractElement* | 
|  | RuleBasedCollator::getContractValues(UChar ch) const | 
|  | { | 
|  | int32_t index = ucmp32_get(data->mapping, ch); | 
|  | return getContractValues(index - CONTRACTCHARINDEX); | 
|  | } | 
|  |  | 
|  | // look for the contracting list entry with the index | 
|  | VectorOfPToContractElement* | 
|  | RuleBasedCollator::getContractValues(int32_t    index) const | 
|  | { | 
|  | if (data->contractTable != NULL) | 
|  | { | 
|  | if (index >= 0) | 
|  | { | 
|  | return data->contractTable->at(index); | 
|  | } | 
|  | } | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Return the maximum length of any expansion sequences that end | 
|  | * with the specified comparison order. | 
|  | * | 
|  | * @param order a collation order returned by previous or next. | 
|  | * @return the maximum length of any expansion seuences ending | 
|  | *         with the specified order. | 
|  | * | 
|  | * @see CollationElementIterator#getMaxExpansion | 
|  | */ | 
|  | int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const | 
|  | { | 
|  | int32_t result = 1; | 
|  |  | 
|  | if (data->expandTable != NULL) | 
|  | { | 
|  | // Right now this does a linear search through the entire | 
|  | // expandsion table.  If a collator had a large number of expansions, | 
|  | // this could cause a performance problem, but in practice that | 
|  | // rarely happens | 
|  | int32_t i; | 
|  | for (i = 0; i < data->expandTable->size(); i += 1) | 
|  | { | 
|  | VectorOfInt *valueList = data->expandTable->at(i); | 
|  | int32_t length = valueList->size(); | 
|  |  | 
|  | if (length > result && valueList->at(length-1) == order) | 
|  | { | 
|  | result = length; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | *  Get the entry of hash table of the expanding string in the collation | 
|  | *  table. | 
|  | *  @param offset the index of the expanding string value list | 
|  | */ | 
|  | VectorOfInt *RuleBasedCollator::getExpandValueList(int32_t order) const | 
|  | { | 
|  | return data->expandTable->at(order - EXPANDCHARINDEX); | 
|  | } | 
|  |  | 
|  |  | 
|  |  | 
|  | void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, UMemoryStream* is) | 
|  | { | 
|  | if (!uprv_mstrm_error(is)) | 
|  | { | 
|  | // Check that this is the correct file type | 
|  | int16_t id; | 
|  |  | 
|  | uprv_mstrm_read(is, &id, sizeof(id)); | 
|  | if (id != collator->FILEID) | 
|  | { | 
|  | // This isn't the right type of file.  Mark the ios | 
|  | // as failing and return. | 
|  | uprv_mstrm_setError(is); // force the stream to set its error flag | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Stream in large objects | 
|  | char isNull; | 
|  |  | 
|  | uprv_mstrm_read(is, &isNull, sizeof(isNull)); | 
|  | if (isNull) | 
|  | { | 
|  | delete collator->data; | 
|  | collator->data = NULL; | 
|  | } | 
|  | else | 
|  | { | 
|  | if (collator->data == NULL) | 
|  | { | 
|  | collator->data = new TableCollationData; | 
|  | } | 
|  |  | 
|  | collator->data->streamIn(is); | 
|  | if (collator->data->isBogus()) { | 
|  | uprv_mstrm_setError(is); // force the stream to set its error flag | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Verify that the end marker is present | 
|  | uprv_mstrm_read(is, &id, sizeof(id)); | 
|  | if (id != collator->FILEID) | 
|  | { | 
|  | // This isn't the right type of file.  Mark the ios | 
|  | // as failing and return. | 
|  | uprv_mstrm_setError(is); // force the stream to set its error flag | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Reset other data members | 
|  | collator->isOverIgnore = FALSE; | 
|  | collator->lastChar = 0; | 
|  | delete collator->mPattern; | 
|  | collator->mPattern = 0; | 
|  | collator->key.remove(); | 
|  | collator->dataIsOwned = TRUE; | 
|  | } | 
|  | } | 
|  |  | 
|  | void RuleBasedCollatorStreamer::streamOut(const RuleBasedCollator* collator, UMemoryStream* os) | 
|  | { | 
|  | if (!uprv_mstrm_error(os)) | 
|  | { | 
|  | // We use a 16-bit ID code to identify this file. | 
|  | int16_t id = collator->FILEID; | 
|  | uprv_mstrm_write(os, (uint8_t *)&id, sizeof(id)); | 
|  |  | 
|  | // Stream out the data | 
|  | char isNull; | 
|  | isNull = (collator->data == 0); | 
|  | uprv_mstrm_write(os, (uint8_t*)&isNull, sizeof(isNull)); | 
|  |  | 
|  | if (!isNull) | 
|  | { | 
|  | collator->data->streamOut(os); | 
|  | } | 
|  |  | 
|  | // Write out the ID to indicate the end | 
|  | uprv_mstrm_write(os, (uint8_t *)&id, sizeof(id)); | 
|  | } | 
|  | } | 
|  |  | 
|  | void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, FileStream* is) | 
|  | { | 
|  | if (!T_FileStream_error(is)) | 
|  | { | 
|  | // Check that this is the correct file type | 
|  | int16_t id; | 
|  |  | 
|  | T_FileStream_read(is, &id, sizeof(id)); | 
|  | if (id != collator->FILEID) | 
|  | { | 
|  | // This isn't the right type of file.  Mark the ios | 
|  | // as failing and return. | 
|  | T_FileStream_setError(is); // force the stream to set its error flag | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Stream in large objects | 
|  | char isNull; | 
|  |  | 
|  | T_FileStream_read(is, &isNull, sizeof(isNull)); | 
|  | if (isNull) | 
|  | { | 
|  | delete collator->data; | 
|  | collator->data = NULL; | 
|  | } | 
|  | else | 
|  | { | 
|  | if (collator->data == NULL) | 
|  | { | 
|  | collator->data = new TableCollationData; | 
|  | } | 
|  |  | 
|  | collator->data->streamIn(is); | 
|  | if (collator->data->isBogus()) { | 
|  | T_FileStream_setError(is); // force the stream to set its error flag | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Verify that the end marker is present | 
|  | T_FileStream_read(is, &id, sizeof(id)); | 
|  | if (id != collator->FILEID) | 
|  | { | 
|  | // This isn't the right type of file.  Mark the ios | 
|  | // as failing and return. | 
|  | T_FileStream_setError(is); // force the stream to set its error flag | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Reset other data members | 
|  | collator->isOverIgnore = FALSE; | 
|  | collator->lastChar = 0; | 
|  | delete collator->mPattern; | 
|  | collator->mPattern = 0; | 
|  | collator->key.remove(); | 
|  | collator->dataIsOwned = TRUE; | 
|  | } | 
|  | } | 
|  |  | 
|  | void RuleBasedCollatorStreamer::streamOut(const RuleBasedCollator* collator, FileStream* os) | 
|  | { | 
|  | if (!T_FileStream_error(os)) | 
|  | { | 
|  | // We use a 16-bit ID code to identify this file. | 
|  | int16_t id = collator->FILEID; | 
|  | T_FileStream_write(os, &id, sizeof(id)); | 
|  |  | 
|  | // Stream out the data | 
|  | char isNull; | 
|  | isNull = (collator->data == 0); | 
|  | T_FileStream_write(os, &isNull, sizeof(isNull)); | 
|  |  | 
|  | if (!isNull) | 
|  | { | 
|  | collator->data->streamOut(os); | 
|  | } | 
|  |  | 
|  | // Write out the ID to indicate the end | 
|  | T_FileStream_write(os, &id, sizeof(id)); | 
|  | } | 
|  | } | 
|  |  | 
|  | UBool RuleBasedCollator::writeToFile(const char* fileName) const | 
|  | { | 
|  | FileStream* ofs = T_FileStream_open(fileName, "wb"); | 
|  | if (ofs != 0) | 
|  | { | 
|  | RuleBasedCollatorStreamer::streamOut(this, ofs); | 
|  | } | 
|  |  | 
|  | #ifdef COLLDEBUG | 
|  | fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs), | 
|  | (!T_FileStream_error(ofs) ? ", OK" : ", FAIL")); | 
|  | #endif | 
|  |  | 
|  | UBool err = T_FileStream_error(ofs) == 0; | 
|  |  | 
|  | T_FileStream_close(ofs); | 
|  | return err; | 
|  | } | 
|  | /* | 
|  | UBool RuleBasedCollator::prepareForBundle() const | 
|  | { | 
|  | UMemoryStream* ofs = uprv_mstrm_openNew(0); | 
|  | if (ofs != 0) | 
|  | { | 
|  | RuleBasedCollatorStreamer::streamOut(this, ofs); | 
|  | } | 
|  |  | 
|  | #ifdef COLLDEBUG | 
|  | fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs), | 
|  | (!T_FileStream_error(ofs) ? ", OK" : ", FAIL")); | 
|  | #endif | 
|  |  | 
|  | UBool err = uprv_mstrm_error(ofs) == 0; | 
|  |  | 
|  | uprv_mstrm_close(ofs); | 
|  |  | 
|  | return err; | 
|  | } | 
|  | */ | 
|  |  | 
|  | void RuleBasedCollator::addToCache(const UnicodeString& key) | 
|  | { | 
|  | // This method doesn't add the RuleBasedCollator itself to the cache.  Instead, | 
|  | // it adds the given RuleBasedCollator's data object to the TableCollationData | 
|  | // cache, and marks it as non-owned in the given RuleBasedCollator object. | 
|  | TableCollationData::addToCache(key, data); | 
|  | dataIsOwned = FALSE; | 
|  | } | 
|  |  | 
|  | void | 
|  | RuleBasedCollator::constructFromCache(const UnicodeString& key, | 
|  | UErrorCode& status) | 
|  | { | 
|  | // Attempt to construct this RuleBasedCollator object from cached TableCollationData. | 
|  | // If no such data is in the cache, return false. | 
|  | if (U_FAILURE(status)) return; | 
|  | if (dataIsOwned) | 
|  | { | 
|  | delete data; | 
|  | data = NULL; | 
|  | } | 
|  |  | 
|  | isOverIgnore = FALSE; | 
|  | lastChar = 0; | 
|  | mPattern = 0; | 
|  | setStrength(Collator::TERTIARY); | 
|  |  | 
|  | dataIsOwned = FALSE; | 
|  | data = TableCollationData::findInCache(key); | 
|  | if (data == NULL) | 
|  | { | 
|  | status = U_MISSING_RESOURCE_ERROR; | 
|  | } | 
|  | } | 
|  |  | 
|  | char* | 
|  | RuleBasedCollator::createPathName(  const UnicodeString&    prefix, | 
|  | const UnicodeString&    name, | 
|  | const UnicodeString&    suffix) | 
|  | { | 
|  | // Concatenate three elements to form a file name, and return it. | 
|  |  | 
|  | UnicodeString   workingName(prefix); | 
|  | int32_t         size; | 
|  | char*           returnVal; | 
|  |  | 
|  | workingName += name; | 
|  | workingName += suffix; | 
|  |  | 
|  | size = workingName.length(); | 
|  | returnVal = new char[size + 1]; | 
|  | workingName.extract(0, size, returnVal, ""); | 
|  | returnVal[size] = 0; | 
|  |  | 
|  | return returnVal; | 
|  | } | 
|  |  | 
|  | void | 
|  | RuleBasedCollator::chopLocale(UnicodeString& localeName) | 
|  | { | 
|  | // chopLocale removes the final element from a locale string. | 
|  | // For instance, "de_CH" becomes "de", and "de" becomes "". | 
|  | // "" remains "". | 
|  |  | 
|  | int32_t     size = localeName.length(); | 
|  | int32_t     i; | 
|  |  | 
|  | for (i = size - 1; i > 0; i--) | 
|  | { | 
|  | if (localeName[i] == 0x005F) | 
|  | { | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (i < 0) | 
|  | { | 
|  | i = 0; | 
|  | } | 
|  |  | 
|  | localeName.remove(i, size - i); | 
|  | } | 
|  |  | 
|  |  | 
|  | uint8_t * | 
|  | RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &status) | 
|  | { | 
|  | UMemoryStream *memdata = 0; | 
|  | uint8_t *data = 0; | 
|  |  | 
|  | if(U_FAILURE(status)) { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | memdata = uprv_mstrm_openNew(0); | 
|  |  | 
|  | if (memdata != 0) { | 
|  | RuleBasedCollatorStreamer::streamOut(this, memdata); | 
|  | } | 
|  |  | 
|  | UBool err = uprv_mstrm_error(memdata) == 0; | 
|  |  | 
|  |  | 
|  | data = (uint8_t *)uprv_malloc(memdata->fPos); | 
|  | if(data == 0) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | uprv_mstrm_close(memdata); | 
|  | length = 0; | 
|  | return 0; | 
|  | } else { | 
|  | uprv_memcpy(data, memdata->fStart, memdata->fPos); | 
|  | length = memdata->fPos; | 
|  | uprv_mstrm_close(memdata); | 
|  | return data; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | //eof |