source/i18n/tblcoll.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 * Copyright (C) 1996-1999, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * File tblcoll.cpp
 *
 * Created by: Helena Shih
 *
 * Modification History:
 *
 *  Date        Name        Description
 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
 *                          constructor which reads RuleBasedCollator object from
 *                          a binary file.  Added writeToFile method which streams
 *                          RuleBasedCollator out to a binary file.  The streamIn
 *                          and streamOut methods use istream and ostream objects
 *                          in binary mode.
 *  2/11/97     aliu        Moved declarations out of for loop initializer.
 *                          Added Mac compatibility #ifdef for ios::nocreate.
 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
 *                          hold invariant data.
 *  2/13/97     aliu        Moved several methods into this class from Collation.
 *                          Added a private RuleBasedCollator(Locale&) constructor,
 *                          to be used by Collator::getInstance().  General
 *                          clean up.  Made use of UErrorCode variables consistent.
 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
 *                          constructor and getDynamicClassID.
 *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
 *                          use the maximum allowable value which is kBlockCount.
 *                          Modified getRules() to load rules dynamically.  Changed
 *                          constructFromFile() call to accomodate this (added
 *                          parameter to specify whether binary loading is to
 *                          take place).
 * 05/06/97     helena      Added memory allocation error check.
 *  6/20/97     helena      Java class name change.
 *  6/23/97     helena      Adding comments to make code more readable.
 * 09/03/97     helena      Added createCollationKeyValues().
 * 06/26/98     erm         Changes for CollationKeys using byte arrays.
 * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
 * 04/23/99     stephen     Removed EDecompositionMode, merged with
 *                          Normalizer::EMode
 * 06/14/99     stephen     Removed kResourceBundleSuffix
 * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
 *                          files are no longer used.
 * 11/02/99     helena      Collator performance enhancements.  Special case
 *                          for NO_OP situations.
 * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
 *                          to implementation file.
 *******************************************************************************
 */

 #include "ucmp32.h"
 #include "tcoldata.h"

 #include "unicode/tblcoll.h"

 #include "unicode/coleitr.h"
 #include "unicode/locid.h"
 #include "unicode/unicode.h"
 #include "tables.h"
 #include "unicode/normlzr.h"
 #include "mergecol.h"
 #include "unicode/resbund.h"
 #include "filestrm.h"
 #include "umemstrm.h"

 #ifdef _DEBUG
 #include "unistrm.h"
 #endif

 #include "compitr.h"

 #include <string.h>

 #include "unicode/ustring.h"

 #include "cmemory.h"


 class RuleBasedCollatorStreamer
 {
 public:
    static void streamIn(RuleBasedCollator* collator, FileStream* is);
    static void streamOut(const RuleBasedCollator* collator, FileStream* os);
    static void streamIn(RuleBasedCollator* collator, UMemoryStream* is);
    static void streamOut(const RuleBasedCollator* collator, UMemoryStream* os);
 };

 //===========================================================================================
 //  The following diagram shows the data structure of the RuleBasedCollator object.
 //  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
 //  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
 //  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
 //  sorts 'o-umlaut' as if it's always expanded with 'e'.
 //
 // mapping table                       contracting list                  expanding list
 // (contains all unicode char
 //  entries)                         ___     _____________         _________________________
 //   ________                   |==>|_*_|-->|'c'  |v('c') |   |==>|v('o')|v('umlaut')|v('e')|
 //  |_\u0001_|--> v('\u0001')   |   |_:_|   |-------------|   |   |-------------------------|
 //  |_\u0002_|--> v('\u0002')   |   |_:_|   |'ch' |v('ch')|   |   |             :           |
 //  |____:___|                  |   |_:_|   |-------------|   |   |-------------------------|
 //  |____:___|                  |           |'cH' |v('cH')|   |   |             :           |
 //  |__'a'___|--> v('a')        |           |-------------|   |   |-------------------------|
 //  |__'b'___|--> v('b')        |           |'Ch' |v('Ch')|   |   |             :           |
 //  |____:___|                  |           |-------------|   |   |-------------------------|
 //  |____:___|                  |           |'CH' |v('CH')|   |   |             :           |
 //  |___'c'__|-------------------            -------------    |   |-------------------------|
 //  |____:___|                                                |   |             :           |
 //  |o-umlaut|------------------------------------------------    |_________________________|
 //  |____:___|
 //
 //
 // Noted by Helena Shih on 6/23/97 with pending design changes (slimming collation).
 //============================================================================================

 const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;             // need look up in .commit()
 const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;       // Expand index follows
 const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;     // contract indexes follows
 const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;              // unmapped character values
 const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000; // primary strength increment
 const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100; // secondary strength increment
 const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001; // tertiary strength increment
 const int32_t RuleBasedCollator::MAXIGNORABLE = 0x00010000;          // maximum ignorable char order value
 const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;      // mask off anything but primary order
 const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;    // mask off anything but secondary order
 const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;     // mask off anything but tertiary order
 const int32_t RuleBasedCollator::SECONDARYRESETMASK = 0x0000ffff;    // mask off secondary and tertiary order
 const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;         // mask off ignorable char order
 const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000; // use only the primary difference
 const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;  // use only the primary and secondary difference
 const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;             // primary order shift
 const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;            // secondary order shift
 const int32_t RuleBasedCollator::SORTKEYOFFSET = 1;                  // minimum sort key offset
 const int32_t RuleBasedCollator::CONTRACTCHAROVERFLOW = 0x7FFFFFFF;  // Indicates the char is a contract char

 const int16_t RuleBasedCollator::FILEID = 0x5443;                    // unique file id for parity check
 const char* RuleBasedCollator::kFilenameSuffix = ".col";             // binary collation file extension
 char  RuleBasedCollator::fgClassID = 0; // Value is irrelevant       // class id

 ////////////////////////////////////////////////////////////////////////
 // NormalizerIterator
 //
 // This class is essentially a duplicate of CollationElementIterator,
 // stripped down for speed.  It is declared here so we can incorporate
 // internal classes as subobjects, as well as just to hide it from the
 // public interface.
 ////////////////////////////////////////////////////////////////////////

 /* Internal class for quick iteration over the text.
    100% pure inline code
 */
 class NormalizerIterator {
 public:
     Normalizer *cursor;
     VectorOfInt *bufferAlias;
     VectorOfInt *reorderBuffer;
     VectorOfInt ownBuffer;
     UChar*      text;
     int32_t     expIndex;
     int32_t     textLen;
     UTextOffset  currentOffset;

     NormalizerIterator(void);
     NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode);
     ~NormalizerIterator(void);
     void setText(const UChar* source, int32_t length, UErrorCode& status);
     void setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status);

     UChar current(void) const;
     UChar next(void);
     void reset(void);
 };

 inline
 NormalizerIterator::NormalizerIterator() :
     cursor(0),
     bufferAlias(0),
     reorderBuffer(0),
     ownBuffer(2),
     text(0),
     textLen(0),
     currentOffset(0),
     expIndex(0)
 {
 }

 inline
 NormalizerIterator::NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode) :
     cursor(0),
     bufferAlias(0),
     reorderBuffer(0),
     ownBuffer(2),
     text(0),
     textLen(0),
     currentOffset(0),
     expIndex(0)
 {
     if (mode == Normalizer::NO_OP) {
         text = (UChar*)source;
         textLen = length;
         currentOffset = 0;
     } else {
         cursor = new Normalizer(source, length, mode);

     }
 }

 inline
 NormalizerIterator::~NormalizerIterator()
 {
     if (cursor != 0) {
         delete cursor;
         cursor = 0;
     }
     if (reorderBuffer != 0) {
         delete reorderBuffer;
     }
 }

 inline
 void
 NormalizerIterator::setText(const UChar* source, int32_t length, UErrorCode& status)
 {
     if (cursor == 0) {
         text = (UChar*)source;
         textLen = length;
         currentOffset = 0;

     } else {
         text = 0;
         cursor->setText(source, length, status);
     }
     bufferAlias = 0;
     currentOffset = 0;
 }

 /* You can only set mode after the comparision of two strings is completed.
    Setting the mode in the middle of a comparison is not allowed.
    */
 inline
 void

 NormalizerIterator::setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status)
 {
     if(mode != Normalizer::NO_OP)
     {
         /* DO have a mode -  will need a normalizer object */
         if(cursor != NULL)
         {
             /* Just modify the existing cursor */
             cursor->setMode(mode);
             cursor->setText(source, length, status);
         }
         else
         {
             cursor = new Normalizer(source, length, mode);
         }

         /* RESET the old data */
         text = 0;
         textLen = 0;
     }
     else
     {
         /* NO_OP mode.. */
         if(cursor != NULL)
         { /* get rid of the old cursor */
             delete cursor;
             cursor = 0;
         }

         text = (UChar*)source;
         textLen = length;
     }
     currentOffset = 0; /* always */

     bufferAlias = 0;
 }

 inline
 UChar
 NormalizerIterator::current(void) const
 {
     if (text != 0) {
         if(currentOffset >= textLen)
         {
             return Normalizer::DONE;
         }
         else
         {
             return text[currentOffset];
         }
     }

     return (UChar)cursor->current();
 }


 inline
 UChar
 NormalizerIterator::next(void)
 {
     if (text != 0) {
         return ((currentOffset < textLen) ? text[++currentOffset] : Normalizer::DONE);
     }
     return (UChar)cursor->next();
 }

 inline
 void
 NormalizerIterator::reset(void)
 {
     currentOffset = 0;
     if(cursor)
     {
         cursor->reset();
     }
 }

 //================ Some inline definitions of implementation functions........ ========
 /**
  * A clone of CollationElementIterator::makeReorderedBuffer, trimmed down
  * to only handle forward.
  */
 inline VectorOfInt*
 RuleBasedCollator::makeReorderedBuffer(NormalizerIterator* cursor,
                                        UChar colFirst,
                                        int32_t lastValue,
                                        VectorOfInt* lastExpansion) const {
     VectorOfInt* result;

     int32_t firstValue = ucmp32_get(data->mapping, colFirst);
     if (firstValue >= CONTRACTCHARINDEX) {
         UErrorCode status = U_ZERO_ERROR;
         firstValue = nextContractChar(cursor, colFirst, status);
     }

     VectorOfInt* firstExpansion = NULL;
     if (firstValue >= EXPANDCHARINDEX) {
         firstExpansion = getExpandValueList(firstValue);
     }

     if (firstExpansion == NULL && lastExpansion == NULL) {
         cursor->ownBuffer.at(0) = firstValue;
         cursor->ownBuffer.at(1) = lastValue;
         result = &cursor->ownBuffer;
     }
     else {
         int32_t firstLength = firstExpansion==NULL? 1 : firstExpansion->size();
         int32_t lastLength = lastExpansion==NULL? 1 : lastExpansion->size();
         if (cursor->reorderBuffer == NULL) {
             cursor->reorderBuffer = new VectorOfInt(firstLength+lastLength);
         }
         // reorderdBuffer gets reused for the life of this object.
         // Since its internal buffer only grows, there is a danger
         // that it will get really, really big, and never shrink.  If
         // this is actually happening, insert code here to check for
         // the condition.  Something along the lines of:
         //! else if (reorderBuffer->size() >= 256 &&
         //!          (firstLength+lastLength) < 16) {
         //!     delete reorderBuffer;
         //!     reorderBuffer = new VectorOfInt(firstLength+lastLength);
         //! }
         // The specific numeric values need to be determined
         // empirically. [aliu]
         result = cursor->reorderBuffer;

         if (firstExpansion == NULL) {
             result->atPut(0, firstValue);
         }
         else {
             // System.arraycopy(firstExpansion, 0, result, 0, firstLength);
             *result = *firstExpansion;
         }

         if (lastExpansion == NULL) {
             result->atPut(firstLength, lastValue);
         }
         else {
             // System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
             for (int32_t i=0; i<lastLength; ++i) {
                 result->atPut(firstLength + i, lastExpansion->at(i));
             }
         }
         result->setSize(firstLength+lastLength);
     }

     return result;
 }


 inline int32_t
 RuleBasedCollator::strengthOrder(int32_t value) const
 {
     if (getStrength() == PRIMARY)
     {
         return (value & PRIMARYDIFFERENCEONLY);
     } else if (getStrength() == SECONDARY)
     {
         return (value & SECONDARYDIFFERENCEONLY);
     }
     return value;
 }


 inline int32_t
 RuleBasedCollator::getStrengthOrder(NormalizerIterator* cursor,
                                     UErrorCode status) const
 {
     if (U_FAILURE(status))
     {
         return CollationElementIterator::NULLORDER;
     }

     if (cursor->bufferAlias != NULL)
     {
         // bufferAlias needs a bit of an explanation.
         // When we hit an expanding character in the text, we call the order's
         // getExpandValues method to retrieve an array of the orderings for all
         // of the characters in the expansion (see the end of this method).
         // The first ordering is returned, and an alias to the orderings array
         // is saved so that the remaining orderings can be returned on subsequent
         // calls to next.  So, if the expanding buffer is not exhausted,
         // all we have to do here is return the next ordering in the buffer.
         if (cursor->expIndex < cursor->bufferAlias->size())
         {
             //_L((stderr, "next from [%08X] from bufferAlias\n", this));
             return strengthOrder(cursor->bufferAlias->at(cursor->expIndex++));
         }
         else
         {
             cursor->bufferAlias = NULL;
         }
     }

     UChar ch = cursor->current();
     cursor->next();

     //_L((stderr, "Next from [%08X] = [%04X], [%c]\n", cursor, (int)ch & 0xFFFF, (char)(ch & 0xFF)));

     if (ch == Normalizer::DONE) {
         return CollationElementIterator::NULLORDER;
     }
     // Ask the collator for this character's ordering.
     int32_t value = ucmp32_get(data->mapping, ch);

     if (value == UNMAPPED)
     {
         // Returned an "unmapped" flag and save the character so it can be
         // returned next time this method is called.
         if (ch == 0x0000) return ch; // \u0000 is not valid in C++'s UnicodeString
         cursor->ownBuffer.at(0) = CollationElementIterator::UNMAPPEDCHARVALUE;
         cursor->ownBuffer.at(1) = ch << 16;
         cursor->bufferAlias = &cursor->ownBuffer;

     } else {

         if (value >= CONTRACTCHARINDEX)
         {
             value = nextContractChar(cursor, ch, status);
         }

         if (value >= EXPANDCHARINDEX) {
             cursor->bufferAlias = getExpandValueList(value);
         }

         if (CollationElementIterator::isThaiPreVowel(ch)) {
             UChar consonant = cursor->current();
             if (CollationElementIterator::isThaiBaseConsonant(consonant)) {
                 cursor->next();
                 cursor->bufferAlias = makeReorderedBuffer(cursor, consonant, value,
                                                           cursor->bufferAlias);
             }
         }
     }

     if (cursor->bufferAlias != NULL) {
         cursor->expIndex = 1;
         value = cursor->bufferAlias->at(0);
     }

     return strengthOrder(value);
 }

 // ==================== End inlines ============================================


 //===============================================================================

 RuleBasedCollator::RuleBasedCollator()
     : Collator(),
       isOverIgnore(FALSE),
       mPattern(0),
       //      sourceCursor(0),
       //targetCursor(0),
       cursor1(0),
       cursor2(0),
       data(0),
       dataIsOwned(FALSE)
 {
 }

 RuleBasedCollator::RuleBasedCollator(const  RuleBasedCollator&  that)
     : Collator(that),
       isOverIgnore(that.isOverIgnore),
       mPattern(0),
       //      sourceCursor(0),
       //targetCursor(0),
       cursor1(0),
       cursor2(0),
       dataIsOwned(FALSE),
       data(that.data) // Alias the data pointer
 {
 }

 UBool
 RuleBasedCollator::operator==(const Collator& that) const
 {
     if (this == &that)
     {
         return TRUE;
     }

     if (this->getDynamicClassID() != that.getDynamicClassID())
     {
         return FALSE;  // not the same class
     }

     if (!Collator::operator==(that))
     {
         return FALSE;
     }

     RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;

     if (isOverIgnore != thatAlias.isOverIgnore)
     {
         return FALSE;
     }

     if (data != thatAlias.data)
     {
         return FALSE;
     }

     return TRUE;
 }

 RuleBasedCollator&
 RuleBasedCollator::operator=(const  RuleBasedCollator& that)
 {
     if (this != &that)
     {
         Collator::operator=(that);
         isOverIgnore = that.isOverIgnore;

         if (dataIsOwned)
         {
             delete data;
         }

         data = 0;
         delete mPattern;
         mPattern = 0;
         dataIsOwned = FALSE;
         data = that.data;
     }

     return *this;
 }

 RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                                         UErrorCode&      status)
     : Collator(),
       isOverIgnore(FALSE),
       mPattern(0),
       //      sourceCursor(0),
       ///      targetCursor(0),
       cursor1(0),
       cursor2(0),
       data(0),
       dataIsOwned(FALSE)
 {
     if (U_FAILURE(status))
     {
         return;
     }

     constructFromRules(rules, status);
 }

 RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                      ECollationStrength collationStrength,
                      UErrorCode&      status)
   : Collator(collationStrength, Normalizer::NO_OP),
     isOverIgnore(FALSE),
     mPattern(0),
     //    sourceCursor(0),
     //    targetCursor(0),
       cursor1(0),
       cursor2(0),
     data(0),
     dataIsOwned(FALSE)
 {
     if (U_FAILURE(status))
     {
         return;
     }
     constructFromRules(rules, status);
 }

 RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                      Normalizer::EMode decompositionMode,
                      UErrorCode&      status)
   : Collator(TERTIARY, decompositionMode),
     isOverIgnore(FALSE),
     mPattern(0),
     //    sourceCursor(0),
     //    targetCursor(0),
       cursor1(0),
       cursor2(0),
     data(0),
     dataIsOwned(FALSE)
 {
   if (U_FAILURE(status))
     {
       return;
     }

   constructFromRules(rules, status);
 }

 RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                      ECollationStrength collationStrength,
                      Normalizer::EMode decompositionMode,
                      UErrorCode&      status)
   : Collator(collationStrength, decompositionMode),
       isOverIgnore(FALSE),
       mPattern(0),
     //      sourceCursor(0),
     //targetCursor(0),
       cursor1(0),
       cursor2(0),
       data(0),
       dataIsOwned(FALSE)
 {
     if (U_FAILURE(status))
     {
         return;
     }

     constructFromRules(rules, status);
 }

 void RuleBasedCollator::constructFromRules(const UnicodeString& rules,
                                         UErrorCode& status)
 {
     // Construct this collator's ruleset from its string representation
     if (U_FAILURE(status))
     {
         return;
     }

     if (rules.isBogus())
     {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }

     if (dataIsOwned)
     {
         delete data;
         data = 0;
     }

     isOverIgnore = FALSE;
     setStrength(Collator::TERTIARY);

     data = new TableCollationData;
     if (data->isBogus())
     {
         status = U_MEMORY_ALLOCATION_ERROR;
         delete data;
         data = 0;
         return;
     }

     // We constructed the data using the build method, so we own it.
     dataIsOwned = TRUE;

     // Now that we've got all the buffers allocated, do the actual work
     mPattern = 0;
     build(rules, status);
 }

 void
 RuleBasedCollator::constructFromFile(const char* fileName,
                                   UErrorCode& status)
 {
     // This method tries to read in a flattened RuleBasedCollator that
     // has been previously streamed out using the streamOut() method.
     // The 'fileName' parameter should contain a full pathname valid on
     // the local environment.

     if (U_FAILURE(status))
     {
         return;
     }

     if (dataIsOwned)
     {
         delete data;
         data = 0;
     }

     mPattern = 0;
     isOverIgnore = FALSE;
     setStrength(Collator::TERTIARY); // This is the default strength

     FileStream* ifs = T_FileStream_open(fileName, "rb");
     if (ifs == 0) {
         status = U_FILE_ACCESS_ERROR;
         return;
     }

     // The streamIn function does the actual work here...
     RuleBasedCollatorStreamer::streamIn(this, ifs);

     if (!T_FileStream_error(ifs))
     {
         status = U_ZERO_ERROR;
     }
     else if (data && data->isBogus())
     {
         status = U_MEMORY_ALLOCATION_ERROR;
         delete data;
         data = 0;
     }
     else
     {
         status = U_MISSING_RESOURCE_ERROR;
         delete data;
         data = 0;
     }

 #ifdef COLLDEBUG
     fprintf(stderr, "binary read %s size %d, %s\n", fileName, T_FileStream_size(ifs), u_errorName(status));
 #endif

     // We constructed the data when streaming it in, so we own it
     dataIsOwned = TRUE;

     T_FileStream_close(ifs);
 }

 const char *
 RuleBasedCollator::constructFromBundle(const Locale & name,
                                   UErrorCode& status)
 {
   // This method tries to locate binary collation data which has been
   // previously streamed to a binary object "%%Collation" in a
   // resource bundle. If the data is found, it is cached.
   // cache is checked before actually streaming in data
   // resource bundle fallback mechanism is used.

     if (U_FAILURE(status))
     {
         return 0;
     }

     if (dataIsOwned)
     {
         delete data;
         data = 0;
     }
     const char* realName = 0;

     mPattern = 0;
     isOverIgnore = FALSE;
     setStrength(Collator::TERTIARY); // This is the default strength

     ResourceBundle rb((char *)0, name, status);
     if(U_SUCCESS(status)) {
       ResourceBundle binary = rb.get("%%Collation", status); //This is the bundle that actually contains the collation data
       realName = binary.getName();
       if(U_SUCCESS(status)) {
 	UErrorCode intStatus = U_ZERO_ERROR;
 	constructFromCache(realName, intStatus); // check whether we already have this data in cache
 	if(U_SUCCESS(intStatus)) {
 	  return realName;
 	}
 	int32_t inDataLen = 0;
 	const uint8_t *inData = binary.getBinary(inDataLen, status); //This got us the real binary data

 	UMemoryStream *ifs = uprv_mstrm_openBuffer(inData, inDataLen);

 	if (ifs == 0) {
 	  status = U_FILE_ACCESS_ERROR;
 	  return 0;
 	}

 	// The streamIn function does the actual work here...
 	RuleBasedCollatorStreamer::streamIn(this, ifs);

 	if (!uprv_mstrm_error(ifs)) {
 	}
 	else if (data && data->isBogus()) {
 	  status = U_MEMORY_ALLOCATION_ERROR;
 	  delete data;
 	  data = 0;
 	} else {
 	  status = U_MISSING_RESOURCE_ERROR;
 	  delete data;
 	  data = 0;
 	}

 	// We constructed the data when streaming it in, so we own it
 	dataIsOwned = TRUE;

 	uprv_mstrm_close(ifs);
 	addToCache(realName); // add the newly constructed data to cache
 	return realName;
       } else {
 	status = U_MISSING_RESOURCE_ERROR;
 	return 0;
       }
     } else {
         return 0;
     }
 }

 RuleBasedCollator::RuleBasedCollator(   const Locale& desiredLocale,
                                 UErrorCode& status)
     : Collator(),
       isOverIgnore(FALSE),
       dataIsOwned(FALSE),
       data(0),
       //      sourceCursor(0),
       //targetCursor(0),
       cursor1(0),
       cursor2(0),
       mPattern(0)
 {


   if (U_FAILURE(status)) {
     return;
   }

   // Try to load, in order:
   // 1. The desired locale's collation.
   // 2. A fallback of the desired locale.
   // 3. The default locale's collation.
   // 4. A fallback of the default locale.
   // 5. The default collation rules, which contains en_US collation rules.

   // To reiterate, we try:
   // Specific:
   //  language+country+variant
   //  language+country
   //  language
   // Default:
   //  language+country+variant
   //  language+country
   //  language
   // Root: (aka DEFAULTRULES)
   // steps 1-5 are handled by resource bundle fallback mechanism.
   // however, in a very unprobable situation that no resource bundle
   // data exists, step 5 is repeated with hardcoded default rules.

   const char *locName = constructFromBundle(desiredLocale, status);  /*!*/
   data->desiredLocale = desiredLocale;

   if (U_SUCCESS(status)) {
     data->realLocaleName = locName;
     if(status != U_USING_DEFAULT_ERROR) {
       setDecomposition(Normalizer::NO_OP);
     }
   } else {
     UErrorCode intStatus = U_ZERO_ERROR;
     constructFromCache(ResourceBundle::kDefaultFilename, intStatus);
     if(U_FAILURE(intStatus)) {
       intStatus = U_ZERO_ERROR;
       constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus);
       if (intStatus == U_ZERO_ERROR) {
 	status = U_USING_DEFAULT_ERROR;
       } else {
 	status = intStatus;     // bubble back
       }

       if (status == U_MEMORY_ALLOCATION_ERROR) {
 	return;
       }
     }
     data->realLocaleName = ResourceBundle::kDefaultFilename;
     setDecomposition(Normalizer::NO_OP);
     addToCache(ResourceBundle::kDefaultFilename);
   }
   return;
 }

 void
 RuleBasedCollator::constructFromFile(   const Locale&           locale,
                                     const UnicodeString&    localeFileName,
                                     UBool                  tryBinaryFile,
                                     UErrorCode&              status)
 {
   // constructFromFile creates a collation object by reading from a
   // file.  It does not employ the usual FILE search mechanism with
   // locales, default locales, and base locales.  Instead, it tries to
   // look only in files with the given localFileName.  It does,
   // however, employ the LOCALE search mechanism.

   // This method maintains the binary collation files.  If a collation
   // is not present in binary form, but is present in text form (in a
   // resource bundle file), it will be loaded in text form, and then
   // written to disk.

   // If tryBinaryFile is true, then try to load from the binary file first.

   if(U_FAILURE(status)) {
     return;
   }

   if(dataIsOwned) {
     delete data;
     data = 0;
   }

     if(tryBinaryFile) {
       char *binaryFilePath = createPathName(UnicodeString(u_getDataDirectory(),""),
 					    localeFileName, UnicodeString(kFilenameSuffix,""));

         // Try to load up the collation from a binary file first
         constructFromFile(binaryFilePath, status);
         #ifdef COLLDEBUG
             cerr << localeFileName  << kFilenameSuffix << " binary load " << u_errorName(status) << endl;
         #endif
         if(U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR) {
             delete [] binaryFilePath;
             return;
         }
         if(status == U_FILE_ACCESS_ERROR) {
             status = U_ZERO_ERROR;
         }
         delete [] binaryFilePath;
     }

   // Now try to load it up from a resource bundle text source file
   UnicodeString dataDir = UnicodeString(u_getDataDirectory(),"");

     char *ch;
     ch = new char[localeFileName.size() + 1];
     ch[localeFileName.extract(0, 0x7fffffff, ch, "")] = 0;
     ResourceBundle bundle(dataDir, ch, status);

     delete [] ch;

   // if there is no resource bundle file for the give locale, break out
   if(U_FAILURE(status))
   {
       return;
   }

     #ifdef COLLDEBUG
         cerr << localeFileName << " ascii load " << u_errorName(status) << endl;
     #endif

     // check and see if this resource bundle contains collation data

     UnicodeString colString;
     UErrorCode intStatus = U_ZERO_ERROR;

     ResourceBundle colElems = bundle.get("CollationElements", intStatus);
     if (U_FAILURE(intStatus))
     {
         status = U_MISSING_RESOURCE_ERROR;
         return;
     }
     colString = colElems.getStringEx("Sequence", intStatus);

     if(U_FAILURE(intStatus)) {
         status = U_MISSING_RESOURCE_ERROR;
         return;
     }

     if(colString.isBogus()) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }

   // Having loaded the collation from the resource bundle text file,
   // now retrieve the CollationElements tagged data, merged with the
   // default rules.  If that fails, use the default rules alone.

   colString.insert(0, DEFAULTRULES);
   if(colString.isBogus()) {
     status = U_MEMORY_ALLOCATION_ERROR;
     return;
   }

   constructFromRules(colString, intStatus);
   if(intStatus == U_MEMORY_ALLOCATION_ERROR) {
     status = U_MEMORY_ALLOCATION_ERROR;
     return;
   }

   if(intStatus != U_ZERO_ERROR)  {
     status = U_USING_DEFAULT_ERROR;

     // predefined tables should contain correct grammar
     intStatus = U_ZERO_ERROR;
     constructFromRules(DEFAULTRULES, intStatus);
     if(intStatus != U_ZERO_ERROR) {
       status = intStatus;
     }
   }

 #ifdef COLLDEBUG
   cerr << localeFileName << " ascii load " << (U_SUCCESS(status) ? "OK" : "Failed") << " - try= " << (tryBinaryFile?"true":"false") << endl;
 #endif

 }

 RuleBasedCollator::~RuleBasedCollator()
 {
     if (dataIsOwned)
     {
         delete data;
     }

     data = 0;

     //    delete sourceCursor;
     //    sourceCursor = 0;

     //    delete targetCursor;
     //    targetCursor = 0;

     if (cursor1 != NULL) {
         delete cursor1;
         cursor1 = 0;
     }
     if (cursor2 != NULL) {
         delete cursor2;
         cursor2 = 0;
     }

     delete mPattern;
     mPattern = 0;
 }

 Collator*
 RuleBasedCollator::clone() const
 {
     return new RuleBasedCollator(*this);
 }

 // Create a CollationElementIterator object that will iterator over the elements
 // in a string, using the collation rules defined in this RuleBasedCollator
 CollationElementIterator*
 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const
 {
     UErrorCode status = U_ZERO_ERROR;
     CollationElementIterator *newCursor = 0;

     newCursor = new CollationElementIterator(source, this, status);
     if (U_FAILURE(status))
     {
         return NULL;
     }

     return newCursor;
 }

 // Create a CollationElementIterator object that will iterator over the elements
 // in a string, using the collation rules defined in this RuleBasedCollator
 CollationElementIterator*
 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const
 {
     UErrorCode status = U_ZERO_ERROR;
     CollationElementIterator *newCursor = 0;

     newCursor = new CollationElementIterator(source, this, status);
     if (U_FAILURE(status))
     {
         return NULL;
     }

     return newCursor;
 }

 // Return a string representation of this collator's rules.
 // The string can later be passed to the constructor that takes a
 // UnicodeString argument, which will construct a collator that's
 // functionally identical to this one.
 // You can also allow users to edit the string in order to change
 // the collation data, or you can print it out for inspection, or whatever.

 const UnicodeString&
 RuleBasedCollator::getRules() const
 {
     if (mPattern != 0)
     {
         MergeCollation*& nonConstMPattern = *(MergeCollation**)&mPattern;
         mPattern->emitPattern(data->ruleTable);
         data->isRuleTableLoaded = TRUE;
         delete nonConstMPattern;
         nonConstMPattern = 0;
     }
     else if (!data->isRuleTableLoaded)
     {
         // At this point the caller wants the rules, but the rule table data
         // is not loaded.  Furthermore, there is no mPattern object to load
         // the rules from.  Therefore, we fetch the rules off the disk.
         // Notice that we pass in a tryBinaryFile value of FALSE, since
         // by design the binary file has NO rules in it!
         //UErrorCode status = U_ZERO_ERROR;
         //RuleBasedCollator temp(data->realLocaleName, status);
         RuleBasedCollator temp;
         UErrorCode status = U_ZERO_ERROR;
         temp.constructFromFile(data->desiredLocale, data->realLocaleName, FALSE, status);

         // We must check that mPattern is nonzero here, or we run the risk
         // of an infinite loop.
         if (U_SUCCESS(status) && temp.mPattern != 0)
         {
             data->ruleTable = temp.getRules();
             data->isRuleTableLoaded = TRUE;
 #ifdef _DEBUG
 //              // the following is useful for specific debugging purposes
 //               UnicodeString name;
 //               cerr << "Table collation rules loaded dynamically for "
 //                   << data->desiredLocale.getName(name)
 //                   << " at "
 //                   << data->realLocaleName
 //                   << ", " << dec << data->ruleTable.size() << " characters"
 //                   << endl;
 #endif
         }
         else
         {
 #ifdef _DEBUG
 //              UnicodeString name;
 //              cerr << "Unable to load table collation rules dynamically for "
 //                  << data->desiredLocale.getName(name)
 //                  << " at "
 //                  << data->realLocaleName
 //                  << endl;
 //              cerr << "Status " << u_errorName(status) << ", mPattern " << temp.mPattern << endl;
 #endif
 	    /* SRL have to add this because we now have the situation where
 	       DEFAULT is loaded from a binary file w/ no rules. */
 	    UErrorCode intStatus = U_ZERO_ERROR;
 	    temp.constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus);

 	    if(U_SUCCESS(intStatus) && (temp.mPattern != 0))
 	      {
 		data->ruleTable = temp.getRules();
 		data->isRuleTableLoaded = TRUE;
 	      }
         }
     }

     return data->ruleTable;
 }


 Collator::EComparisonResult
 RuleBasedCollator::compare( const UnicodeString& source,
                             const UnicodeString& target,
                             int32_t length) const
 {
     UnicodeString source_togo;
     UnicodeString target_togo;
     UTextOffset begin=0;

     source.extract(begin, uprv_min(length,source.length()), source_togo);
     target.extract(begin, uprv_min(length,target.length()), target_togo);
     return (RuleBasedCollator::compare(source_togo, target_togo));
 }

 Collator::EComparisonResult
 RuleBasedCollator::compare(const   UChar* source,
                       int32_t sourceLength,
                       const   UChar*  target,
                       int32_t targetLength) const
 {
     // check if source and target are valid strings
     if (((source == 0) && (target == 0)) ||
         ((sourceLength == 0) && (targetLength == 0)))
     {
         return Collator::EQUAL;
     }

     Collator::EComparisonResult result = Collator::EQUAL;
     UErrorCode status = U_ZERO_ERROR;

     if (cursor1 == NULL)
     {
         ((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLength, getDecomposition());
     }
     else
     {
         cursor1->setModeAndText(getDecomposition(), source, sourceLength, status);
     }

     if ( /*cursor1->cursor == NULL ||*/ U_FAILURE(status))
     {
         return Collator::EQUAL;
     }

     if (cursor2 == NULL)
     {
         ((RuleBasedCollator *)this)->cursor2 = new NormalizerIterator(target, targetLength, getDecomposition());
     }
     else
     {
         cursor2->setModeAndText(getDecomposition(), target, targetLength, status);
     }

     if (/*cursor2 == NULL ||*/ U_FAILURE(status))
     {
         return Collator::EQUAL;
     }

     int32_t sOrder, tOrder;
     //    int32_t sOrder = CollationElementIterator::NULLORDER, tOrder = CollationElementIterator::NULLORDER;
     UBool gets = TRUE, gett = TRUE;
     UBool initialCheckSecTer = getStrength() >= Collator::SECONDARY;
     UBool checkSecTer = initialCheckSecTer;
     UBool checkTertiary = getStrength() >= Collator::TERTIARY;
     UBool isFrenchSec = data->isFrenchSec;
     uint32_t pSOrder, pTOrder;

     while(TRUE)
     {
         // Get the next collation element in each of the strings, unless
         // we've been requested to skip it.
         if (gets)
         {
             sOrder = getStrengthOrder((NormalizerIterator*)cursor1, status);

             if (U_FAILURE(status))
             {
                 return Collator::EQUAL;
             }
         }

         gets = TRUE;

         if (gett)
         {
             tOrder = getStrengthOrder((NormalizerIterator*)cursor2, status);

             if (U_FAILURE(status))
             {
                 return Collator::EQUAL;
             }
         }

         gett = TRUE;

         // If we've hit the end of one of the strings, jump out of the loop
         if ((sOrder == CollationElementIterator::NULLORDER)||
             (tOrder == CollationElementIterator::NULLORDER))
         {
             break;
         }

         // If there's no difference at this position, we can skip to the
         // next one.
         pSOrder = CollationElementIterator::primaryOrder(sOrder);
         pTOrder = CollationElementIterator::primaryOrder(tOrder);
         if (sOrder == tOrder)
         {
             if (isFrenchSec && pSOrder != 0)
             {
                 if (!checkSecTer)
                 {
                     // in french, a secondary difference more to the right is stronger,
                     // so accents have to be checked with each base element
                     checkSecTer = initialCheckSecTer;

                     // but tertiary differences are less important than the first
                     // secondary difference, so checking tertiary remains disabled
                     checkTertiary = FALSE;
                 }
             }

             continue;
         }

         // Compare primary differences first.
         if (pSOrder != pTOrder)
         {
             if (sOrder == 0)
             {
                 // The entire source element is ignorable.
                 // Skip to the next source element, but don't fetch another target element.
                 gett = FALSE;
                 continue;
             }

             if (tOrder == 0)
             {
                 gets = FALSE;
                 continue;
             }

             // The source and target elements aren't ignorable, but it's still possible
             // for the primary component of one of the elements to be ignorable....
             if (pSOrder == 0)  // primary order in source is ignorable
             {
                 // The source's primary is ignorable, but the target's isn't.  We treat ignorables
                 // as a secondary difference, so remember that we found one.
                 if (checkSecTer)
                 {
                     result = Collator::GREATER;  // (strength is SECONDARY)
                     checkSecTer = FALSE;
                 }

                 // Skip to the next source element, but don't fetch another target element.
                 gett = FALSE;
             }
             else if (pTOrder == 0)
             {
                 // record differences - see the comment above.
                 if (checkSecTer)
                 {
                     result = Collator::LESS;  // (strength is SECONDARY)
                     checkSecTer = FALSE;
                 }

                 // Skip to the next target element, but don't fetch another source element.
                 gets = FALSE;
             }
             else
             {
                 // Neither of the orders is ignorable, and we already know that the primary
                 // orders are different because of the (pSOrder != pTOrder) test above.
                 // Record the difference and stop the comparison.
                 if (pSOrder < pTOrder)
                 {
                     return Collator::LESS;  // (strength is PRIMARY)
                 }

                 return Collator::GREATER;  // (strength is PRIMARY)
             }
         }
         else
         { // else of if ( pSOrder != pTOrder )
             // primary order is the same, but complete order is different. So there
             // are no base elements at this point, only ignorables (Since the strings are
             // normalized)

             if (checkSecTer)
             {
                 // a secondary or tertiary difference may still matter
                 uint32_t secSOrder = CollationElementIterator::secondaryOrder(sOrder);
                 uint32_t secTOrder = CollationElementIterator::secondaryOrder(tOrder);

                 if (secSOrder != secTOrder)
                 {
                     // there is a secondary difference
                     result = (secSOrder < secTOrder) ? Collator::LESS : Collator::GREATER;
                                             // (strength is SECONDARY)
                     checkSecTer = FALSE;
                     // (even in french, only the first secondary difference within
                     //  a base character matters)
                 }
                 else
                 {
                     if (checkTertiary)
                     {
                         // a tertiary difference may still matter
                         uint32_t terSOrder = CollationElementIterator::tertiaryOrder(sOrder);
                         uint32_t terTOrder = CollationElementIterator::tertiaryOrder(tOrder);

                         if (terSOrder != terTOrder)
                         {
                             // there is a tertiary difference
                             result = (terSOrder < terTOrder) ? Collator::LESS : Collator::GREATER;
                                             // (strength is TERTIARY)
                             checkTertiary = FALSE;
                         }
                     }
                 }
             } // if (checkSecTer)

         }  // if ( pSOrder != pTOrder )
     } // while()

     if (sOrder != CollationElementIterator::NULLORDER)
     {
         // (tOrder must be CollationElementIterator::NULLORDER,
         //  since this point is only reached when sOrder or tOrder is NULLORDER.)
         // The source string has more elements, but the target string hasn't.
         do
         {
             if (CollationElementIterator::primaryOrder(sOrder) != 0)
             {
                 // We found an additional non-ignorable base character in the source string.
                 // This is a primary difference, so the source is greater
                 return Collator::GREATER; // (strength is PRIMARY)
             }

             if (CollationElementIterator::secondaryOrder(sOrder) != 0)
             {
                 // Additional secondary elements mean the source string is greater
                 if (checkSecTer)
                 {
                     result = Collator::GREATER;  // (strength is SECONDARY)
                     checkSecTer = FALSE;
                 }
             }
         }
         while ((sOrder = getStrengthOrder(cursor1, status)) != CollationElementIterator::NULLORDER);
     }
     else if (tOrder != CollationElementIterator::NULLORDER)
     {
         // The target string has more elements, but the source string hasn't.
         do
         {
             if (CollationElementIterator::primaryOrder(tOrder) != 0)
             {
                 // We found an additional non-ignorable base character in the target string.
                 // This is a primary difference, so the source is less
                 return Collator::LESS; // (strength is PRIMARY)
             }

             if (CollationElementIterator::secondaryOrder(tOrder) != 0)
             {
                 // Additional secondary elements in the target mean the source string is less
                 if (checkSecTer)
                 {
                     result = Collator::LESS;  // (strength is SECONDARY)
                     checkSecTer = FALSE;
                 }
             }
         }
         while ((tOrder = getStrengthOrder(cursor2, status)) != CollationElementIterator::NULLORDER);
     }


     // For IDENTICAL comparisons, we use a bitwise character comparison
     // as a tiebreaker if all else is equal
     // NOTE: The java code compares result with 0, and
     // puts the result of the string comparison directly into result
     if (result == Collator::EQUAL && getStrength() == IDENTICAL)
     {
 #if 0
       // ******** for the  UChar normalization interface.
       // It doesn't work much faster, and the code was broken
       // so it's commented out. --srl
 //          UChar sourceDecomp[1024], targetDecomp[1024];
 //  	int32_t sourceDecompLength = 1024;
 //  	int32_t targetDecompLength = 1024;

 //          int8_t comparison;
 //  	Normalizer::EMode decompMode = getDecomposition();

 //  	if (decompMode != Normalizer::NO_OP)
 //  	  {
 //  	    Normalizer::normalize(source, sourceLength, decompMode,
 //  				  0, sourceDecomp, sourceDecompLength, status);

 //  	    Normalizer::normalize(target, targetLength, decompMode,
 //  				  0, targetDecomp, targetDecompLength, status);

 //  	    comparison = u_strcmp(sourceDecomp,targetDecomp);
 //  	  }
 //  	else
 //  	  {
 //  	    comparison = u_strcmp(source, target); /* ! */
 //  	  }

 #else

 	UnicodeString sourceDecomp, targetDecomp;

         int8_t comparison;

         Normalizer::normalize(source, getDecomposition(),
                       0, sourceDecomp,  status);

         Normalizer::normalize(target, getDecomposition(),
                       0, targetDecomp,  status);

         comparison = sourceDecomp.compare(targetDecomp);
 #endif

         if (comparison < 0)
         {
             result = Collator::LESS;
         }
         else if (comparison == 0)
         {
             result = Collator::EQUAL;
         }
         else
         {
             result = Collator::GREATER;
         }
     }

     return result;
 }


 int32_t
 RuleBasedCollator::nextContractChar(NormalizerIterator *cursor,
                                     UChar ch,
                                     UErrorCode& status) const
 {
     // First get the ordering of this single character
     VectorOfPToContractElement *list = getContractValues(ch);
     EntryPair *pair = (EntryPair *)list->at(0);
     int32_t order = pair->value;

     // Now iterate through the chars following it and
     // look for the longest match
     ((UnicodeString&)key).remove();
     ((UnicodeString&)key) += ch;

     while ((ch = cursor->current()) != Normalizer::DONE)
     {
         ((UnicodeString&)key) += ch;

         int32_t n = getEntry(list, key, TRUE);

         if (n == UNMAPPED)
         {
             break;
         }
         cursor->next();

         pair = (EntryPair *)list->at(n);
         order = pair->value;
     }

     return order;
 }

 // Compare two strings using this collator
 Collator::EComparisonResult
 RuleBasedCollator::compare(const UnicodeString& source,
                         const UnicodeString& target) const
 {
     return compare(source.getUChars(), source.length(), target.getUChars(), target.length());
 }

 // Retrieve a collation key for the specified string
 // The key can be compared with other collation keys using a bitwise comparison
 // (e.g. memcmp) to find the ordering of their respective source strings.
 // This is handy when doing a sort, where each sort key must be compared
 // many times.
 //
 // The basic algorithm here is to find all of the collation elements for each
 // character in the source string, convert them to an ASCII representation,
 // and put them into the collation key.  But it's trickier than that.
 // Each collation element in a string has three components: primary ('A' vs 'B'),
 // secondary ('u' vs 'ü'), and tertiary ('A' vs 'a'), and a primary difference
 // at the end of a string takes precedence over a secondary or tertiary
 // difference earlier in the string.
 //
 // To account for this, we put all of the primary orders at the beginning of the
 // string, followed by the secondary and tertiary orders. Each set of orders is
 // terminated by nulls so that a key for a string which is a initial substring of
 // another key will compare less without any special case.
 //
 // Here's a hypothetical example, with the collation element represented as
 // a three-digit number, one digit for primary, one for secondary, etc.
 //
 // String:              A     a     B    É
 // Collation Elements: 101   100   201  511
 // Collation Key:      1125<null>0001<null>1011<null>
 //
 // To make things even trickier, secondary differences (accent marks) are compared
 // starting at the *end* of the string in languages with French secondary ordering.
 // But when comparing the accent marks on a single base character, they are compared
 // from the beginning.  To handle this, we reverse all of the accents that belong
 // to each base character, then we reverse the entire string of secondary orderings
 // at the end.
 //
 CollationKey&
 RuleBasedCollator::getCollationKey( const   UnicodeString&  source,
                                     CollationKey&   sortkey,
                                     UErrorCode&      status) const
 {
     return RuleBasedCollator::getCollationKey(source.getUChars(), source.length(), sortkey, status);
 }

 CollationKey&
 RuleBasedCollator::getCollationKey( const   UChar*  source,
                                     int32_t sourceLen,
                                     CollationKey&   sortkey,
                                     UErrorCode&      status) const
 {
     if (U_FAILURE(status))
     {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return sortkey.setToBogus();
     }

     if ((!source) || (sourceLen == 0))
     {
         return sortkey.reset();
     }

     if (cursor1 == NULL)
     {
       ((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLen, getDecomposition());
     }
     else
     {
       cursor1->setModeAndText(getDecomposition(), source,sourceLen, status);
     }

     if (U_FAILURE(status))
     {
         return sortkey.setToBogus();
     }

     UBool  compareSec   = (getStrength() >= Collator::SECONDARY);
     UBool  compareTer   = (getStrength() >= Collator::TERTIARY);
     UBool  compareIdent = (getStrength() == Collator::IDENTICAL);
     int32_t order        = 0;
     int32_t totalPrimary = 0;
     int32_t totalSec     = 0;
     int32_t totalTer     = 0;
     int32_t totalIdent     = 0;
     UnicodeString decomp;

     // iterate over the source, counting primary, secondary, and tertiary entries
     while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) !=
 	                                      CollationElementIterator::NULLORDER)
     {
         int32_t secOrder = CollationElementIterator::secondaryOrder(order);
         int32_t terOrder = CollationElementIterator::tertiaryOrder(order);

         if (U_FAILURE(status))
         {
             return sortkey.setToBogus();
         }

         if (! CollationElementIterator::isIgnorable(order))
         {
             totalPrimary += 1;

             if (compareSec)
             {
                 totalSec += 1;
             }

             if (compareTer)
             {
                 totalTer += 1;
             }
         }
         else
         {
             if (compareSec && secOrder != 0)
             {
                 totalSec += 1;
             }

             if (compareTer && terOrder != 0)
             {
                 totalTer += 1;
             }
         }
     }

     // count the null bytes after the entires
     totalPrimary += 1;

     if (compareSec)
     {
         totalSec += 1;
     }

     if (compareTer)
     {
         totalTer += 1;
     }

     if (compareIdent)
     {
       Normalizer::normalize(source, getDecomposition(), // SRL: ??
                 0, decomp, status);

         if (U_SUCCESS(status))
         {
             totalIdent = decomp.length() + 1;
         }
     }

     // Compute total number of bytes to hold the entries
     // and make sure the key can hold them
     uint32_t size   = 2 * (totalPrimary + totalSec + totalTer + totalIdent);

     sortkey.ensureCapacity(size);

     if (sortkey.isBogus())
     {
         status = U_MEMORY_ALLOCATION_ERROR;
         return sortkey;
     }

     int32_t primaryCursor = 0;
     int32_t secCursor     = 2 * totalPrimary;
     int32_t secBase       = secCursor;
     int32_t preSecIgnore  = secBase;
     int32_t terCursor     = secCursor + (2 * totalSec);
     int32_t identCursor      = terCursor + (2 * totalTer);

     // reset source to the beginning
     cursor1->reset();

     // now iterate over the source computing the actual entries
     while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) != CollationElementIterator::NULLORDER)
     {
         if (U_FAILURE(status))
         {
             return sortkey.reset();
         }

         int32_t primaryOrder = CollationElementIterator::primaryOrder(order);
         int32_t secOrder     = CollationElementIterator::secondaryOrder(order);
         int32_t terOrder     = CollationElementIterator::tertiaryOrder(order);

         if (! CollationElementIterator::isIgnorable(order))
         {
             primaryCursor = sortkey.storeBytes(primaryCursor, primaryOrder + SORTKEYOFFSET);

             if (compareSec)
             {
                 if (data->isFrenchSec && (preSecIgnore < secCursor))
                 {
                     sortkey.reverseBytes(preSecIgnore, secCursor);
                 }

                 secCursor = sortkey.storeBytes(secCursor, secOrder + SORTKEYOFFSET);

                 preSecIgnore = secCursor;
             }

             if (compareTer)
             {
                 terCursor = sortkey.storeBytes(terCursor, terOrder + SORTKEYOFFSET);
             }
         }
         else
         {
             if (compareSec && secOrder != 0)
             {
                 secCursor = sortkey.storeBytes(secCursor, secOrder + data->maxSecOrder + SORTKEYOFFSET);
             }

             if (compareTer && terOrder != 0)
             {
                 terCursor = sortkey.storeBytes(terCursor, terOrder + data->maxTerOrder + SORTKEYOFFSET);
             }
         }
     }

     // append 0 at the end of each portion.
     sortkey.storeBytes(primaryCursor, 0);

     if (compareSec)
     {
         if (data->isFrenchSec)
         {
             if (preSecIgnore < secCursor)
             {
                 sortkey.reverseBytes(preSecIgnore, secCursor);
             }

             sortkey.reverseBytes(secBase, secCursor);
         }

         sortkey.storeBytes(secCursor, 0);
     }

     if (compareTer)
     {
         sortkey.storeBytes(terCursor, 0);
     }

     if (compareIdent)
     {
         sortkey.storeUnicodeString(identCursor, decomp);
     }

     //    Debugging - print out the sortkey [--srl]
 //      {
 //        const uint8_t *bytes;
 //        int32_t xcount;
 //        bytes = sortkey.getByteArray(xcount);
 //        //      fprintf(stderr, "\n\n-  [%02X] [%02X]\n\n", (int)(bytes[0]&0xFF), (int)(bytes[1]&0xFF) );
 //      }

     return sortkey;
 }


 // Build this collator's rule tables based on a string representation of the rules
 // See the big diagram at the top of this file for an overview of how the tables
 // are organized.
 void
 RuleBasedCollator::build(const UnicodeString&   pattern,
                             UErrorCode&      status)
 {
     if (U_FAILURE(status))
     {
         return;
     }

     // This array maps Unicode characters to their collation ordering
     data->mapping = ucmp32_open(UNMAPPED);

     if (data->mapping->fBogus)
     {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }

     Collator::ECollationStrength aStrength = Collator::IDENTICAL;
     UBool isSource = TRUE;
     int32_t i = 0;
     UnicodeString lastGroupChars;
     UnicodeString expChars;
     UnicodeString groupChars;

     if (pattern.length() == 0)
     {
         status = U_INVALID_FORMAT_ERROR;
         return;
     }

     // Build the merged collation entries
     // Since rules can be specified in any order in the string
     // (e.g. "c , C < d , D < e , E .... C < CH")
     // this splits all of the rules in the string out into separate
     // objects and then sorts them.  In the above example, it merges the
     // "C < CH" rule in just before the "C < D" rule.

     mPattern = new MergeCollation(pattern, getDecomposition(), status);
     if (U_FAILURE(status))
     {
         ucmp32_close(data->mapping);
         data->mapping = 0;
         delete mPattern;
         mPattern = 0;
         return;
     }

     int32_t order = 0;

     // Walk through each entry
     for (i = 0; i < mPattern->getCount(); ++i)
     {
         const PatternEntry* entry = mPattern->getItemAt(i);
         groupChars.remove();
         expChars.remove();

         // if entry is valid
         if (entry != NULL)
         {
             entry->getChars(groupChars);

             // check if french secondary needs to be turned on
             if ((groupChars.length() > 1) &&
                 (groupChars[groupChars.length()-1] == 0x0040))
             {
                 data->isFrenchSec = TRUE;
                 groupChars.remove(groupChars.length()-1);
             }

             order = increment((Collator::ECollationStrength)entry->getStrength(), order);

             if (entry->getExtension(expChars).length() != 0)
             {
                 // encountered an expanding character, where one character on input
                 // expands to several sort elements (e.g. 'ö' --> 'o' 'e')
                 addExpandOrder(groupChars, expChars, order, status);
                 if (U_FAILURE(status))
                 {
                     return;
                 }
             }
             else if (groupChars.length() > 1)
             {
                 // encountered a contracting character, where several characters on input
                 // contract into one sort order.  For example, "ch" is treated as a single
                 // character in traditional Spanish sorting.
                 addContractOrder(groupChars, order, status);
                 if (U_FAILURE(status))
                 {
                     return;
                 }
             }
             else
             {
                 // Nothing out of the ordinary -- one character maps to one sort order
                 addOrder(groupChars[0], order, status);
                 if (U_FAILURE(status))
                 {
                     return;
                 }
             }
         }
     }

     // add expanding entries for pre-composed characters
     addComposedChars();

     // Fill in all the expanding chars values
     commit();

     // Compact the data mapping table
     ucmp32_compact(data->mapping, 1);
 }

 /**
  * Add expanding entries for pre-composed unicode characters so that this
  * collator can be used reasonably well with decomposition turned off.
  */
  void RuleBasedCollator::addComposedChars()
  {
     UnicodeString buf;
     UErrorCode status = U_ZERO_ERROR;

     // Iterate through all of the pre-composed characters in Unicode
     ComposedCharIter iter;
     UnicodeString decomp;

     while (iter.hasNext())
     {
         UChar c = iter.next();

         if (getCharOrder(c) == UNMAPPED)
         {
             //
             // We don't already have an ordering for this pre-composed character.
             //
             // First, see if the decomposed string is already in our
             // tables as a single contracting-string ordering.
             // If so, just map the precomposed character to that order.
             //
             // TODO: What we should really be doing here is trying to find the
             // longest initial substring of the decomposition that is present
             // in the tables as a contracting character sequence, and find its
             // ordering.  Then do this recursively with the remaining chars
             // so that we build a list of orderings, and add that list to
             // the expansion table.
             // That would be more correct but also significantly slower, so
             // I'm not totally sure it's worth doing.
             //
             iter.getDecomposition(decomp);
             int contractOrder = getContractOrder(decomp);

             if (contractOrder != UNMAPPED)
             {
                 addOrder(c, contractOrder, status);
             }
             else
             {
                 //
                 // We don't have a contracting ordering for the entire string
                 // that results from the decomposition, but if we have orders
                 // for each individual character, we can add an expanding
                 // table entry for the pre-composed character
                 //
                 UBool allThere = TRUE;
                 int32_t i;

                 for (i = 0; i < decomp.length(); i += 1)
                 {
                     if (getCharOrder(decomp[i]) == UNMAPPED)
                     {
                         allThere = FALSE;
                         break;
                     }
                 }

                 if (allThere)
                 {
                     buf.remove();
                     buf += c;
                     addExpandOrder(buf, decomp, UNMAPPED, status);
                 }
             }
         }
     }
 }

 // When the expanding character tables are built by addExpandOrder,
 // it doesn't know what the final ordering of each character
 // in the expansion will be.  Instead, it just puts the raw character
 // code into the table, adding CHARINDEX as a flag.  Now that we've
 // finished building the mapping table, we can go back and look up
 // that character to see what its real collation order is and
 // stick that into the expansion table.  That lets us avoid doing
 // a two-stage lookup later.

 void
 RuleBasedCollator::commit()
 {
     // if there are any expanding characters
     if (data->expandTable != NULL)
     {
         int32_t i;
         for (i = 0; i < data->expandTable->size(); i += 1)
         {
             VectorOfInt* valueList = data->expandTable->at(i);
             int32_t j;
             for (j = 0; j < valueList->size(); j++)
             {
                 // found a expanding character
                 // the expanding char value is not filled in yet
                 if ((valueList->at(j) < EXPANDCHARINDEX) &&
                     (valueList->at(j) > CHARINDEX))
                 {
                     // Get the real values for the non-filled entry
                     UChar ch = (UChar)(valueList->at(j) - CHARINDEX);
                     int32_t realValue = ucmp32_get(data->mapping, ch);

                     if (realValue == UNMAPPED)
                     {
                         // The real value is still unmapped, maybe it'signorable
                         valueList->atPut(j, IGNORABLEMASK & ch);
                     }
                     // fill in the value
                     else
                     {
                         valueList->atPut(j, realValue);
                     }
                 }
             }
         }
     }
  }

 /**
  *  Increment of the last order based on the comparison level.
  */
 int32_t
 RuleBasedCollator::increment(Collator::ECollationStrength aStrength, int32_t lastValue)
 {
     switch(aStrength)
     {
     case Collator::PRIMARY:
         // increment priamry order  and mask off secondary and tertiary difference
         lastValue += PRIMARYORDERINCREMENT;
         lastValue &= PRIMARYORDERMASK;
         isOverIgnore = TRUE;
         break;

     case Collator::SECONDARY:
         // increment secondary order and mask off tertiary difference
         lastValue += SECONDARYORDERINCREMENT;
         lastValue &= SECONDARYDIFFERENCEONLY;

         // record max # of ignorable chars with secondary difference
         if (isOverIgnore == FALSE)
         {
             data->maxSecOrder += 1;
         }
         break;

     case Collator::TERTIARY:
         // increment tertiary order
         lastValue += TERTIARYORDERINCREMENT;

         // record max # of ignorable chars with tertiary difference
         if (isOverIgnore == FALSE)
         {
             data->maxTerOrder += 1;
         }
         break;

   // case IDENTICAL?
     }

     return lastValue;
 }

 // Adds a character and its designated order into the collation table.
 // This is the simple case, with no expansion or contraction
 void
 RuleBasedCollator::addOrder(UChar ch,
                          int32_t anOrder,
                          UErrorCode& status)
 {
     if (U_FAILURE(status))
     {
         return;
     }

     // try to find the order of the char in the mapping table
     int32_t order = ucmp32_get(data->mapping, ch);

     if (order >= CONTRACTCHARINDEX)
     {
         // There's already an entry for this character that points to a contracting
         // character table.  Instead of adding the character directly to the mapping
         // table, we must add it to the contract table instead.
         key.remove();
         key += ch;
         if (key.isBogus())
         {
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }

         addContractOrder(key, anOrder, status);
     }
     else
     {
         // add the entry to the mapping table, the same later entry replaces the previous one
         ucmp32_set(data->mapping, ch, anOrder);
     }
 }

 // Add an expanding-character entry to the table.
 void
 RuleBasedCollator::addExpandOrder(  const   UnicodeString& contractChars,
                                 const   UnicodeString& expandChars,
                                 int32_t anOrder,
                                 UErrorCode& status)
 {
     if (U_FAILURE(status))
     {
         return;
     }

     // Create an expansion table entry
     int32_t tableIndex = addExpansion(anOrder, expandChars);

     // And add its index into the main mapping table
     if (contractChars.length() > 1)
     {
         addContractOrder(contractChars, tableIndex, status);
     }
     else
     {
         addOrder(contractChars[0], tableIndex, status);
     }
 }

 int32_t RuleBasedCollator::addExpansion(int32_t anOrder, const UnicodeString &expandChars)
 {
     if (data->expandTable == NULL)
     {
         data->expandTable = new VectorOfPToExpandTable();

         if (data->expandTable == NULL)
         {
             return 0;
         }
     }

     // If anOrder is valid, we want to add it at the beginning of the list
     int32_t offset = (anOrder == UNMAPPED) ? 0 : 1;

     VectorOfInt *valueList = new VectorOfInt(expandChars.length() + offset);

     if (offset == 1)
     {
         valueList->atPut(0, anOrder);
     }

     int32_t i;
     for (i = 0; i < expandChars.length(); i += 1)
     {
         UChar ch = expandChars[i];
         int32_t mapValue = getCharOrder(ch);

         if (mapValue != UNMAPPED)
         {
             valueList->atPut(i + offset, mapValue);
         }
         else
         {
             // can't find it in the table, will be filled in by commit().
             valueList->atPut(i + offset, CHARINDEX + (int32_t)ch);
         }
     }

     // Add the expanding char list into the expansion table.
     int32_t tableIndex = EXPANDCHARINDEX + data->expandTable->size();
     data->expandTable->atPut(data->expandTable->size(), valueList);

     return tableIndex;
 }

 // Add a string of characters that contracts into a single ordering.
 void
 RuleBasedCollator::addContractOrder(const   UnicodeString& groupChars,
                                     int32_t anOrder,
                                     UBool fwd,
                                     UErrorCode& status)
 {
     if (U_FAILURE(status))
     {
         return;
     }

     if (data->contractTable == NULL)
     {
         data->contractTable = new VectorOfPToContractTable();
         if (data->contractTable->isBogus())
         {
             delete data->contractTable;
             data->contractTable = NULL;
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }
     }

     // See if the initial character of the string already has a contract table.
     // e.g. for "ch", look for 'c'.
     int32_t entry = ucmp32_get(data->mapping, groupChars[0]);
     VectorOfPToContractElement *entryTable = getContractValues(entry - CONTRACTCHARINDEX);

     if (entryTable == NULL)
     {
         // We need to create a new table of contract entries for this base char
         int32_t tableIndex = CONTRACTCHARINDEX + data->contractTable->size();
         EntryPair *pair = NULL;
         UnicodeString substring;

         entryTable = new VectorOfPToContractElement();
         if (entryTable->isBogus())
         {
             delete entryTable;
             delete data->contractTable;
             data->contractTable = NULL;
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }

         data->contractTable->atPut(data->contractTable->size(), entryTable);
         if (data->contractTable->isBogus())
         {
             delete entryTable;
             delete data->contractTable;
             data->contractTable = NULL;
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }


         // Add the initial character's current ordering first. then
         // update its mapping to point to this contract table
         groupChars.extract(0, 1, substring);
         if (substring.isBogus())
         {
             delete entryTable;
             delete data->contractTable;
             data->contractTable = NULL;
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }

         pair = new EntryPair(substring, entry);

         entryTable->atPut(0, pair);
         if (entryTable->isBogus())
         {
             delete entryTable;
             delete data->contractTable;
             data->contractTable = NULL;
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }

         ucmp32_set(data->mapping, groupChars[0], tableIndex);
     }

     // Now add (or replace) this string in the table
     int32_t index = getEntry(entryTable, groupChars, fwd);

     if (index != UNMAPPED)
     {
         EntryPair *pair = (EntryPair *) entryTable->at(index);
         pair->value = anOrder;
     }
     else
     {
         EntryPair *pair = new EntryPair(groupChars, anOrder, fwd);

         entryTable->atPut(entryTable->size(), pair);
     }

     // If this was a forward mapping for a contracting string, also add a
     // reverse mapping for it, so that CollationElementIterator::previous
     // can work right
     if (fwd)
     {
         UnicodeString reverse(groupChars);

         if (reverse.isBogus())
         {
             delete entryTable;
             delete data->contractTable;
             data->contractTable = NULL;
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }

         addContractOrder(reverse.reverse(), anOrder, FALSE, status);
     }
 }

 /**
  * If the given string has been specified as a contracting string
  * in this collation table, return its ordering.
  * Otherwise return UNMAPPED.
  */
  int32_t RuleBasedCollator::getContractOrder(const UnicodeString &groupChars) const
 {
     int32_t result = UNMAPPED;

     if (data->contractTable != NULL)
     {
         VectorOfPToContractElement *entryTable = getContractValues(groupChars[0]);

         if (entryTable != NULL)
         {
             int32_t index = getEntry(entryTable, groupChars, TRUE);

             if (index != UNMAPPED)
             {
                 EntryPair *pair = entryTable->at(index);

                 result = pair->value;
             }
         }
     }

     return result;
 }

 int32_t RuleBasedCollator::getCharOrder(UChar ch) const
 {
     int32_t order = ucmp32_get(data->mapping, ch);

     if (order >= CONTRACTCHARINDEX)
     {
         VectorOfPToContractElement *groupList = getContractValues(order - CONTRACTCHARINDEX);
         EntryPair *pair = groupList->at(0);

         order = pair->value;
     }

     return order;
 }

 // Create a hash code for this collation.  Just hash the main rule table --
 // that should be good enough for almost any use.
 int32_t
 RuleBasedCollator::hashCode() const
 {
     int32_t         value = 0;
     int32_t         c;
     int32_t         count = getRules().length();
     UTextOffset      pos = count - 1;

     if (count > 64)
     {
         count = 64; // only hash upto limit
     }

     int16_t i = 0;

     while (i < count)
     {
         c = data->ruleTable[pos];
         value = ((value << (c & 0x0f)) ^ (c << 8)) + (c ^ value);
         i += 1;
         pos -= 1;
     }

     if (value == 0)
     {
         value = 1;
     }

     return value;
 }

 // find the contracting char entry in the list
 int32_t
 RuleBasedCollator::getEntry(VectorOfPToContractElement* list,
                          const UnicodeString& name,
                          UBool fwd)
 {
     int32_t i;

     if (list != NULL)
     {
         for (i = 0; i < list->size(); i += 1)
         {
             EntryPair *pair = list->at(i);

             if ((pair != NULL) && (pair->fwd == fwd) && (pair->entryName == name))
             {
                 return i;
             }
         }
     }

     return RuleBasedCollator::UNMAPPED;
 }

 // look for the contracting list entry with the beginning char
 VectorOfPToContractElement*
 RuleBasedCollator::getContractValues(UChar ch) const
 {
     int32_t index = ucmp32_get(data->mapping, ch);
     return getContractValues(index - CONTRACTCHARINDEX);
 }

 // look for the contracting list entry with the index
 VectorOfPToContractElement*
 RuleBasedCollator::getContractValues(int32_t    index) const
 {
     if (data->contractTable != NULL)
     {
         if (index >= 0)
         {
             return data->contractTable->at(index);
         }
     }
     return NULL;
 }

 /**
   * Return the maximum length of any expansion sequences that end
   * with the specified comparison order.
   *
   * @param order a collation order returned by previous or next.
   * @return the maximum length of any expansion seuences ending
   *         with the specified order.
   *
   * @see CollationElementIterator#getMaxExpansion
   */
 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
 {
     int32_t result = 1;

     if (data->expandTable != NULL)
     {
         // Right now this does a linear search through the entire
         // expandsion table.  If a collator had a large number of expansions,
         // this could cause a performance problem, but in practice that
         // rarely happens
         int32_t i;
         for (i = 0; i < data->expandTable->size(); i += 1)
         {
             VectorOfInt *valueList = data->expandTable->at(i);
             int32_t length = valueList->size();

             if (length > result && valueList->at(length-1) == order)
             {
                 result = length;
             }
         }
     }

     return result;
 }

 /**
  *  Get the entry of hash table of the expanding string in the collation
  *  table.
  *  @param offset the index of the expanding string value list
  */
 VectorOfInt *RuleBasedCollator::getExpandValueList(int32_t order) const
 {
     return data->expandTable->at(order - EXPANDCHARINDEX);
 }


 void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, UMemoryStream* is)
 {
     if (!uprv_mstrm_error(is))
     {
         // Check that this is the correct file type
         int16_t id;

         uprv_mstrm_read(is, &id, sizeof(id));
         if (id != collator->FILEID)
         {
             // This isn't the right type of file.  Mark the ios
             // as failing and return.
             uprv_mstrm_setError(is); // force the stream to set its error flag
             return;
         }

         // Stream in large objects
         char isNull;

         uprv_mstrm_read(is, &isNull, sizeof(isNull));
         if (isNull)
         {
             delete collator->data;
             collator->data = NULL;
         }
         else
         {
             if (collator->data == NULL)
             {
                 collator->data = new TableCollationData;
             }

             collator->data->streamIn(is);
             if (collator->data->isBogus()) {
                 uprv_mstrm_setError(is); // force the stream to set its error flag
                 return;
             }
         }

         // Verify that the end marker is present
         uprv_mstrm_read(is, &id, sizeof(id));
         if (id != collator->FILEID)
         {
             // This isn't the right type of file.  Mark the ios
             // as failing and return.
             uprv_mstrm_setError(is); // force the stream to set its error flag
             return;
         }

         // Reset other data members
         collator->isOverIgnore = FALSE;
         collator->lastChar = 0;
         delete collator->mPattern;
         collator->mPattern = 0;
         collator->key.remove();
         collator->dataIsOwned = TRUE;
     }
 }

 void RuleBasedCollatorStreamer::streamOut(const RuleBasedCollator* collator, UMemoryStream* os)
 {
     if (!uprv_mstrm_error(os))
     {
         // We use a 16-bit ID code to identify this file.
         int16_t id = collator->FILEID;
         uprv_mstrm_write(os, (uint8_t *)&id, sizeof(id));

         // Stream out the data
         char isNull;
         isNull = (collator->data == 0);
         uprv_mstrm_write(os, (uint8_t*)&isNull, sizeof(isNull));

         if (!isNull)
         {
             collator->data->streamOut(os);
         }

         // Write out the ID to indicate the end
         uprv_mstrm_write(os, (uint8_t *)&id, sizeof(id));
     }
 }

 void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, FileStream* is)
 {
     if (!T_FileStream_error(is))
     {
         // Check that this is the correct file type
         int16_t id;

         T_FileStream_read(is, &id, sizeof(id));
         if (id != collator->FILEID)
         {
             // This isn't the right type of file.  Mark the ios
             // as failing and return.
             T_FileStream_setError(is); // force the stream to set its error flag
             return;
         }

         // Stream in large objects
         char isNull;

         T_FileStream_read(is, &isNull, sizeof(isNull));
         if (isNull)
         {
             delete collator->data;
             collator->data = NULL;
         }
         else
         {
             if (collator->data == NULL)
             {
                 collator->data = new TableCollationData;
             }

             collator->data->streamIn(is);
             if (collator->data->isBogus()) {
                 T_FileStream_setError(is); // force the stream to set its error flag
                 return;
             }
         }

         // Verify that the end marker is present
         T_FileStream_read(is, &id, sizeof(id));
         if (id != collator->FILEID)
         {
             // This isn't the right type of file.  Mark the ios
             // as failing and return.
             T_FileStream_setError(is); // force the stream to set its error flag
             return;
         }

         // Reset other data members
         collator->isOverIgnore = FALSE;
         collator->lastChar = 0;
         delete collator->mPattern;
         collator->mPattern = 0;
         collator->key.remove();
         collator->dataIsOwned = TRUE;
     }
 }

 void RuleBasedCollatorStreamer::streamOut(const RuleBasedCollator* collator, FileStream* os)
 {
     if (!T_FileStream_error(os))
     {
         // We use a 16-bit ID code to identify this file.
         int16_t id = collator->FILEID;
         T_FileStream_write(os, &id, sizeof(id));

         // Stream out the data
         char isNull;
         isNull = (collator->data == 0);
         T_FileStream_write(os, &isNull, sizeof(isNull));

         if (!isNull)
         {
             collator->data->streamOut(os);
         }

         // Write out the ID to indicate the end
         T_FileStream_write(os, &id, sizeof(id));
     }
 }

 UBool RuleBasedCollator::writeToFile(const char* fileName) const
 {
     FileStream* ofs = T_FileStream_open(fileName, "wb");
     if (ofs != 0)
     {
         RuleBasedCollatorStreamer::streamOut(this, ofs);
     }

 #ifdef COLLDEBUG
     fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs),
         (!T_FileStream_error(ofs) ? ", OK" : ", FAIL"));
 #endif

     UBool err = T_FileStream_error(ofs) == 0;

     T_FileStream_close(ofs);
     return err;
 }
 /*
 UBool RuleBasedCollator::prepareForBundle() const
 {
     UMemoryStream* ofs = uprv_mstrm_openNew(0);
     if (ofs != 0)
     {
         RuleBasedCollatorStreamer::streamOut(this, ofs);
     }

 #ifdef COLLDEBUG
     fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs),
         (!T_FileStream_error(ofs) ? ", OK" : ", FAIL"));
 #endif

     UBool err = uprv_mstrm_error(ofs) == 0;

     uprv_mstrm_close(ofs);

     return err;
 }
 */

 void RuleBasedCollator::addToCache(const UnicodeString& key)
 {
     // This method doesn't add the RuleBasedCollator itself to the cache.  Instead,
     // it adds the given RuleBasedCollator's data object to the TableCollationData
     // cache, and marks it as non-owned in the given RuleBasedCollator object.
     TableCollationData::addToCache(key, data);
     dataIsOwned = FALSE;
 }

 void
 RuleBasedCollator::constructFromCache(const UnicodeString& key,
                                    UErrorCode& status)
 {
     // Attempt to construct this RuleBasedCollator object from cached TableCollationData.
     // If no such data is in the cache, return false.
     if (U_FAILURE(status)) return;
     if (dataIsOwned)
     {
         delete data;
         data = NULL;
     }

     isOverIgnore = FALSE;
     lastChar = 0;
     mPattern = 0;
     setStrength(Collator::TERTIARY);

     dataIsOwned = FALSE;
     data = TableCollationData::findInCache(key);
     if (data == NULL)
     {
         status = U_MISSING_RESOURCE_ERROR;
     }
 }

 char*
 RuleBasedCollator::createPathName(  const UnicodeString&    prefix,
                                 const UnicodeString&    name,
                                 const UnicodeString&    suffix)
 {
     // Concatenate three elements to form a file name, and return it.

     UnicodeString   workingName(prefix);
     int32_t         size;
     char*           returnVal;

     workingName += name;
     workingName += suffix;

     size = workingName.length();
     returnVal = new char[size + 1];
     workingName.extract(0, size, returnVal, "");
     returnVal[size] = 0;

     return returnVal;
 }

 void
 RuleBasedCollator::chopLocale(UnicodeString& localeName)
 {
     // chopLocale removes the final element from a locale string.
     // For instance, "de_CH" becomes "de", and "de" becomes "".
     // "" remains "".

     int32_t     size = localeName.length();
     int32_t     i;

     for (i = size - 1; i > 0; i--)
     {
         if (localeName[i] == 0x005F)
         {
             break;
         }
     }

     if (i < 0)
     {
        i = 0;
     }

     localeName.remove(i, size - i);
 }


 uint8_t *
 RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &status)
 {
   UMemoryStream *memdata = 0;
   uint8_t *data = 0;

   if(U_FAILURE(status)) {
     return NULL;
   }

       memdata = uprv_mstrm_openNew(0);

       if (memdata != 0) {
         RuleBasedCollatorStreamer::streamOut(this, memdata);
       }

       UBool err = uprv_mstrm_error(memdata) == 0;


     data = (uint8_t *)uprv_malloc(memdata->fPos);
     if(data == 0) {
       status = U_MEMORY_ALLOCATION_ERROR;
       uprv_mstrm_close(memdata);
       length = 0;
       return 0;
     } else {
       uprv_memcpy(data, memdata->fStart, memdata->fPos);
       length = memdata->fPos;
       uprv_mstrm_close(memdata);
       return data;
     }
 }


 //eof