source/i18n/tblcoll.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 * Copyright (C) {1996-2001}, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 */

 /**
 * File tblcoll.cpp
 *
 * Created by: Helena Shih
 *
 * Modification History:
 *
 *  Date        Name        Description
 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
 *                          constructor which reads RuleBasedCollator object from
 *                          a binary file.  Added writeToFile method which streams
 *                          RuleBasedCollator out to a binary file.  The streamIn
 *                          and streamOut methods use istream and ostream objects
 *                          in binary mode.
 *  2/11/97     aliu        Moved declarations out of for loop initializer.
 *                          Added Mac compatibility #ifdef for ios::nocreate.
 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
 *                          hold invariant data.
 *  2/13/97     aliu        Moved several methods into this class from Collation.
 *                          Added a private RuleBasedCollator(Locale&) constructor,
 *                          to be used by Collator::getInstance().  General
 *                          clean up.  Made use of UErrorCode variables consistent.
 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
 *                          constructor and getDynamicClassID.
 *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
 *                          use the maximum allowable value which is kBlockCount.
 *                          Modified getRules() to load rules dynamically.  Changed
 *                          constructFromFile() call to accomodate this (added
 *                          parameter to specify whether binary loading is to
 *                          take place).
 * 05/06/97     helena      Added memory allocation error check.
 *  6/20/97     helena      Java class name change.
 *  6/23/97     helena      Adding comments to make code more readable.
 * 09/03/97     helena      Added createCollationKeyValues().
 * 06/26/98     erm         Changes for CollationKeys using byte arrays.
 * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
 * 04/23/99     stephen     Removed EDecompositionMode, merged with
 *                          Normalizer::EMode
 * 06/14/99     stephen     Removed kResourceBundleSuffix
 * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
 *                          files are no longer used.
 * 11/02/99     helena      Collator performance enhancements.  Special case
 *                          for NO_OP situations.
 * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
 *                          to implementation file.
 * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
 */

 #include "ucol_imp.h"
 #include "unicode/tblcoll.h"
 #include "unicode/coleitr.h"
 #include "uhash.h"
 #include "unicode/resbund.h"
 #include "cmemory.h"

 #ifdef _DEBUG
   #include "unistrm.h"
 #endif

 /* global variable ---------------------------------------------------------- */

 /*
 synwee : using another name for this
 const uint32_t tblcoll_STACK_BUFFER_LENGTH_ = 1024;
 */
 #define STACK_BUFFER_LENGTH_ 1024

 /* forward declarations ----------------------------------------------------- */

 U_CAPI UChar forwardCharIteratorGlue(void *iterator);

 /* RuleBasedCollator declaration ----------------------------------------- */

 /* ---------------------------------------------------------------------------
 The following diagram shows the data structure of the RuleBasedCollator  object.
 Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
 "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O;
 'o-umlaut'/E, 'O-umlaut'/E ...".
 What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary
 difference and sorts 'o-umlaut' as if it's always expanded with 'e'.

 mapping table                 contracting list             expanding list
 (contains all unicode
 char entries)               ___    ____________      _________________________
  ________                |=>|_*_|->|'c' |v('c') | |=>|v('o')|v('umlaut')|v('e')|
 |_\u0001_|-> v('\u0001') |  |_:_|  |------------| |  |-------------------------|
 |_\u0002_|-> v('\u0002') |  |_:_|  |'ch'|v('ch')| |  |             :           |
 |____:___|               |  |_:_|  |------------| |  |-------------------------|
 |____:___|               |         |'cH'|v('cH')| |  |             :           |
 |__'a'___|-> v('a')      |         |------------| |  |-------------------------|
 |__'b'___|-> v('b')      |         |'Ch'|v('Ch')| |  |             :           |
 |____:___|               |         |------------| |  |-------------------------|
 |____:___|               |         |'CH'|v('CH')| |  |             :           |
 |___'c'__|----------------          ------------  |  |-------------------------|
 |____:___|                                        |  |             :           |
 |o-umlaut|----------------------------------------   |_________________________|
 |____:___|

 --------------------------------------------------------------------------- */

 /* public RuleBasedCollator constructor ---------------------------------- */

 /**
 * Copy constructor
 */
 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) :
               Collator(that), dataIsOwned(FALSE), ucollator(that.ucollator),
               urulestring(that.urulestring)
 {
 }

 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
                                            UErrorCode& status) :
                                            dataIsOwned(FALSE)
 {
   if (U_FAILURE(status))
     return;

   int32_t length = rules.length();

   UChar ucharrules[STACK_BUFFER_LENGTH_];
   UChar *pucharrules = ucharrules;

   if (length >= STACK_BUFFER_LENGTH_)
     pucharrules = new UChar[length + 1];

   rules.extract(0, length, pucharrules);
   pucharrules[length] = 0;

   ucollator = ucol_openRules(pucharrules, length, UCOL_DEFAULT_NORMALIZATION,
                              UCOL_DEFAULT_STRENGTH, &status);

   if (U_SUCCESS(status))
   {
     const UChar *r = ucol_getRules(ucollator, &length);
     if (length > 0) {
         urulestring = new UnicodeString(r, length);
     }
     else {
         urulestring = new UnicodeString();
     }

     dataIsOwned = TRUE;
   }

   if (pucharrules != ucharrules)
     delete[] pucharrules;
 }

 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
                       ECollationStrength collationStrength,
                       UErrorCode& status) : dataIsOwned(FALSE)
 {
   if (U_FAILURE(status))
     return;

   int32_t length = rules.length();

   UChar ucharrules[STACK_BUFFER_LENGTH_];
   UChar *pucharrules = ucharrules;

   if (length >= STACK_BUFFER_LENGTH_)
     pucharrules = new UChar[length + 1];

   rules.extract(0, length, pucharrules);
   pucharrules[length] = 0;

   UCollationStrength strength = getUCollationStrength(collationStrength);
   ucollator = ucol_openRules(pucharrules, length, UCOL_DEFAULT_NORMALIZATION,
                              strength, &status);

   if (U_SUCCESS(status))
   {
     const UChar *r = ucol_getRules(ucollator, &length);
     if (length > 0) {
         urulestring = new UnicodeString(r, length);
     }
     else {
         urulestring = new UnicodeString();
     }
     dataIsOwned = TRUE;
   }

   if (pucharrules != ucharrules)
     delete[] pucharrules;
 }

 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
                                      Normalizer::EMode decompositionMode,
                                      UErrorCode& status) :
                                      dataIsOwned(FALSE)
 {
   if (U_FAILURE(status))
     return;

   int32_t length = rules.length();

   UChar ucharrules[STACK_BUFFER_LENGTH_];
   UChar *pucharrules = ucharrules;

   if (length >= STACK_BUFFER_LENGTH_)
     pucharrules = new UChar[length + 1];

   rules.extract(0, length, pucharrules);
   pucharrules[length] = 0;

   UNormalizationMode mode = Normalizer::getUNormalizationMode(
                                                    decompositionMode, status);
   ucollator = ucol_openRules(pucharrules, length, mode,
                              UCOL_DEFAULT_STRENGTH, &status);

   if (U_SUCCESS(status))
   {
     const UChar *r = ucol_getRules(ucollator, &length);
     if (length > 0) {
         urulestring = new UnicodeString(r, length);
     }
     else {
         urulestring = new UnicodeString();
     }
     dataIsOwned = TRUE;
   }

   if (pucharrules != ucharrules)
     delete[] pucharrules;
 }

 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
                       ECollationStrength collationStrength,
                       Normalizer::EMode decompositionMode,
                       UErrorCode& status) : dataIsOwned(FALSE)
 {
   if (U_FAILURE(status))
     return;

   int32_t length = rules.length();

   UChar ucharrules[STACK_BUFFER_LENGTH_];
   UChar *pucharrules = ucharrules;

   if (length >= STACK_BUFFER_LENGTH_)
     pucharrules = new UChar[length + 1];

   rules.extract(0, length, pucharrules);
   pucharrules[length] = 0;

   UCollationStrength strength = getUCollationStrength(collationStrength);
   UNormalizationMode mode = Normalizer::getUNormalizationMode(
                                                    decompositionMode, status);
   ucollator = ucol_openRules(pucharrules, length, mode, strength, &status);
   if (U_SUCCESS(status))
   {
     const UChar *r = ucol_getRules(ucollator, &length);
     if (length > 0) {
         urulestring = new UnicodeString(r, length);
     }
     else {
         urulestring = new UnicodeString();
     }
     dataIsOwned = TRUE;
   }

   if (pucharrules != ucharrules)
     delete[] pucharrules;
 }


 /* RuleBasedCollator public destructor ----------------------------------- */

 RuleBasedCollator::~RuleBasedCollator()
 {
   if (dataIsOwned)
   {
     ucol_close(ucollator);
     delete urulestring;
   }
   ucollator = NULL;
 }

 /* RuleBaseCollator public methods --------------------------------------- */

 UBool RuleBasedCollator::operator==(const Collator& that) const
 {
   /* only checks for address equals here */
   if (Collator::operator==(that))
     return TRUE;

   if (getDynamicClassID() != that.getDynamicClassID())
     return FALSE;  /* not the same class */

   RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;

   /*
   synwee : orginal code does not check for data compatibility
   */
   if (ucollator != thatAlias.ucollator)
     return FALSE;

   return TRUE;
 }

 RuleBasedCollator& RuleBasedCollator::operator=(
                                               const RuleBasedCollator& that)
 {
   if (this != &that)
   {
     if (dataIsOwned)
     {
       ucol_close(ucollator);
       ucollator = NULL;
       delete urulestring;
     }

     dataIsOwned = FALSE;
     ucollator = that.ucollator;
     urulestring = that.urulestring;
   }
   return *this;
 }

 Collator* RuleBasedCollator::clone() const
 {
   return new RuleBasedCollator(*this);
 }

 /**
 * Create a CollationElementIterator object that will iterator over the
 * elements in a string, using the collation rules defined in this
 * RuleBasedCollator
 */
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
                                            (const UnicodeString& source) const
 {
   UErrorCode status = U_ZERO_ERROR;
   CollationElementIterator *result = new CollationElementIterator(source, this,
                                                                   status);

   if (U_FAILURE(status))
     return NULL;

   return result;
 }

 /**
 * Create a CollationElementIterator object that will iterator over the
 * elements in a string, using the collation rules defined in this
 * RuleBasedCollator
 */
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
                                        (const CharacterIterator& source) const
 {
   UErrorCode status = U_ZERO_ERROR;
   CollationElementIterator *result = new CollationElementIterator(source, this,
                                                                   status);

   if (U_FAILURE(status))
     return NULL;

   return result;
 }

 /**
 * Return a string representation of this collator's rules. The string can
 * later be passed to the constructor that takes a UnicodeString argument,
 * which will construct a collator that's functionally identical to this one.
 * You can also allow users to edit the string in order to change the collation
 * data, or you can print it out for inspection, or whatever.
 */
 const UnicodeString& RuleBasedCollator::getRules() const
 {
   return (*urulestring);
 }

 Collator::EComparisonResult RuleBasedCollator::compare(
                                                const UnicodeString& source,
                                                const UnicodeString& target,
                                                int32_t length) const
 {
   UnicodeString source_togo;
   UnicodeString target_togo;
   UTextOffset begin=0;

   source.extract(begin, uprv_min(length,source.length()), source_togo);
   target.extract(begin, uprv_min(length,target.length()), target_togo);
   return compare(source_togo, target_togo);
 }

 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
                                                        int32_t sourceLength,
                                                        const UChar* target,
                                                        int32_t targetLength)
                                                        const
 {
   return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
                                                      target, targetLength));
 }

 /**
 * Compare two strings using this collator
 */
 Collator::EComparisonResult RuleBasedCollator::compare(
                                              const UnicodeString& source,
                                              const UnicodeString& target) const
 {
   UChar uSstart[STACK_BUFFER_LENGTH_];
   UChar uTstart[STACK_BUFFER_LENGTH_];
   UChar *uSource = uSstart;
   UChar *uTarget = uTstart;
   uint32_t sourceLen = source.length();
   uint32_t targetLen = target.length();

   if(sourceLen >= STACK_BUFFER_LENGTH_)
     uSource = new UChar[sourceLen+1];

   if(targetLen >= STACK_BUFFER_LENGTH_)
     uTarget = new UChar[targetLen+1];

   source.extract(0, sourceLen, uSource);
   uSource[sourceLen] = 0;
   target.extract(0, targetLen, uTarget);
   uTarget[targetLen] = 0;
   EComparisonResult result = compare(uSource, sourceLen, uTarget, targetLen);

   if(uSstart != uSource)
     delete[] uSource;

   if(uTstart != uTarget)
     delete[] uTarget;

   return result;
 }

 /**
 * Retrieve a collation key for the specified string. The key can be compared
 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
 * the ordering of their respective source strings. This is handy when doing a
 * sort, where each sort key must be compared many times.
 *
 * The basic algorithm here is to find all of the collation elements for each
 * character in the source string, convert them to an ASCII representation, and
 * put them into the collation key.  But it's trickier than that. Each
 * collation element in a string has three components: primary ('A' vs 'B'),
 * secondary ('u' vs 'ü'), and tertiary ('A' vs 'a'), and a primary difference
 * at the end of a string takes precedence over a secondary or tertiary
 * difference earlier in the string.
 *
 * To account for this, we put all of the primary orders at the beginning of
 * the string, followed by the secondary and tertiary orders. Each set of
 * orders is terminated by nulls so that a key for a string which is a initial
 * substring of another key will compare less without any special case.
 *
 * Here's a hypothetical example, with the collation element represented as a
 * three-digit number, one digit for primary, one for secondary, etc.
 *
 * String:              A     a     B    É
 * Collation Elements: 101   100   201  511
 * Collation Key:      1125<null>0001<null>1011<null>
 *
 * To make things even trickier, secondary differences (accent marks) are
 * compared starting at the *end* of the string in languages with French
 * secondary ordering. But when comparing the accent marks on a single base
 * character, they are compared from the beginning. To handle this, we reverse
 * all of the accents that belong to each base character, then we reverse the
 * entire string of secondary orderings at the end.
 */
 CollationKey& RuleBasedCollator::getCollationKey(
                                                   const UnicodeString& source,
                                                   CollationKey& sortkey,
                                                   UErrorCode& status) const
 {
   UChar sStart[STACK_BUFFER_LENGTH_];
   UChar *uSource = sStart;
   uint32_t sourceLen = source.length();

   if(sourceLen >= STACK_BUFFER_LENGTH_)
     uSource = new UChar[sourceLen+1];

   source.extract(0, sourceLen, uSource);
   uSource[sourceLen] = 0;
   CollationKey& result = getCollationKey(uSource, sourceLen, sortkey, status);
   if(sStart != uSource)
     delete[] uSource;

   return result;
 }

 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
                                                     int32_t sourceLen,
                                                     CollationKey& sortkey,
                                                     UErrorCode& status) const
 {
   if (U_FAILURE(status))
   {
     status = U_ILLEGAL_ARGUMENT_ERROR;
     return sortkey.setToBogus();
   }

   if ((!source) || (sourceLen == 0))
     return sortkey.reset();

   /*
   * have to use malloc, lowest denomination, since adopt can be used by
   * a c return value.
   */
   uint8_t *result = (uint8_t *)uprv_malloc(UCOL_MAX_BUFFER * sizeof(uint8_t));
   int32_t resLen = ucol_getSortKey(ucollator, source, sourceLen, result,
                                    UCOL_MAX_BUFFER);
   sortkey.adopt(result, resLen);

   return sortkey;
 }

 /**
  * Return the maximum length of any expansion sequences that end with the
  * specified comparison order.
  * @param order a collation order returned by previous or next.
  * @return the maximum length of any expansion seuences ending with the
  *         specified order or 1 if collation order does not occur at the end of any
  *         expansion sequence.
  * @see CollationElementIterator#getMaxExpansion
  */
 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
 {
   uint8_t result;
   UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
   return result;
 }

 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
                                               UErrorCode &status)
 {
   return ucol_cloneRuleData(ucollator, &length, &status);
 }

 void RuleBasedCollator::setAttribute(UColAttribute attr,
                                      UColAttributeValue value,
                                      UErrorCode &status)
 {
   if (U_FAILURE(status))
     return;
   ucol_setAttribute(ucollator, attr, value, &status);
 }

 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
                                                       UErrorCode &status)
 {
   if (U_FAILURE(status))
     return UCOL_DEFAULT;
   return ucol_getAttribute(ucollator, attr, &status);
 }

 Collator* RuleBasedCollator::safeClone(void)
 {
   UErrorCode intStatus = U_ZERO_ERROR;
   int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
   UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
                                    &intStatus);
   if (U_FAILURE(intStatus))
     return NULL;
   int32_t length = 0;
   UnicodeString *r = new UnicodeString(ucol_getRules(ucollator, &length),
                                        length);
   RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
   result->dataIsOwned = TRUE;
   return result;
 }

 Collator::EComparisonResult RuleBasedCollator::compare(
                                               ForwardCharacterIterator &source,
                                               ForwardCharacterIterator &target)
 {
   return getEComparisonResult(
     ucol_strcollinc(ucollator, forwardCharIteratorGlue, &source,
                     forwardCharIteratorGlue, &target));
 }

 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
                                          uint8_t *result, int32_t resultLength)
                                          const
 {
   UChar sStart[STACK_BUFFER_LENGTH_];
   UChar *uSource = sStart;
   uint32_t sourceLen = source.length();
   if(sourceLen >= STACK_BUFFER_LENGTH_)
     uSource = new UChar[sourceLen+1];

   source.extract(0, sourceLen, uSource);
   uSource[sourceLen] = 0;

   int32_t resLen = ucol_getSortKey(ucollator, uSource, sourceLen, result,
                                    resultLength);
   if(sStart != uSource)
     delete[] uSource;

   return resLen;
 }

 int32_t RuleBasedCollator::getSortKey(const UChar *source,
                                          int32_t sourceLength, uint8_t *result,
                                          int32_t resultLength) const
 {
   return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
 }

 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
 {
   UErrorCode intStatus = U_ZERO_ERROR;
   return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
                                &intStatus));
 }

 void RuleBasedCollator::setStrength(ECollationStrength newStrength)
 {
   UErrorCode intStatus = U_ZERO_ERROR;
   UCollationStrength strength = getUCollationStrength(newStrength);
   ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
 }

 /**
 * Create a hash code for this collation. Just hash the main rule table -- that
 * should be good enough for almost any use.
 */
 int32_t RuleBasedCollator::hashCode() const
 {
   int32_t length;
   const UChar *rules = ucol_getRules(ucollator, &length);
   return uhash_hashUCharsN(rules, length);
 }

 /**
 * Set the decomposition mode of the Collator object. success is equal to
 * U_ILLEGAL_ARGUMENT_ERROR if error occurs.
 * @param the new decomposition mode
 * @see Collator#getDecomposition
 */
 void RuleBasedCollator::setDecomposition(Normalizer::EMode  mode)
 {
   UErrorCode status = U_ZERO_ERROR;
   ucol_setNormalization(ucollator, Normalizer::getUNormalizationMode(mode,
                                                                      status));
 }

 /**
 * Get the decomposition mode of the Collator object.
 * @return the decomposition mode
 * @see Collator#setDecomposition
 */
 Normalizer::EMode RuleBasedCollator::getDecomposition(void) const
 {
   UErrorCode status = U_ZERO_ERROR;
   return Normalizer::getNormalizerEMode(ucol_getNormalization(ucollator),
                                                               status);
 }

 // RuleBaseCollatorNew private constructor ----------------------------------

 RuleBasedCollator::RuleBasedCollator() : dataIsOwned(FALSE), ucollator(0)
 {
 }

 RuleBasedCollator::RuleBasedCollator(UCollator *collator,
                                      UnicodeString *rule) : dataIsOwned(FALSE)
 {
   ucollator = collator;
   urulestring = rule;
 }

 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
                                            UErrorCode& status) :
                                      dataIsOwned(FALSE), ucollator(0)
 {
   if (U_FAILURE(status))
     return;

   /*
   Try to load, in order:
    1. The desired locale's collation.
    2. A fallback of the desired locale.
    3. The default locale's collation.
    4. A fallback of the default locale.
    5. The default collation rules, which contains en_US collation rules.

    To reiterate, we try:
    Specific:
     language+country+variant
     language+country
     language
    Default:
     language+country+variant
     language+country
     language
    Root: (aka DEFAULTRULES)
    steps 1-5 are handled by resource bundle fallback mechanism.
    however, in a very unprobable situation that no resource bundle
    data exists, step 5 is repeated with hardcoded default rules.
   */

   setUCollator(desiredLocale, status);

   if (U_FAILURE(status))
   {
     status = U_ZERO_ERROR;

     setUCollator(ResourceBundle::kDefaultFilename, status);
     if (U_FAILURE(status))
     {
       status = U_ZERO_ERROR;

       if (status == U_ZERO_ERROR)
         status = U_USING_DEFAULT_ERROR;

       if (status == U_MEMORY_ALLOCATION_ERROR)
         return;
     }
   }

   if (U_SUCCESS(status))
   {
     int32_t length;
     const UChar *r = ucol_getRules(ucollator, &length);
     if (length > 0) {
         urulestring = new UnicodeString(r, length);
     }
     else {
         urulestring = new UnicodeString();
     }
     dataIsOwned = TRUE;
   }

   return;
 }

 /* RuleBasedCollator private data members -------------------------------- */

 /* need look up in .commit() */
 const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
 /* Expand index follows */
 const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
 /* contract indexes follows */
 const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
 /* unmapped character values */
 const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
 /* primary strength increment */
 const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
 /* secondary strength increment */
 const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
 /* tertiary strength increment */
 const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
 /* mask off anything but primary order */
 const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
 /* mask off anything but secondary order */
 const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
 /* mask off anything but tertiary order */
 const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
 /* mask off ignorable char order */
 const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
 /* use only the primary difference */
 const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
 /* use only the primary and secondary difference */
 const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
 /* primary order shift */
 const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
 /* secondary order shift */
 const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
 /* starting value for collation elements */
 const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
 /* testing mask for primary low element */
 const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
 /* reseting value for secondaries and tertiaries */
 const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
 /* reseting value for tertiaries */
 const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;

 const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;

 /* unique file id for parity check */
 const int16_t RuleBasedCollator::FILEID = 0x5443;
 /* binary collation file extension */
 const char* RuleBasedCollator::kFilenameSuffix = ".col";
 /* class id ? Value is irrelevant */
 char  RuleBasedCollator::fgClassID = 0;

 /* other methods not belonging to any classes ------------------------------- */

 U_CAPI UChar forwardCharIteratorGlue(void *iterator)
 {
   ForwardCharacterIterator *iter = ((ForwardCharacterIterator *)iterator);
   UChar result = iter->nextPostInc();
   if (result == ForwardCharacterIterator::DONE)
     return 0xFFFF;
   else
     return result;
 }