source/i18n/plurrule.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 * Copyright (C) 2007-2008, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 *
 * File PLURRULE.CPP
 *
 * Modification History:
 *
 *   Date        Name        Description
 *******************************************************************************
 */


 #include "unicode/uniset.h"
 #include "unicode/utypes.h"
 #include "unicode/plurrule.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "hash.h"
 #include "mutex.h"
 #include "plurrule_impl.h"
 #include "putilimp.h"
 #include "ucln_in.h"
 #include "ustrfmt.h"
 #include "locutil.h"

 #if !UCONFIG_NO_FORMATTING

 // gPluralRuleLocaleHash is a global hash table that maps locale name to
 // the pointer of PluralRule. gPluralRuleLocaleHash is built only once and
 // resides in the memory until end of application. We will remove the
 // gPluralRuleLocaleHash table when we move plural rules data to resource
 // bundle in ICU4.0 release.  If Valgrind reports the memory is still
 // reachable, please ignore it.
 static Hashtable *gPluralRuleLocaleHash=NULL;


 U_NAMESPACE_BEGIN

 #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])

 // TODO: Plural rule data - will move to ResourceBundle.
 #define NUMBER_PLURAL_RULES 13
 static const UChar uCharPluralRules[NUMBER_PLURAL_RULES][128] = {
  // other: n/ja,ko,tr,v
  {LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,SLASH,LOW_J,LOW_A,COMMA,LOW_K,LOW_O,COMMA,LOW_T,
   LOW_R,COMMA,LOW_V,LOW_I, 0},
   // one: n is 1/da,de,el,en,eo,es,et,fi,fo,he,hu,it,nb,nl,nn,no,pt,sv
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SLASH,LOW_D,LOW_A,COMMA,LOW_D,
   LOW_E,COMMA,LOW_E,LOW_L,COMMA,LOW_E,LOW_N,COMMA,LOW_E,LOW_O,COMMA,LOW_E,LOW_S,COMMA,LOW_E,LOW_T,
   COMMA,LOW_F,LOW_I,COMMA,LOW_F,LOW_O,COMMA,LOW_H,LOW_E,COMMA,LOW_H,LOW_U,COMMA,LOW_I,LOW_T,COMMA,
   LOW_N,LOW_B,COMMA,LOW_N,LOW_L,COMMA,LOW_N,LOW_N,COMMA,LOW_N,LOW_O,COMMA,LOW_P,LOW_T,COMMA,LOW_S,
   LOW_V, 0},
   // one: n in 0..1/fr,pt_BR
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_N,SPACE,U_ZERO,DOT,DOT,U_ONE,SLASH,LOW_F,
   LOW_R,COMMA,LOW_P,LOW_T,LOWLINE,CAP_B,CAP_R, 0},
   // zero: n is 0; one: n mod 10 is 1 and n mod 100 is not 11/lv
  {LOW_Z,LOW_E,LOW_R,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ZERO,SEMI_COLON,SPACE,LOW_O,
   LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_S,SPACE,
   U_ONE,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,
   LOW_I,LOW_S,SPACE,LOW_N,LOW_O,LOW_T,SPACE,U_ONE,U_ONE,SLASH,LOW_L,LOW_V, 0},
   // one: n is 1; two: n is 2/ga
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_T,LOW_W,
   LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_TWO,SLASH,LOW_G,LOW_A, 0},
   // zero: n is 0; one: n is 1; zero: n mod 100 in 1..19/ro
  {LOW_Z,LOW_E,LOW_R,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ZERO,SEMI_COLON,SPACE,LOW_O,
   LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_Z,LOW_E,LOW_R,
   LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,
   U_ONE,DOT,DOT,U_ONE,U_NINE,SLASH,LOW_R,LOW_O, 0},
   // other: n mod 100 in 11..19; one: n mod 10 is 1; few: n mod 10 in 2..9/lt
  {LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,
   SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_ONE,DOT,DOT,U_ONE,U_NINE,SEMI_COLON,SPACE,LOW_O,LOW_N,LOW_E,COLON,
   SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,
   SPACE,LOW_F,LOW_E,LOW_W,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,
   LOW_N,SPACE,U_TWO,DOT,DOT,U_NINE,SLASH,LOW_L,LOW_T, 0},
  // one: n mod 10 is 1 and n mod 100 is not 11; few: n mod 10 in 2..4
  // and n mod 100 not in 12..14/hr,ru,sr,uk
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_S,
   SPACE,U_ONE,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,
   SPACE,LOW_I,LOW_S,SPACE,LOW_N,LOW_O,LOW_T,SPACE,U_ONE,U_ONE,SEMI_COLON,SPACE,LOW_F,LOW_E,LOW_W,
   COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,U_TWO,DOT,
   DOT,U_FOUR,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,
   U_ZERO,U_ZERO,SPACE,LOW_N,LOW_O,LOW_T,SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_TWO,DOT,DOT,U_ONE,U_FOUR,
   SLASH,LOW_H,LOW_R,COMMA,LOW_R,LOW_U,COMMA,LOW_S,LOW_R,COMMA,LOW_U,LOW_K, 0},
   // one: n is 1; few: n in 2..4/cs,sk
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_F,LOW_E,
   LOW_W,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_N,SPACE,U_TWO,DOT,DOT,U_FOUR,SLASH,LOW_C,LOW_S,COMMA,
   LOW_S,LOW_K, 0},
   // one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14/pl
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_F,LOW_E,
   LOW_W,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,U_TWO,
   DOT,DOT,U_FOUR,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,
   U_ZERO,SPACE,LOW_N,LOW_O,LOW_T,SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_TWO,DOT,DOT,U_ONE,U_FOUR,SLASH,
   LOW_P,LOW_L, 0},
   // one: n mod 100 is 1; two: n mod 100 is 2; few: n mod 100 in 3..4/sl
  {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,
   LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_T,LOW_W,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,
   SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,LOW_S,SPACE,U_TWO,SEMI_COLON,SPACE,LOW_F,LOW_E,LOW_W,COLON,
   SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,U_THREE,DOT,
   DOT,U_FOUR,SLASH,LOW_S,LOW_L, 0},
   // zero: n is 0; one: n is 1; two: n is 2; few: n is 3..10; many: n in 11..99/ar
  {LOW_Z,LOW_E,LOW_R,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ZERO,SEMI_COLON,SPACE,LOW_O,
   LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_T,LOW_W,LOW_O,
   COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_TWO,SEMI_COLON,SPACE,LOW_F,LOW_E,LOW_W,COLON,SPACE,
   LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_THREE,DOT,DOT,U_ONE,U_ZERO,SEMI_COLON,SPACE,LOW_M,LOW_A,LOW_N,
   LOW_Y,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_ONE,DOT,DOT,U_NINE,U_NINE,SLASH,LOW_A,
   LOW_R, 0},
 };

 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
 static const UChar PK_IN[]={LOW_I,LOW_N,0};
 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
 static const UChar PK_IS[]={LOW_I,LOW_S,0};
 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
 static const UChar PK_OR[]={LOW_O,LOW_R,0};
 static const UChar PK_VAR_N[]={LOW_N,0};

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)

 PluralRules::PluralRules(UErrorCode& status)
 :
     fLocaleStringsHash(NULL),
     mRules(NULL),
     mParser(new RuleParser())
 {
     initHashtable(status);
     if (U_SUCCESS(status)) {
         getRuleData(status);
     }
 }

 PluralRules::PluralRules(const PluralRules& other)
 : UObject(other),
     fLocaleStringsHash(NULL),
     mRules(NULL),
     mParser(new RuleParser())
 {
     *this=other;
 }

 PluralRules::~PluralRules() {
     delete mRules;
     delete mParser;
 }

 PluralRules*
 PluralRules::clone() const {
     return new PluralRules(*this);
 }

 PluralRules&
 PluralRules::operator=(const PluralRules& other) {
     if (this != &other) {
         fLocaleStringsHash=other.fLocaleStringsHash;
         delete mRules;
         mRules = new RuleChain(*other.mRules);
         delete mParser;
         mParser = new RuleParser();
     }

     return *this;
 }

 PluralRules* U_EXPORT2
 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
     RuleChain   rules;

     PluralRules *newRules = new PluralRules(status);
     if ( (newRules != NULL)&& U_SUCCESS(status) ) {
         newRules->parseDescription((UnicodeString &)description, rules, status);
         if (U_SUCCESS(status)) {
             newRules->addRules(rules, status);
         }
     }
     if (U_FAILURE(status)) {
         delete newRules;
         return NULL;
     }
     else {
         return newRules;
     }
 }

 PluralRules* U_EXPORT2
 PluralRules::createDefaultRules(UErrorCode& status) {
     return createRules(PLURAL_DEFAULT_RULE, status);
 }

 PluralRules* U_EXPORT2
 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
     RuleChain *locRules;

     PluralRules *newRules = new PluralRules(status);
     if (U_FAILURE(status)) {
         delete newRules;
         return NULL;
     }
     UnicodeString localeName;
     LocaleUtility::initNameFromLocale(locale, localeName);
     {
         Mutex lock;
         locRules = (RuleChain *) (newRules->fLocaleStringsHash->get(localeName));
     }
     if (locRules == NULL) {
         // Check parent locales.
         char parentLocale[ULOC_FULLNAME_CAPACITY];
         const char *curLocaleName=locale.getName();
         int32_t localeNameLen=0;
         uprv_strcpy(parentLocale, curLocaleName);
         while ((localeNameLen=uloc_getParent(parentLocale, parentLocale, ULOC_FULLNAME_CAPACITY, &status)) > 0) {
             Mutex lock;
             locRules = (RuleChain *) (newRules->fLocaleStringsHash->get(localeName));
             if (locRules != NULL) {
                 break;
             }
         }
     }
     if (locRules==NULL) {
         delete newRules; // Remove newRules to avoid memory leak since it is not needed anymore.
         return createRules(PLURAL_DEFAULT_RULE, status);
     }

     newRules->addRules(*locRules, status);
     return newRules;
 }

 UnicodeString
 PluralRules::select(int32_t number) const {
     if (mRules == NULL) {
         return PLURAL_DEFAULT_RULE;
     }
     else {
         return mRules->select(number);
     }
 }

 StringEnumeration*
 PluralRules::getKeywords(UErrorCode& status) const {
     if (U_FAILURE(status))  return NULL;
     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(status);
     return nameEnumerator;
 }


 UBool
 PluralRules::isKeyword(const UnicodeString& keyword) const {
     if ( mRules == NULL) {
         return (UBool)( keyword == PLURAL_DEFAULT_RULE );
     }
     else {
         return mRules->isKeyword(keyword);
     }
 }

 UnicodeString
 PluralRules::getKeywordOther() const {
     return PLURAL_KEYWORD_OTHER;
 }

 UBool
 PluralRules::operator==(const PluralRules& other) const  {
     int32_t limit;
     UBool sameList = TRUE;
     const UnicodeString *ptrKeyword;
     UErrorCode status= U_ZERO_ERROR;

     if ( this == &other ) {
         return TRUE;
     }
     StringEnumeration* myKeywordList = getKeywords(status);
     StringEnumeration* otherKeywordList =other.getKeywords(status);

     if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
         sameList = FALSE;
     }
     else {
         myKeywordList->reset(status);
         while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
             if (!other.isKeyword(*ptrKeyword)) {
                 sameList = FALSE;
             }
         }
         otherKeywordList->reset(status);
         while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
             if (!this->isKeyword(*ptrKeyword))  {
                 sameList = FALSE;
             }
         }
         delete myKeywordList;
         delete otherKeywordList;
         if (!sameList) {
             return FALSE;
         }
     }

     if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
         return FALSE;
     }
     UnicodeString myKeyword, otherKeyword;
     for (int32_t i=0; i<limit; ++i) {
         myKeyword = this->select(i);
         otherKeyword = other.select(i);
         if (myKeyword!=otherKeyword) {
             return FALSE;
         }
     }
     return TRUE;
 }

 void
 PluralRules::getRuleData(UErrorCode& status) {
     UnicodeString ruleData;
     UnicodeString localeData;
     UnicodeString localeName;
     int32_t i;
     UChar cSlash = (UChar)0x002F;

     i=0;
     while ( i<NUMBER_PLURAL_RULES && U_SUCCESS(status) ) {
         RuleChain   rules;
         UnicodeString pluralRuleData = UnicodeString(uCharPluralRules[i]);
         int32_t slashIndex = pluralRuleData.indexOf(cSlash);
         if ( slashIndex < 0 ) {
             break;
         }
         ruleData=UnicodeString(pluralRuleData, 0, slashIndex);
         localeData=UnicodeString(pluralRuleData, slashIndex+1);
         parseDescription(ruleData, rules, status);
         int32_t curIndex=0;
         while (curIndex < localeData.length() && U_SUCCESS(status)) {
             getNextLocale(localeData, &curIndex, localeName);
             addRules(localeName, rules, TRUE, status);
         }
         i++;
     }
 }

 void
 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
 {
     int32_t ruleIndex=0;
     UnicodeString token;
     tokenType type;
     tokenType prevType=none;
     RuleChain *ruleChain=NULL;
     AndConstraint *curAndConstraint=NULL;
     OrConstraint *orNode=NULL;

     UnicodeString ruleData = data.toLower();
     while (ruleIndex< ruleData.length()) {
         mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
         if (U_FAILURE(status)) {
             return;
         }
         mParser->checkSyntax(prevType, type, status);
         if (U_FAILURE(status)) {
             return;
         }
         switch (type) {
         case tAnd:
             curAndConstraint = curAndConstraint->add();
             break;
         case tOr:
             orNode=rules.ruleHeader;
             while (orNode->next != NULL) {
                 orNode = orNode->next;
             }
             orNode->next= new OrConstraint();
             orNode=orNode->next;
             orNode->next=NULL;
             curAndConstraint = orNode->add();
             break;
         case tIs:
             curAndConstraint->rangeHigh=-1;
             break;
         case tNot:
             curAndConstraint->notIn=TRUE;
             break;
         case tIn:
             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
             break;
         case tNumber:
             if ( (curAndConstraint->op==AndConstraint::MOD)&&
                  (curAndConstraint->opNum == -1 ) ) {
                 curAndConstraint->opNum=getNumberValue(token);
             }
             else {
                 if (curAndConstraint->rangeLow == -1) {
                     curAndConstraint->rangeLow=getNumberValue(token);
                 }
                 else {
                     curAndConstraint->rangeHigh=getNumberValue(token);
                 }
             }
             break;
         case tMod:
             curAndConstraint->op=AndConstraint::MOD;
             break;
         case tKeyword:
             if (ruleChain==NULL) {
                 ruleChain = &rules;
             }
             else {
                 while (ruleChain->next!=NULL){
                     ruleChain=ruleChain->next;
                 }
                 ruleChain=ruleChain->next=new RuleChain();
             }
             orNode = ruleChain->ruleHeader = new OrConstraint();
             curAndConstraint = orNode->add();
             ruleChain->keyword = token;
             break;
         default:
             break;
         }
         prevType=type;
     }
 }

 int32_t
 PluralRules::getNumberValue(const UnicodeString& token) const {
     int32_t i;
     char digits[128];

     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
     digits[i]='\0';

     return((int32_t)atoi(digits));
 }


 void
 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
     int32_t i=*curIndex;

     localeName.remove();
     while (i< localeData.length()) {
        if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
            break;
        }
        i++;
     }

     while (i< localeData.length()) {
        if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
            break;
        }
        localeName+=localeData.charAt(i++);
     }
     *curIndex=i;
 }


 int32_t
 PluralRules::getRepeatLimit() const {
     return mRules->getRepeatLimit();
 }

 void
 PluralRules::initHashtable(UErrorCode& status) {
     if (fLocaleStringsHash!=NULL) {
         return;
     }
     {
         Mutex lock;
         if (gPluralRuleLocaleHash == NULL) {
         // This static PluralRule hashtable residents in memory until end of application.
             if ((gPluralRuleLocaleHash = new Hashtable(TRUE, status))!=NULL) {
                 fLocaleStringsHash = gPluralRuleLocaleHash;
                 return;
             }
         }
         else {
             fLocaleStringsHash = gPluralRuleLocaleHash;
         }
     }
 }

 void
 PluralRules::addRules(RuleChain& rules, UErrorCode& status) {
     addRules(mLocaleName, rules, FALSE, status);
 }

 void
 PluralRules::addRules(const UnicodeString& localeName, RuleChain& rules, UBool addToHash, UErrorCode& status) {
     RuleChain *newRule = new RuleChain(rules);
     if ( addToHash )
     {
         {
             Mutex lock;
             if ( (RuleChain *)fLocaleStringsHash->get(localeName) == NULL ) {
                 fLocaleStringsHash->put(localeName, newRule, status);
             }
             else {
                 delete newRule;
                 return;
             }
         }
     }
     else {
         this->mRules=newRule;
     }
     newRule->setRepeatLimit();
 }

 AndConstraint::AndConstraint() {
     op = AndConstraint::NONE;
     opNum=-1;
     rangeLow=-1;
     rangeHigh=-1;
     notIn=FALSE;
     next=NULL;
 }


 AndConstraint::AndConstraint(const AndConstraint& other) {
     this->op = other.op;
     this->opNum=other.opNum;
     this->rangeLow=other.rangeLow;
     this->rangeHigh=other.rangeHigh;
     this->notIn=other.notIn;
     if (other.next==NULL) {
         this->next=NULL;
     }
     else {
         this->next = new AndConstraint(*other.next);
     }
 }

 AndConstraint::~AndConstraint() {
     if (next!=NULL) {
         delete next;
     }
 }


 UBool
 AndConstraint::isFulfilled(int32_t number) {
     UBool result=TRUE;
     int32_t value=number;

     if ( op == MOD ) {
         value = value % opNum;
     }
     if ( rangeHigh == -1 ) {
         if ( rangeLow == -1 ) {
             result = TRUE; // empty rule
         }
         else {
             if ( value == rangeLow ) {
                 result = TRUE;
             }
             else {
                 result = FALSE;
             }
         }
     }
     else {
         if ((rangeLow <= value) && (value <= rangeHigh)) {
             result = TRUE;
         }
         else {
             result = FALSE;
         }
     }
     if (notIn) {
         return !result;
     }
     else {
         return result;
     }
 }

 int32_t
 AndConstraint::updateRepeatLimit(int32_t maxLimit) {

     if ( op == MOD ) {
         return uprv_max(opNum, maxLimit);
     }
     else {
         if ( rangeHigh == -1 ) {
             return(rangeLow>maxLimit? rangeLow : maxLimit);
             return uprv_max(rangeLow, maxLimit);
         }
         else{
             return uprv_max(rangeHigh, maxLimit);
         }
     }
 }


 AndConstraint*
 AndConstraint::add()
 {
     this->next = new AndConstraint();
     return this->next;
 }

 OrConstraint::OrConstraint() {
     childNode=NULL;
     next=NULL;
 }

 OrConstraint::OrConstraint(const OrConstraint& other) {
     if ( other.childNode == NULL ) {
         this->childNode = NULL;
     }
     else {
         this->childNode = new AndConstraint(*(other.childNode));
     }
     if (other.next == NULL ) {
         this->next = NULL;
     }
     else {
         this->next = new OrConstraint(*(other.next));
     }
 }

 OrConstraint::~OrConstraint() {
     if (childNode!=NULL) {
         delete childNode;
     }
     if (next!=NULL) {
         delete next;
     }
 }

 AndConstraint*
 OrConstraint::add()
 {
     OrConstraint *curOrConstraint=this;
     {
         while (curOrConstraint->next!=NULL) {
             curOrConstraint = curOrConstraint->next;
         }
         curOrConstraint->next = NULL;
         curOrConstraint->childNode = new AndConstraint();
     }
     return curOrConstraint->childNode;
 }

 UBool
 OrConstraint::isFulfilled(int32_t number) {
     OrConstraint* orRule=this;
     UBool result=FALSE;

     while (orRule!=NULL && !result) {
         result=TRUE;
         AndConstraint* andRule = orRule->childNode;
         while (andRule!=NULL && result) {
             result = andRule->isFulfilled(number);
             andRule=andRule->next;
         }
         orRule = orRule->next;
     }

     return result;
 }


 RuleChain::RuleChain() {
     ruleHeader=NULL;
     next = NULL;
     repeatLimit=0;
 }

 RuleChain::RuleChain(const RuleChain& other) {

     this->repeatLimit = other.repeatLimit;
     this->keyword=other.keyword;
     if (other.ruleHeader != NULL) {
         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
     }
     else {
         this->ruleHeader = NULL;
     }
     if (other.next != NULL ) {
         this->next = new RuleChain(*other.next);
     }
     else
     {
         this->next = NULL;
     }
 }

 RuleChain::~RuleChain() {
     if (next != NULL) {
         delete next;
     }
     if ( ruleHeader != NULL ) {
         delete ruleHeader;
     }
 }

 UnicodeString
 RuleChain::select(int32_t number) const {

    if ( ruleHeader != NULL ) {
        if (ruleHeader->isFulfilled(number)) {
            return keyword;
        }
    }
    if ( next != NULL ) {
        return next->select(number);
    }
    else {
        return PLURAL_KEYWORD_OTHER;
    }

 }

 void
 RuleChain::dumpRules(UnicodeString& result) {
     UChar digitString[16];

     if ( ruleHeader != NULL ) {
         result +=  keyword;
         OrConstraint* orRule=ruleHeader;
         while ( orRule != NULL ) {
             AndConstraint* andRule=orRule->childNode;
             while ( andRule != NULL ) {
                 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
                     result += UNICODE_STRING_SIMPLE(" n is ");
                     if (andRule->notIn) {
                         result += UNICODE_STRING_SIMPLE("not ");
                     }
                     uprv_itou(digitString,16, andRule->rangeLow,10,0);
                     result += UnicodeString(digitString);
                 }
                 else {
                     if (andRule->op==AndConstraint::MOD) {
                         result += UNICODE_STRING_SIMPLE("  n mod ");
                         uprv_itou(digitString,16, andRule->opNum,10,0);
                         result += UnicodeString(digitString);
                     }
                     else {
                         result += UNICODE_STRING_SIMPLE("  n ");
                     }
                     if (andRule->rangeHigh==-1) {
                         if (andRule->notIn) {
                             result += UNICODE_STRING_SIMPLE(" is not ");
                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
                             result += UnicodeString(digitString);
                         }
                         else {
                             result += UNICODE_STRING_SIMPLE(" is ");
                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
                             result += UnicodeString(digitString);
                         }
                     }
                     else {
                         if (andRule->notIn) {
                             result += UNICODE_STRING_SIMPLE("  not in ");
                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
                             result += UnicodeString(digitString);
                             result += UNICODE_STRING_SIMPLE(" .. ");
                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
                             result += UnicodeString(digitString);
                         }
                         else {
                             result += UNICODE_STRING_SIMPLE(" in ");
                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
                             result += UnicodeString(digitString);
                             result += UNICODE_STRING_SIMPLE(" .. ");
                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
                         }
                     }
                 }
                 if ( (andRule=andRule->next) != NULL) {
                     result += PK_AND;
                 }
             }
             if ( (orRule = orRule->next) != NULL ) {
                 result += PK_OR;
             }
         }
     }
     if ( next != NULL ) {
         next->dumpRules(result);
     }
 }

 int32_t
 RuleChain::getRepeatLimit () {
     return repeatLimit;
 }

 void
 RuleChain::setRepeatLimit () {
     int32_t limit=0;

     if ( next != NULL ) {
         next->setRepeatLimit();
         limit = next->repeatLimit;
     }

     if ( ruleHeader != NULL ) {
         OrConstraint* orRule=ruleHeader;
         while ( orRule != NULL ) {
             AndConstraint* andRule=orRule->childNode;
             while ( andRule != NULL ) {
                 limit = andRule->updateRepeatLimit(limit);
                 andRule = andRule->next;
             }
             orRule = orRule->next;
         }
     }
     repeatLimit = limit;
 }

 UErrorCode
 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
     if ( arraySize < capacityOfKeywords-1 ) {
         keywords[arraySize++]=keyword;
     }
     else {
         return U_BUFFER_OVERFLOW_ERROR;
     }

     if ( next != NULL ) {
         return next->getKeywords(capacityOfKeywords, keywords, arraySize);
     }
     else {
         return U_ZERO_ERROR;
     }
 }

 UBool
 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
     if ( keyword == keywordParam ) {
         return TRUE;
     }

     if ( next != NULL ) {
         return next->isKeyword(keywordParam);
     }
     else {
         return FALSE;
     }
 }


 RuleParser::RuleParser() {
     UErrorCode err=U_ZERO_ERROR;
     const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
     const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
     idStartFilter = new UnicodeSet(idStart, err);
     idContinueFilter = new UnicodeSet(idContinue, err);
 }

 RuleParser::~RuleParser() {
     delete idStartFilter;
     delete idContinueFilter;
 }

 void
 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
 {
     if (U_FAILURE(status)) {
         return;
     }
     switch(prevType) {
     case none:
     case tSemiColon:
         if (curType!=tKeyword) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tVariableN :
         if (curType != tIs && curType != tMod && curType != tIn && curType != tNot) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tZero:
     case tOne:
     case tTwo:
     case tFew:
     case tMany:
     case tOther:
     case tKeyword:
         if (curType != tColon) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tColon :
         if (curType != tVariableN) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tIs:
         if ( curType != tNumber && curType != tNot) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tNot:
         if (curType != tNumber && curType != tIn) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tMod:
     case tDot:
     case tIn:
     case tAnd:
     case tOr:
         if (curType != tNumber && curType != tVariableN) {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     case tNumber:
         if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
             curType != tIn && curType != tAnd && curType != tOr)
         {
             status = U_UNEXPECTED_TOKEN;
         }
         break;
     default:
         status = U_UNEXPECTED_TOKEN;
         break;
     }
 }

 void
 RuleParser::getNextToken(const UnicodeString& ruleData,
                          int32_t *ruleIndex,
                          UnicodeString& token,
                          tokenType& type,
                          UErrorCode &status)
 {
     int32_t curIndex= *ruleIndex;
     UChar ch;
     tokenType prevType=none;

     while (curIndex<ruleData.length()) {
         ch = ruleData.charAt(curIndex);
         if ( !inRange(ch, type) ) {
             status = U_ILLEGAL_CHARACTER;
             return;
         }
         switch (type) {
         case tSpace:
             if ( *ruleIndex != curIndex ) { // letter
                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
                 *ruleIndex=curIndex;
                 type=prevType;
                 getKeyType(token, type, status);
                 return;
             }
             else {
                 *ruleIndex=*ruleIndex+1;
             }
             break; // consective space
         case tColon:
         case tSemiColon:
             if ( *ruleIndex != curIndex ) {
                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
                 *ruleIndex=curIndex;
                 type=prevType;
                 getKeyType(token, type, status);
                 return;
             }
             else {
                 *ruleIndex=curIndex+1;
                 return;
             }
         case tLetter:
              if ((type==prevType)||(prevType==none)) {
                 prevType=type;
                 break;
              }
              break;
         case tNumber:
              if ((type==prevType)||(prevType==none)) {
                 prevType=type;
                 break;
              }
              else {
                 *ruleIndex=curIndex+1;
                 return;
              }
          case tDot:
              if (prevType==none) {  // first dot
                 prevType=type;
                 continue;
              }
              else {
                  if ( *ruleIndex != curIndex ) {
                     token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
                     *ruleIndex=curIndex;  // letter
                     type=prevType;
                     getKeyType(token, type, status);
                     return;
                  }
                  else {  // two consective dots
                     *ruleIndex=curIndex+2;
                     return;
                  }
              }
              break;
          default:
              status = U_UNEXPECTED_TOKEN;
              return;
         }
         curIndex++;
     }
     if ( curIndex>=ruleData.length() ) {
         if ( (type == tLetter)||(type == tNumber) ) {
             token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
             getKeyType(token, type, status);
         }
         *ruleIndex = ruleData.length();
     }
 }

 UBool
 RuleParser::inRange(UChar ch, tokenType& type) {
     if ((ch>=CAP_A) && (ch<=CAP_Z)) {
         // we assume all characters are in lower case already.
         return FALSE;
     }
     if ((ch>=LOW_A) && (ch<=LOW_Z)) {
         type = tLetter;
         return TRUE;
     }
     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
         type = tNumber;
         return TRUE;
     }
     switch (ch) {
     case COLON:
         type = tColon;
         return TRUE;
     case SPACE:
         type = tSpace;
         return TRUE;
     case SEMI_COLON:
         type = tSemiColon;
         return TRUE;
     case DOT:
         type = tDot;
         return TRUE;
     default :
         type = none;
         return FALSE;
     }
 }


 void
 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
 {
     if ( keyType==tNumber) {
     }
     else if (token==PK_VAR_N) {
         keyType = tVariableN;
     }
     else if (token==PK_IS) {
         keyType = tIs;
     }
     else if (token==PK_AND) {
         keyType = tAnd;
     }
     else if (token==PK_IN) {
         keyType = tIn;
     }
     else if (token==PK_NOT) {
         keyType = tNot;
     }
     else if (token==PK_MOD) {
         keyType = tMod;
     }
     else if (token==PK_OR) {
         keyType = tOr;
     }
     else if ( isValidKeyword(token) ) {
         keyType = tKeyword;
     }
     else {
         status = U_UNEXPECTED_TOKEN;
     }
 }

 UBool
 RuleParser::isValidKeyword(const UnicodeString& token) {
     if ( token.length()==0 ) {
         return FALSE;
     }
     if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
         int32_t i;
         for (i=1; i< token.length(); i++) {
             if (idContinueFilter->contains(token.charAt(i))== FALSE) {
                 return FALSE;
             }
         }
         return TRUE;
     }
     else {
         return FALSE;
     }
 }

 PluralKeywordEnumeration::PluralKeywordEnumeration(UErrorCode& status) :
 fKeywordNames(status)
 {
     pos=0;
 }

 const UnicodeString*
 PluralKeywordEnumeration::snext(UErrorCode& status) {
     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
     }
     return NULL;
 }

 void
 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
     pos=0;
 }

 int32_t
 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
        return fKeywordNames.size();
 }

 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
     UnicodeString *s;
     for (int32_t i=0; i<fKeywordNames.size(); ++i) {
         if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
             delete s;
         }
     }
 }

 U_NAMESPACE_END


 #endif /* #if !UCONFIG_NO_FORMATTING */

 //eof