source/i18n/rbnf.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 * Copyright (C) 1997-2004, International Business Machines Corporation and others. All Rights Reserved.
 *******************************************************************************
 */

 #include "unicode/rbnf.h"

 #if U_HAVE_RBNF

 #include "unicode/normlzr.h"
 #include "unicode/tblcoll.h"
 #include "unicode/uchar.h"
 #include "unicode/ucol.h"
 #include "unicode/uloc.h"
 #include "unicode/unum.h"
 #include "unicode/ures.h"
 #include "unicode/ustring.h"
 #include "unicode/utf16.h"
 #include "unicode/udata.h"
 #include "nfrs.h"

 #include "cmemory.h"
 #include "cstring.h"
 #include "util.h"

 // debugging
 // #define DEBUG

 #ifdef DEBUG
 #include "stdio.h"
 #endif

 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"

 static const UChar gPercentPercent[] =
 {
     0x25, 0x25, 0
 }; /* "%%" */

 // All urbnf objects are created through openRules, so we init all of the
 // Unicode string constants required by rbnf, nfrs, or nfr here.
 static const UChar gLenientParse[] =
 {
     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
 }; /* "%%lenient-parse:" */
 static const UChar gSemiColon = 0x003B;
 static const UChar gSemiPercent[] =
 {
     0x3B, 0x25, 0
 }; /* ";%" */

 #define kSomeNumberOfBitsDiv2 22
 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)

 U_NAMESPACE_BEGIN

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)

 class LocalizationInfo : public UObject {
 protected:
     virtual ~LocalizationInfo() {};
     uint32_t refcount;

 public:
     LocalizationInfo() : refcount(0) {}

     LocalizationInfo* ref(void) {
         ++refcount;
         return this;
     }

     LocalizationInfo* unref(void) {
         if (refcount && --refcount == 0) {
             delete this;
         }
         return NULL;
     }

     virtual UBool operator==(const LocalizationInfo* rhs) const;
     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }

     virtual int32_t getNumberOfRuleSets(void) const = 0;
     virtual const UChar* getRuleSetName(int32_t index) const = 0;
     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
     virtual const UChar* getLocaleName(int32_t index) const = 0;
     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;

     virtual int32_t indexForLocale(const UChar* locale) const;
     virtual int32_t indexForRuleSet(const UChar* ruleset) const;

     virtual UClassID getDynamicClassID() const = 0;
     static UClassID getStaticClassID(void);
 };

 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)

 // if both strings are NULL, this returns TRUE
 static UBool
 streq(const UChar* lhs, const UChar* rhs) {
     if (rhs == lhs) {
         return TRUE;
     }
     if (lhs && rhs) {
         return u_strcmp(lhs, rhs) == 0;
     }
     return FALSE;
 }

 UBool
 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
     if (rhs) {
         if (this == rhs) {
             return TRUE;
         }

         int32_t rsc = getNumberOfRuleSets();
         if (rsc == rhs->getNumberOfRuleSets()) {
             for (int i = 0; i < rsc; ++i) {
                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
                     return FALSE;
                 }
             }
             int32_t dlc = getNumberOfDisplayLocales();
             if (dlc == rhs->getNumberOfDisplayLocales()) {
                 for (int i = 0; i < dlc; ++i) {
                     const UChar* locale = getLocaleName(i);
                     int32_t ix = rhs->indexForLocale(locale);
                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
                     if (!streq(locale, rhs->getLocaleName(ix))) {
                         return FALSE;
                     }
                     for (int j = 0; j < rsc; ++j) {
                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
                             return FALSE;
                         }
                     }
                 }
                 return TRUE;
             }
         }
     }
     return FALSE;
 }

 int32_t
 LocalizationInfo::indexForLocale(const UChar* locale) const {
     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
         if (streq(locale, getLocaleName(i))) {
             return i;
         }
     }
     return -1;
 }

 int32_t
 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
     if (ruleset) {
         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
             if (streq(ruleset, getRuleSetName(i))) {
                 return i;
             }
         }
     }
     return -1;
 }


 typedef void (*Fn_Deleter)(void*);

 class VArray {
     void** buf;
     int32_t cap;
     int32_t size;
     Fn_Deleter deleter;
 public:
     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}

     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}

     ~VArray() {
         if (deleter) {
             for (int i = 0; i < size; ++i) {
                 (*deleter)(buf[i]);
             }
         }
         uprv_free(buf);
     }

     int32_t length() {
         return size;
     }

     void add(void* elem, UErrorCode& status) {
         if (U_SUCCESS(status)) {
             if (size == cap) {
                 if (cap == 0) {
                     cap = 1;
                 } else if (cap < 256) {
                     cap *= 2;
                 } else {
                     cap += 256;
                 }
                 if (buf == NULL) {
                     buf = (void**)uprv_malloc(cap * sizeof(void*));
                 } else {
                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
                 }
                 if (buf == NULL) {
                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
                     status = U_MEMORY_ALLOCATION_ERROR;
                     return;
                 }
                 void* start = &buf[size];
                 size_t count = (cap - size) * sizeof(void*);
                 uprv_memset(start, 0, count); // fill with nulls, just because
             }
             buf[size++] = elem;
         }
     }

     void** release(void) {
         void** result = buf;
         buf = NULL;
         cap = 0;
         size = 0;
         return result;
     }
 };

 class LocDataParser;

 class StringLocalizationInfo : public LocalizationInfo {
     UChar* info;
     UChar*** data;
     int32_t numRuleSets;
     int32_t numLocales;

 friend class LocDataParser;

     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
     {
     }

 public:
     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);

     virtual ~StringLocalizationInfo();
     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
     virtual const UChar* getRuleSetName(int32_t index) const;
     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
     virtual const UChar* getLocaleName(int32_t index) const;
     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;

     virtual UClassID getDynamicClassID() const;
     static UClassID getStaticClassID(void);

 private:
     void init(UErrorCode& status) const;
 };


 enum {
     OPEN_ANGLE = 0x003c, /* '<' */
     CLOSE_ANGLE = 0x003e, /* '>' */
     COMMA = 0x002c,
     TICK = 0x0027,
     QUOTE = 0x0022,
     SPACE = 0x0020
 };

 /**
  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
  */
 class LocDataParser {
     UChar* data;
     const UChar* e;
     UChar* p;
     UChar ch;
     UParseError& pe;
     UErrorCode& ec;

 public:
     LocDataParser(UParseError& parseError, UErrorCode& status)
         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
     ~LocDataParser() {}

     /*
     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
     */
     StringLocalizationInfo* parse(UChar* data, int32_t len);

 private:

     void inc(void) { ++p; ch = 0xffff; }
     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
     UBool inList(UChar c, const UChar* list) const {
         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
         while (*list && *list != c) ++list; return *list == c;
     }
     void parseError(const char* msg);

     StringLocalizationInfo* doParse(void);

     UChar** nextArray(int32_t& requiredLength);
     UChar*  nextString(void);
 };

 #ifdef DEBUG
 #define ERROR(msg) parseError(msg); return NULL;
 #else
 #define ERROR(msg) parseError(NULL); return NULL;
 #endif


 static const UChar DQUOTE_STOPLIST[] = {
     QUOTE, 0
 };

 static const UChar SQUOTE_STOPLIST[] = {
     TICK, 0
 };

 static const UChar NOQUOTE_STOPLIST[] = {
     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
 };

 static void
 DeleteFn(void* p) {
   uprv_free(p);
 }

 StringLocalizationInfo*
 LocDataParser::parse(UChar* _data, int32_t len) {
     if (U_FAILURE(ec)) {
         if (_data) uprv_free(_data);
         return NULL;
     }

     pe.line = 0;
     pe.offset = -1;
     pe.postContext[0] = 0;
     pe.preContext[0] = 0;

     if (_data == NULL) {
         ec = U_ILLEGAL_ARGUMENT_ERROR;
         return NULL;
     }

     if (len <= 0) {
         ec = U_ILLEGAL_ARGUMENT_ERROR;
         uprv_free(_data);
         return NULL;
     }

     data = _data;
     e = data + len;
     p = _data;
     ch = 0xffff;

     return doParse();
 }


 StringLocalizationInfo*
 LocDataParser::doParse(void) {
     skipWhitespace();
     if (!checkInc(OPEN_ANGLE)) {
         ERROR("Missing open angle");
     } else {
         VArray array(DeleteFn);
         UBool mightHaveNext = TRUE;
         int32_t requiredLength = -1;
         while (mightHaveNext) {
             mightHaveNext = FALSE;
             UChar** elem = nextArray(requiredLength);
             skipWhitespace();
             UBool haveComma = check(COMMA);
             if (elem) {
                 array.add(elem, ec);
                 if (haveComma) {
                     inc();
                     mightHaveNext = TRUE;
                 }
             } else if (haveComma) {
                 ERROR("Unexpected character");
             }
         }

         skipWhitespace();
         if (!checkInc(CLOSE_ANGLE)) {
             if (check(OPEN_ANGLE)) {
                 ERROR("Missing comma in outer array");
             } else {
                 ERROR("Missing close angle bracket in outer array");
             }
         }

         skipWhitespace();
         if (p != e) {
             ERROR("Extra text after close of localization data");
         }

         array.add(NULL, ec);
         if (U_SUCCESS(ec)) {
             int32_t numLocs = array.length() - 2; // subtract first, NULL
             UChar*** result = (UChar***)array.release();

             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
         }
     }

     ERROR("Unknown error");
 }

 UChar**
 LocDataParser::nextArray(int32_t& requiredLength) {
     if (U_FAILURE(ec)) {
         return NULL;
     }

     skipWhitespace();
     if (!checkInc(OPEN_ANGLE)) {
         ERROR("Missing open angle");
     }

     VArray array;
     UBool mightHaveNext = TRUE;
     while (mightHaveNext) {
         mightHaveNext = FALSE;
         UChar* elem = nextString();
         skipWhitespace();
         UBool haveComma = check(COMMA);
         if (elem) {
             array.add(elem, ec);
             if (haveComma) {
                 inc();
                 mightHaveNext = TRUE;
             }
         } else if (haveComma) {
             ERROR("Unexpected comma");
         }
     }
     skipWhitespace();
     if (!checkInc(CLOSE_ANGLE)) {
         if (check(OPEN_ANGLE)) {
             ERROR("Missing close angle bracket in inner array");
         } else {
             ERROR("Missing comma in inner array");
         }
     }

     array.add(NULL, ec);
     if (U_SUCCESS(ec)) {
         if (requiredLength == -1) {
             requiredLength = array.length() + 1;
         } else if (array.length() != requiredLength) {
             ec = U_ILLEGAL_ARGUMENT_ERROR;
             ERROR("Array not of required length");
         }

         return (UChar**)array.release();
     }
     ERROR("Unknown Error");
 }

 UChar*
 LocDataParser::nextString() {
     UChar* result = NULL;

     skipWhitespace();
     if (p < e) {
         const UChar* terminators;
         UChar c = *p;
         UBool haveQuote = c == QUOTE || c == TICK;
         if (haveQuote) {
             inc();
             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
         } else {
             terminators = NOQUOTE_STOPLIST;
         }
         UChar* start = p;
         while (p < e && !inList(*p, terminators)) ++p;
         if (p == e) {
             ERROR("Unexpected end of data");
         }

         UChar x = *p;
         if (p > start) {
             ch = x;
             *p = 0x0; // terminate by writing to data
             result = start; // just point into data
         }
         if (haveQuote) {
             if (x != c) {
                 ERROR("Missing matching quote");
             } else if (p == start) {
                 ERROR("Empty string");
             }
             inc();
         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
             ERROR("Unexpected character in string");
         }
     }

     // ok for there to be no next string
     return result;
 }

 void
 LocDataParser::parseError(const char* /*str*/) {
     if (!data) {
         return;
     }

     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
     if (start < data)
         start = data;
     for (UChar* x = p; --x >= start;)
         if (!*x) {
             start = x+1;
             break;
         }
     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
     if (limit > e)
         limit = e;
     u_strncpy(pe.preContext, start, p-start);
     pe.preContext[p-start] = 0;
     u_strncpy(pe.postContext, p, limit-p);
     pe.postContext[limit-p] = 0;
     pe.offset = p - data;

 #ifdef DEBUG
     fprintf(stderr, "%s at or near character %d: ", str, p-data);

     UnicodeString msg;
     msg.append(start, p - start);
     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
     msg.append(p, limit-p);
     msg.append("'");

     char buf[128];
     int32_t len = msg.extract(0, msg.length(), buf, 128);
     if (len >= 128) {
         buf[127] = 0;
     } else {
         buf[len] = 0;
     }
     fprintf(stderr, "%s\n", buf);
     fflush(stderr);
 #endif

     uprv_free(data);
     data = NULL;
     p = NULL;
     e = NULL;

     if (U_SUCCESS(ec)) {
         ec = U_PARSE_ERROR;
     }
 }

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)

 StringLocalizationInfo*
 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
     if (U_FAILURE(status)) {
         return NULL;
     }

     int32_t len = info.length();
     if (len == 0) {
         return NULL; // no error;
     }

     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
     if (!p) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return NULL;
     }
     info.extract(p, len, status);
     if (!U_FAILURE(status)) {
         status = U_ZERO_ERROR; // clear warning about non-termination
     }

     LocDataParser parser(perror, status);
     return parser.parse(p, len);
 }

 StringLocalizationInfo::~StringLocalizationInfo() {
     for (UChar*** p = (UChar***)data; *p; ++p) {
         // remaining data is simply pointer into our unicode string data.
         if (*p) uprv_free(*p);
     }
     if (data) uprv_free(data);
     if (info) uprv_free(info);
 }


 const UChar*
 StringLocalizationInfo::getRuleSetName(int32_t index) const {
     if (index >= 0 && index < getNumberOfRuleSets()) {
         return data[0][index];
     }
     return NULL;
 }

 const UChar*
 StringLocalizationInfo::getLocaleName(int32_t index) const {
     if (index >= 0 && index < getNumberOfDisplayLocales()) {
         return data[index+1][0];
     }
     return NULL;
 }

 const UChar*
 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
         return data[localeIndex+1][ruleIndex+1];
     }
     return NULL;
 }

 // ----------

 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
                                              const UnicodeString& locs,
                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
   : ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(alocale)
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
   init(description, locinfo, perror, status);
 }

 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
                                              const UnicodeString& locs,
                                              UParseError& perror, UErrorCode& status)
   : ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(Locale::getDefault())
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
   init(description, locinfo, perror, status);
 }

 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
                                              LocalizationInfo* info,
                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
   : ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(alocale)
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
   init(description, info, perror, status);
 }

 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 					     UParseError& perror,
 					     UErrorCode& status)
   : ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(Locale::getDefault())
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
     init(description, NULL, perror, status);
 }

 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 					     const Locale& aLocale,
 					     UParseError& perror,
 					     UErrorCode& status)
   : ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(aLocale)
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
     init(description, NULL, perror, status);
 }

 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
   : ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(alocale)
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
     if (U_FAILURE(status)) {
         return;
     }

     const char* fmt_tag = "";
     switch (tag) {
     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
     }

     // TODO: read localization info from resource
     LocalizationInfo* locinfo = NULL;

     int32_t len = 0;
     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
     if (U_SUCCESS(status)) {
         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
         const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
         UnicodeString desc(description, len);
         UParseError perror;
         init (desc, locinfo, perror, status);
     }
     ures_close(nfrb);
 }

 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
   : NumberFormat(rhs)
   , ruleSets(NULL)
   , defaultRuleSet(NULL)
   , locale(rhs.locale)
   , collator(NULL)
   , decimalFormatSymbols(NULL)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
 {
     this->operator=(rhs);
 }

 // --------

 RuleBasedNumberFormat&
 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
 {
     UErrorCode status = U_ZERO_ERROR;
     dispose();
     locale = rhs.locale;
     lenient = rhs.lenient;

     UnicodeString rules = rhs.getRules();
     UParseError perror;
     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);

     return *this;
 }

 RuleBasedNumberFormat::~RuleBasedNumberFormat()
 {
     dispose();
 }

 Format*
 RuleBasedNumberFormat::clone(void) const
 {
     RuleBasedNumberFormat * result = NULL;
     UnicodeString rules = getRules();
     UErrorCode status = U_ZERO_ERROR;
     UParseError perror;
     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
     /* test for NULL */
     if (result == 0) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return 0;
     }
     if (U_FAILURE(status)) {
         delete result;
         result = 0;
     } else {
         result->lenient = lenient;
     }
     return result;
 }

 UBool
 RuleBasedNumberFormat::operator==(const Format& other) const
 {
     if (this == &other) {
         return TRUE;
     }

     if (other.getDynamicClassID() == getStaticClassID()) {
         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
         if (locale == rhs.locale &&
             lenient == rhs.lenient &&
             (localizations == NULL
                 ? rhs.localizations == NULL
                 : (rhs.localizations == NULL
                     ? FALSE
                     : *localizations == rhs.localizations))) {

             NFRuleSet** p = ruleSets;
             NFRuleSet** q = rhs.ruleSets;
             if (p == NULL) {
                 return q == NULL;
             } else if (q == NULL) {
                 return FALSE;
             }
             while (*p && *q && (**p == **q)) {
                 ++p;
                 ++q;
             }
             return *q == NULL && *p == NULL;
         }
     }

     return FALSE;
 }

 UnicodeString
 RuleBasedNumberFormat::getRules() const
 {
     UnicodeString result;
     if (ruleSets != NULL) {
         for (NFRuleSet** p = ruleSets; *p; ++p) {
             (*p)->appendRules(result);
         }
     }
     return result;
 }

 UnicodeString
 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
 {
     if (localizations) {
       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
       return string;
     } else if (ruleSets) {
         UnicodeString result;
         for (NFRuleSet** p = ruleSets; *p; ++p) {
             NFRuleSet* rs = *p;
             if (rs->isPublic()) {
                 if (--index == -1) {
                     rs->getName(result);
                     return result;
                 }
             }
         }
     }
     UnicodeString empty;
     return empty;
 }

 int32_t
 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
 {
     int32_t result = 0;
     if (localizations) {
       result = localizations->getNumberOfRuleSets();
     } else if (ruleSets) {
         for (NFRuleSet** p = ruleSets; *p; ++p) {
             if ((**p).isPublic()) {
                 ++result;
             }
         }
     }
     return result;
 }

 int32_t
 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
     if (localizations) {
         return localizations->getNumberOfDisplayLocales();
     }
     return 0;
 }

 Locale
 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
         char buffer[64];
         int32_t cap = name.length() + 1;
         char* bp = buffer;
         if (cap > 64) {
             bp = new char[cap];
         }
         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
         Locale retLocale(bp);
         if (bp != buffer) {
             delete[] bp;
         }
         return retLocale;
     }
     status = U_ILLEGAL_ARGUMENT_ERROR;
     Locale retLocale;
     return retLocale;
 }

 UnicodeString
 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
         int32_t len = localeName.length();
         UChar* localeStr = localeName.getBuffer(len + 1);
         while (len >= 0) {
             localeStr[len] = 0;
             int32_t ix = localizations->indexForLocale(localeStr);
             if (ix >= 0) {
                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
                 return name;
             }

             // trim trailing portion, skipping over ommitted sections
             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
             while (len > 0 && localeStr[len-1] == 0x005F) --len;
         }
         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
         return name;
     }
     UnicodeString bogus;
     bogus.setToBogus();
     return bogus;
 }

 UnicodeString
 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
     if (localizations) {
         UnicodeString rsn(ruleSetName);
         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
         return getRuleSetDisplayName(ix, localeParam);
     }
     UnicodeString bogus;
     bogus.setToBogus();
     return bogus;
 }

 NFRuleSet*
 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
 {
     if (U_SUCCESS(status) && ruleSets) {
         for (NFRuleSet** p = ruleSets; *p; ++p) {
             NFRuleSet* rs = *p;
             if (rs->isNamed(name)) {
                 return rs;
             }
         }
         status = U_ILLEGAL_ARGUMENT_ERROR;
     }
     return NULL;
 }

 UnicodeString&
 RuleBasedNumberFormat::format(int32_t number,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */) const
 {
     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
     return toAppendTo;
 }


 UnicodeString&
 RuleBasedNumberFormat::format(int64_t number,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */) const
 {
     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
     return toAppendTo;
 }


 UnicodeString&
 RuleBasedNumberFormat::format(double number,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */) const
 {
     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
     return toAppendTo;
 }


 UnicodeString&
 RuleBasedNumberFormat::format(int32_t number,
                               const UnicodeString& ruleSetName,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */,
                               UErrorCode& status) const
 {
     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
     if (U_SUCCESS(status)) {
         if (ruleSetName.indexOf(gPercentPercent) == 0) {
             // throw new IllegalArgumentException("Can't use internal rule set");
             status = U_ILLEGAL_ARGUMENT_ERROR;
         } else {
             NFRuleSet *rs = findRuleSet(ruleSetName, status);
             if (rs) {
                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
             }
         }
     }
     return toAppendTo;
 }


 UnicodeString&
 RuleBasedNumberFormat::format(int64_t number,
                               const UnicodeString& ruleSetName,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */,
                               UErrorCode& status) const
 {
     if (U_SUCCESS(status)) {
         if (ruleSetName.indexOf(gPercentPercent) == 0) {
             // throw new IllegalArgumentException("Can't use internal rule set");
             status = U_ILLEGAL_ARGUMENT_ERROR;
         } else {
             NFRuleSet *rs = findRuleSet(ruleSetName, status);
             if (rs) {
                 rs->format(number, toAppendTo, toAppendTo.length());
             }
         }
     }
     return toAppendTo;
 }


 // make linker happy
 UnicodeString&
 RuleBasedNumberFormat::format(const Formattable& obj,
                               UnicodeString& toAppendTo,
                               FieldPosition& pos,
                               UErrorCode& status) const
 {
     return NumberFormat::format(obj, toAppendTo, pos, status);
 }

 UnicodeString&
 RuleBasedNumberFormat::format(double number,
                               const UnicodeString& ruleSetName,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */,
                               UErrorCode& status) const
 {
     if (U_SUCCESS(status)) {
         if (ruleSetName.indexOf(gPercentPercent) == 0) {
             // throw new IllegalArgumentException("Can't use internal rule set");
             status = U_ILLEGAL_ARGUMENT_ERROR;
         } else {
             NFRuleSet *rs = findRuleSet(ruleSetName, status);
             if (rs) {
                 rs->format(number, toAppendTo, toAppendTo.length());
             }
         }
     }
     return toAppendTo;
 }

 void
 RuleBasedNumberFormat::parse(const UnicodeString& text,
                              Formattable& result,
                              ParsePosition& parsePosition) const
 {
     if (!ruleSets) {
         parsePosition.setErrorIndex(0);
         return;
     }

     UnicodeString workingText(text, parsePosition.getIndex());
     ParsePosition workingPos(0);

     ParsePosition high_pp(0);
     Formattable high_result;

     for (NFRuleSet** p = ruleSets; *p; ++p) {
         NFRuleSet *rp = *p;
         if (rp->isPublic()) {
             ParsePosition working_pp(0);
             Formattable working_result;

             rp->parse(workingText, working_pp, kMaxDouble, working_result);
             if (working_pp.getIndex() > high_pp.getIndex()) {
                 high_pp = working_pp;
                 high_result = working_result;

                 if (high_pp.getIndex() == workingText.length()) {
                     break;
                 }
             }
         }
     }

     parsePosition.setIndex(parsePosition.getIndex() + high_pp.getIndex());
     if (high_pp.getIndex() > 0) {
         parsePosition.setErrorIndex(-1);
     }
     result = high_result;
     if (result.getType() == Formattable::kDouble) {
         int32_t r = (int32_t)result.getDouble();
         if ((double)r == result.getDouble()) {
             result.setLong(r);
         }
     }
 }

 #if !UCONFIG_NO_COLLATION

 void
 RuleBasedNumberFormat::setLenient(UBool enabled)
 {
     lenient = enabled;
     if (!enabled && collator) {
         delete collator;
         collator = NULL;
     }
 }

 #endif

 void
 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
     if (U_SUCCESS(status)) {
         if (ruleSetName.isEmpty()) {
           if (localizations) {
               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
               defaultRuleSet = findRuleSet(name, status);
           } else {
             initDefaultRuleSet();
           }
         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
             status = U_ILLEGAL_ARGUMENT_ERROR;
         } else {
             NFRuleSet* result = findRuleSet(ruleSetName, status);
             if (result != NULL) {
                 defaultRuleSet = result;
             }
         }
     }
 }

 UnicodeString
 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   UnicodeString result;
   if (defaultRuleSet && defaultRuleSet->isPublic()) {
     defaultRuleSet->getName(result);
   } else {
     result.setToBogus();
   }
   return result;
 }

 void
 RuleBasedNumberFormat::initDefaultRuleSet()
 {
     defaultRuleSet = NULL;
     if (!ruleSets) {
       return;
     }

     NFRuleSet**p = &ruleSets[0];
     while (*p) {
         ++p;
     }

     defaultRuleSet = *--p;
     if (!defaultRuleSet->isPublic()) {
         while (p != ruleSets) {
             if ((*--p)->isPublic()) {
                 defaultRuleSet = *p;
                 break;
             }
         }
     }
 }


 void
 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
                             UParseError& /* pErr */, UErrorCode& status)
 {
     // TODO: implement UParseError
     // Note: this can leave ruleSets == NULL, so remaining code should check
     if (U_FAILURE(status)) {
         return;
     }

     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();

     UnicodeString description(rules);
     if (!description.length()) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }

     // start by stripping the trailing whitespace from all the rules
     // (this is all the whitespace follwing each semicolon in the
     // description).  This allows us to look for rule-set boundaries
     // by searching for ";%" without having to worry about whitespace
     // between the ; and the %
     stripWhitespace(description);

     // check to see if there's a set of lenient-parse rules.  If there
     // is, pull them out into our temporary holding place for them,
     // and delete them from the description before the real desciption-
     // parsing code sees them
     int32_t lp = description.indexOf(gLenientParse);
     if (lp != -1) {
         // we've got to make sure we're not in the middle of a rule
         // (where "%%lenient-parse" would actually get treated as
         // rule text)
         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
             // locate the beginning and end of the actual collation
             // rules (there may be whitespace between the name and
             // the first token in the description)
             int lpEnd = description.indexOf(gSemiPercent, lp);

             if (lpEnd == -1) {
                 lpEnd = description.length() - 1;
             }
             int lpStart = lp + u_strlen(gLenientParse);
             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
                 ++lpStart;
             }

             // copy out the lenient-parse rules and delete them
             // from the description
             lenientParseRules = new UnicodeString();
             /* test for NULL */
             if (lenientParseRules == 0) {
                 status = U_MEMORY_ALLOCATION_ERROR;
                 return;
             }
             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);

             description.remove(lp, lpEnd + 1 - lp);
         }
     }

     // pre-flight parsing the description and count the number of
     // rule sets (";%" marks the end of one rule set and the beginning
     // of the next)
     int numRuleSets = 0;
     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
         ++numRuleSets;
         ++p;
     }
     ++numRuleSets;

     // our rule list is an array of the appropriate size
     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
     /* test for NULL */
     if (ruleSets == 0) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }

     for (int i = 0; i <= numRuleSets; ++i) {
         ruleSets[i] = NULL;
     }

     // divide up the descriptions into individual rule-set descriptions
     // and store them in a temporary array.  At each step, we also
     // new up a rule set, but all this does is initialize its name
     // and remove it from its description.  We can't actually parse
     // the rest of the descriptions and finish initializing everything
     // because we have to know the names and locations of all the rule
     // sets before we can actually set everything up
     if(!numRuleSets) {
       status = U_ILLEGAL_ARGUMENT_ERROR;
       return;
     }
     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
     /* test for NULL */
     if (ruleSetDescriptions == 0) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }

     {
         int curRuleSet = 0;
         int32_t start = 0;
         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
             /* test for NULL */
             if (ruleSets[curRuleSet] == 0) {
                 status = U_MEMORY_ALLOCATION_ERROR;
                 return;
             }
             ++curRuleSet;
             start = p + 1;
         }
         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
         /* test for NULL */
         if (ruleSets[curRuleSet] == 0) {
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }
     }

     // now we can take note of the formatter's default rule set, which
     // is the last public rule set in the description (it's the last
     // rather than the first so that a user can create a new formatter
     // from an existing formatter and change its default behavior just
     // by appending more rule sets to the end)

     // {dlf} Initialization of a fraction rule set requires the default rule
     // set to be known.  For purposes of initialization, this is always the
     // last public rule set, no matter what the localization data says.
     initDefaultRuleSet();

     // finally, we can go back through the temporary descriptions
     // list and finish seting up the substructure (and we throw
     // away the temporary descriptions as we go)
     {
         for (int i = 0; i < numRuleSets; i++) {
             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
         }
     }

     delete[] ruleSetDescriptions;

     // Now that the rules are initialized, the 'real' default rule
     // set can be adjusted by the localization data.

     // The C code keeps the localization array as is, rather than building
     // a separate array of the public rule set names, so we have less work
     // to do here-- but we still need to check the names.

     if (localizationInfos) {
         // confirm the names, if any aren't in the rules, that's an error
         // it is ok if the rules contain public rule sets that are not in this list
         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
             NFRuleSet* rs = findRuleSet(name, status);
             if (rs == NULL) {
                 break; // error
             }
             if (i == 0) {
                 defaultRuleSet = rs;
             }
         }
     } else {
         defaultRuleSet = getDefaultRuleSet();
     }
 }

 void
 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
 {
     // iterate through the characters...
     UnicodeString result;

     int start = 0;
     while (start != -1 && start < description.length()) {
         // seek to the first non-whitespace character...
         while (start < description.length()
             && uprv_isRuleWhiteSpace(description.charAt(start))) {
             ++start;
         }

         // locate the next semicolon in the text and copy the text from
         // our current position up to that semicolon into the result
         int32_t p = description.indexOf(gSemiColon, start);
         if (p == -1) {
             // or if we don't find a semicolon, just copy the rest of
             // the string into the result
             result.append(description, start, description.length() - start);
             start = -1;
         }
         else if (p < description.length()) {
             result.append(description, start, p + 1 - start);
             start = p + 1;
         }

         // when we get here, we've seeked off the end of the sring, and
         // we terminate the loop (we continue until *start* is -1 rather
         // than until *p* is -1, because otherwise we'd miss the last
         // rule in the description)
         else {
             start = -1;
         }
     }

     description.setTo(result);
 }


 void
 RuleBasedNumberFormat::dispose()
 {
     if (ruleSets) {
         for (NFRuleSet** p = ruleSets; *p; ++p) {
             delete *p;
         }
         uprv_free(ruleSets);
         ruleSets = NULL;
     }

 #if !UCONFIG_NO_COLLATION
     delete collator;
 #endif
     collator = NULL;

     delete decimalFormatSymbols;
     decimalFormatSymbols = NULL;

     delete lenientParseRules;
     lenientParseRules = NULL;

     if (localizations) localizations = localizations->unref();
 }


 //-----------------------------------------------------------------------
 // package-internal API
 //-----------------------------------------------------------------------

 /**
  * Returns the collator to use for lenient parsing.  The collator is lazily created:
  * this function creates it the first time it's called.
  * @return The collator to use for lenient parsing, or null if lenient parsing
  * is turned off.
 */
 Collator*
 RuleBasedNumberFormat::getCollator() const
 {
 #if !UCONFIG_NO_COLLATION
     if (!ruleSets) {
         return NULL;
     }

     // lazy-evaulate the collator
     if (collator == NULL && lenient) {
         // create a default collator based on the formatter's locale,
         // then pull out that collator's rules, append any additional
         // rules specified in the description, and create a _new_
         // collator based on the combinaiton of those rules

         UErrorCode status = U_ZERO_ERROR;

         Collator* temp = Collator::createInstance(locale, status);
         if (U_SUCCESS(status) &&
             temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {

             RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
             if (lenientParseRules) {
                 UnicodeString rules(newCollator->getRules());
                 rules.append(*lenientParseRules);

                 newCollator = new RuleBasedCollator(rules, status);
             } else {
                 temp = NULL;
             }
             if (U_SUCCESS(status)) {
                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
                 // cast away const
                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
             } else {
                 delete newCollator;
             }
         }
         delete temp;
     }
 #endif

     // if lenient-parse mode is off, this will be null
     // (see setLenientParseMode())
     return collator;
 }


 /**
  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
  * instances owned by this formatter.  This object is lazily created: this function
  * creates it the first time it's called.
  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
  * instances owned by this formatter.
 */
 DecimalFormatSymbols*
 RuleBasedNumberFormat::getDecimalFormatSymbols() const
 {
     // lazy-evaluate the DecimalFormatSymbols object.  This object
     // is shared by all DecimalFormat instances belonging to this
     // formatter
     if (decimalFormatSymbols == NULL) {
         UErrorCode status = U_ZERO_ERROR;
         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
         if (U_SUCCESS(status)) {
             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
         } else {
             delete temp;
         }
     }
     return decimalFormatSymbols;
 }

 U_NAMESPACE_END

 /* U_HAVE_RBNF */
 #endif