source/i18n/nfrs.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 *   Copyright (C) 1997-2001, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *   file name:  nfrs.cpp
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 *
 * Modification history
 * Date        Name      Comments
 * 10/11/2001  Doug      Ported from ICU4J
 */

 #include "nfrs.h"
 #include "nfrule.h"
 #include "nfrlist.h"
 #include "unicode/uchar.h"

 #ifdef RBNF_DEBUG
 #include "cmemory.h"
 #endif

 U_NAMESPACE_BEGIN

 #if 0
 // euclid's algorithm works with doubles
 // note, doubles only get us up to one quadrillion or so, which
 // isn't as much range as we get with longs.  We probably still
 // want either 64-bit math, or BigInteger.

 static llong
 util_lcm(llong x, llong y)
 {
     x.abs();
     y.abs();

     if (x == 0 || y == 0) {
         return 0;
     } else {
         do {
             if (x < y) {
                 llong t = x; x = y; y = t;
             }
             x -= y * (x/y);
         } while (x != 0);

         return y;
     }
 }

 #else
 /**
  * Calculates the least common multiple of x and y.
  */
 static llong
 util_lcm(llong x, llong y)
 {
     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
     // vol. 2, 1st ed., pp. 298-299
     llong x1 = x;
     llong y1 = y;

     int p2 = 0;
     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
         ++p2;
         x1 >>= 1;
         y1 >>= 1;
     }

     llong t;
     if ((x1 & 1) == 1) {
         t = -y1;
     } else {
         t = x1;
     }

     while (t != 0) {
         while ((t & 1) == 0) {
             t >>= 1;
         }
         if (t > 0) {
             x1 = t;
         } else {
             y1 = -t;
         }
         t = x1 - y1;
     }

     llong gcd = x1 << p2;

     // x * y == gcd(x, y) * lcm(x, y)
     return x / gcd * y;
 }
 #endif

 static const UChar gPercent = 0x0025;
 static const UChar gColon = 0x003a;
 static const UChar gSemicolon = 0x003b;
 static const UChar gLineFeed = 0x000a;

 static const UChar gFourSpaces[] =
 {
     0x20, 0x20, 0x20, 0x20, 0
 }; /* "    " */
 static const UChar gPercentPercent[] =
 {
     0x25, 0x25, 0
 }; /* "%%" */

 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
   : name()
   , rules(0)
   , negativeNumberRule(NULL)
   , fIsFractionRuleSet(FALSE)
   , fIsPublic(FALSE)
 {
     for (int i = 0; i < 3; ++i) {
         fractionRules[i] = NULL;
     }

     if (U_FAILURE(status)) {
         return;
     }

     UnicodeString& description = descriptions[index]; // !!! make sure index is valid

     // if the description begins with a rule set name (the rule set
     // name can be omitted in formatter descriptions that consist
     // of only one rule set), copy it out into our "name" member
     // and delete it from the description
     if (description.charAt(0) == gPercent) {
         UTextOffset pos = description.indexOf(gColon);
         if (pos == -1) {
             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
             status = U_PARSE_ERROR;
         } else {
             name.setTo(description, 0, pos);
             while (pos < description.length() && u_isWhitespace(description.charAt(++pos))) {
             }
             description.remove(0, pos);
         }
     } else {
         name.setTo("%default");
     }

     if (description.length() == 0) {
         // throw new IllegalArgumentException("Empty rule set description");
         status = U_PARSE_ERROR;
     }

     fIsPublic = name.indexOf(gPercentPercent) != 0;

     // all of the other members of NFRuleSet are initialized
     // by parseRules()
 }

 void
 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
 {
     // start by creating a Vector whose elements are Strings containing
     // the descriptions of the rules (one rule per element).  The rules
     // are separated by semicolons (there's no escape facility: ALL
     // semicolons are rule delimiters)

     if (U_FAILURE(status)) {
         return;
     }

     // dlf - the original code kept a separate description array for no reason,
     // so I got rid of it.  The loop was too complex so I simplified it.

     UnicodeString currentDescription;
     UTextOffset oldP = 0;
     while (oldP < description.length()) {
         UTextOffset p = description.indexOf(gSemicolon, oldP);
         if (p == -1) {
             p = description.length();
         }
         currentDescription.setTo(description, oldP, p - oldP);
         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
         oldP = p + 1;
     }

     // for rules that didn't specify a base value, their base values
     // were initialized to 0.  Make another pass through the list and
     // set all those rules' base values.  We also remove any special
     // rules from the list and put them into their own member variables
     llong defaultBaseValue = (int32_t)0;

     // (this isn't a for loop because we might be deleting items from
     // the vector-- we want to make sure we only increment i when
     // we _didn't_ delete aything from the vector)
     uint32_t i = 0;
     while (i < rules.size()) {
         NFRule* rule = rules[i];

         switch (rule->getType()) {
             // if the rule's base value is 0, fill in a default
             // base value (this will be 1 plus the preceding
             // rule's base value for regular rule sets, and the
             // same as the preceding rule's base value in fraction
             // rule sets)
         case NFRule::kNoBase:
             rule->setBaseValue(defaultBaseValue);
             if (!isFractionRuleSet()) {
                 ++defaultBaseValue;
             }
             ++i;
             break;

             // if it's the negative-number rule, copy it into its own
             // data member and delete it from the list
         case NFRule::kNegativeNumberRule:
             negativeNumberRule = rules.remove(i);
             break;

             // if it's the improper fraction rule, copy it into the
             // correct element of fractionRules
         case NFRule::kImproperFractionRule:
             fractionRules[0] = rules.remove(i);
             break;

             // if it's the proper fraction rule, copy it into the
             // correct element of fractionRules
         case NFRule::kProperFractionRule:
             fractionRules[1] = rules.remove(i);
             break;

             // if it's the master rule, copy it into the
             // correct element of fractionRules
         case NFRule::kMasterRule:
             fractionRules[2] = rules.remove(i);
             break;

             // if it's a regular rule that already knows its base value,
             // check to make sure the rules are in order, and update
             // the default base value for the next rule
         default:
             if (rule->getBaseValue() < defaultBaseValue) {
                 // throw new IllegalArgumentException("Rules are not in order");
                 status = U_PARSE_ERROR;
                 return;
             }
             defaultBaseValue = rule->getBaseValue();
             if (!isFractionRuleSet()) {
                 ++defaultBaseValue;
             }
             ++i;
             break;
         }
     }
 }

 NFRuleSet::~NFRuleSet()
 {
     delete negativeNumberRule;
     delete fractionRules[0];
     delete fractionRules[1];
     delete fractionRules[2];
 }

 UBool
 util_equalRules(const NFRule* rule1, const NFRule* rule2)
 {
     if (rule1) {
         if (rule2) {
             return *rule1 == *rule2;
         }
     } else if (!rule2) {
         return TRUE;
     }
     return FALSE;
 }

 UBool
 NFRuleSet::operator==(const NFRuleSet& rhs) const
 {
     if (rules.size() == rhs.rules.size() &&
         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
         name == rhs.name &&
         util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
         util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
         util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
         util_equalRules(fractionRules[2], rhs.fractionRules[2])) {

         for (uint32_t i = 0; i < rules.size(); ++i) {
             if (*rules[i] != *rhs.rules[i]) {
                 return FALSE;
             }
         }
         return TRUE;
     }
     return FALSE;
 }

 void
 NFRuleSet::format(llong number, UnicodeString& toAppendTo, int32_t pos) const
 {
     NFRule *rule = findNormalRule(number);
     rule->doFormat(number, toAppendTo, pos);
 }

 void
 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
 {
     NFRule *rule = findDoubleRule(number);
     rule->doFormat(number, toAppendTo, pos);
 }

 NFRule*
 NFRuleSet::findDoubleRule(double number) const
 {
     // if this is a fraction rule set, use findFractionRuleSetRule()
     if (isFractionRuleSet()) {
         return findFractionRuleSetRule(number);
     }

     // if the number is negative, return the negative number rule
     // (if there isn't a negative-number rule, we pretend it's a
     // positive number)
     if (number < 0) {
         if (negativeNumberRule) {
             return  negativeNumberRule;
         } else {
             number = -number;
         }
     }

     // if the number isn't an integer, we use one of the fraction rules...
     if (number != uprv_floor(number)) {
         // if the number is between 0 and 1, return the proper
         // fraction rule
         if (number < 1 && fractionRules[1]) {
             return fractionRules[1];
         }
         // otherwise, return the improper fraction rule
         else if (fractionRules[0]) {
             return fractionRules[0];
         }
     }

     // if there's a master rule, use it to format the number
     if (fractionRules[2]) {
         return fractionRules[2];
     }

     // and if we haven't yet returned a rule, use findNormalRule()
     // to find the applicable rule
     llong r = number + 0.5;
     return findNormalRule(r);
 }

 NFRule *
 NFRuleSet::findNormalRule(llong number) const
 {
     // if this is a fraction rule set, use findFractionRuleSetRule()
     // to find the rule (we should only go into this clause if the
     // value is 0)
     if (fIsFractionRuleSet) {
         return findFractionRuleSetRule(number.asDouble());
     }

     // if the number is negative, return the negative-number rule
     // (if there isn't one, pretend the number is positive)
     if (number < 0) {
         if (negativeNumberRule) {
             return negativeNumberRule;
         } else {
             number = -number;
         }
     }

     // we have to repeat the preceding two checks, even though we
     // do them in findRule(), because the version of format() that
     // takes a long bypasses findRule() and goes straight to this
     // function.  This function does skip the fraction rules since
     // we know the value is an integer (it also skips the master
     // rule, since it's considered a fraction rule.  Skipping the
     // master rule in this function is also how we avoid infinite
     // recursion)

 	// {dlf} unfortunately this fails if there are no rules except
 	// special rules.  If there are no rules, use the master rule.

     // binary-search the rule list for the applicable rule
     // (a rule is used for all values from its base value to
     // the next rule's base value)
     int32_t hi = rules.size();
 	if (hi > 0) {
 		int32_t lo = 0;

 		while (lo < hi) {
 			int32_t mid = (lo + hi) / 2;
 			if (rules[mid]->getBaseValue() == number) {
 				return rules[mid];
 			}
 			else if (rules[mid]->getBaseValue() > number) {
 				hi = mid;
 			}
 			else {
 				lo = mid + 1;
 			}
 		}
 		NFRule *result = rules[hi - 1];

 		// use shouldRollBack() to see whether we need to invoke the
 		// rollback rule (see shouldRollBack()'s documentation for
 		// an explanation of the rollback rule).  If we do, roll back
 		// one rule and return that one instead of the one we'd normally
 		// return
 		if (result->shouldRollBack(number.asDouble())) {
 			result = rules[hi - 2];
 		}
 	     return result;
 	}
 	// else use the master rule
 	return fractionRules[2];
 }

 /**
  * If this rule is a fraction rule set, this function is used by
  * findRule() to select the most appropriate rule for formatting
  * the number.  Basically, the base value of each rule in the rule
  * set is treated as the denominator of a fraction.  Whichever
  * denominator can produce the fraction closest in value to the
  * number passed in is the result.  If there's a tie, the earlier
  * one in the list wins.  (If there are two rules in a row with the
  * same base value, the first one is used when the numerator of the
  * fraction would be 1, and the second rule is used the rest of the
  * time.
  * @param number The number being formatted (which will always be
  * a number between 0 and 1)
  * @return The rule to use to format this number
  */
 NFRule*
 NFRuleSet::findFractionRuleSetRule(double number) const
 {
     // the obvious way to do this (multiply the value being formatted
     // by each rule's base value until you get an integral result)
     // doesn't work because of rounding error.  This method is more
     // accurate

     // find the least common multiple of the rules' base values
     // and multiply this by the number being formatted.  This is
     // all the precision we need, and we can do all of the rest
     // of the math using integer arithmetic
     llong leastCommonMultiple = rules[0]->getBaseValue();
     llong numerator;
     {
         for (uint32_t i = 1; i < rules.size(); ++i) {
             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
         }
         numerator = number * leastCommonMultiple.asDouble() + 0.5;
     }
     // for each rule, do the following...
     llong tempDifference;
     llong difference(uprv_maxMantissa());
     int32_t winner = 0;
     for (uint32_t i = 0; i < rules.size(); ++i) {
         // "numerator" is the numerator of the fraction if the
         // denominator is the LCD.  The numerator if the rule's
         // base value is the denominator is "numerator" times the
         // base value divided bythe LCD.  Here we check to see if
         // that's an integer, and if not, how close it is to being
         // an integer.
         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;


         // normalize the result of the above calculation: we want
         // the numerator's distance from the CLOSEST multiple
         // of the LCD
         if (leastCommonMultiple - tempDifference < tempDifference) {
             tempDifference = leastCommonMultiple - tempDifference;
         }

         // if this is as close as we've come, keep track of how close
         // that is, and the line number of the rule that did it.  If
         // we've scored a direct hit, we don't have to look at any more
         // rules
         if (tempDifference < difference) {
             difference = tempDifference;
             winner = i;
             if (difference == 0) {
                 break;
             }
         }
     }

     // if we have two successive rules that both have the winning base
     // value, then the first one (the one we found above) is used if
     // the numerator of the fraction is 1 and the second one is used if
     // the numerator of the fraction is anything else (this lets us
     // do things like "one third"/"two thirds" without haveing to define
     // a whole bunch of extra rule sets)
     if ((unsigned)(winner + 1) < rules.size() &&
         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
         double n = rules[winner]->getBaseValue().asDouble() * number;
         if (n < 0.5 || n >= 2) {
             ++winner;
         }
     }

     // finally, return the winning rule
     return rules[winner];
 }

 /**
  * Parses a string.  Matches the string to be parsed against each
  * of its rules (with a base value less than upperBound) and returns
  * the value produced by the rule that matched the most charcters
  * in the source string.
  * @param text The string to parse
  * @param parsePosition The initial position is ignored and assumed
  * to be 0.  On exit, this object has been updated to point to the
  * first character position this rule set didn't consume.
  * @param upperBound Limits the rules that can be allowed to match.
  * Only rules whose base values are strictly less than upperBound
  * are considered.
  * @return The numerical result of parsing this string.  This will
  * be the matching rule's base value, composed appropriately with
  * the results of matching any of its substitutions.  The object
  * will be an instance of Long if it's an integral value; otherwise,
  * it will be an instance of Double.  This function always returns
  * a valid object: If nothing matched the input string at all,
  * this function returns new Long(0), and the parse position is
  * left unchanged.
  */
 #ifdef RBNF_DEBUG
 #include <stdio.h>

 static void dumpUS(FILE* f, const UnicodeString& us) {
   int len = us.length();
   char* buf = new char[len+1];
   us.extract(0, len, buf);
   buf[len] = 0;
   fprintf(f, "%s", buf);
   delete[] buf;
 }
 #endif

 UBool
 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
 {
     // try matching each rule in the rule set against the text being
     // parsed.  Whichever one matches the most characters is the one
     // that determines the value we return.

     result.setLong(0);

     // dump out if there's no text to parse
     if (text.length() == 0) {
         return 0;
     }

     ParsePosition highWaterMark;
     ParsePosition workingPos = pos;

 #ifdef RBNF_DEBUG
     fprintf(stderr, "<nfrs> %x '", this);
     dumpUS(stderr, name);
     fprintf(stderr, "' text '");
     dumpUS(stderr, text);
     fprintf(stderr, "'\n");
     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
 #endif

     // start by trying the negative number rule (if there is one)
     if (negativeNumberRule) {
         Formattable tempResult;
 #ifdef RBNF_DEBUG
         fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
 #endif
         UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
 #ifdef RBNF_DEBUG
         fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
 #endif
         if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
             result = tempResult;
             highWaterMark = workingPos;
         }
         workingPos = pos;
     }
 #ifdef RBNF_DEBUG
     fprintf(stderr, "<nfrs> continue fractional with text '");
     dumpUS(stderr, text);
     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
 #endif
     // then try each of the fraction rules
     {
         for (int i = 0; i < 3; i++) {
             if (fractionRules[i]) {
                 Formattable tempResult;
                 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
                 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
                     result = tempResult;
                     highWaterMark = workingPos;
                 }
                 workingPos = pos;
             }
         }
     }
 #ifdef RBNF_DEBUG
     fprintf(stderr, "<nfrs> continue other with text '");
     dumpUS(stderr, text);
     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
 #endif

     // finally, go through the regular rules one at a time.  We start
     // at the end of the list because we want to try matching the most
     // sigificant rule first (this helps ensure that we parse
     // "five thousand three hundred six" as
     // "(five thousand) (three hundred) (six)" rather than
     // "((five thousand three) hundred) (six)").  Skip rules whose
     // base values are higher than the upper bound (again, this helps
     // limit ambiguity by making sure the rules that match a rule's
     // are less significant than the rule containing the substitutions)/
     {
         llong ub(upperBound);
 #ifdef RBNF_DEBUG
         {
             char ubstr[64];
             ub.lltoa(ubstr, 64);
             fprintf(stderr, "ub: %g, ll: %s(%x/%x)\n", upperBound, ubstr, ub.hi, ub.lo);
         }
 #endif
         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
                 continue;
             }
             Formattable tempResult;
             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
                 result = tempResult;
                 highWaterMark = workingPos;
             }
             workingPos = pos;
         }
     }
 #ifdef RBNF_DEBUG
     fprintf(stderr, "<nfrs> exit\n");
 #endif
     // finally, update the parse postion we were passed to point to the
     // first character we didn't use, and return the result that
     // corresponds to that string of characters
     pos = highWaterMark;

     return 1;
 }

 void
 NFRuleSet::appendRules(UnicodeString& result) const
 {
     // the rule set name goes first...
     result.append(name);
     result.append(gColon);
     result.append(gLineFeed);

     // followed by the regular rules...
     for (uint32_t i = 0; i < rules.size(); i++) {
         result.append(gFourSpaces);
         rules[i]->appendRuleText(result);
         result.append(gLineFeed);
     }

     // followed by the special rules (if they exist)
     if (negativeNumberRule) {
         result.append(gFourSpaces);
         negativeNumberRule->appendRuleText(result);
         result.append(gLineFeed);
     }

     {
         for (uint32_t i = 0; i < 3; ++i) {
             if (fractionRules[i]) {
                 result.append(gFourSpaces);
                 fractionRules[i]->appendRuleText(result);
                 result.append(gLineFeed);
             }
         }
     }
 }

 U_NAMESPACE_END
	/*
	******************************************************************************
	* Copyright (C) 1997-2001, International Business Machines
	* Corporation and others. All Rights Reserved.
	******************************************************************************
	* file name: nfrs.cpp
	* encoding: US-ASCII
	* tab size: 8 (not used)
	* indentation:4
	*
	* Modification history
	* Date Name Comments
	* 10/11/2001 Doug Ported from ICU4J
	*/

	#include "nfrs.h"
	#include "nfrule.h"
	#include "nfrlist.h"
	#include "unicode/uchar.h"

	#ifdef RBNF_DEBUG
	#include "cmemory.h"
	#endif

	U_NAMESPACE_BEGIN

	#if 0
	// euclid's algorithm works with doubles
	// note, doubles only get us up to one quadrillion or so, which
	// isn't as much range as we get with longs. We probably still
	// want either 64-bit math, or BigInteger.

	static llong
	util_lcm(llong x, llong y)
	{
	x.abs();
	y.abs();

	if (x == 0 \|\| y == 0) {
	return 0;
	} else {
	do {
	if (x < y) {
	llong t = x; x = y; y = t;
	}
	x -= y * (x/y);
	} while (x != 0);

	return y;
	}
	}

	#else
	/**
	* Calculates the least common multiple of x and y.
	*/
	static llong
	util_lcm(llong x, llong y)
	{
	// binary gcd algorithm from Knuth, "The Art of Computer Programming,"
	// vol. 2, 1st ed., pp. 298-299
	llong x1 = x;
	llong y1 = y;

	int p2 = 0;
	while ((x1 & 1) == 0 && (y1 & 1) == 0) {
	++p2;
	x1 >>= 1;
	y1 >>= 1;
	}

	llong t;
	if ((x1 & 1) == 1) {
	t = -y1;
	} else {
	t = x1;
	}

	while (t != 0) {
	while ((t & 1) == 0) {
	t >>= 1;
	}
	if (t > 0) {
	x1 = t;
	} else {
	y1 = -t;
	}
	t = x1 - y1;
	}

	llong gcd = x1 << p2;

	// x * y == gcd(x, y) * lcm(x, y)
	return x / gcd * y;
	}
	#endif

	static const UChar gPercent = 0x0025;
	static const UChar gColon = 0x003a;
	static const UChar gSemicolon = 0x003b;
	static const UChar gLineFeed = 0x000a;

	static const UChar gFourSpaces[] =
	{
	0x20, 0x20, 0x20, 0x20, 0
	}; /* " " */
	static const UChar gPercentPercent[] =
	{
	0x25, 0x25, 0
	}; /* "%%" */

	NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
	: name()
	, rules(0)
	, negativeNumberRule(NULL)
	, fIsFractionRuleSet(FALSE)
	, fIsPublic(FALSE)
	{
	for (int i = 0; i < 3; ++i) {
	fractionRules[i] = NULL;
	}

	if (U_FAILURE(status)) {
	return;
	}

	UnicodeString& description = descriptions[index]; // !!! make sure index is valid

	// if the description begins with a rule set name (the rule set
	// name can be omitted in formatter descriptions that consist
	// of only one rule set), copy it out into our "name" member
	// and delete it from the description
	if (description.charAt(0) == gPercent) {
	UTextOffset pos = description.indexOf(gColon);
	if (pos == -1) {
	// throw new IllegalArgumentException("Rule set name doesn't end in colon");
	status = U_PARSE_ERROR;
	} else {
	name.setTo(description, 0, pos);
	while (pos < description.length() && u_isWhitespace(description.charAt(++pos))) {
	}
	description.remove(0, pos);
	}
	} else {
	name.setTo("%default");
	}

	if (description.length() == 0) {
	// throw new IllegalArgumentException("Empty rule set description");
	status = U_PARSE_ERROR;
	}

	fIsPublic = name.indexOf(gPercentPercent) != 0;

	// all of the other members of NFRuleSet are initialized
	// by parseRules()
	}

	void
	NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
	{
	// start by creating a Vector whose elements are Strings containing
	// the descriptions of the rules (one rule per element). The rules
	// are separated by semicolons (there's no escape facility: ALL
	// semicolons are rule delimiters)

	if (U_FAILURE(status)) {
	return;
	}

	// dlf - the original code kept a separate description array for no reason,
	// so I got rid of it. The loop was too complex so I simplified it.

	UnicodeString currentDescription;
	UTextOffset oldP = 0;
	while (oldP < description.length()) {
	UTextOffset p = description.indexOf(gSemicolon, oldP);
	if (p == -1) {
	p = description.length();
	}
	currentDescription.setTo(description, oldP, p - oldP);
	NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
	oldP = p + 1;
	}

	// for rules that didn't specify a base value, their base values
	// were initialized to 0. Make another pass through the list and
	// set all those rules' base values. We also remove any special
	// rules from the list and put them into their own member variables
	llong defaultBaseValue = (int32_t)0;

	// (this isn't a for loop because we might be deleting items from
	// the vector-- we want to make sure we only increment i when
	// we _didn't_ delete aything from the vector)
	uint32_t i = 0;
	while (i < rules.size()) {
	NFRule* rule = rules[i];

	switch (rule->getType()) {
	// if the rule's base value is 0, fill in a default
	// base value (this will be 1 plus the preceding
	// rule's base value for regular rule sets, and the
	// same as the preceding rule's base value in fraction
	// rule sets)
	case NFRule::kNoBase:
	rule->setBaseValue(defaultBaseValue);
	if (!isFractionRuleSet()) {
	++defaultBaseValue;
	}
	++i;
	break;

	// if it's the negative-number rule, copy it into its own
	// data member and delete it from the list
	case NFRule::kNegativeNumberRule:
	negativeNumberRule = rules.remove(i);
	break;

	// if it's the improper fraction rule, copy it into the
	// correct element of fractionRules
	case NFRule::kImproperFractionRule:
	fractionRules[0] = rules.remove(i);
	break;

	// if it's the proper fraction rule, copy it into the
	// correct element of fractionRules
	case NFRule::kProperFractionRule:
	fractionRules[1] = rules.remove(i);
	break;

	// if it's the master rule, copy it into the
	// correct element of fractionRules
	case NFRule::kMasterRule:
	fractionRules[2] = rules.remove(i);
	break;

	// if it's a regular rule that already knows its base value,
	// check to make sure the rules are in order, and update
	// the default base value for the next rule
	default:
	if (rule->getBaseValue() < defaultBaseValue) {
	// throw new IllegalArgumentException("Rules are not in order");
	status = U_PARSE_ERROR;
	return;
	}
	defaultBaseValue = rule->getBaseValue();
	if (!isFractionRuleSet()) {
	++defaultBaseValue;
	}
	++i;
	break;
	}
	}
	}

	NFRuleSet::~NFRuleSet()
	{
	delete negativeNumberRule;
	delete fractionRules[0];
	delete fractionRules[1];
	delete fractionRules[2];
	}

	UBool
	util_equalRules(const NFRule* rule1, const NFRule* rule2)
	{
	if (rule1) {
	if (rule2) {
	return rule1 == rule2;
	}
	} else if (!rule2) {
	return TRUE;
	}
	return FALSE;
	}

	UBool
	NFRuleSet::operator==(const NFRuleSet& rhs) const
	{
	if (rules.size() == rhs.rules.size() &&
	fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
	name == rhs.name &&
	util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
	util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
	util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
	util_equalRules(fractionRules[2], rhs.fractionRules[2])) {

	for (uint32_t i = 0; i < rules.size(); ++i) {
	if (rules[i] != rhs.rules[i]) {
	return FALSE;
	}
	}
	return TRUE;
	}
	return FALSE;
	}

	void
	NFRuleSet::format(llong number, UnicodeString& toAppendTo, int32_t pos) const
	{
	NFRule *rule = findNormalRule(number);
	rule->doFormat(number, toAppendTo, pos);
	}

	void
	NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
	{
	NFRule *rule = findDoubleRule(number);
	rule->doFormat(number, toAppendTo, pos);
	}

	NFRule*
	NFRuleSet::findDoubleRule(double number) const
	{
	// if this is a fraction rule set, use findFractionRuleSetRule()
	if (isFractionRuleSet()) {
	return findFractionRuleSetRule(number);
	}

	// if the number is negative, return the negative number rule
	// (if there isn't a negative-number rule, we pretend it's a
	// positive number)
	if (number < 0) {
	if (negativeNumberRule) {
	return negativeNumberRule;
	} else {
	number = -number;
	}
	}

	// if the number isn't an integer, we use one of the fraction rules...
	if (number != uprv_floor(number)) {
	// if the number is between 0 and 1, return the proper
	// fraction rule
	if (number < 1 && fractionRules[1]) {
	return fractionRules[1];
	}
	// otherwise, return the improper fraction rule
	else if (fractionRules[0]) {
	return fractionRules[0];
	}
	}

	// if there's a master rule, use it to format the number
	if (fractionRules[2]) {
	return fractionRules[2];
	}

	// and if we haven't yet returned a rule, use findNormalRule()
	// to find the applicable rule
	llong r = number + 0.5;
	return findNormalRule(r);
	}

	NFRule *
	NFRuleSet::findNormalRule(llong number) const
	{
	// if this is a fraction rule set, use findFractionRuleSetRule()
	// to find the rule (we should only go into this clause if the
	// value is 0)
	if (fIsFractionRuleSet) {
	return findFractionRuleSetRule(number.asDouble());
	}

	// if the number is negative, return the negative-number rule
	// (if there isn't one, pretend the number is positive)
	if (number < 0) {
	if (negativeNumberRule) {
	return negativeNumberRule;
	} else {
	number = -number;
	}
	}

	// we have to repeat the preceding two checks, even though we
	// do them in findRule(), because the version of format() that
	// takes a long bypasses findRule() and goes straight to this
	// function. This function does skip the fraction rules since
	// we know the value is an integer (it also skips the master
	// rule, since it's considered a fraction rule. Skipping the
	// master rule in this function is also how we avoid infinite
	// recursion)

	// {dlf} unfortunately this fails if there are no rules except
	// special rules. If there are no rules, use the master rule.

	// binary-search the rule list for the applicable rule
	// (a rule is used for all values from its base value to
	// the next rule's base value)
	int32_t hi = rules.size();
	if (hi > 0) {
	int32_t lo = 0;

	while (lo < hi) {
	int32_t mid = (lo + hi) / 2;
	if (rules[mid]->getBaseValue() == number) {
	return rules[mid];
	}
	else if (rules[mid]->getBaseValue() > number) {
	hi = mid;
	}
	else {
	lo = mid + 1;
	}
	}
	NFRule *result = rules[hi - 1];

	// use shouldRollBack() to see whether we need to invoke the
	// rollback rule (see shouldRollBack()'s documentation for
	// an explanation of the rollback rule). If we do, roll back
	// one rule and return that one instead of the one we'd normally
	// return
	if (result->shouldRollBack(number.asDouble())) {
	result = rules[hi - 2];
	}
	return result;
	}
	// else use the master rule
	return fractionRules[2];
	}

	/**
	* If this rule is a fraction rule set, this function is used by
	* findRule() to select the most appropriate rule for formatting
	* the number. Basically, the base value of each rule in the rule
	* set is treated as the denominator of a fraction. Whichever
	* denominator can produce the fraction closest in value to the
	* number passed in is the result. If there's a tie, the earlier
	* one in the list wins. (If there are two rules in a row with the
	* same base value, the first one is used when the numerator of the
	* fraction would be 1, and the second rule is used the rest of the
	* time.
	* @param number The number being formatted (which will always be
	* a number between 0 and 1)
	* @return The rule to use to format this number
	*/
	NFRule*
	NFRuleSet::findFractionRuleSetRule(double number) const
	{
	// the obvious way to do this (multiply the value being formatted
	// by each rule's base value until you get an integral result)
	// doesn't work because of rounding error. This method is more
	// accurate

	// find the least common multiple of the rules' base values
	// and multiply this by the number being formatted. This is
	// all the precision we need, and we can do all of the rest
	// of the math using integer arithmetic
	llong leastCommonMultiple = rules[0]->getBaseValue();
	llong numerator;
	{
	for (uint32_t i = 1; i < rules.size(); ++i) {
	leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
	}
	numerator = number * leastCommonMultiple.asDouble() + 0.5;
	}
	// for each rule, do the following...
	llong tempDifference;
	llong difference(uprv_maxMantissa());
	int32_t winner = 0;
	for (uint32_t i = 0; i < rules.size(); ++i) {
	// "numerator" is the numerator of the fraction if the
	// denominator is the LCD. The numerator if the rule's
	// base value is the denominator is "numerator" times the
	// base value divided bythe LCD. Here we check to see if
	// that's an integer, and if not, how close it is to being
	// an integer.
	tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;


	// normalize the result of the above calculation: we want
	// the numerator's distance from the CLOSEST multiple
	// of the LCD
	if (leastCommonMultiple - tempDifference < tempDifference) {
	tempDifference = leastCommonMultiple - tempDifference;
	}

	// if this is as close as we've come, keep track of how close
	// that is, and the line number of the rule that did it. If
	// we've scored a direct hit, we don't have to look at any more
	// rules
	if (tempDifference < difference) {
	difference = tempDifference;
	winner = i;
	if (difference == 0) {
	break;
	}
	}
	}

	// if we have two successive rules that both have the winning base
	// value, then the first one (the one we found above) is used if
	// the numerator of the fraction is 1 and the second one is used if
	// the numerator of the fraction is anything else (this lets us
	// do things like "one third"/"two thirds" without haveing to define
	// a whole bunch of extra rule sets)
	if ((unsigned)(winner + 1) < rules.size() &&
	rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
	double n = rules[winner]->getBaseValue().asDouble() * number;
	if (n < 0.5 \|\| n >= 2) {
	++winner;
	}
	}

	// finally, return the winning rule
	return rules[winner];
	}

	/**
	* Parses a string. Matches the string to be parsed against each
	* of its rules (with a base value less than upperBound) and returns
	* the value produced by the rule that matched the most charcters
	* in the source string.
	* @param text The string to parse
	* @param parsePosition The initial position is ignored and assumed
	* to be 0. On exit, this object has been updated to point to the
	* first character position this rule set didn't consume.
	* @param upperBound Limits the rules that can be allowed to match.
	* Only rules whose base values are strictly less than upperBound
	* are considered.
	* @return The numerical result of parsing this string. This will
	* be the matching rule's base value, composed appropriately with
	* the results of matching any of its substitutions. The object
	* will be an instance of Long if it's an integral value; otherwise,
	* it will be an instance of Double. This function always returns
	* a valid object: If nothing matched the input string at all,
	* this function returns new Long(0), and the parse position is
	* left unchanged.
	*/
	#ifdef RBNF_DEBUG
	#include <stdio.h>

	static void dumpUS(FILE* f, const UnicodeString& us) {
	int len = us.length();
	char* buf = new char[len+1];
	us.extract(0, len, buf);
	buf[len] = 0;
	fprintf(f, "%s", buf);
	delete[] buf;
	}
	#endif

	UBool
	NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
	{
	// try matching each rule in the rule set against the text being
	// parsed. Whichever one matches the most characters is the one
	// that determines the value we return.

	result.setLong(0);

	// dump out if there's no text to parse
	if (text.length() == 0) {
	return 0;
	}

	ParsePosition highWaterMark;
	ParsePosition workingPos = pos;

	#ifdef RBNF_DEBUG
	fprintf(stderr, "<nfrs> %x '", this);
	dumpUS(stderr, name);
	fprintf(stderr, "' text '");
	dumpUS(stderr, text);
	fprintf(stderr, "'\n");
	fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
	#endif

	// start by trying the negative number rule (if there is one)
	if (negativeNumberRule) {
	Formattable tempResult;
	#ifdef RBNF_DEBUG
	fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
	#endif
	UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
	#ifdef RBNF_DEBUG
	fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
	#endif
	if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
	result = tempResult;
	highWaterMark = workingPos;
	}
	workingPos = pos;
	}
	#ifdef RBNF_DEBUG
	fprintf(stderr, "<nfrs> continue fractional with text '");
	dumpUS(stderr, text);
	fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
	#endif
	// then try each of the fraction rules
	{
	for (int i = 0; i < 3; i++) {
	if (fractionRules[i]) {
	Formattable tempResult;
	UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
	if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
	result = tempResult;
	highWaterMark = workingPos;
	}
	workingPos = pos;
	}
	}
	}
	#ifdef RBNF_DEBUG
	fprintf(stderr, "<nfrs> continue other with text '");
	dumpUS(stderr, text);
	fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
	#endif

	// finally, go through the regular rules one at a time. We start
	// at the end of the list because we want to try matching the most
	// sigificant rule first (this helps ensure that we parse
	// "five thousand three hundred six" as
	// "(five thousand) (three hundred) (six)" rather than
	// "((five thousand three) hundred) (six)"). Skip rules whose
	// base values are higher than the upper bound (again, this helps
	// limit ambiguity by making sure the rules that match a rule's
	// are less significant than the rule containing the substitutions)/
	{
	llong ub(upperBound);
	#ifdef RBNF_DEBUG
	{
	char ubstr[64];
	ub.lltoa(ubstr, 64);
	fprintf(stderr, "ub: %g, ll: %s(%x/%x)\n", upperBound, ubstr, ub.hi, ub.lo);
	}
	#endif
	for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
	if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
	continue;
	}
	Formattable tempResult;
	UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
	if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
	result = tempResult;
	highWaterMark = workingPos;
	}
	workingPos = pos;
	}
	}
	#ifdef RBNF_DEBUG
	fprintf(stderr, "<nfrs> exit\n");
	#endif
	// finally, update the parse postion we were passed to point to the
	// first character we didn't use, and return the result that
	// corresponds to that string of characters
	pos = highWaterMark;

	return 1;
	}

	void
	NFRuleSet::appendRules(UnicodeString& result) const
	{
	// the rule set name goes first...
	result.append(name);
	result.append(gColon);
	result.append(gLineFeed);

	// followed by the regular rules...
	for (uint32_t i = 0; i < rules.size(); i++) {
	result.append(gFourSpaces);
	rules[i]->appendRuleText(result);
	result.append(gLineFeed);
	}

	// followed by the special rules (if they exist)
	if (negativeNumberRule) {
	result.append(gFourSpaces);
	negativeNumberRule->appendRuleText(result);
	result.append(gLineFeed);
	}

	{
	for (uint32_t i = 0; i < 3; ++i) {
	if (fractionRules[i]) {
	result.append(gFourSpaces);
	fractionRules[i]->appendRuleText(result);
	result.append(gLineFeed);
	}
	}
	}
	}

	U_NAMESPACE_END