blob: 95b690572026e142e1342b35e3ade9e1cb9f7942 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 1996-2014, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.List;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.impl.Utility;
/**
* A collection of rules used by a RuleBasedNumberFormat to format and
* parse numbers. It is the responsibility of a RuleSet to select an
* appropriate rule for formatting a particular number and dispatch
* control to it, and to arbitrate between different rules when parsing
* a number.
*/
final class NFRuleSet {
//-----------------------------------------------------------------------
// data members
//-----------------------------------------------------------------------
/**
* The rule set's name
*/
private String name;
/**
* The rule set's regular rules
*/
private NFRule[] rules;
/**
* The rule set's negative-number rule
*/
private NFRule negativeNumberRule = null;
/**
* The rule set's fraction rules: element 0 is the proper fraction
* (0.x) rule, element 1 is the improper fraction (x.x) rule, and
* element 2 is the master (x.0) rule.
*/
private NFRule[] fractionRules = new NFRule[3];
/**
* True if the rule set is a fraction rule set. A fraction rule set
* is a rule set that is used to format the fractional part of a
* number. It is called from a >> substitution in another rule set's
* fraction rule, and is only called upon to format values between
* 0 and 1. A fraction rule set has different rule-selection
* behavior than a regular rule set.
*/
private boolean isFractionRuleSet = false;
/**
* True if the rule set is parseable.
*/
private boolean isParseable = true;
/**
* Used to limit recursion for bad rule sets.
*/
private int recursionCount = 0;
/**
* Limit of recursion.
*/
private static final int RECURSION_LIMIT = 50;
//-----------------------------------------------------------------------
// construction
//-----------------------------------------------------------------------
/*
* Constructs a rule set.
* @param descriptions An array of Strings representing rule set
* descriptions. On exit, this rule set's entry in the array will
* have been stripped of its rule set name and any trailing whitespace.
* @param index The index into "descriptions" of the description
* for the rule to be constructed
*/
public NFRuleSet(String[] descriptions, int index) throws IllegalArgumentException {
String description = descriptions[index];
if (description.length() == 0) {
throw new IllegalArgumentException("Empty rule set description");
}
// if the description begins with a rule set name (the rule set
// name can be omitted in formatter descriptions that consist
// of only one rule set), copy it out into our "name" member
// and delete it from the description
if (description.charAt(0) == '%') {
int pos = description.indexOf(':');
if (pos == -1) {
throw new IllegalArgumentException("Rule set name doesn't end in colon");
}
else {
name = description.substring(0, pos);
while (pos < description.length() && PatternProps.isWhiteSpace(description.charAt(++pos))) {
}
description = description.substring(pos);
descriptions[index] = description;
}
}
else {
// if the description doesn't begin with a rule set name, its
// name is "%default"
name = "%default";
}
if (description.length() == 0) {
throw new IllegalArgumentException("Empty rule set description");
}
if ( name.endsWith("@noparse")) {
name = name.substring(0,name.length()-8); // Remove the @noparse from the name
isParseable = false;
}
// all of the other members of NFRuleSet are initialized
// by parseRules()
}
/**
* Construct the subordinate data structures used by this object.
* This function is called by the RuleBasedNumberFormat constructor
* after all the rule sets have been created to actually parse
* the description and build rules from it. Since any rule set
* can refer to any other rule set, we have to have created all of
* them before we can create anything else.
* @param description The textual description of this rule set
* @param owner The formatter that owns this rule set
*/
public void parseRules(String description,
RuleBasedNumberFormat owner) {
// start by creating a Vector whose elements are Strings containing
// the descriptions of the rules (one rule per element). The rules
// are separated by semicolons (there's no escape facility: ALL
// semicolons are rule delimiters)
List<String> ruleDescriptions = new ArrayList<String>();
int oldP = 0;
int p = description.indexOf(';');
while (oldP != -1) {
if (p != -1) {
ruleDescriptions.add(description.substring(oldP, p));
oldP = p + 1;
} else {
if (oldP < description.length()) {
ruleDescriptions.add(description.substring(oldP));
}
oldP = p;
}
p = description.indexOf(';', p + 1);
}
// now go back through and build a vector of the rules themselves
// (the number of elements in the description list isn't necessarily
// the number of rules-- some descriptions may expend into two rules)
List<NFRule> tempRules = new ArrayList<NFRule>();
// we keep track of the rule before the one we're currently working
// on solely to support >>> substitutions
NFRule predecessor = null;
for (String ruleDescription : ruleDescriptions) {
// makeRules (a factory method on NFRule) will return either
// a single rule or an array of rules. Either way, add them
// to our rule vector
Object temp = NFRule.makeRules(ruleDescription,
this, predecessor, owner);
if (temp instanceof NFRule) {
tempRules.add((NFRule) temp);
predecessor = (NFRule) temp;
}
else if (temp instanceof NFRule[]) {
NFRule[] rulesToAdd = (NFRule[]) temp;
for (int j = 0; j < rulesToAdd.length; j++) {
tempRules.add(rulesToAdd[j]);
predecessor = rulesToAdd[j];
}
}
}
// now we can bag the description list
ruleDescriptions = null;
// for rules that didn't specify a base value, their base values
// were initialized to 0. Make another pass through the list and
// set all those rules' base values. We also remove any special
// rules from the list and put them into their own member variables
long defaultBaseValue = 0;
// (this isn't a for loop because we might be deleting items from
// the vector-- we want to make sure we only increment i when
// we _didn't_ delete anything from the vector)
int i = 0;
while (i < tempRules.size()) {
NFRule rule = tempRules.get(i);
switch ((int)rule.getBaseValue()) {
case 0:
// if the rule's base value is 0, fill in a default
// base value (this will be 1 plus the preceding
// rule's base value for regular rule sets, and the
// same as the preceding rule's base value in fraction
// rule sets)
rule.setBaseValue(defaultBaseValue);
if (!isFractionRuleSet) {
++defaultBaseValue;
}
++i;
break;
case NFRule.NEGATIVE_NUMBER_RULE:
// if it's the negative-number rule, copy it into its own
// data member and delete it from the list
negativeNumberRule = rule;
tempRules.remove(i);
break;
case NFRule.IMPROPER_FRACTION_RULE:
// if it's the improper fraction rule, copy it into the
// correct element of fractionRules
fractionRules[0] = rule;
tempRules.remove(i);
break;
case NFRule.PROPER_FRACTION_RULE:
// if it's the proper fraction rule, copy it into the
// correct element of fractionRules
fractionRules[1] = rule;
tempRules.remove(i);
break;
case NFRule.MASTER_RULE:
// if it's the master rule, copy it into the
// correct element of fractionRules
fractionRules[2] = rule;
tempRules.remove(i);
break;
default:
// if it's a regular rule that already knows its base value,
// check to make sure the rules are in order, and update
// the default base value for the next rule
if (rule.getBaseValue() < defaultBaseValue) {
throw new IllegalArgumentException("Rules are not in order, base: " +
rule.getBaseValue() + " < " + defaultBaseValue);
}
defaultBaseValue = rule.getBaseValue();
if (!isFractionRuleSet) {
++defaultBaseValue;
}
++i;
break;
}
}
// finally, we can copy the rules from the vector into a
// fixed-length array
rules = new NFRule[tempRules.size()];
tempRules.toArray(rules);
}
/**
* Flags this rule set as a fraction rule set. This function is
* called during the construction process once we know this rule
* set is a fraction rule set. We don't know a rule set is a
* fraction rule set until we see it used somewhere. This function
* is not ad must not be called at any time other than during
* construction of a RuleBasedNumberFormat.
*/
public void makeIntoFractionRuleSet() {
isFractionRuleSet = true;
}
//-----------------------------------------------------------------------
// boilerplate
//-----------------------------------------------------------------------
/**
* Compares two rule sets for equality.
* @param that The other rule set
* @return true if the two rule sets are functionally equivalent.
*/
public boolean equals(Object that) {
// if different classes, they're not equal
if (!(that instanceof NFRuleSet)) {
return false;
} else {
// otherwise, compare the members one by one...
NFRuleSet that2 = (NFRuleSet)that;
if (!name.equals(that2.name)
|| !Utility.objectEquals(negativeNumberRule, that2.negativeNumberRule)
|| !Utility.objectEquals(fractionRules[0], that2.fractionRules[0])
|| !Utility.objectEquals(fractionRules[1], that2.fractionRules[1])
|| !Utility.objectEquals(fractionRules[2], that2.fractionRules[2])
|| rules.length != that2.rules.length
|| isFractionRuleSet != that2.isFractionRuleSet)
{
return false;
}
// ...then compare the rule lists...
for (int i = 0; i < rules.length; i++) {
if (!rules[i].equals(that2.rules[i])) {
return false;
}
}
// ...and if we make it here, they're equal
return true;
}
}
public int hashCode() {
assert false : "hashCode not designed";
return 42;
}
/**
* Builds a textual representation of a rule set.
* @return A textual representation of a rule set. This won't
* necessarily be the same description that the rule set was
* constructed with, but it will produce the same results.
*/
public String toString() {
StringBuilder result = new StringBuilder();
// the rule set name goes first...
result.append(name).append(":\n");
// followed by the regular rules...
for (int i = 0; i < rules.length; i++) {
result.append(" ").append(rules[i].toString()).append("\n");
}
// followed by the special rules (if they exist)
if (negativeNumberRule != null) {
result.append(" ").append(negativeNumberRule.toString()).append("\n");
}
if (fractionRules[0] != null) {
result.append(" ").append(fractionRules[0].toString()).append("\n");
}
if (fractionRules[1] != null) {
result.append(" ").append(fractionRules[1].toString()).append("\n");
}
if (fractionRules[2] != null) {
result.append(" ").append(fractionRules[2].toString()).append("\n");
}
return result.toString();
}
//-----------------------------------------------------------------------
// simple accessors
//-----------------------------------------------------------------------
/**
* Says whether this rule set is a fraction rule set.
* @return true if this rule is a fraction rule set; false if it isn't
*/
public boolean isFractionSet() {
return isFractionRuleSet;
}
/**
* Returns the rule set's name
* @return The rule set's name
*/
public String getName() {
return name;
}
/**
* Return true if the rule set is public.
* @return true if the rule set is public
*/
public boolean isPublic() {
return !name.startsWith("%%");
}
/**
* Return true if the rule set can be used for parsing.
* @return true if the rule set can be used for parsing.
*/
public boolean isParseable() {
return isParseable;
}
//-----------------------------------------------------------------------
// formatting
//-----------------------------------------------------------------------
/**
* Formats a long. Selects an appropriate rule and dispatches
* control to it.
* @param number The number being formatted
* @param toInsertInto The string where the result is to be placed
* @param pos The position in toInsertInto where the result of
* this operation is to be inserted
*/
public void format(long number, StringBuffer toInsertInto, int pos) {
NFRule applicableRule = findNormalRule(number);
if (++recursionCount >= RECURSION_LIMIT) {
recursionCount = 0;
throw new IllegalStateException("Recursion limit exceeded when applying ruleSet " + name);
}
applicableRule.doFormat(number, toInsertInto, pos);
--recursionCount;
}
/**
* Formats a double. Selects an appropriate rule and dispatches
* control to it.
* @param number The number being formatted
* @param toInsertInto The string where the result is to be placed
* @param pos The position in toInsertInto where the result of
* this operation is to be inserted
*/
public void format(double number, StringBuffer toInsertInto, int pos) {
NFRule applicableRule = findRule(number);
if (++recursionCount >= RECURSION_LIMIT) {
recursionCount = 0;
throw new IllegalStateException("Recursion limit exceeded when applying ruleSet " + name);
}
applicableRule.doFormat(number, toInsertInto, pos);
--recursionCount;
}
/**
* Selects an appropriate rule for formatting the number.
* @param number The number being formatted.
* @return The rule that should be used to format it
*/
private NFRule findRule(double number) {
// if this is a fraction rule set, use findFractionRuleSetRule()
if (isFractionRuleSet) {
return findFractionRuleSetRule(number);
}
// if the number is negative, return the negative number rule
// (if there isn't a negative-number rule, we pretend it's a
// positive number)
if (number < 0) {
if (negativeNumberRule != null) {
return negativeNumberRule;
} else {
number = -number;
}
}
// if the number isn't an integer, we use one f the fraction rules...
if (number != Math.floor(number)) {
// if the number is between 0 and 1, return the proper
// fraction rule
if (number < 1 && fractionRules[1] != null) {
return fractionRules[1];
}
// otherwise, return the improper fraction rule
else if (fractionRules[0] != null) {
return fractionRules[0];
}
}
// if there's a master rule, use it to format the number
if (fractionRules[2] != null) {
return fractionRules[2];
} else {
// and if we haven't yet returned a rule, use findNormalRule()
// to find the applicable rule
return findNormalRule(Math.round(number));
}
}
/**
* If the value passed to findRule() is a positive integer, findRule()
* uses this function to select the appropriate rule. The result will
* generally be the rule with the highest base value less than or equal
* to the number. There is one exception to this: If that rule has
* two substitutions and a base value that is not an even multiple of
* its divisor, and the number itself IS an even multiple of the rule's
* divisor, then the result will be the rule that preceded the original
* result in the rule list. (This behavior is known as the "rollback
* rule", and is used to handle optional text: a rule with optional
* text is represented internally as two rules, and the rollback rule
* selects appropriate between them. This avoids things like "two
* hundred zero".)
* @param number The number being formatted
* @return The rule to use to format this number
*/
private NFRule findNormalRule(long number) {
// if this is a fraction rule set, use findFractionRuleSetRule()
// to find the rule (we should only go into this clause if the
// value is 0)
if (isFractionRuleSet) {
return findFractionRuleSetRule(number);
}
// if the number is negative, return the negative-number rule
// (if there isn't one, pretend the number is positive)
if (number < 0) {
if (negativeNumberRule != null) {
return negativeNumberRule;
} else {
number = -number;
}
}
// we have to repeat the preceding two checks, even though we
// do them in findRule(), because the version of format() that
// takes a long bypasses findRule() and goes straight to this
// function. This function does skip the fraction rules since
// we know the value is an integer (it also skips the master
// rule, since it's considered a fraction rule. Skipping the
// master rule in this function is also how we avoid infinite
// recursion)
// binary-search the rule list for the applicable rule
// (a rule is used for all values from its base value to
// the next rule's base value)
int lo = 0;
int hi = rules.length;
if (hi > 0) {
while (lo < hi) {
int mid = (lo + hi) >>> 1;
long ruleBaseValue = rules[mid].getBaseValue();
if (ruleBaseValue == number) {
return rules[mid];
}
else if (ruleBaseValue > number) {
hi = mid;
}
else {
lo = mid + 1;
}
}
if (hi == 0) { // bad rule set
throw new IllegalStateException("The rule set " + name + " cannot format the value " + number);
}
NFRule result = rules[hi - 1];
// use shouldRollBack() to see whether we need to invoke the
// rollback rule (see shouldRollBack()'s documentation for
// an explanation of the rollback rule). If we do, roll back
// one rule and return that one instead of the one we'd normally
// return
if (result.shouldRollBack(number)) {
if (hi == 1) { // bad rule set
throw new IllegalStateException("The rule set " + name + " cannot roll back from the rule '" +
result + "'");
}
result = rules[hi - 2];
}
return result;
}
// else use the master rule
return fractionRules[2];
}
/**
* If this rule is a fraction rule set, this function is used by
* findRule() to select the most appropriate rule for formatting
* the number. Basically, the base value of each rule in the rule
* set is treated as the denominator of a fraction. Whichever
* denominator can produce the fraction closest in value to the
* number passed in is the result. If there's a tie, the earlier
* one in the list wins. (If there are two rules in a row with the
* same base value, the first one is used when the numerator of the
* fraction would be 1, and the second rule is used the rest of the
* time.
* @param number The number being formatted (which will always be
* a number between 0 and 1)
* @return The rule to use to format this number
*/
private NFRule findFractionRuleSetRule(double number) {
// the obvious way to do this (multiply the value being formatted
// by each rule's base value until you get an integral result)
// doesn't work because of rounding error. This method is more
// accurate
// find the least common multiple of the rules' base values
// and multiply this by the number being formatted. This is
// all the precision we need, and we can do all of the rest
// of the math using integer arithmetic
long leastCommonMultiple = rules[0].getBaseValue();
for (int i = 1; i < rules.length; i++) {
leastCommonMultiple = lcm(leastCommonMultiple, rules[i].getBaseValue());
}
long numerator = Math.round(number * leastCommonMultiple);
// for each rule, do the following...
long tempDifference;
long difference = Long.MAX_VALUE;
int winner = 0;
for (int i = 0; i < rules.length; i++) {
// "numerator" is the numerator of the fraction is the
// denominator is the LCD. The numerator if the the rule's
// base value is the denominator is "numerator" times the
// base value divided by the LCD. Here we check to see if
// that's an integer, and if not, how close it is to being
// an integer.
tempDifference = numerator * rules[i].getBaseValue() % leastCommonMultiple;
// normalize the result of the above calculation: we want
// the numerator's distance from the CLOSEST multiple
// of the LCD
if (leastCommonMultiple - tempDifference < tempDifference) {
tempDifference = leastCommonMultiple - tempDifference;
}
// if this is as close as we've come, keep track of how close
// that is, and the line number of the rule that did it. If
// we've scored a direct hit, we don't have to look at any more
// rules
if (tempDifference < difference) {
difference = tempDifference;
winner = i;
if (difference == 0) {
break;
}
}
}
// if we have two successive rules that both have the winning base
// value, then the first one (the one we found above) is used if
// the numerator of the fraction is 1 and the second one is used if
// the numerator of the fraction is anything else (this lets us
// do things like "one third"/"two thirds" without having to define
// a whole bunch of extra rule sets)
if (winner + 1 < rules.length
&& rules[winner + 1].getBaseValue() == rules[winner].getBaseValue()) {
if (Math.round(number * rules[winner].getBaseValue()) < 1
|| Math.round(number * rules[winner].getBaseValue()) >= 2) {
++winner;
}
}
// finally, return the winning rule
return rules[winner];
}
/**
* Calculates the least common multiple of x and y.
*/
private static long lcm(long x, long y) {
// binary gcd algorithm from Knuth, "The Art of Computer Programming,"
// vol. 2, 1st ed., pp. 298-299
long x1 = x;
long y1 = y;
int p2 = 0;
while ((x1 & 1) == 0 && (y1 & 1) == 0) {
++p2;
x1 >>= 1;
y1 >>= 1;
}
long t;
if ((x1 & 1) == 1) {
t = -y1;
} else {
t = x1;
}
while (t != 0) {
while ((t & 1) == 0) {
t >>= 1;
}
if (t > 0) {
x1 = t;
} else {
y1 = -t;
}
t = x1 - y1;
}
long gcd = x1 << p2;
// x * y == gcd(x, y) * lcm(x, y)
return x / gcd * y;
}
//-----------------------------------------------------------------------
// parsing
//-----------------------------------------------------------------------
/**
* Parses a string. Matches the string to be parsed against each
* of its rules (with a base value less than upperBound) and returns
* the value produced by the rule that matched the most characters
* in the source string.
* @param text The string to parse
* @param parsePosition The initial position is ignored and assumed
* to be 0. On exit, this object has been updated to point to the
* first character position this rule set didn't consume.
* @param upperBound Limits the rules that can be allowed to match.
* Only rules whose base values are strictly less than upperBound
* are considered.
* @return The numerical result of parsing this string. This will
* be the matching rule's base value, composed appropriately with
* the results of matching any of its substitutions. The object
* will be an instance of Long if it's an integral value; otherwise,
* it will be an instance of Double. This function always returns
* a valid object: If nothing matched the input string at all,
* this function returns new Long(0), and the parse position is
* left unchanged.
*/
public Number parse(String text, ParsePosition parsePosition, double upperBound) {
// try matching each rule in the rule set against the text being
// parsed. Whichever one matches the most characters is the one
// that determines the value we return.
ParsePosition highWaterMark = new ParsePosition(0);
Number result = Long.valueOf(0);
Number tempResult = null;
// dump out if there's no text to parse
if (text.length() == 0) {
return result;
}
// start by trying the negative number rule (if there is one)
if (negativeNumberRule != null) {
tempResult = negativeNumberRule.doParse(text, parsePosition, false, upperBound);
if (parsePosition.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark.setIndex(parsePosition.getIndex());
}
// commented out because the error-index API on ParsePosition isn't there in 1.1.x
// if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
// highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
// }
parsePosition.setIndex(0);
}
// then try each of the fraction rules
for (int i = 0; i < 3; i++) {
if (fractionRules[i] != null) {
tempResult = fractionRules[i].doParse(text, parsePosition, false, upperBound);
if (parsePosition.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark.setIndex(parsePosition.getIndex());
}
// commented out because the error-index API on ParsePosition isn't there in 1.1.x
// if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
// highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
// }
parsePosition.setIndex(0);
}
}
// finally, go through the regular rules one at a time. We start
// at the end of the list because we want to try matching the most
// significant rule first (this helps ensure that we parse
// "five thousand three hundred six" as
// "(five thousand) (three hundred) (six)" rather than
// "((five thousand three) hundred) (six)"). Skip rules whose
// base values are higher than the upper bound (again, this helps
// limit ambiguity by making sure the rules that match a rule's
// are less significant than the rule containing the substitutions)/
for (int i = rules.length - 1; i >= 0 && highWaterMark.getIndex() < text.length(); i--) {
if (!isFractionRuleSet && rules[i].getBaseValue() >= upperBound) {
continue;
}
tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound);
if (parsePosition.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark.setIndex(parsePosition.getIndex());
}
// commented out because the error-index API on ParsePosition isn't there in 1.1.x
// if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
// highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
// }
parsePosition.setIndex(0);
}
// finally, update the parse position we were passed to point to the
// first character we didn't use, and return the result that
// corresponds to that string of characters
parsePosition.setIndex(highWaterMark.getIndex());
// commented out because the error-index API on ParsePosition isn't there in 1.1.x
// if (parsePosition.getIndex() == 0) {
// parsePosition.setErrorIndex(highWaterMark.getErrorIndex());
// }
return result;
}
}