blob: 7178aa1086c24b4d820eafff76ea0c899858495d [file] [log] [blame]
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.CurrencyPluralInfoAffixProvider;
import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.DecimalFormatProperties.ParseMode;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo;
import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
import com.ibm.icu.impl.number.RoundingUtils;
import com.ibm.icu.number.NumberFormatter.GroupingStrategy;
import com.ibm.icu.number.Scale;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.CurrencyAmount;
import com.ibm.icu.util.ULocale;
/**
* Primary number parsing implementation class.
*
* @author sffc
*
*/
public class NumberParserImpl {
/**
* Creates a parser with most default options. Used for testing, not production.
*/
public static NumberParserImpl createSimpleParser(ULocale locale, String pattern, int parseFlags) {
NumberParserImpl parser = new NumberParserImpl(parseFlags);
Currency currency = Currency.getInstance("USD");
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.getInstance(parseFlags);
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.currency = currency;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
factory.parseFlags = parseFlags;
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
parser.addMatcher(ignorables);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
parser.addMatcher(PercentMatcher.getInstance(symbols));
parser.addMatcher(PermilleMatcher.getInstance(symbols));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(InfinityMatcher.getInstance(symbols));
parser.addMatcher(PaddingMatcher.getInstance("@"));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags));
parser.addMatcher(new RequireNumberValidator());
parser.freeze();
return parser;
}
/**
* Parses the string without returning a NumberParserImpl. Used for testing, not production.
*/
public static Number parseStatic(
String input,
ParsePosition ppos,
DecimalFormatProperties properties,
DecimalFormatSymbols symbols) {
NumberParserImpl parser = createParserFromProperties(properties, symbols, false);
ParsedNumber result = new ParsedNumber();
parser.parse(input, true, result);
if (result.success()) {
ppos.setIndex(result.charEnd);
return result.getNumber();
} else {
ppos.setErrorIndex(result.charEnd);
return null;
}
}
/**
* Parses the string without returning a NumberParserImpl. Used for testing, not production.
*/
public static CurrencyAmount parseStaticCurrency(
String input,
ParsePosition ppos,
DecimalFormatProperties properties,
DecimalFormatSymbols symbols) {
NumberParserImpl parser = createParserFromProperties(properties, symbols, true);
ParsedNumber result = new ParsedNumber();
parser.parse(input, true, result);
if (result.success()) {
ppos.setIndex(result.charEnd);
assert result.currencyCode != null;
return new CurrencyAmount(result.getNumber(), Currency.getInstance(result.currencyCode));
} else {
ppos.setErrorIndex(result.charEnd);
return null;
}
}
public static NumberParserImpl createDefaultParserForLocale(ULocale loc) {
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(loc);
DecimalFormatProperties properties = PatternStringParser.parseToProperties("0");
return createParserFromProperties(properties, symbols, false);
}
/**
* Creates a parser from the given DecimalFormatProperties. This is the endpoint used by
* DecimalFormat in production code.
*
* @param properties
* The property bag.
* @param symbols
* The locale's symbols.
* @param parseCurrency
* True to force a currency match and use monetary separators; false otherwise.
* @return An immutable parser object.
*/
public static NumberParserImpl createParserFromProperties(
DecimalFormatProperties properties,
DecimalFormatSymbols symbols,
boolean parseCurrency) {
ULocale locale = symbols.getULocale();
AffixPatternProvider affixProvider;
if (properties.getCurrencyPluralInfo() == null) {
affixProvider = new PropertiesAffixPatternProvider(properties);
} else {
affixProvider = new CurrencyPluralInfoAffixProvider(properties.getCurrencyPluralInfo(), properties);
}
Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols);
boolean isStrict = properties.getParseMode() == ParseMode.STRICT;
Grouper grouper = Grouper.forProperties(properties);
int parseFlags = 0;
if (!properties.getParseCaseSensitive()) {
parseFlags |= ParsingUtils.PARSE_FLAG_IGNORE_CASE;
}
if (properties.getParseIntegerOnly()) {
parseFlags |= ParsingUtils.PARSE_FLAG_INTEGER_ONLY;
}
if (properties.getParseToBigDecimal()) {
parseFlags |= ParsingUtils.PARSE_FLAG_FORCE_BIG_DECIMAL;
}
if (properties.getSignAlwaysShown()) {
parseFlags |= ParsingUtils.PARSE_FLAG_PLUS_SIGN_ALLOWED;
}
if (isStrict) {
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS;
parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES;
parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX;
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES;
} else {
parseFlags |= ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
}
if (grouper.getPrimary() <= 0) {
parseFlags |= ParsingUtils.PARSE_FLAG_GROUPING_DISABLED;
}
if (parseCurrency || affixProvider.hasCurrencySign()) {
parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
}
if (!parseCurrency) {
parseFlags |= ParsingUtils.PARSE_FLAG_NO_FOREIGN_CURRENCIES;
}
NumberParserImpl parser = new NumberParserImpl(parseFlags);
IgnorablesMatcher ignorables = IgnorablesMatcher.getInstance(parseFlags);
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.currency = currency;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
factory.parseFlags = parseFlags;
//////////////////////
/// AFFIX MATCHERS ///
//////////////////////
// Set up a pattern modifier with mostly defaults to generate AffixMatchers.
AffixMatcher.createMatchers(affixProvider, parser, factory, ignorables, parseFlags);
////////////////////////
/// CURRENCY MATCHER ///
////////////////////////
if (parseCurrency || affixProvider.hasCurrencySign()) {
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags));
}
///////////////
/// PERCENT ///
///////////////
// ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern,
// and to maintain regressive behavior, divide by 100 even if no percent sign is present.
if (!isStrict && affixProvider.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
parser.addMatcher(PercentMatcher.getInstance(symbols));
}
if (!isStrict && affixProvider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
parser.addMatcher(PermilleMatcher.getInstance(symbols));
}
///////////////////////////////
/// OTHER STANDARD MATCHERS ///
///////////////////////////////
if (!isStrict) {
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
}
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(InfinityMatcher.getInstance(symbols));
String padString = properties.getPadString();
if (padString != null && !ignorables.getSet().contains(padString)) {
parser.addMatcher(PaddingMatcher.getInstance(padString));
}
parser.addMatcher(ignorables);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
// NOTE: parseNoExponent doesn't disable scientific parsing if we have a scientific formatter
if (!properties.getParseNoExponent() || properties.getMinimumExponentDigits() > 0) {
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
}
//////////////////
/// VALIDATORS ///
//////////////////
parser.addMatcher(new RequireNumberValidator());
if (isStrict) {
parser.addMatcher(new RequireAffixValidator());
}
if (parseCurrency) {
parser.addMatcher(new RequireCurrencyValidator());
}
if (properties.getDecimalPatternMatchRequired()) {
boolean patternHasDecimalSeparator = properties.getDecimalSeparatorAlwaysShown()
|| properties.getMaximumFractionDigits() != 0;
parser.addMatcher(RequireDecimalSeparatorValidator.getInstance(patternHasDecimalSeparator));
}
// The multiplier takes care of scaling percentages.
Scale multiplier = RoundingUtils.scaleFromProperties(properties);
if (multiplier != null) {
parser.addMatcher(new MultiplierParseHandler(multiplier));
}
parser.freeze();
return parser;
}
private final int parseFlags;
private final List<NumberParseMatcher> matchers;
private boolean frozen;
/**
* Creates a new, empty parser.
*
* @param parseFlags
* The parser settings defined in the PARSE_FLAG_* fields.
*/
public NumberParserImpl(int parseFlags) {
matchers = new ArrayList<>();
this.parseFlags = parseFlags;
frozen = false;
}
public void addMatcher(NumberParseMatcher matcher) {
assert !frozen;
this.matchers.add(matcher);
}
public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
assert !frozen;
this.matchers.addAll(matchers);
}
public void freeze() {
frozen = true;
}
public int getParseFlags() {
return parseFlags;
}
public void parse(String input, boolean greedy, ParsedNumber result) {
parse(input, 0, greedy, result);
}
/**
* Primary entrypoint to parsing code path.
*
* @param input
* The string to parse. This is a String, not CharSequence, to enforce assumptions about
* immutability (CharSequences are not guaranteed to be immutable).
* @param start
* The index into the string at which to start parsing.
* @param greedy
* Whether to use the faster but potentially less accurate greedy code path.
* @param result
* Output variable to store results.
*/
public void parse(String input, int start, boolean greedy, ParsedNumber result) {
assert frozen;
assert start >= 0 && start < input.length();
StringSegment segment = new StringSegment(input,
0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
parseGreedy(segment, result);
} else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
// Start at 1 so that recursionLevels never gets to 0
parseLongestRecursive(segment, result, 1);
} else {
// Arbitrary recursion safety limit: 100 levels.
parseLongestRecursive(segment, result, -100);
}
for (NumberParseMatcher matcher : matchers) {
matcher.postProcess(result);
}
result.postProcess();
}
private void parseGreedy(StringSegment segment, ParsedNumber result) {
// Note: this method is not recursive in order to avoid stack overflow.
for (int i = 0; i < matchers.size();) {
// Base Case
if (segment.length() == 0) {
return;
}
NumberParseMatcher matcher = matchers.get(i);
if (!matcher.smokeTest(segment)) {
// Matcher failed smoke test: try the next one
i++;
continue;
}
int initialOffset = segment.getOffset();
matcher.match(segment, result);
if (segment.getOffset() != initialOffset) {
// Greedy heuristic: accept the match and loop back
i = 0;
continue;
} else {
// Matcher did not match: try the next one
i++;
continue;
}
}
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
}
private void parseLongestRecursive(StringSegment segment, ParsedNumber result, int recursionLevels) {
// Base Case
if (segment.length() == 0) {
return;
}
// Safety against stack overflow
if (recursionLevels == 0) {
return;
}
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
ParsedNumber initial = new ParsedNumber();
initial.copyFrom(result);
ParsedNumber candidate = new ParsedNumber();
int initialOffset = segment.getOffset();
for (int i = 0; i < matchers.size(); i++) {
NumberParseMatcher matcher = matchers.get(i);
if (!matcher.smokeTest(segment)) {
continue;
}
// In a non-greedy parse, we attempt all possible matches and pick the best.
for (int charsToConsume = 0; charsToConsume < segment.length();) {
charsToConsume += Character.charCount(segment.codePointAt(charsToConsume));
// Run the matcher on a segment of the current length.
candidate.copyFrom(initial);
segment.setLength(charsToConsume);
boolean maybeMore = matcher.match(segment, candidate);
segment.resetLength();
// If the entire segment was consumed, recurse.
if (segment.getOffset() - initialOffset == charsToConsume) {
parseLongestRecursive(segment, candidate, recursionLevels + 1);
if (candidate.isBetterThan(result)) {
result.copyFrom(candidate);
}
}
// Since the segment can be re-used, reset the offset.
// This does not have an effect if the matcher did not consume any chars.
segment.setOffset(initialOffset);
// Unless the matcher wants to see the next char, continue to the next matcher.
if (!maybeMore) {
break;
}
}
}
}
@Override
public String toString() {
return "<NumberParserImpl matchers=" + matchers.toString() + ">";
}
}