| // ยฉ 2018 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING |
| |
| #include "numbertest.h" |
| #include "numparse_impl.h" |
| #include "static_unicode_sets.h" |
| #include "unicode/dcfmtsym.h" |
| #include "unicode/testlog.h" |
| |
| #include <cmath> |
| #include <numparse_affixes.h> |
| |
| void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { |
| if (exec) { |
| logln("TestSuite NumberParserTest: "); |
| } |
| TESTCASE_AUTO_BEGIN; |
| TESTCASE_AUTO(testBasic); |
| TESTCASE_AUTO(testSeriesMatcher); |
| TESTCASE_AUTO(testCombinedCurrencyMatcher); |
| TESTCASE_AUTO(testAffixPatternMatcher); |
| TESTCASE_AUTO(test20360_BidiOverflow); |
| TESTCASE_AUTO(testInfiniteRecursion); |
| TESTCASE_AUTO_END; |
| } |
| |
| void NumberParserTest::testBasic() { |
| IcuTestErrorCode status(*this, "testBasic"); |
| |
| static const struct TestCase { |
| int32_t flags; |
| const char16_t* inputString; |
| const char16_t* patternString; |
| int32_t expectedCharsConsumed; |
| double expectedResultDouble; |
| } cases[] = {{3, u"51423", u"0", 5, 51423.}, |
| {3, u"51423x", u"0", 5, 51423.}, |
| {3, u" 51423", u"0", 6, 51423.}, |
| {3, u"51423 ", u"0", 5, 51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"0", 10, 51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏx", u"0", 10, 51423.}, |
| {3, u" ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"0", 11, 51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ ", u"0", 10, 51423.}, |
| {7, u"51,423", u"#,##,##0", 6, 51423.}, |
| {7, u" 51,423", u"#,##,##0", 7, 51423.}, |
| {7, u"51,423 ", u"#,##,##0", 6, 51423.}, |
| {7, u"51,423,", u"#,##,##0", 6, 51423.}, |
| {7, u"51,423,,", u"#,##,##0", 6, 51423.}, |
| {7, u"51,423.5", u"#,##,##0", 8, 51423.5}, |
| {7, u"51,423.5,", u"#,##,##0", 8, 51423.5}, |
| {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5}, |
| {7, u"51,423.5.", u"#,##,##0", 8, 51423.5}, |
| {7, u"51,423.5..", u"#,##,##0", 8, 51423.5}, |
| {7, u"๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", u"#,##,##0", 11, 51423.}, |
| {7, u"๐ณ,๐ด๐ต,๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", u"#,##,##0", 19, 78951423.}, |
| {7, u"๐ณ๐ด,๐ต๐ฑ๐ญ.๐ฐ๐ฎ๐ฏ", u"#,##,##0", 18, 78951.423}, |
| {7, u"๐ณ๐ด,๐ฌ๐ฌ๐ฌ", u"#,##,##0", 11, 78000.}, |
| {7, u"๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฌ๐ฌ", u"#,##,##0", 18, 78000.}, |
| {7, u"๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", u"#,##,##0", 18, 78000.023}, |
| {7, u"๐ณ๐ด.๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", u"#,##,##0", 11, 78.}, |
| {7, u"1,", u"#,##,##0", 1, 1.}, |
| {7, u"1,,", u"#,##,##0", 1, 1.}, |
| {7, u"1.,", u"#,##,##0", 2, 1.}, |
| {3, u"1,.", u"#,##,##0", 3, 1.}, |
| {7, u"1..", u"#,##,##0", 2, 1.}, |
| {3, u",1", u"#,##,##0", 2, 1.}, |
| {3, u"1,1", u"#,##,##0", 1, 1.}, |
| {3, u"1,1,", u"#,##,##0", 1, 1.}, |
| {3, u"1,1,,", u"#,##,##0", 1, 1.}, |
| {3, u"1,1,1", u"#,##,##0", 1, 1.}, |
| {3, u"1,1,1,", u"#,##,##0", 1, 1.}, |
| {3, u"1,1,1,,", u"#,##,##0", 1, 1.}, |
| {3, u"-51423", u"0", 6, -51423.}, |
| {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after |
| {3, u"+51423", u"0", 6, 51423.}, |
| {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after |
| {3, u"%51423", u"0", 6, 51423.}, |
| {3, u"51423%", u"0", 6, 51423.}, |
| {3, u"51423%%", u"0", 6, 51423.}, |
| {3, u"โฐ51423", u"0", 6, 51423.}, |
| {3, u"51423โฐ", u"0", 6, 51423.}, |
| {3, u"51423โฐโฐ", u"0", 6, 51423.}, |
| {3, u"โ", u"0", 1, INFINITY}, |
| {3, u"-โ", u"0", 2, -INFINITY}, |
| {3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak? |
| {3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak? |
| {3, u"a51423US dollars", u"a0ยคยคยค", 16, 51423.}, |
| {3, u"a 51423 US dollars", u"a0ยคยคยค", 18, 51423.}, |
| {3, u"514.23 USD", u"ยค0", 10, 514.23}, |
| {3, u"514.23 GBP", u"ยค0", 10, 514.23}, |
| {3, u"a ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ b", u"a0b", 14, 51423.}, |
| {3, u"-a ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ b", u"a0b", 15, -51423.}, |
| {3, u"a -๐ฑ๐ญ๐ฐ๐ฎ๐ฏ b", u"a0b", 15, -51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"[0];(0)", 10, 51423.}, |
| {3, u"[๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"[0];(0)", 11, 51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ]", u"[0];(0)", 11, 51423.}, |
| {3, u"[๐ฑ๐ญ๐ฐ๐ฎ๐ฏ]", u"[0];(0)", 12, 51423.}, |
| {3, u"(๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"[0];(0)", 11, -51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ)", u"[0];(0)", 11, -51423.}, |
| {3, u"(๐ฑ๐ญ๐ฐ๐ฎ๐ฏ)", u"[0];(0)", 12, -51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"{0};{0}", 10, 51423.}, |
| {3, u"{๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"{0};{0}", 11, 51423.}, |
| {3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ}", u"{0};{0}", 11, 51423.}, |
| {3, u"{๐ฑ๐ญ๐ฐ๐ฎ๐ฏ}", u"{0};{0}", 12, 51423.}, |
| {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number |
| {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b" |
| {3, u"๐ฑ.๐ญ๐ฐ๐ฎE๐ฏ", u"0", 12, 5142.}, |
| {3, u"๐ฑ.๐ญ๐ฐ๐ฎE-๐ฏ", u"0", 13, 0.005142}, |
| {3, u"๐ฑ.๐ญ๐ฐ๐ฎe-๐ฏ", u"0", 13, 0.005142}, |
| {3, u"5.142e+3", u"0", 8, 5142.0 }, |
| {3, u"5.142\u200Ee+3", u"0", 9, 5142.0}, |
| {3, u"5.142e\u200E+3", u"0", 9, 5142.0}, |
| {3, u"5.142e+\u200E3", u"0", 9, 5142.0}, |
| {7, u"5,142.50 Canadian dollars", u"#,##,##0 ยคยคยค", 25, 5142.5}, |
| {3, u"a$ b5", u"a ยค b0", 5, 5.0}, |
| {3, u"๐บ1.23", u"๐บ0;๐ป0", 6, 1.23}, |
| {3, u"๐ป1.23", u"๐บ0;๐ป0", 6, -1.23}, |
| {3, u".00", u"0", 3, 0.0}, |
| {3, u" 1,234", u"a0", 35, 1234.}, // should not hang |
| {3, u"NaN", u"0", 3, NAN}, |
| {3, u"NaN E5", u"0", 6, NAN}, |
| {3, u"0", u"0", 1, 0.0}}; |
| |
| parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; |
| for (auto& cas : cases) { |
| UnicodeString inputString(cas.inputString); |
| UnicodeString patternString(cas.patternString); |
| LocalPointer<const NumberParserImpl> parser( |
| NumberParserImpl::createSimpleParser( |
| Locale("en"), patternString, parseFlags, status)); |
| if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { |
| continue; |
| } |
| UnicodeString message = |
| UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString(); |
| |
| if (0 != (cas.flags & 0x01)) { |
| // Test greedy code path |
| ParsedNumber resultObject; |
| parser->parse(inputString, true, resultObject, status); |
| assertTrue("Greedy Parse failed: " + message, resultObject.success()); |
| assertEquals("Greedy Parse failed: " + message, |
| cas.expectedCharsConsumed, resultObject.charEnd); |
| assertEquals("Greedy Parse failed: " + message, |
| cas.expectedResultDouble, resultObject.getDouble(status)); |
| } |
| |
| if (0 != (cas.flags & 0x02)) { |
| // Test slow code path |
| ParsedNumber resultObject; |
| parser->parse(inputString, false, resultObject, status); |
| assertTrue("Non-Greedy Parse failed: " + message, resultObject.success()); |
| assertEquals( |
| "Non-Greedy Parse failed: " + message, |
| cas.expectedCharsConsumed, |
| resultObject.charEnd); |
| assertEquals( |
| "Non-Greedy Parse failed: " + message, |
| cas.expectedResultDouble, |
| resultObject.getDouble(status)); |
| } |
| |
| if (0 != (cas.flags & 0x04)) { |
| // Test with strict separators |
| parser.adoptInstead( |
| NumberParserImpl::createSimpleParser( |
| Locale("en"), |
| patternString, |
| parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE, |
| status)); |
| ParsedNumber resultObject; |
| parser->parse(inputString, true, resultObject, status); |
| assertTrue("Strict Parse failed: " + message, resultObject.success()); |
| assertEquals("Strict Parse failed: " + message, |
| cas.expectedCharsConsumed, resultObject.charEnd); |
| assertEquals("Strict Parse failed: " + message, |
| cas.expectedResultDouble, resultObject.getDouble(status)); |
| } |
| } |
| } |
| |
| void NumberParserTest::testSeriesMatcher() { |
| IcuTestErrorCode status(*this, "testSeriesMatcher"); |
| |
| DecimalFormatSymbols symbols("en", status); |
| if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) { |
| return; |
| } |
| PlusSignMatcher m0(symbols, false); |
| MinusSignMatcher m1(symbols, false); |
| IgnorablesMatcher m2(0); |
| PercentMatcher m3(symbols); |
| IgnorablesMatcher m4(0); |
| |
| ArraySeriesMatcher::MatcherArray matchers(5, status); |
| status.assertSuccess(); |
| matchers[0] = &m0; |
| matchers[1] = &m1; |
| matchers[2] = &m2; |
| matchers[3] = &m3; |
| matchers[4] = &m4; |
| ArraySeriesMatcher series(matchers, 5); |
| |
| assertFalse("", series.smokeTest(StringSegment(u"x", false))); |
| assertFalse("", series.smokeTest(StringSegment(u"-", false))); |
| assertTrue("", series.smokeTest(StringSegment(u"+", false))); |
| |
| static const struct TestCase { |
| const char16_t* input; |
| int32_t expectedOffset; |
| bool expectedMaybeMore; |
| } cases[] = {{u"", 0, true}, |
| {u" ", 0, false}, |
| {u"$", 0, false}, |
| {u"+", 0, true}, |
| {u" +", 0, false}, |
| {u"+-", 0, true}, |
| {u"+ -", 0, false}, |
| {u"+- ", 0, true}, |
| {u"+- $", 0, false}, |
| {u"+-%", 3, true}, |
| {u" +- % ", 0, false}, |
| {u"+- % ", 7, true}, |
| {u"+-%$", 3, false}}; |
| |
| for (auto& cas : cases) { |
| UnicodeString input(cas.input); |
| |
| StringSegment segment(input, false); |
| ParsedNumber result; |
| bool actualMaybeMore = series.match(segment, result, status); |
| int actualOffset = segment.getOffset(); |
| |
| assertEquals("'" + input + "'", cas.expectedOffset, actualOffset); |
| assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore); |
| } |
| } |
| |
| void NumberParserTest::testCombinedCurrencyMatcher() { |
| IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher"); |
| |
| IgnorablesMatcher ignorables(0); |
| Locale locale = Locale::getEnglish(); |
| |
| DecimalFormatSymbols dfs(locale, status); |
| if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) { |
| return; |
| } |
| dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); |
| dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); |
| CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); |
| |
| AffixTokenMatcherSetupData affixSetupData = { |
| currencySymbols, {"en", status}, ignorables, "en", 0}; |
| AffixTokenMatcherWarehouse warehouse(&affixSetupData); |
| NumberParseMatcher& matcher = warehouse.currency(status); |
| affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY; |
| AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData); |
| NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status); |
| |
| static const struct TestCase { |
| const char16_t* input; |
| const char16_t* expectedCurrencyCode; |
| const char16_t* expectedNoForeignCurrencyCode; |
| } cases[]{{u"", u"", u""}, |
| {u"FOO", u"", u""}, |
| {u"USD", u"USD", u""}, |
| {u"$", u"USD", u""}, |
| {u"US dollars", u"USD", u""}, |
| {u"eu", u"", u""}, |
| {u"euros", u"EUR", u""}, |
| {u"ICU", u"ICU", u"ICU"}, |
| {u"IU$", u"ICU", u"ICU"}}; |
| for (auto& cas : cases) { |
| UnicodeString input(cas.input); |
| |
| { |
| StringSegment segment(input, false); |
| ParsedNumber result; |
| matcher.match(segment, result, status); |
| assertEquals( |
| "Parsing " + input, |
| cas.expectedCurrencyCode, |
| result.currencyCode); |
| assertEquals( |
| "Whole string on " + input, |
| cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(), |
| result.charEnd); |
| } |
| { |
| StringSegment segment(input, false); |
| ParsedNumber result; |
| matcherNoForeign.match(segment, result, status); |
| assertEquals( |
| "[no foreign] Parsing " + input, |
| cas.expectedNoForeignCurrencyCode, |
| result.currencyCode); |
| assertEquals( |
| "[no foreign] Whole string on " + input, |
| cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(), |
| result.charEnd); |
| } |
| } |
| } |
| |
| void NumberParserTest::testAffixPatternMatcher() { |
| IcuTestErrorCode status(*this, "testAffixPatternMatcher"); |
| Locale locale = Locale::getEnglish(); |
| IgnorablesMatcher ignorables(0); |
| |
| DecimalFormatSymbols dfs(locale, status); |
| dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); |
| dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); |
| CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); |
| |
| AffixTokenMatcherSetupData affixSetupData = { |
| currencySymbols, {"en", status}, ignorables, "en", 0}; |
| AffixTokenMatcherWarehouse warehouse(&affixSetupData); |
| |
| static const struct TestCase { |
| bool exactMatch; |
| const char16_t* affixPattern; |
| int32_t expectedMatcherLength; |
| const char16_t* sampleParseableString; |
| } cases[] = {{false, u"-", 1, u"-"}, |
| {false, u"+-%", 5, u"+-%"}, |
| {true, u"+-%", 3, u"+-%"}, |
| {false, u"ab c", 5, u"a bc"}, |
| {true, u"abc", 3, u"abc"}, |
| {false, u"hello-to+this%veryยคlongโฐstring", 59, u"hello-to+this%very USD longโฐstring"}}; |
| |
| for (auto& cas : cases) { |
| UnicodeString affixPattern(cas.affixPattern); |
| UnicodeString sampleParseableString(cas.sampleParseableString); |
| int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0; |
| |
| bool success; |
| AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern( |
| affixPattern, warehouse, parseFlags, &success, status); |
| if (!status.errDataIfFailureAndReset("Creation should be successful")) { |
| |
| // Check that the matcher has the expected number of children |
| assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length()); |
| |
| // Check that the matcher works on a sample string |
| StringSegment segment(sampleParseableString, false); |
| ParsedNumber result; |
| matcher.match(segment, result, status); |
| assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd); |
| } |
| } |
| } |
| |
| void NumberParserTest::test20360_BidiOverflow() { |
| IcuTestErrorCode status(*this, "test20360_BidiOverflow"); |
| UnicodeString inputString; |
| inputString.append(u'-'); |
| for (int32_t i=0; i<100000; i++) { |
| inputString.append(u'\u061C'); |
| } |
| inputString.append(u'5'); |
| |
| LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status)); |
| if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { |
| return; |
| } |
| |
| ParsedNumber resultObject; |
| parser->parse(inputString, true, resultObject, status); |
| assertTrue("Greedy Parse, success", resultObject.success()); |
| assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd); |
| assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status)); |
| |
| resultObject.clear(); |
| parser->parse(inputString, false, resultObject, status); |
| assertFalse("Non-Greedy Parse, success", resultObject.success()); |
| assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd); |
| } |
| |
| void NumberParserTest::testInfiniteRecursion() { |
| IcuTestErrorCode status(*this, "testInfiniteRecursion"); |
| UnicodeString inputString; |
| inputString.append(u'-'); |
| for (int32_t i=0; i<200; i++) { |
| inputString.append(u'\u061C'); |
| } |
| inputString.append(u'5'); |
| |
| LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status)); |
| if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { |
| return; |
| } |
| |
| ParsedNumber resultObject; |
| parser->parse(inputString, false, resultObject, status); |
| assertFalse("Default recursion limit, success", resultObject.success()); |
| assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd); |
| |
| parser.adoptInstead(NumberParserImpl::createSimpleParser( |
| "en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status)); |
| resultObject.clear(); |
| parser->parse(inputString, false, resultObject, status); |
| assertTrue("Unlimited recursion, success", resultObject.success()); |
| assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd); |
| assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status)); |
| } |
| |
| |
| #endif |