| /* |
| ******************************************************************************* |
| * Copyright (C) 1996-2000, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| */ |
| #include "itrbnf.h" |
| |
| #include "unicode/tblcoll.h" |
| #include "unicode/coleitr.h" |
| |
| // import com.ibm.text.RuleBasedNumberFormat; |
| // import com.ibm.test.TestFmwk; |
| |
| // import java.util.Locale; |
| // import java.text.NumberFormat; |
| |
| // current macro not in icu1.8.1 |
| #define TESTCASE(id,test) \ |
| case id: \ |
| name = #test; \ |
| if (exec) { \ |
| logln(#test "---"); \ |
| logln((UnicodeString)""); \ |
| test(); \ |
| } \ |
| break |
| |
| void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) |
| { |
| if (exec) logln("TestSuite RuleBasedNumberFormat"); |
| switch (index) { |
| TESTCASE(0, TestEnglishSpellout); |
| TESTCASE(1, TestOrdinalAbbreviations); |
| TESTCASE(2, TestDurations); |
| TESTCASE(3, TestSpanishSpellout); |
| TESTCASE(4, TestFrenchSpellout); |
| TESTCASE(5, TestSwissFrenchSpellout); |
| TESTCASE(6, TestItalianSpellout); |
| TESTCASE(7, TestGermanSpellout); |
| TESTCASE(8, TestThaiSpellout); |
| default: |
| name = ""; |
| break; |
| } |
| } |
| |
| void |
| IntlTestRBNF::TestEnglishSpellout() |
| { |
| #if 0 |
| // temporary test code |
| int32_t result = 0; |
| UErrorCode status = U_ZERO_ERROR; |
| Collator* temp = Collator::createInstance(Locale::US, status); |
| if (U_SUCCESS(status) && |
| temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) { |
| |
| RuleBasedCollator* collator = (RuleBasedCollator*)temp; |
| UnicodeString rules(collator->getRules()); |
| UnicodeString tailoring("&'\\u0000' << ' ' << '-'\n"); |
| tailoring = tailoring.unescape(); |
| rules.append(tailoring); |
| |
| collator = new RuleBasedCollator(rules, status); |
| if (U_SUCCESS(status)) { |
| collator->setDecomposition(Normalizer::DECOMP); |
| |
| UnicodeString prefix(" hundred"); |
| UnicodeString str("hundred-fifty"); |
| |
| CollationElementIterator* strIter = collator->createCollationElementIterator(str); |
| CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix); |
| |
| // match collation elements between the strings |
| int32_t oStr = strIter->next(status); |
| int32_t oPrefix = prefixIter->next(status); |
| |
| while (oPrefix != CollationElementIterator::NULLORDER) { |
| // skip over ignorable characters in the target string |
| while (CollationElementIterator::primaryOrder(oStr) == 0 |
| && oStr != CollationElementIterator::NULLORDER) { |
| oStr = strIter->next(status); |
| } |
| |
| // skip over ignorable characters in the prefix |
| while (CollationElementIterator::primaryOrder(oPrefix) == 0 |
| && oPrefix != CollationElementIterator::NULLORDER) { |
| oPrefix = prefixIter->next(status); |
| } |
| |
| // if skipping over ignorables brought us to the end |
| // of the target string, we didn't match and return 0 |
| if (oStr == CollationElementIterator::NULLORDER) { |
| result = -1; |
| break; |
| } |
| |
| // if skipping over ignorables brought to the end of |
| // the prefix, we DID match: drop out of the loop |
| else if (oPrefix == CollationElementIterator::NULLORDER) { |
| break; |
| } |
| |
| // match collation elements from the two strings |
| // (considering only primary differences). If we |
| // get a mismatch, dump out and return 0 |
| if (CollationElementIterator::primaryOrder(oStr) |
| != CollationElementIterator::primaryOrder(oPrefix)) { |
| result = -1; |
| break; |
| |
| // otherwise, advance to the next character in each string |
| // and loop (we drop out of the loop when we exhaust |
| // collation elements in the prefix) |
| } else { |
| oStr = strIter->next(status); |
| oPrefix = prefixIter->next(status); |
| } |
| } |
| if (result == 0) { |
| result = strIter->getOffset(); |
| } |
| delete prefixIter; |
| delete strIter; |
| } |
| delete collator; |
| } |
| delete temp; |
| |
| printf("result: %d\n", result); |
| #endif |
| |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::US, status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "one" }, |
| { "2", "two" }, |
| { "15", "fifteen" }, |
| { "20", "twenty" }, |
| { "23", "twenty-three" }, |
| { "73", "seventy-three" }, |
| { "88", "eighty-eight" }, |
| { "100", "one hundred" }, |
| { "106", "one hundred and six" }, |
| { "127", "one hundred and twenty-seven" }, |
| { "200", "two hundred" }, |
| { "579", "five hundred and seventy-nine" }, |
| { "1,000", "one thousand" }, |
| { "2,000", "two thousand" }, |
| { "3,004", "three thousand and four" }, |
| { "4,567", "four thousand five hundred and sixty-seven" }, |
| { "15,943", "fifteen thousand nine hundred and forty-three" }, |
| { "2,345,678", "two million, three hundred and forty-five thousand, six hundred and seventy-eight" }, |
| { "-36", "minus thirty-six" }, |
| { "234.567", "two hundred and thirty-four point five six seven" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| |
| formatter->setLenient(TRUE); |
| static const char* lpTestData[][2] = { |
| { "fifty-7", "57" }, |
| { " fifty-7", "57" }, |
| { " fifty-7", "57" }, |
| { "2 thousand six HUNDRED fifty-7", "2,657" }, |
| { "fifteen hundred and zero", "1,500" }, |
| { "FOurhundred thiRTY six", "436" }, |
| { NULL, NULL} |
| }; |
| doLenientParseTest(formatter, lpTestData); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestOrdinalAbbreviations() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_ORDINAL, Locale::US, status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "1st" }, |
| { "2", "2nd" }, |
| { "3", "3rd" }, |
| { "4", "4th" }, |
| { "7", "7th" }, |
| { "10", "10th" }, |
| { "11", "11th" }, |
| { "13", "13th" }, |
| { "20", "20th" }, |
| { "21", "21st" }, |
| { "22", "22nd" }, |
| { "23", "23rd" }, |
| { "24", "24th" }, |
| { "33", "33rd" }, |
| { "102", "102nd" }, |
| { "312", "312th" }, |
| { "12,345", "12,345th" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, FALSE); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestDurations() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_DURATION, Locale::US, status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "3,600", "1:00:00" }, //move me and I fail |
| { "0", "0 sec." }, |
| { "1", "1 sec." }, |
| { "24", "24 sec." }, |
| { "60", "1:00" }, |
| { "73", "1:13" }, |
| { "145", "2:25" }, |
| { "666", "11:06" }, |
| // { "3,600", "1:00:00" }, |
| { "3,740", "1:02:20" }, |
| { "10,293", "2:51:33" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| |
| formatter->setLenient(TRUE); |
| static const char* lpTestData[][2] = { |
| { "2-51-33", "10,293" }, |
| { NULL, NULL} |
| }; |
| doLenientParseTest(formatter, lpTestData); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestSpanishSpellout() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("es", "ES", ""), status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "uno" }, |
| { "6", "seis" }, |
| { "16", "diecis\\u00e9is" }, |
| { "20", "veinte" }, |
| { "24", "veinticuatro" }, |
| { "26", "veintis\\u00e9is" }, |
| { "73", "setenta y tres" }, |
| { "88", "ochenta y ocho" }, |
| { "100", "cien" }, |
| { "106", "ciento seis" }, |
| { "127", "ciento veintisiete" }, |
| { "200", "doscientos" }, |
| { "579", "quinientos setenta y nueve" }, |
| { "1,000", "mil" }, |
| { "2,000", "dos mil" }, |
| { "3,004", "tres mil cuatro" }, |
| { "4,567", "cuatro mil quinientos sesenta y siete" }, |
| { "15,943", "quince mil novecientos cuarenta y tres" }, |
| { "2,345,678", "dos mill\\u00f3n trescientos cuarenta y cinco mil seiscientos setenta y ocho"}, |
| { "-36", "menos treinta y seis" }, |
| { "234.567", "doscientos treinta y cuatro punto cinco seis siete" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestFrenchSpellout() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::FRANCE, status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "un" }, |
| { "15", "quinze" }, |
| { "20", "vingt" }, |
| { "21", "vingt-et-un" }, |
| { "23", "vingt-trois" }, |
| { "62", "soixante-deux" }, |
| { "70", "soixante-dix" }, |
| { "71", "soixante et onze" }, |
| { "73", "soixante-treize" }, |
| { "80", "quatre-vingts" }, |
| { "88", "quatre-vingt-huit" }, |
| { "100", "cent" }, |
| { "106", "cent six" }, |
| { "127", "cent vingt-sept" }, |
| { "200", "deux cents" }, |
| { "579", "cinq cents soixante-dix-neuf" }, |
| { "1,000", "mille" }, |
| { "1,123", "onze cents vingt-trois" }, |
| { "1,594", "mille cinq cents quatre-vingt-quatorze" }, |
| { "2,000", "deux mille" }, |
| { "3,004", "trois mille quatre" }, |
| { "4,567", "quatre mille cinq cents soixante-sept" }, |
| { "15,943", "quinze mille neuf cents quarante-trois" }, |
| { "2,345,678", "deux million trois cents quarante-cinq mille six cents soixante-dix-huit" }, |
| { "-36", "moins trente-six" }, |
| { "234.567", "deux cents trente-quatre virgule cinq six sept" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| |
| formatter->setLenient(TRUE); |
| static const char* lpTestData[][2] = { |
| { "trente-un", "31" }, |
| { "un cents quatre vingt dix huit", "198" }, |
| { NULL, NULL} |
| }; |
| doLenientParseTest(formatter, lpTestData); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestSwissFrenchSpellout() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("fr", "CH", ""), status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "un" }, |
| { "15", "quinze" }, |
| { "20", "vingt" }, |
| { "21", "vingt-et-un" }, |
| { "23", "vingt-trois" }, |
| { "62", "soixante-deux" }, |
| { "70", "septante" }, |
| { "71", "septante-et-un" }, |
| { "73", "septante-trois" }, |
| { "80", "octante" }, |
| { "88", "octante-huit" }, |
| { "100", "cent" }, |
| { "106", "cent six" }, |
| { "127", "cent vingt-sept" }, |
| { "200", "deux cents" }, |
| { "579", "cinq cents septante-neuf" }, |
| { "1,000", "mille" }, |
| { "1,123", "onze cents vingt-trois" }, |
| { "1,594", "mille cinq cents nonante-quatre" }, |
| { "2,000", "deux mille" }, |
| { "3,004", "trois mille quatre" }, |
| { "4,567", "quatre mille cinq cents soixante-sept" }, |
| { "15,943", "quinze mille neuf cents quarante-trois" }, |
| { "2,345,678", "deux million trois cents quarante-cinq mille six cents septante-huit" }, |
| { "-36", "moins trente-six" }, |
| { "234.567", "deux cents trente-quatre virgule cinq six sept" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestItalianSpellout() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::ITALIAN, status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "uno" }, |
| { "15", "quindici" }, |
| { "20", "venti" }, |
| { "23", "ventitre" }, |
| { "73", "settantatre" }, |
| { "88", "ottantotto" }, |
| { "100", "cento" }, |
| { "106", "centosei" }, |
| { "108", "centotto" }, |
| { "127", "centoventisette" }, |
| { "181", "centottantuno" }, |
| { "200", "duecento" }, |
| { "579", "cinquecentosettantanove" }, |
| { "1,000", "mille" }, |
| { "2,000", "duemila" }, |
| { "3,004", "tremilaquattro" }, |
| { "4,567", "quattromilacinquecentosessantasette" }, |
| { "15,943", "quindicimilanovecentoquarantatre" }, |
| { "-36", "meno trentisei" }, |
| { "234.567", "duecentotrentiquattro virgola cinque sei sette" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestGermanSpellout() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::GERMANY, status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "1", "eins" }, |
| { "15", "f\\u00fcnfzehn" }, |
| { "20", "zwanzig" }, |
| { "23", "dreiundzwanzig" }, |
| { "73", "dreiundsiebzig" }, |
| { "88", "achtundachtzig" }, |
| { "100", "hundert" }, |
| { "106", "hundertsechs" }, |
| { "127", "hundertsiebenundzwanzig" }, |
| { "200", "zweihundert" }, |
| { "579", "f\\u00fcnfhundertneunundsiebzig" }, |
| { "1,000", "tausend" }, |
| { "2,000", "zweitausend" }, |
| { "3,004", "dreitausendvier" }, |
| { "4,567", "viertausendf\\u00fcnfhundertsiebenundsechzig" }, |
| { "15,943", "f\\u00fcnfzehntausendneunhundertdreiundvierzig" }, |
| { "2,345,678", "zwei Millionen dreihundertf\\u00fcnfundvierzigtausendsechshundertachtundsiebzig" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| |
| formatter->setLenient(TRUE); |
| static const char* lpTestData[][2] = { |
| { "ein Tausend sechs Hundert fuenfunddreissig", "1,635" }, |
| { NULL, NULL} |
| }; |
| doLenientParseTest(formatter, lpTestData); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::TestThaiSpellout() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedNumberFormat* formatter |
| = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("th"), status); |
| |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not construct formatter"); |
| } else { |
| static const char* testData[][2] = { |
| { "0", "\\u0e28\\u0e39\\u0e19\\u0e22\\u0e4c" }, |
| { "1", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" }, |
| { "10", "\\u0e2a\\u0e34\\u0e1a" }, |
| { "11", "\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" }, |
| { "21", "\\u0e22\\u0e35\\u0e48\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" }, |
| { "101", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e23\\u0e49\\u0e2d\\u0e22\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" }, |
| { "1.234", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e08\\u0e38\\u0e14\\u0e2a\\u0e2d\\u0e07\\u0e2a\\u0e32\\u0e21\\u0e2a\\u0e35\\u0e48" }, |
| { NULL, NULL} |
| }; |
| |
| doTest(formatter, testData, TRUE); |
| } |
| delete formatter; |
| } |
| |
| void |
| IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing) |
| { |
| // man, error reporting would be easier with printf-style syntax for unicode string and formattable |
| |
| UErrorCode status = U_ZERO_ERROR; |
| NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status); |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not create NumberFormat"); |
| } else { |
| for (int i = 0; testData[i][0]; ++i) { |
| const char* numString = testData[i][0]; |
| const char* expectedWords = testData[i][1]; |
| |
| Formattable expectedNumber; |
| decFmt->parse(numString, expectedNumber, status); |
| if (U_FAILURE(status)) { |
| errln("FAIL: decFmt could not parse %s", numString); |
| break; |
| } else { |
| UnicodeString actualString; |
| FieldPosition pos; |
| formatter->format(expectedNumber, actualString/* , pos*/, status); |
| if (U_FAILURE(status)) { |
| UnicodeString msg = "Fail: formatter could not format "; |
| decFmt->format(expectedNumber, msg, status); |
| errln(msg); |
| break; |
| } else { |
| UnicodeString expectedString = UnicodeString(expectedWords).unescape(); |
| if (actualString != expectedString) { |
| UnicodeString msg = "FAIL: check failed for "; |
| decFmt->format(expectedNumber, msg, status); |
| msg.append(", expected "); |
| msg.append(expectedString); |
| msg.append(" but got "); |
| msg.append(actualString); |
| errln(msg); |
| break; |
| } else if (testParsing) { |
| Formattable parsedNumber; |
| formatter->parse(actualString, parsedNumber, status); |
| if (U_FAILURE(status)) { |
| UnicodeString msg = "FAIL: formatter could not parse "; |
| msg.append(actualString); |
| msg.append(" status code: " ); |
| char buffer[32]; |
| sprintf(buffer, "0x%x", status); |
| msg.append(buffer); |
| errln(msg); |
| break; |
| } else { |
| if (parsedNumber != expectedNumber) { |
| UnicodeString msg = "FAIL: parse failed for "; |
| msg.append(actualString); |
| msg.append(", expected "); |
| decFmt->format(expectedNumber, msg, status); |
| msg.append(", but got "); |
| decFmt->format(parsedNumber, msg, status); |
| errln(msg); |
| break; |
| } |
| } |
| } |
| } |
| } |
| } |
| delete decFmt; |
| } |
| } |
| |
| void |
| IntlTestRBNF::doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status); |
| if (U_FAILURE(status)) { |
| errln("FAIL: could not create NumberFormat"); |
| } else { |
| for (int i = 0; testData[i][0]; ++i) { |
| const char* spelledNumber = testData[i][0]; // spelled-out number |
| const char* asciiUSNumber = testData[i][1]; // number as ascii digits formatted for US locale |
| |
| UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape(); |
| Formattable actualNumber; |
| formatter->parse(spelledNumberString, actualNumber, status); |
| if (U_FAILURE(status)) { |
| UnicodeString msg = "FAIL: formatter could not parse "; |
| msg.append(spelledNumberString); |
| errln(msg); |
| break; |
| } else { |
| // I changed the logic of this test somewhat from Java-- instead of comparing the |
| // strings, I compare the Formattables. Hmmm, but the Formattables don't compare, |
| // so change it back. |
| |
| UnicodeString asciiUSNumberString = asciiUSNumber; |
| Formattable expectedNumber; |
| decFmt->parse(asciiUSNumberString, expectedNumber, status); |
| if (U_FAILURE(status)) { |
| UnicodeString msg = "FAIL: decFmt could not parse "; |
| msg.append(asciiUSNumberString); |
| errln(msg); |
| break; |
| } else { |
| UnicodeString actualNumberString; |
| UnicodeString expectedNumberString; |
| decFmt->format(actualNumber, actualNumberString, status); |
| decFmt->format(expectedNumber, expectedNumberString, status); |
| if (actualNumberString != expectedNumberString) { |
| UnicodeString msg = "FAIL: parsing"; |
| msg.append(asciiUSNumberString); |
| msg.append("\n"); |
| msg.append(" lenient parse failed for "); |
| msg.append(spelledNumberString); |
| msg.append(", expected "); |
| msg.append(expectedNumberString); |
| msg.append(", but got "); |
| msg.append(actualNumberString); |
| errln(msg); |
| break; |
| } |
| } |
| } |
| } |
| delete decFmt; |
| } |
| } |
| |