| // © 2018 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| #include <memory> |
| |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "localebuildertest.h" |
| #include "unicode/localebuilder.h" |
| #include "unicode/strenum.h" |
| |
| LocaleBuilderTest::LocaleBuilderTest() |
| { |
| } |
| |
| LocaleBuilderTest::~LocaleBuilderTest() |
| { |
| } |
| |
| void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) |
| { |
| TESTCASE_AUTO_BEGIN; |
| TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute); |
| TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed); |
| TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed); |
| TESTCASE_AUTO(TestLocaleBuilder); |
| TESTCASE_AUTO(TestLocaleBuilderBasic); |
| TESTCASE_AUTO(TestPosixCases); |
| TESTCASE_AUTO(TestSetExtensionOthers); |
| TESTCASE_AUTO(TestSetExtensionPU); |
| TESTCASE_AUTO(TestSetExtensionT); |
| TESTCASE_AUTO(TestSetExtensionU); |
| TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed); |
| TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed); |
| TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed); |
| TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed); |
| TESTCASE_AUTO(TestSetExtensionValidateTIllFormed); |
| TESTCASE_AUTO(TestSetExtensionValidateTWellFormed); |
| TESTCASE_AUTO(TestSetExtensionValidateUIllFormed); |
| TESTCASE_AUTO(TestSetExtensionValidateUWellFormed); |
| TESTCASE_AUTO(TestSetLanguageIllFormed); |
| TESTCASE_AUTO(TestSetLanguageWellFormed); |
| TESTCASE_AUTO(TestSetLocale); |
| TESTCASE_AUTO(TestSetRegionIllFormed); |
| TESTCASE_AUTO(TestSetRegionWellFormed); |
| TESTCASE_AUTO(TestSetScriptIllFormed); |
| TESTCASE_AUTO(TestSetScriptWellFormed); |
| TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey); |
| TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue); |
| TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed); |
| TESTCASE_AUTO(TestSetVariantIllFormed); |
| TESTCASE_AUTO(TestSetVariantWellFormed); |
| TESTCASE_AUTO_END; |
| } |
| |
| void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) { |
| UErrorCode status = U_ZERO_ERROR; |
| UErrorCode copyStatus = U_ZERO_ERROR; |
| UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR; |
| if (bld.copyErrorTo(copyStatus)) { |
| errln(msg, u_errorName(copyStatus)); |
| } |
| if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("Should always get the previous error and return FALSE"); |
| } |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln(msg, u_errorName(status)); |
| } |
| if (status != copyStatus) { |
| errln(msg, u_errorName(status)); |
| } |
| std::string tag = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status)) { |
| errln("loc.toLanguageTag() got Error: %s\n", |
| u_errorName(status)); |
| } |
| if (tag != expected) { |
| errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str()); |
| } |
| } |
| |
| void LocaleBuilderTest::TestLocaleBuilder() { |
| // The following test data are copy from |
| // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java |
| // "L": +1 = language |
| // "S": +1 = script |
| // "R": +1 = region |
| // "V": +1 = variant |
| // "K": +1 = Unicode locale key / +2 = Unicode locale type |
| // "A": +1 = Unicode locale attribute |
| // "E": +1 = extension letter / +2 = extension value |
| // "P": +1 = private use |
| // "U": +1 = ULocale |
| // "B": +1 = BCP47 language tag |
| // "C": Clear all |
| // "N": Clear extensions |
| // "D": +1 = Unicode locale attribute to be removed |
| // "X": indicates an exception must be thrown |
| // "T": +1 = expected language tag / +2 = expected locale string |
| const char* TESTCASES[][14] = { |
| {"L", "en", "R", "us", "T", "en-US", "en_US"}, |
| {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"}, |
| {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"}, |
| {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"}, |
| {"L", "123", "X"}, |
| {"R", "us", "T", "und-US", "_US"}, |
| {"R", "usa", "X"}, |
| {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"}, |
| {"R", "123", "L", "it", "R", "", "T", "it", "it"}, |
| {"R", "123", "L", "en", "T", "en-123", "en_123"}, |
| {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"}, |
| {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"}, |
| {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"}, |
| {"S", "latin", "X"}, |
| {"V", "1234", "L", "en", "T", "en-1234", "en__1234"}, |
| {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"}, |
| {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"}, |
| {"V", "1234", "L", "en", "V", "", "T", "en", "en"}, |
| {"V", "123", "X"}, |
| {"U", "en_US", "T", "en-US", "en_US"}, |
| {"U", "en_US_WIN", "X"}, |
| {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T", |
| "fr-FR-1606nict-u-ca-gregory-x-test", |
| "fr_FR_1606NICT@calendar=gregorian;x=test"}, |
| {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"}, |
| {"B", "und-CA", "T", "und-CA", "_CA"}, |
| // Blocked by ICU-20327 |
| // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var", |
| // "en_US_VAR@x=test"}, |
| {"B", "en-US-VAR", "X"}, |
| {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T", |
| "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"}, |
| {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T", |
| "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"}, |
| {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u", |
| "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory", |
| "ja_JP@attribute=attr1;calendar=gregorian"}, |
| {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn", |
| "en@colnumeric=yes"}, |
| {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai", |
| "th_TH@numbers=thai"}, |
| {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"}, |
| {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"}, |
| {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"}, |
| {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"}, |
| {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"}, |
| {"E", "a", "x", "X"}, |
| {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"}, |
| // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes. |
| // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true". |
| // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown, |
| // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform |
| // key = alphanum alpha |
| {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a", |
| "en@0a=yes;attribute=aaa-bbb"}, |
| {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu", |
| "fr_FR@x=yoshito-icu"}, |
| {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese", |
| "ja_JP@calendar=japanese"}, |
| {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T", |
| "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"}, |
| {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"}, |
| {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai", |
| "th@calendar=gregorian;numbers=thai"}, |
| {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc", |
| "en_US@timezone=America/New_York"}, |
| {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk", |
| "true", "T", "de-u-co-phonebk-kk-ks-level1", |
| "de@collation=phonebook;colnormalization=yes;colstrength=primary"}, |
| {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory", |
| "en_US@calendar=gregorian"}, |
| {"L", "en", "R", "US", "K", "cal", "gregory", "X"}, |
| {"L", "en", "R", "US", "K", "ca", "gregorian", "X"}, |
| {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn", |
| "en_US@colnumeric=yes"}, |
| {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"}, |
| {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"}, |
| {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T", |
| "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"}, |
| {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T", |
| "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"}, |
| {"L", "en", "A", "aa", "X"}, |
| {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"}, |
| }; |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) { |
| const char* (&testCase)[14] = TESTCASES[tidx]; |
| std::string actions; |
| for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) { |
| if (testCase[p] == nullptr) { |
| actions += " (nullptr)"; |
| break; |
| } |
| if (p > 0) actions += " "; |
| actions += testCase[p]; |
| } |
| int i = 0; |
| const char* method; |
| status = U_ZERO_ERROR; |
| bld.clear(); |
| while (true) { |
| status = U_ZERO_ERROR; |
| UErrorCode copyStatus = U_ZERO_ERROR; |
| method = testCase[i++]; |
| if (strcmp("L", method) == 0) { |
| bld.setLanguage(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("S", method) == 0) { |
| bld.setScript(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("R", method) == 0) { |
| bld.setRegion(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("V", method) == 0) { |
| bld.setVariant(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("K", method) == 0) { |
| const char* key = testCase[i++]; |
| const char* type = testCase[i++]; |
| bld.setUnicodeLocaleKeyword(key, type); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("A", method) == 0) { |
| bld.addUnicodeLocaleAttribute(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("E", method) == 0) { |
| const char* key = testCase[i++]; |
| const char* value = testCase[i++]; |
| bld.setExtension(key[0], value); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("P", method) == 0) { |
| bld.setExtension('x', testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("U", method) == 0) { |
| bld.setLocale(Locale(testCase[i++])); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("B", method) == 0) { |
| bld.setLanguageTag(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } |
| // clear / remove |
| else if (strcmp("C", method) == 0) { |
| bld.clear(); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("N", method) == 0) { |
| bld.clearExtensions(); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } else if (strcmp("D", method) == 0) { |
| bld.removeUnicodeLocaleAttribute(testCase[i++]); |
| bld.copyErrorTo(copyStatus); |
| bld.build(status); |
| } |
| // result |
| else if (strcmp("X", method) == 0) { |
| if (U_SUCCESS(status)) { |
| errln("FAIL: No error return - test case: %s", actions.c_str()); |
| } |
| } else if (strcmp("T", method) == 0) { |
| status = U_ZERO_ERROR; |
| Locale loc = bld.build(status); |
| if (status != copyStatus) { |
| errln("copyErrorTo not matching"); |
| } |
| if (U_FAILURE(status) || |
| strcmp(loc.getName(), testCase[i + 1]) != 0) { |
| errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(), |
| " for test case: ", actions.c_str()); |
| } |
| std::string langtag = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || langtag != testCase[i]) { |
| errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(), |
| " for test case: ", actions.c_str()); |
| } |
| break; |
| } else { |
| // Unknow test method |
| errln("Unknown test case method: There is an error in the test case data."); |
| break; |
| } |
| if (status != copyStatus) { |
| errln("copyErrorTo not matching"); |
| } |
| if (U_FAILURE(status)) { |
| if (strcmp("X", testCase[i]) == 0) { |
| // This failure is expected |
| break; |
| } else { |
| errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i, |
| " in test case: ", actions.c_str()); |
| break; |
| } |
| } |
| if (strcmp("T", method) == 0) { |
| break; |
| } |
| } // while(true) |
| } // for TESTCASES |
| } |
| |
| void LocaleBuilderTest::TestLocaleBuilderBasic() { |
| LocaleBuilder bld; |
| bld.setLanguage("zh"); |
| Verify(bld, "zh", "setLanguage('zh') got Error: %s\n"); |
| |
| bld.setScript("Hant"); |
| Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n"); |
| |
| bld.setRegion("SG"); |
| Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n"); |
| |
| bld.setRegion("HK"); |
| bld.setScript("Hans"); |
| Verify(bld, "zh-Hans-HK", |
| "setRegion('HK') and setScript('Hans') got Error: %s\n"); |
| |
| bld.setVariant("revised"); |
| Verify(bld, "zh-Hans-HK-revised", |
| "setVariant('revised') got Error: %s\n"); |
| |
| bld.setUnicodeLocaleKeyword("nu", "thai"); |
| Verify(bld, "zh-Hans-HK-revised-u-nu-thai", |
| "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n"); |
| |
| bld.setUnicodeLocaleKeyword("co", "pinyin"); |
| Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai", |
| "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n"); |
| |
| bld.setUnicodeLocaleKeyword("nu", "latn"); |
| Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn", |
| "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n"); |
| |
| bld.setUnicodeLocaleKeyword("nu", nullptr); |
| Verify(bld, "zh-Hans-HK-revised-u-co-pinyin", |
| "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n"); |
| |
| bld.setUnicodeLocaleKeyword("co", nullptr); |
| Verify(bld, "zh-Hans-HK-revised", |
| "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n"); |
| |
| bld.setScript(""); |
| Verify(bld, "zh-HK-revised", |
| "setScript('') got Error: %s\n"); |
| |
| bld.setVariant(""); |
| Verify(bld, "zh-HK", |
| "setVariant('') got Error: %s\n"); |
| |
| bld.setRegion(""); |
| Verify(bld, "zh", |
| "setRegion('') got Error: %s\n"); |
| } |
| |
| void LocaleBuilderTest::TestSetLanguageWellFormed() { |
| // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag |
| // unicode_language_subtag = alpha{2,3} | alpha{5,8}; |
| // ICUTC decided also support alpha{4} |
| static const char* wellFormedLanguages[] = { |
| "", |
| |
| // alpha{2} |
| "en", |
| "NE", |
| "eN", |
| "Ne", |
| |
| // alpha{3} |
| "aNe", |
| "zzz", |
| "AAA", |
| |
| // alpha{4} |
| "ABCD", |
| "abcd", |
| |
| // alpha{5} |
| "efgij", |
| "AbCAD", |
| "ZAASD", |
| |
| // alpha{6} |
| "efgijk", |
| "AADGFE", |
| "AkDfFz", |
| |
| // alpha{7} |
| "asdfads", |
| "ADSFADF", |
| "piSFkDk", |
| |
| // alpha{8} |
| "oieradfz", |
| "IADSFJKR", |
| "kkDSFJkR", |
| }; |
| for (const char* lang : wellFormedLanguages) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setLanguage(lang); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setLanguage(\"%s\") got Error: %s\n", |
| lang, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetLanguageIllFormed() { |
| static const char* illFormed[] = { |
| "a", |
| "z", |
| "A", |
| "F", |
| "2", |
| "0", |
| "9" |
| "{", |
| ".", |
| "[", |
| "]", |
| "\\", |
| |
| "e1", |
| "N2", |
| "3N", |
| "4e", |
| "e:", |
| "43", |
| "a9", |
| |
| "aN0", |
| "z1z", |
| "2zz", |
| "3A3", |
| "456", |
| "af)", |
| |
| // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321. |
| // "latn", |
| // "Arab", |
| // "LATN", |
| |
| "e)gij", |
| "Ab3AD", |
| "ZAAS8", |
| |
| "efgi[]", |
| "AA9GFE", |
| "7kD3Fz", |
| "as8fads", |
| "0DSFADF", |
| "'iSFkDk", |
| |
| "oieradf+", |
| "IADSFJK-", |
| "kkDSFJk0", |
| |
| // alpha{9} |
| "oieradfab", |
| "IADSFJKDE", |
| "kkDSFJkzf", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setLanguage(ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setLanguage(\"%s\") should fail but has no Error\n", ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetScriptWellFormed() { |
| // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag |
| // unicode_script_subtag = alpha{4} ; |
| static const char* wellFormedScripts[] = { |
| "", |
| |
| "Latn", |
| "latn", |
| "lATN", |
| "laTN", |
| "arBN", |
| "ARbn", |
| "adsf", |
| "aADF", |
| "BSVS", |
| "LATn", |
| }; |
| for (const char* script : wellFormedScripts) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setScript(script); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setScript(\"%s\") got Error: %s\n", |
| script, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetScriptIllFormed() { |
| static const char* illFormed[] = { |
| "a", |
| "z", |
| "A", |
| "F", |
| "2", |
| "0", |
| "9" |
| "{", |
| ".", |
| "[", |
| "]", |
| "\\", |
| |
| "e1", |
| "N2", |
| "3N", |
| "4e", |
| "e:", |
| "43", |
| "a9", |
| |
| "aN0", |
| "z1z", |
| "2zz", |
| "3A3", |
| "456", |
| "af)", |
| |
| "0atn", |
| "l1tn", |
| "lA2N", |
| "la4N", |
| "arB5", |
| "1234", |
| |
| "e)gij", |
| "Ab3AD", |
| "ZAAS8", |
| |
| "efgi[]", |
| "AA9GFE", |
| "7kD3Fz", |
| |
| "as8fads", |
| "0DSFADF", |
| "'iSFkDk", |
| |
| "oieradf+", |
| "IADSFJK-", |
| "kkDSFJk0", |
| |
| // alpha{9} |
| "oieradfab", |
| "IADSFJKDE", |
| "kkDSFJkzf", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setScript(ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setScript(\"%s\") should fail but has no Error\n", ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetRegionWellFormed() { |
| // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag |
| // unicode_region_subtag = (alpha{2} | digit{3}) |
| static const char* wellFormedRegions[] = { |
| "", |
| |
| // alpha{2} |
| "en", |
| "NE", |
| "eN", |
| "Ne", |
| |
| // digit{3} |
| "000", |
| "999", |
| "123", |
| "987" |
| }; |
| for (const char* region : wellFormedRegions) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setRegion(region); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setRegion(\"%s\") got Error: %s\n", |
| region, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetRegionIllFormed() { |
| static const char* illFormed[] = { |
| "a", |
| "z", |
| "A", |
| "F", |
| "2", |
| "0", |
| "9" |
| "{", |
| ".", |
| "[", |
| "]", |
| "\\", |
| |
| "e1", |
| "N2", |
| "3N", |
| "4e", |
| "e:", |
| "43", |
| "a9", |
| |
| "aN0", |
| "z1z", |
| "2zz", |
| "3A3", |
| "4.6", |
| "af)", |
| |
| "0atn", |
| "l1tn", |
| "lA2N", |
| "la4N", |
| "arB5", |
| "1234", |
| |
| "e)gij", |
| "Ab3AD", |
| "ZAAS8", |
| |
| "efgi[]", |
| "AA9GFE", |
| "7kD3Fz", |
| |
| "as8fads", |
| "0DSFADF", |
| "'iSFkDk", |
| |
| "oieradf+", |
| "IADSFJK-", |
| "kkDSFJk0", |
| |
| // alpha{9} |
| "oieradfab", |
| "IADSFJKDE", |
| "kkDSFJkzf", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setRegion(ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setRegion(\"%s\") should fail but has no Error\n", ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetVariantWellFormed() { |
| // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag |
| // (sep unicode_variant_subtag)* |
| // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ; |
| static const char* wellFormedVariants[] = { |
| "", |
| |
| // alphanum{5} |
| "efgij", |
| "AbCAD", |
| "ZAASD", |
| "0AASD", |
| "A1CAD", |
| "ef2ij", |
| "ads3X", |
| "owqF4", |
| |
| // alphanum{6} |
| "efgijk", |
| "AADGFE", |
| "AkDfFz", |
| "0ADGFE", |
| "A9DfFz", |
| "AADG7E", |
| |
| // alphanum{7} |
| "asdfads", |
| "ADSFADF", |
| "piSFkDk", |
| "a0dfads", |
| "ADSF3DF", |
| "piSFkD9", |
| |
| // alphanum{8} |
| "oieradfz", |
| "IADSFJKR", |
| "kkDSFJkR", |
| "0ADSFJKR", |
| "12345679", |
| |
| // digit alphanum{3} |
| "0123", |
| "1abc", |
| "20EF", |
| "30EF", |
| "8A03", |
| "3Ax3", |
| "9Axy", |
| |
| // (sep unicode_variant_subtag)* |
| "0123-4567", |
| "0ab3-ABCDE", |
| "9ax3-xByD9", |
| "9ax3-xByD9-adfk934a", |
| |
| "0123_4567", |
| "0ab3_ABCDE", |
| "9ax3_xByD9", |
| "9ax3_xByD9_adfk934a", |
| |
| "9ax3-xByD9_adfk934a", |
| "9ax3_xByD9-adfk934a", |
| }; |
| for (const char* variant : wellFormedVariants) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setVariant(variant); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setVariant(\"%s\") got Error: %s\n", |
| variant, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetVariantIllFormed() { |
| static const char* illFormed[] = { |
| "a", |
| "z", |
| "A", |
| "F", |
| "2", |
| "0", |
| "9" |
| "{", |
| ".", |
| "[", |
| "]", |
| "\\", |
| |
| "e1", |
| "N2", |
| "3N", |
| "4e", |
| "e:", |
| "43", |
| "a9", |
| "en", |
| "NE", |
| "eN", |
| "Ne", |
| |
| "aNe", |
| "zzz", |
| "AAA", |
| "aN0", |
| "z1z", |
| "2zz", |
| "3A3", |
| "4.6", |
| "af)", |
| "345", |
| "923", |
| |
| "Latn", |
| "latn", |
| "lATN", |
| "laTN", |
| "arBN", |
| "ARbn", |
| "adsf", |
| "aADF", |
| "BSVS", |
| "LATn", |
| "l1tn", |
| "lA2N", |
| "la4N", |
| "arB5", |
| "abc3", |
| "A3BC", |
| |
| "e)gij", |
| "A+3AD", |
| "ZAA=8", |
| |
| "efgi[]", |
| "AA9]FE", |
| "7k[3Fz", |
| |
| "as8f/ds", |
| "0DSFAD{", |
| "'iSFkDk", |
| |
| "oieradf+", |
| "IADSFJK-", |
| "k}DSFJk0", |
| |
| // alpha{9} |
| "oieradfab", |
| "IADSFJKDE", |
| "kkDSFJkzf", |
| "123456789", |
| |
| "-0123", |
| "-0123-4567", |
| "0123-4567-", |
| "-123-4567", |
| "_0123", |
| "_0123_4567", |
| "0123_4567_", |
| "_123_4567", |
| |
| "-abcde-figjk", |
| "abcde-figjk-", |
| "-abcde-figjk-", |
| "_abcde_figjk", |
| "abcde_figjk_", |
| "_abcde_figjk_", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setVariant(ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setVariant(\"%s\") should fail but has no Error\n", ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() { |
| // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions |
| // keyword = key (sep type)? ; |
| // key = alphanum alpha ; |
| // type = alphanum{3,8} (sep alphanum{3,8})* ; |
| static const char* wellFormed_key_value[] = { |
| "aa", "123", |
| "3b", "zyzbcdef", |
| "0Z", "1ZB30zk9-abc", |
| "cZ", "2ck30zfZ-adsf023-234kcZ", |
| "ZZ", "Lant", |
| "ko", "", |
| }; |
| for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setUnicodeLocaleKeyword(wellFormed_key_value[i], |
| wellFormed_key_value[i + 1]); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n", |
| wellFormed_key_value[i], |
| wellFormed_key_value[i + 1], |
| u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() { |
| static const char* illFormed[] = { |
| "34", |
| "ab-cde", |
| "123", |
| "b3", |
| "zyzabcdef", |
| "Z0", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setUnicodeLocaleKeyword(ill, "abc"); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n", |
| ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() { |
| static const char* illFormed[] = { |
| "34", |
| "ab-", |
| "-cd", |
| "-ef-", |
| "zyzabcdef", |
| "ab-abc", |
| "1ZB30zfk9-abc", |
| "2ck30zfk9-adsf023-234kcZ", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setUnicodeLocaleKeyword("ab", ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n", |
| ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() { |
| LocaleBuilder bld; |
| UErrorCode status = U_ZERO_ERROR; |
| Locale loc = bld.setLanguage("fr") |
| .addUnicodeLocaleAttribute("abc") |
| .addUnicodeLocaleAttribute("aBc") |
| .addUnicodeLocaleAttribute("EFG") |
| .addUnicodeLocaleAttribute("efghi") |
| .addUnicodeLocaleAttribute("efgh") |
| .addUnicodeLocaleAttribute("efGhi") |
| .addUnicodeLocaleAttribute("EFg") |
| .addUnicodeLocaleAttribute("hijk") |
| .addUnicodeLocaleAttribute("EFG") |
| .addUnicodeLocaleAttribute("HiJK") |
| .addUnicodeLocaleAttribute("aBc") |
| .build(status); |
| if (U_FAILURE(status)) { |
| errln("addUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| std::string expected("fr-u-abc-efg-efgh-efghi-hijk"); |
| std::string actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove "efgh" in the middle with different casing. |
| loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| expected = "fr-u-abc-efg-efghi-hijk"; |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove non-existing attributes. |
| loc = bld.removeUnicodeLocaleAttribute("efgh").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove "abc" in the beginning with different casing. |
| loc = bld.removeUnicodeLocaleAttribute("ABC").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| expected = "fr-u-efg-efghi-hijk"; |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove non-existing substring in the end. |
| loc = bld.removeUnicodeLocaleAttribute("hij").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove "hijk" in the end with different casing. |
| loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| expected = "fr-u-efg-efghi"; |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove "efghi" in the end with different casing. |
| loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| expected = "fr-u-efg"; |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| // remove "efg" in as the only one, with different casing. |
| loc = bld.removeUnicodeLocaleAttribute("EFG").build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute() got Error: %s\n", |
| u_errorName(status)); |
| } |
| expected = "fr"; |
| actual = loc.toLanguageTag<std::string>(status); |
| if (U_FAILURE(status) || expected != actual) { |
| errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str()); |
| } |
| |
| } |
| |
| void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() { |
| // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions |
| // attribute = alphanum{3,8} ; |
| static const char* wellFormedAttributes[] = { |
| // alphanum{3} |
| "AbC", |
| "ZAA", |
| "0AA", |
| "x3A", |
| "xa8", |
| |
| // alphanum{4} |
| "AbCA", |
| "ZASD", |
| "0ASD", |
| "A3a4", |
| "zK90", |
| |
| // alphanum{5} |
| "efgij", |
| "AbCAD", |
| "ZAASD", |
| "0AASD", |
| "A1CAD", |
| "ef2ij", |
| "ads3X", |
| "owqF4", |
| |
| // alphanum{6} |
| "efgijk", |
| "AADGFE", |
| "AkDfFz", |
| "0ADGFE", |
| "A9DfFz", |
| "AADG7E", |
| |
| // alphanum{7} |
| "asdfads", |
| "ADSFADF", |
| "piSFkDk", |
| "a0dfads", |
| "ADSF3DF", |
| "piSFkD9", |
| |
| // alphanum{8} |
| "oieradfz", |
| "IADSFJKR", |
| "kkDSFJkR", |
| }; |
| LocaleBuilder bld; |
| for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) { |
| if (i % 5 == 0) { |
| bld.clear(); |
| } |
| UErrorCode status = U_ZERO_ERROR; |
| bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n", |
| wellFormedAttributes[i], u_errorName(status)); |
| } |
| if (i > 2) { |
| bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]); |
| loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n", |
| wellFormedAttributes[i - 1], u_errorName(status)); |
| } |
| bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]); |
| loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n", |
| wellFormedAttributes[i - 3], u_errorName(status)); |
| } |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() { |
| static const char* illFormed[] = { |
| "aa", |
| "34", |
| "ab-", |
| "-cd", |
| "-ef-", |
| "zyzabcdef", |
| "123456789", |
| "ab-abc", |
| "1ZB30zfk9-abc", |
| "2ck30zfk9-adsf023-234kcZ", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.addUnicodeLocaleAttribute(ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n", |
| ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionU() { |
| LocaleBuilder bld; |
| bld.setLanguage("zh"); |
| Verify(bld, "zh", |
| "setLanguage(\"zh\") got Error: %s\n"); |
| |
| bld.setExtension('u', "co-stroke"); |
| Verify(bld, "zh-u-co-stroke", |
| "setExtension('u', \"co-stroke\") got Error: %s\n"); |
| |
| bld.setExtension('U', "ca-islamic"); |
| Verify(bld, "zh-u-ca-islamic", |
| "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n"); |
| |
| bld.setExtension('u', "ca-chinese"); |
| Verify(bld, "zh-u-ca-chinese", |
| "setExtension('u', \"ca-chinese\") got Error: %s\n"); |
| |
| bld.setExtension('U', "co-pinyin"); |
| Verify(bld, "zh-u-co-pinyin", |
| "setExtension('U', \"co-pinyin\") got Error: %s\n"); |
| |
| bld.setRegion("TW"); |
| Verify(bld, "zh-TW-u-co-pinyin", |
| "setRegion(\"TW\") got Error: %s\n"); |
| |
| bld.setExtension('U', ""); |
| Verify(bld, "zh-TW", |
| "setExtension('U', \"\") got Error: %s\n"); |
| |
| bld.setExtension('u', "abc-defg-kr-face"); |
| Verify(bld, "zh-TW-u-abc-defg-kr-face", |
| "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n"); |
| |
| bld.setExtension('U', "ca-japanese"); |
| Verify(bld, "zh-TW-u-ca-japanese", |
| "setExtension('U', \"ca-japanese\") got Error: %s\n"); |
| |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() { |
| static const char* wellFormedExtensions[] = { |
| // keyword |
| // keyword = key (sep type)? ; |
| // key = alphanum alpha ; |
| // type = alphanum{3,8} (sep alphanum{3,8})* ; |
| "3A", |
| "ZA", |
| "az-abc", |
| "zz-123", |
| "7z-12345678", |
| "kb-A234567Z", |
| // (sep keyword)+ |
| "1z-ZZ", |
| "2z-ZZ-123", |
| "3z-ZZ-123-cd", |
| "0z-ZZ-123-cd-efghijkl", |
| // attribute |
| "abc", |
| "456", |
| "87654321", |
| "ZABADFSD", |
| // (sep attribute)+ |
| "abc-ZABADFSD", |
| "123-ZABADFSD", |
| "K2K-12345678", |
| "K2K-12345678-zzz", |
| // (sep attribute)+ (sep keyword)* |
| "K2K-12345678-zz", |
| "K2K-12345678-zz-0z", |
| "K2K-12345678-9z-AZ-abc", |
| "K2K-12345678-zz-9A-234", |
| "K2K-12345678-zk0-abc-efg-zz-9k-234", |
| }; |
| for (const char* extension : wellFormedExtensions) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension('u', extension); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setExtension('u', \"%s\") got Error: %s\n", |
| extension, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() { |
| static const char* illFormed[] = { |
| // bad key |
| "-", |
| "-ab", |
| "ab-", |
| "abc-", |
| "-abc", |
| "0", |
| "a", |
| "A0", |
| "z9", |
| "09", |
| "90", |
| // bad keyword |
| "AB-A0", |
| "AB-efg-A0", |
| "xy-123456789", |
| "AB-Aa-", |
| "AB-Aac-", |
| // bad attribute |
| "abcdefghi", |
| "abcdefgh-", |
| "abcdefgh-abcdefghi", |
| "abcdefgh-1", |
| "abcdefgh-a", |
| "abcdefgh-a2345678z", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension('u', ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setExtension('u', \"%s\") should fail but has no Error\n", |
| ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionT() { |
| LocaleBuilder bld; |
| bld.setLanguage("fr"); |
| Verify(bld, "fr", |
| "setLanguage(\"fr\") got Error: %s\n"); |
| |
| bld.setExtension('T', "zh"); |
| Verify(bld, "fr-t-zh", |
| "setExtension('T', \"zh\") got Error: %s\n"); |
| |
| bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE"); |
| Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde", |
| "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n"); |
| |
| bld.setExtension('T', "a9-123"); |
| Verify(bld, "fr-t-a9-123", |
| "setExtension('T', \"a9-123\") got Error: %s\n"); |
| |
| bld.setRegion("MX"); |
| Verify(bld, "fr-MX-t-a9-123", |
| "setRegion(\"MX\") got Error: %s\n"); |
| |
| bld.setScript("Hans"); |
| Verify(bld, "fr-Hans-MX-t-a9-123", |
| "setScript(\"Hans\") got Error: %s\n"); |
| |
| bld.setVariant("9abc-abcde"); |
| Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123", |
| "setVariant(\"9abc-abcde\") got Error: %s\n"); |
| |
| bld.setExtension('T', ""); |
| Verify(bld, "fr-Hans-MX-9abc-abcde", |
| "bld.setExtension('T', \"\") got Error: %s\n"); |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() { |
| // ((sep tlang (sep tfield)*) | (sep tfield)+) |
| static const char* wellFormedExtensions[] = { |
| // tlang |
| // tlang = unicode_language_subtag (sep unicode_script_subtag)? |
| // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ; |
| // unicode_language_subtag |
| "en", |
| "abc", |
| "abcde", |
| "ABCDEFGH", |
| // unicode_language_subtag sep unicode_script_subtag |
| "en-latn", |
| "abc-arab", |
| "ABCDEFGH-Thai", |
| // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag |
| "en-latn-ME", |
| "abc-arab-RU", |
| "ABCDEFGH-Thai-TH", |
| "en-latn-409", |
| "abc-arab-123", |
| "ABCDEFGH-Thai-456", |
| // unicode_language_subtag sep unicode_region_subtag |
| "en-ME", |
| "abc-RU", |
| "ABCDEFGH-TH", |
| "en-409", |
| "abc-123", |
| "ABCDEFGH-456", |
| // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag |
| // sep (sep unicode_variant_subtag)* |
| "en-latn-ME-abcde", |
| "abc-arab-RU-3abc-abcdef", |
| "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef", |
| "en-latn-409-xafsa", |
| "abc-arab-123-ADASDF", |
| "ABCDEFGH-Thai-456-9sdf-ADASFAS", |
| // (sep tfield)+ |
| "A0-abcde", |
| "z9-abcde123", |
| "z9-abcde123-a1-abcde", |
| // tlang (sep tfield)* |
| "fr-A0-abcde", |
| "fr-FR-A0-abcde", |
| "fr-123-z9-abcde123-a1-abcde", |
| "fr-Latn-FR-z9-abcde123-a1-abcde", |
| "gab-Thai-TH-abcde-z9-abcde123-a1-abcde", |
| "gab-Thai-TH-0bde-z9-abcde123-a1-abcde", |
| }; |
| for (const char* extension : wellFormedExtensions) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension('t', extension); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setExtension('t', \"%s\") got Error: %s\n", |
| extension, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() { |
| static const char* illFormed[] = { |
| "a", |
| "a-", |
| "0", |
| "9-", |
| "-9", |
| "-z", |
| "Latn", |
| "Latn-", |
| "en-", |
| "nob-", |
| "-z9", |
| "a3", |
| "a3-", |
| "3a", |
| "0z-", |
| "en-123-a1", |
| "en-TH-a1", |
| "gab-TH-a1", |
| "gab-Thai-a1", |
| "gab-Thai-TH-a1", |
| "gab-Thai-TH-0bde-a1", |
| "gab-Thai-TH-0bde-3b", |
| "gab-Thai-TH-0bde-z9-a1", |
| "gab-Thai-TH-0bde-z9-3b", |
| "gab-Thai-TH-0bde-z9-abcde123-3b", |
| "gab-Thai-TH-0bde-z9-abcde123-ab", |
| "gab-Thai-TH-0bde-z9-abcde123-ab", |
| "gab-Thai-TH-0bde-z9-abcde123-a1", |
| "gab-Thai-TH-0bde-z9-abcde123-a1-", |
| "gab-Thai-TH-0bde-z9-abcde123-a1-a", |
| "gab-Thai-TH-0bde-z9-abcde123-a1-ab", |
| // ICU-21408 |
| "root", |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension('t', ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setExtension('t', \"%s\") should fail but has no Error\n", |
| ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionPU() { |
| LocaleBuilder bld; |
| bld.setLanguage("ar"); |
| Verify(bld, "ar", |
| "setLanguage(\"ar\") got Error: %s\n"); |
| |
| bld.setExtension('X', "a-b-c-d-e"); |
| Verify(bld, "ar-x-a-b-c-d-e", |
| "setExtension('X', \"a-b-c-d-e\") got Error: %s\n"); |
| |
| bld.setExtension('x', "0-1-2-3"); |
| Verify(bld, "ar-x-0-1-2-3", |
| "setExtension('x', \"0-1-2-3\") got Error: %s\n"); |
| |
| bld.setExtension('X', "0-12345678-x-x"); |
| Verify(bld, "ar-x-0-12345678-x-x", |
| "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n"); |
| |
| bld.setRegion("TH"); |
| Verify(bld, "ar-TH-x-0-12345678-x-x", |
| "setRegion(\"TH\") got Error: %s\n"); |
| |
| bld.setExtension('X', ""); |
| Verify(bld, "ar-TH", |
| "setExtension(\"X\") got Error: %s\n"); |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() { |
| // ((sep tlang (sep tfield)*) | (sep tfield)+) |
| static const char* wellFormedExtensions[] = { |
| "a", // Short subtag |
| "z", // Short subtag |
| "0", // Short subtag, digit |
| "9", // Short subtag, digit |
| "a-0", // Two short subtag, alpha and digit |
| "9-z", // Two short subtag, digit and alpha |
| "ab", |
| "abc", |
| "abcefghi", // Long subtag |
| "87654321", |
| "01", |
| "234", |
| "0a-ab-87654321", // Three subtags |
| "87654321-ab-00-3A", // Four subtabs |
| "a-9-87654321", // Three subtags with short and long subtags |
| "87654321-ab-0-3A", |
| }; |
| for (const char* extension : wellFormedExtensions) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension('x', extension); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setExtension('x', \"%s\") got Error: %s\n", |
| extension, u_errorName(status)); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() { |
| static const char* illFormed[] = { |
| "123456789", // Too long |
| "abcdefghi", // Too long |
| "ab-123456789", // Second subtag too long |
| "abcdefghi-12", // First subtag too long |
| "a-ab-987654321", // Third subtag too long |
| "987654321-a-0-3", // First subtag too long |
| }; |
| for (const char* ill : illFormed) { |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension('x', ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setExtension('x', \"%s\") should fail but has no Error\n", |
| ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionOthers() { |
| LocaleBuilder bld; |
| bld.setLanguage("fr"); |
| Verify(bld, "fr", |
| "setLanguage(\"fr\") got Error: %s\n"); |
| |
| bld.setExtension('Z', "ab"); |
| Verify(bld, "fr-z-ab", |
| "setExtension('Z', \"ab\") got Error: %s\n"); |
| |
| bld.setExtension('0', "xyz12345-abcdefg"); |
| Verify(bld, "fr-0-xyz12345-abcdefg-z-ab", |
| "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n"); |
| |
| bld.setExtension('a', "01-12345678-ABcdef"); |
| Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab", |
| "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n"); |
| |
| bld.setRegion("TH"); |
| Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab", |
| "setRegion(\"TH\") got Error: %s\n"); |
| |
| bld.setScript("Arab"); |
| Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab", |
| "setRegion(\"Arab\") got Error: %s\n"); |
| |
| bld.setExtension('A', "97"); |
| Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab", |
| "setExtension('a', \"97\") got Error: %s\n"); |
| |
| bld.setExtension('a', ""); |
| Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab", |
| "setExtension('a', \"\") got Error: %s\n"); |
| |
| bld.setExtension('0', ""); |
| Verify(bld, "fr-Arab-TH-z-ab", |
| "setExtension('0', \"\") got Error: %s\n"); |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() { |
| static const char* wellFormedExtensions[] = { |
| "ab", |
| "abc", |
| "abcefghi", |
| "01", |
| "234", |
| "87654321", |
| "0a-ab-87654321", |
| "87654321-ab-00-3A", |
| }; |
| |
| const char * aToZ = "abcdefghijklmnopqrstuvwxyz"; |
| const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ)); |
| int32_t i = 0; |
| for (const char* extension : wellFormedExtensions) { |
| char ch = aToZ[i]; |
| i = (i + 1) % aToZLen; |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension(ch, extension); |
| Locale loc = bld.build(status); |
| if (U_FAILURE(status)) { |
| errln("setExtension('%c', \"%s\") got Error: %s\n", |
| ch, extension, u_errorName(status)); |
| } |
| } |
| |
| const char* someChars = |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?"; |
| const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars)); |
| for (int32_t i = 0; i < someCharsLen; i++) { |
| char ch = someChars[i]; |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]); |
| Locale loc = bld.build(status); |
| if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) { |
| if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') { |
| if (U_FAILURE(status)) { |
| errln("setExtension('%c', \"%s\") got Error: %s\n", |
| ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status)); |
| } |
| } |
| } else { |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setExtension('%c', \"%s\") should fail but has no Error\n", |
| ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]); |
| } |
| } |
| |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() { |
| static const char* illFormed[] = { |
| "0", // Too short |
| "a", // Too short |
| "123456789", // Too long |
| "abcdefghi", // Too long |
| "ab-123456789", // Second subtag too long |
| "abcdefghi-12", // First subtag too long |
| "a-ab-87654321", // Third subtag too long |
| "87654321-a-0-3", // First subtag too long |
| }; |
| const char * aToZ = "abcdefghijklmnopqrstuvwxyz"; |
| const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ)); |
| int32_t i = 0; |
| for (const char* ill : illFormed) { |
| char ch = aToZ[i]; |
| i = (i + 1) % aToZLen; |
| UErrorCode status = U_ZERO_ERROR; |
| LocaleBuilder bld; |
| bld.setExtension(ch, ill); |
| Locale loc = bld.build(status); |
| if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| errln("setExtension('%c', \"%s\") should fail but has no Error\n", |
| ch, ill); |
| } |
| } |
| } |
| |
| void LocaleBuilderTest::TestSetLocale() { |
| LocaleBuilder bld1, bld2; |
| UErrorCode status = U_ZERO_ERROR; |
| Locale l1 = bld1.setLanguage("en") |
| .setScript("Latn") |
| .setRegion("MX") |
| .setVariant("3456-abcde") |
| .addUnicodeLocaleAttribute("456") |
| .addUnicodeLocaleAttribute("123") |
| .setUnicodeLocaleKeyword("nu", "thai") |
| .setUnicodeLocaleKeyword("co", "stroke") |
| .setUnicodeLocaleKeyword("ca", "chinese") |
| .build(status); |
| if (U_FAILURE(status) || l1.isBogus()) { |
| errln("build got Error: %s\n", u_errorName(status)); |
| } |
| status = U_ZERO_ERROR; |
| Locale l2 = bld1.setLocale(l1).build(status); |
| if (U_FAILURE(status) || l2.isBogus()) { |
| errln("build got Error: %s\n", u_errorName(status)); |
| } |
| |
| if (l1 != l2) { |
| errln("Two locales should be the same, but one is '%s' and the other is '%s'", |
| l1.getName(), l2.getName()); |
| } |
| } |
| |
| void LocaleBuilderTest::TestPosixCases() { |
| UErrorCode status = U_ZERO_ERROR; |
| Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status); |
| if (U_FAILURE(status) || l1.isBogus()) { |
| errln("build got Error: %s\n", u_errorName(status)); |
| } |
| LocaleBuilder bld; |
| bld.setLanguage("en") |
| .setRegion("MX") |
| .setScript("Arab") |
| .setUnicodeLocaleKeyword("nu", "Thai") |
| .setExtension('x', "1"); |
| // All of above should be cleared by the setLocale call. |
| Locale l2 = bld.setLocale(l1).build(status); |
| if (U_FAILURE(status) || l2.isBogus()) { |
| errln("build got Error: %s\n", u_errorName(status)); |
| } |
| if (l1 != l2) { |
| errln("The result locale should be the set as the setLocale %s but got %s\n", |
| l1.toLanguageTag<std::string>(status).c_str(), |
| l2.toLanguageTag<std::string>(status).c_str()); |
| } |
| Locale posix("en-US-POSIX"); |
| if (posix != l2) { |
| errln("The result locale should be the set as %s but got %s\n", |
| posix.getName(), l2.getName()); |
| } |
| } |