ICU-20321 Fix ultag_isUnicodeLocaleKey
Returns false when passingin alphanum digit.
Sync with UTS35
https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
address review feedback add unit tests.
Fix Java too
add test cases to c++
Fix format
change test case
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index ce5ab9d..bdc0652 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -608,7 +608,7 @@
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
- if (len == 2 && _isAlphaNumericString(s, len)) {
+ if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
return TRUE;
}
return FALSE;
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c
index 2268486..99f9970 100644
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -53,6 +53,7 @@
static void TestIsRightToLeft(void);
static void TestBadLocaleIDs(void);
static void TestBug20370(void);
+static void TestBug20321UnicodeLocaleKey(void);
void PrintDataTable();
@@ -268,6 +269,7 @@
TESTCASE(TestToLegacyType);
TESTCASE(TestBadLocaleIDs);
TESTCASE(TestBug20370);
+ TESTCASE(TestBug20321UnicodeLocaleKey);
}
@@ -6287,6 +6289,39 @@
}
}
+static void TestBug20321UnicodeLocaleKey(void)
+{
+ // key = alphanum alpha ;
+ static const char* invalid[] = {
+ "a0",
+ "00",
+ "a@",
+ "0@",
+ "@a",
+ "@a",
+ "abc",
+ "0bc",
+ };
+ for (int i = 0; i < UPRV_LENGTHOF(invalid); i++) {
+ const char* bcpKey = NULL;
+ bcpKey = uloc_toUnicodeLocaleKey(invalid[i]);
+ if (bcpKey != NULL) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", invalid[i], bcpKey);
+ }
+ }
+ static const char* valid[] = {
+ "aa",
+ "0a",
+ };
+ for (int i = 0; i < UPRV_LENGTHOF(valid); i++) {
+ const char* bcpKey = NULL;
+ bcpKey = uloc_toUnicodeLocaleKey(valid[i]);
+ if (bcpKey == NULL) {
+ log_err("toUnicodeLocaleKey: keyword=%s => NULL, expected!=NULL\n", valid[i]);
+ }
+ }
+}
+
static void TestToLegacyKey(void)
{
/* $IN specifies the result should be the input pointer itself */
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java
index bc27a49..c0e820f 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java
@@ -93,8 +93,9 @@
}
public static boolean isKey(String s) {
- // 2alphanum
- return (s.length() == 2) && AsciiUtil.isAlphaNumericString(s);
+ // key = alphanum alpha ;
+ return (s.length() == 2) && AsciiUtil.isAlphaNumeric(s.charAt(0)) &&
+ AsciiUtil.isAlpha(s.charAt(1));
}
public static boolean isTypeSubtag(String s) {
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
index 772d0ea..71002ba 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
@@ -82,12 +82,12 @@
{"E", "z", "ExtZ", "L", "en", "E", "z", null, "T", "en", "en"},
{"E", "a", "x", "X"},
{"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
- // Design limitation - typeless u extension keyword 00 below is interpreted as a boolean value true/yes.
+ // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
// With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
- // However, once the legacy keyword is translated back to BCP 47 u extension, key "00" is unknown,
+ // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
// so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
- // "yes" to "true", but it will break roundtrip conversion if BCP 47 u extension has "00-yes".
- {"L", "en", "E", "u", "bbb-aaa-00", "T", "en-u-aaa-bbb-00-yes", "en@00=yes;attribute=aaa-bbb"},
+ // "yes" to "true", but it will break roundtrip conversion if BCP 47 u extension has "0a-yes".
+ {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes", "en@0a=yes;attribute=aaa-bbb"},
{"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu", "fr_FR@x=yoshito-icu"},
{"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese", "ja_JP@calendar=japanese"},
{"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T", "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
index ba772a1..8ffc478 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
@@ -4697,6 +4697,36 @@
}
@Test
+ public void TestBug20321UnicodeLocaleKey() {
+ // key = alphanum alpha ;
+ String[] INVALID = {
+ "a0",
+ "00",
+ "a@",
+ "0@",
+ "@a",
+ "@a",
+ "abc",
+ "0bc",
+ };
+
+ for (String invalid : INVALID) {
+ String bcpKey = ULocale.toUnicodeLocaleKey(invalid);
+ assertNull("keyword=" + invalid, bcpKey);
+ }
+
+ String[] VALID = {
+ "aa",
+ "0a",
+ };
+
+ for (String valid : VALID) {
+ String bcpKey = ULocale.toUnicodeLocaleKey(valid);
+ assertEquals("keyword=" + valid, valid, bcpKey);
+ };
+ }
+
+ @Test
public void TestToLegacyKey() {
String[][] DATA = {
{"kb", "colbackwards"},