ICU-21414 Fix toLanguageTag x-private
Also fix ICU-21433 forLanguageTag when there are variant and -x-
extension
diff --git a/icu4c/source/common/loclikelysubtags.cpp b/icu4c/source/common/loclikelysubtags.cpp
index a031bfa..aa592e6 100644
--- a/icu4c/source/common/loclikelysubtags.cpp
+++ b/icu4c/source/common/loclikelysubtags.cpp
@@ -320,7 +320,8 @@
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
- // Private use language tag x-subtag-subtag...
+ // Private use language tag x-subtag-subtag... which CLDR changes to
+ // und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index b44e49b4..ee1cedf 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -1794,11 +1794,6 @@
return;
}
- /* Determine if variants already exists */
- if (ultag_getVariantsSize(langtag)) {
- posixVariant = TRUE;
- }
-
n = ultag_getExtensionsSize(langtag);
/* resolve locale keywords and reordering keys */
@@ -1806,6 +1801,11 @@
key = ultag_getExtensionKey(langtag, i);
type = ultag_getExtensionValue(langtag, i);
if (*key == LDMLEXT) {
+ /* Determine if variants already exists */
+ if (ultag_getVariantsSize(langtag)) {
+ posixVariant = TRUE;
+ }
+
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
if (U_FAILURE(*status)) {
break;
@@ -2711,8 +2711,7 @@
if (U_SUCCESS(tmpStatus)) {
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
/* return private use only tag */
- static const char PREFIX[] = { PRIVATEUSE, SEP };
- sink.Append(PREFIX, sizeof(PREFIX));
+ sink.Append("und-x-", 6);
sink.Append(buf.data(), buf.length());
done = TRUE;
} else if (strict) {
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c
index 61cfd4c..7312453 100644
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -6164,7 +6164,7 @@
{"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic", NULL},
{"en_US_POSIX", "en-US-u-va-posix", "en-US-u-va-posix"},
{"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix", "en-US-u-ca-japanese-cu-eur-va-posix"},
- {"@x=elmer", "x-elmer", "x-elmer"},
+ {"@x=elmer", "und-x-elmer", "und-x-elmer"},
{"en@x=elmer", "en-x-elmer", "en-x-elmer"},
{"@x=elmer;a=exta", "und-a-exta-x-elmer", "und-a-exta-x-elmer"},
{"en_US@attribute=attr1-attr2;calendar=gregorian", "en-US-u-attr1-attr2-ca-gregory", "en-US-u-attr1-attr2-ca-gregory"},
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index 3538cc8..36b3728 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -5584,6 +5584,31 @@
Locale result_ext = Locale::forLanguageTag(tag_ext, status);
status.errIfFailureAndReset("\"%s\"", tag_ext);
assertEquals(tag_ext, loc_ext.getName(), result_ext.getName());
+
+ static const struct {
+ const char *inputTag; /* input */
+ const char *expectedID; /* expected forLanguageTag().getName() result */
+ } testCases[] = {
+ // ICU-21433
+ {"und-1994-biske-rozaj", "__1994_BISKE_ROZAJ"},
+ {"de-1994-biske-rozaj", "de__1994_BISKE_ROZAJ"},
+ {"und-x-private", "@x=private"},
+ {"de-1994-biske-rozaj-x-private", "de__1994_BISKE_ROZAJ@x=private"},
+ {"und-1994-biske-rozaj-x-private", "__1994_BISKE_ROZAJ@x=private"},
+ };
+ int32_t i;
+ for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
+ UErrorCode status = U_ZERO_ERROR;
+ std::string otag = testCases[i].inputTag;
+ std::string tag = Locale::forLanguageTag(otag.c_str(), status).getName();
+ if (tag != testCases[i].expectedID) {
+ errcheckln(status, "FAIL: %s should be toLanguageTag to %s but got %s - %s",
+ otag.c_str(),
+ testCases[i].expectedID,
+ tag.c_str(),
+ u_errorName(status));
+ }
+ }
}
void LocaleTest::TestToLanguageTag() {
@@ -5643,6 +5668,33 @@
std::string result_bogus = loc_bogus.toLanguageTag<std::string>(status);
assertEquals("bogus", U_ILLEGAL_ARGUMENT_ERROR, status.reset());
assertTrue(result_bogus.c_str(), result_bogus.empty());
+
+ static const struct {
+ const char *localeID; /* input */
+ const char *expectedID; /* expected toLanguageTag() result */
+ } testCases[] = {
+ /* ICU-21414 */
+ {"und-x-abc-private", "und-x-abc-private"},
+ {"und-x-private", "und-x-private"},
+ {"und-u-ca-roc-x-private", "und-u-ca-roc-x-private"},
+ {"und-US-x-private", "und-US-x-private"},
+ {"und-Latn-x-private", "und-Latn-x-private"},
+ {"und-1994-biske-rozaj", "und-1994-biske-rozaj"},
+ {"und-1994-biske-rozaj-x-private", "und-1994-biske-rozaj-x-private"},
+ };
+ int32_t i;
+ for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
+ UErrorCode status = U_ZERO_ERROR;
+ std::string otag = testCases[i].localeID;
+ std::string tag = Locale::forLanguageTag(otag.c_str(), status).toLanguageTag<std::string>(status);
+ if (tag != testCases[i].expectedID) {
+ errcheckln(status, "FAIL: %s should be toLanguageTag to %s but got %s - %s",
+ otag.c_str(),
+ testCases[i].expectedID,
+ tag.c_str(),
+ u_errorName(status));
+ }
+ }
}
/* ICU-20310 */
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
index e2f0a6b..91d70a3 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
@@ -189,8 +189,9 @@
String name = locale.getName(); // Faster than .toLanguageTag().
if (name.startsWith("@x=")) {
String tag = locale.toLanguageTag();
- assert tag.startsWith("x-");
- // Private use language tag x-subtag-subtag...
+ assert tag.startsWith("und-x-");
+ // Private use language tag x-subtag-subtag... which CLDR changes to
+ // und-x-subtag-subtag...
return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
@@ -199,8 +200,9 @@
public LSR makeMaximizedLsrFrom(Locale locale) {
String tag = locale.toLanguageTag();
- if (tag.startsWith("x-")) {
- // Private use language tag x-subtag-subtag...
+ if (tag.startsWith("x-") || tag.startsWith("und-x-")) {
+ // Private use language tag x-subtag-subtag... which CLDR changes to
+ // und-x-subtag-subtag...
return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
index fee70ab..981a348 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
@@ -3544,9 +3544,10 @@
subtag = tag.getPrivateuse();
if (subtag.length() > 0) {
- if (buf.length() > 0) {
- buf.append(LanguageTag.SEP);
+ if (buf.length() == 0) {
+ buf.append(UNDEFINED_LANGUAGE);
}
+ buf.append(LanguageTag.SEP);
buf.append(LanguageTag.PRIVATEUSE).append(LanguageTag.SEP);
buf.append(LanguageTag.canonicalizePrivateuse(subtag));
}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
index 0883273..f71b879 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
@@ -4092,7 +4092,7 @@
{"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic"},
{"en_US_POSIX", "en-US-u-va-posix"},
{"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix"},
- {"@x=elmer", "x-elmer"},
+ {"@x=elmer", "und-x-elmer"},
{"_US@x=elmer", "und-US-x-elmer"},
/* #12671 */
{"en@a=bar;attribute=baz", "en-a-bar-u-baz"},
@@ -4113,6 +4113,17 @@
{"de-u-co", "de-u-co"},
{"de@collation=yes", "de-u-co"},
{"cmn-hans-cn-u-ca-t-ca-x-t-u", "cmn-Hans-CN-t-ca-u-ca-x-t-u"},
+ /* ICU-21414 */
+ {"und-CN", "und-CN"},
+ {"und-Latn", "und-Latn"},
+ {"und-u-ca-roc", "und-u-ca-roc"},
+ {"und-x-abc-private", "und-x-abc-private"},
+ {"und-x-private", "und-x-private"},
+ {"und-u-ca-roc-x-private", "und-u-ca-roc-x-private"},
+ {"und-US-x-private", "und-US-x-private"},
+ {"und-Latn-x-private", "und-Latn-x-private"},
+ {"und-1994-biske-rozaj", "und-1994-biske-rozaj"},
+ {"und-1994-biske-rozaj-x-private", "und-1994-biske-rozaj-x-private"},
};
for (int i = 0; i < locale_to_langtag.length; i++) {
@@ -4266,6 +4277,12 @@
{"sl-1994-biske-rozaj", "sl__1994_BISKE_ROZAJ", NOERROR},
{"en-fonipa-scouse", "en__FONIPA_SCOUSE", NOERROR},
{"en-scouse-fonipa", "en__FONIPA_SCOUSE", NOERROR},
+ /* ICU-21433 */
+ {"und-1994-biske-rozaj", "__1994_BISKE_ROZAJ", NOERROR},
+ {"de-1994-biske-rozaj", "de__1994_BISKE_ROZAJ", NOERROR},
+ {"und-x-private", "@x=private", NOERROR},
+ {"de-1994-biske-rozaj-x-private", "de__1994_BISKE_ROZAJ@x=private", NOERROR},
+ {"und-1994-biske-rozaj-x-private", "__1994_BISKE_ROZAJ@x=private", NOERROR},
};
for (int i = 0; i < langtag_to_locale.length; i++) {