ICU-21402 replace sd and rg by subdivisionAlias
See #1475
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index feadbcb..7b259da 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@@ -627,6 +627,17 @@
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
+
+ // Read the subdivisionAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readSubdivisionAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
};
/**
@@ -647,6 +658,7 @@
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
+ const CharStringMap& subdivisionMap() const { return subdivision; }
static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
@@ -658,11 +670,13 @@
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
+ CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
+ subdivision(std::move(subdivisionMap)),
strings(strings) {
}
@@ -676,6 +690,7 @@
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
+ CharStringMap subdivision;
CharString* strings;
friend class AliasDataBuilder;
@@ -867,6 +882,34 @@
}
/**
+ * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readSubdivisionAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
+/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
*
@@ -905,12 +948,14 @@
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+ LocalUResourceBundlePointer subdivisionAlias(
+ ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
- variantLength = 0;
+ variantLength = 0, subdivisionLength = 0;
// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
@@ -955,6 +1000,16 @@
variantReplacementIndexes,
variantLength, status);
+ // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
+ // and strings
+ LocalMemory<const char*> subdivisionTypes;
+ LocalMemory<int32_t> subdivisionReplacementIndexes;
+ readSubdivisionAlias(subdivisionAlias.getAlias(),
+ &strings,
+ subdivisionTypes,
+ subdivisionReplacementIndexes,
+ subdivisionLength, status);
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -994,6 +1049,14 @@
status);
}
+ // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
+ CharStringMap subdivisionMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
+ subdivisionMap.put(subdivisionTypes[i],
+ strings.get(subdivisionReplacementIndexes[i]),
+ status);
+ }
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -1004,6 +1067,7 @@
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
+ std::move(subdivisionMap),
strings.orphanCharStrings());
if (data == nullptr) {
@@ -1105,6 +1169,9 @@
// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);
+
+ // Replace by using subdivisionAlias.
+ bool replaceSubdivision(CharString& subdivision, UErrorCode& status);
};
CharString&
@@ -1433,6 +1500,27 @@
return false;
}
+bool
+AliasReplacer::replaceSubdivision(CharString& subdivision, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->subdivisionMap().get(subdivision.data());
+ if (replacement != nullptr) {
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ // Found replacement data for this subdivision.
+ size_t len = (firstSpace != nullptr) ?
+ (firstSpace - replacement) : uprv_strlen(replacement);
+ // Ignore len == 2, see CLDR-14312
+ if (3 <= len && len <= 8) {
+ subdivision.clear().append(replacement, (int32_t)len, status);
+ }
+ return true;
+ }
+ return false;
+}
+
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
@@ -1495,7 +1583,6 @@
region = nullptr;
}
const char* variantsStr = locale.getVariant();
- const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
@@ -1559,11 +1646,42 @@
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
- if (changed == 0 && variants.size() <= 1) {
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
if (extensionsStr != nullptr) {
+ changed = 0;
+ Locale temp(locale);
+ LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
+ if (U_SUCCESS(status) && !iter.isNull()) {
+ const char* key;
+ while ((key = iter->next(nullptr, status)) != nullptr) {
+ if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0) {
+ CharString value;
+ CharStringByteSink valueSink(&value);
+ locale.getKeywordValue(key, valueSink, status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ if (replaceSubdivision(value, status)) {
+ changed++;
+ }
+ temp.setKeywordValue(key, value.data(), status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ }
+ }
+ }
+ if (changed != 0) {
+ extensionsStr = locale_getKeywordsStart(temp.getName());
+ }
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
@@ -1572,7 +1690,6 @@
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
U_ASSERT(changed == 0);
- U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index d12336b..4bb402c 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -4916,8 +4916,24 @@
// ICU-21344
{ "ku-Arab-NT", "ku-Arab-IQ"},
+ // ICU-21402
+ { "und-u-rg-no23", "und-u-rg-no50"},
+ { "und-u-rg-cn11", "und-u-rg-cnbj"},
+ { "und-u-rg-cz10a", "und-u-rg-cz110"},
+ { "und-u-rg-fra", "und-u-rg-frges"},
+ { "und-u-rg-frg", "und-u-rg-frges"},
+ { "und-u-rg-lud", "und-u-rg-lucl"},
+
+ { "und-NO-u-sd-no23", "und-NO-u-sd-no50"},
+ { "und-CN-u-sd-cn11", "und-CN-u-sd-cnbj"},
+ { "und-CZ-u-sd-cz10a", "und-CZ-u-sd-cz110"},
+ { "und-FR-u-sd-fra", "und-FR-u-sd-frges"},
+ { "und-FR-u-sd-frg", "und-FR-u-sd-frges"},
+ { "und-LU-u-sd-lud", "und-LU-u-sd-lucl"},
+
// ICU-21401
{ "cel-gaulish", "xtg"},
+
};
int32_t i;
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
index 9faa3c4..3a19c29 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
@@ -1268,12 +1268,33 @@
// Nothing changed in this iteration, break out the loop
break;
} // while(1)
- if (changed) {
- String result = lscvToID(language, script, region,
+ if (extensions == null && !changed) {
+ return null;
+ }
+ String result = lscvToID(language, script, region,
((variants == null) ? "" : Utility.joinStrings("_", variants)));
- if (extensions != null) {
- result += extensions;
+ if (extensions != null) {
+ boolean keywordChanged = false;
+ ULocale temp = new ULocale(result + extensions);
+ Iterator<String> keywords = temp.getKeywords();
+ while (keywords != null && keywords.hasNext()) {
+ String key = keywords.next();
+ if (key.equals("rg") || key.equals("sd")) {
+ String value = temp.getKeywordValue(key);
+ String replacement = replaceSubdivision(value);
+ if (replacement != null) {
+ temp = temp.setKeywordValue(key, replacement);
+ keywordChanged = true;
+ }
+ }
}
+ if (keywordChanged) {
+ extensions = temp.getName().substring(temp.getBaseName().length());
+ changed = true;
+ }
+ result += extensions;
+ }
+ if (changed) {
return result;
}
// Nothing changed in any iteration of the loop.
@@ -1285,6 +1306,7 @@
private static Map<String, String> scriptAliasMap = null;
private static Map<String, List<String>> territoryAliasMap = null;
private static Map<String, String> variantAliasMap = null;
+ private static Map<String, String> subdivisionAliasMap = null;
/*
* Initializes the alias data from the ICU resource bundles. The alias
@@ -1302,6 +1324,7 @@
scriptAliasMap = new HashMap<>();
territoryAliasMap = new HashMap<>();
variantAliasMap = new HashMap<>();
+ subdivisionAliasMap = new HashMap<>();
UResourceBundle metadata = UResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "metadata",
@@ -1311,6 +1334,7 @@
UResourceBundle scriptAlias = metadataAlias.get("script");
UResourceBundle territoryAlias = metadataAlias.get("territory");
UResourceBundle variantAlias = metadataAlias.get("variant");
+ UResourceBundle subdivisionAlias = metadataAlias.get("subdivision");
for (int i = 0 ; i < languageAlias.getSize(); i++) {
UResourceBundle res = languageAlias.get(i);
@@ -1369,6 +1393,22 @@
}
variantAliasMap.put(aliasFrom, aliasTo);
}
+ for (int i = 0 ; i < subdivisionAlias.getSize(); i++) {
+ UResourceBundle res = subdivisionAlias.get(i);
+ String aliasFrom = res.getKey();
+ String aliasTo = res.get("replacement").getString().split(" ")[0];
+ if (aliasFrom.length() < 3 || aliasFrom.length() > 8) {
+ throw new IllegalArgumentException(
+ "Incorrect key [" + aliasFrom + "] in alias:territory.");
+ }
+ if (aliasTo.length() < 3 || aliasTo.length() > 8) {
+ // Ignore replacement < 3 for now. see CLDR-14312
+ // throw new IllegalArgumentException(
+ // "Incorrect value [" + aliasTo + "] in alias:subdivision.");
+ continue;
+ }
+ subdivisionAliasMap.put(aliasFrom, aliasTo);
+ }
aliasDataIsLoaded = true;
}
@@ -1591,6 +1631,11 @@
}
return false;
}
+
+ private String replaceSubdivision(String subdivision) {
+ return subdivisionAliasMap.get(subdivision);
+ }
+
};
/**
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
index 2868ddd..06eef5f 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
@@ -5216,6 +5216,21 @@
// ICU-21344
Assert.assertEquals("ku-Arab-IQ", canonicalTag("ku-Arab-NT"));
+ // ICU-21402
+ Assert.assertEquals("und-u-rg-no50", canonicalTag("und-u-rg-no23"));
+ Assert.assertEquals("und-u-rg-cnbj", canonicalTag("und-u-rg-cn11"));
+ Assert.assertEquals("und-u-rg-cz110", canonicalTag("und-u-rg-cz10a"));
+ Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-fra"));
+ Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-frg"));
+ Assert.assertEquals("und-u-rg-lucl", canonicalTag("und-u-rg-lud"));
+
+ Assert.assertEquals("und-NO-u-sd-no50", canonicalTag("und-NO-u-sd-no23"));
+ Assert.assertEquals("und-CN-u-sd-cnbj", canonicalTag("und-CN-u-sd-cn11"));
+ Assert.assertEquals("und-CZ-u-sd-cz110", canonicalTag("und-CZ-u-sd-cz10a"));
+ Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-fra"));
+ Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-frg"));
+ Assert.assertEquals("und-LU-u-sd-lucl", canonicalTag("und-LU-u-sd-lud"));
+
// ICU-21401
Assert.assertEquals("xtg", canonicalTag("cel-gaulish"));
}