ICU-21402 replace sd and rg by subdivisionAlias

See #1475
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index feadbcb..7b259da 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@@ -627,6 +627,17 @@
                           LocalMemory<const char*>& types,
                           LocalMemory<int32_t>& replacementIndexes,
                           int32_t &length, UErrorCode &status);
+
+    // Read the subdivisionAlias data from alias to
+    // strings+types+replacementIndexes
+    // Allocate length items for types, to store the type field.
+    // Allocate length items for replacementIndexes,
+    // to store the index in the strings for the replacement variant.
+    void readSubdivisionAlias(UResourceBundle* alias,
+                          UniqueCharStrings* strings,
+                          LocalMemory<const char*>& types,
+                          LocalMemory<int32_t>& replacementIndexes,
+                          int32_t &length, UErrorCode &status);
 };
 
 /**
@@ -647,6 +658,7 @@
     const CharStringMap& scriptMap() const { return script; }
     const CharStringMap& territoryMap() const { return territory; }
     const CharStringMap& variantMap() const { return variant; }
+    const CharStringMap& subdivisionMap() const { return subdivision; }
 
     static void U_CALLCONV loadData(UErrorCode &status);
     static UBool U_CALLCONV cleanup();
@@ -658,11 +670,13 @@
               CharStringMap scriptMap,
               CharStringMap territoryMap,
               CharStringMap variantMap,
+              CharStringMap subdivisionMap,
               CharString* strings)
         : language(std::move(languageMap)),
           script(std::move(scriptMap)),
           territory(std::move(territoryMap)),
           variant(std::move(variantMap)),
+          subdivision(std::move(subdivisionMap)),
           strings(strings) {
     }
 
@@ -676,6 +690,7 @@
     CharStringMap script;
     CharStringMap territory;
     CharStringMap variant;
+    CharStringMap subdivision;
     CharString* strings;
 
     friend class AliasDataBuilder;
@@ -867,6 +882,34 @@
 }
 
 /**
+ * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readSubdivisionAlias(
+        UResourceBundle* alias,
+        UniqueCharStrings* strings,
+        LocalMemory<const char*>& types,
+        LocalMemory<int32_t>& replacementIndexes,
+        int32_t &length,
+        UErrorCode &status)
+{
+    return readAlias(
+        alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+        [](const char* type) {
+            U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
+        },
+#else
+        [](const char*) {},
+#endif
+        [](const UnicodeString&) { },
+        status);
+}
+
+/**
  * Initializes the alias data from the ICU resource bundles. The alias data
  * contains alias of language, country, script and variants.
  *
@@ -905,12 +948,14 @@
         ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
     LocalUResourceBundlePointer variantAlias(
         ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+    LocalUResourceBundlePointer subdivisionAlias(
+        ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
 
     if (U_FAILURE(status)) {
         return nullptr;
     }
     int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
-            variantLength = 0;
+            variantLength = 0, subdivisionLength = 0;
 
     // Read the languageAlias into languageTypes, languageReplacementIndexes
     // and strings
@@ -955,6 +1000,16 @@
                      variantReplacementIndexes,
                      variantLength, status);
 
+    // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
+    // and strings
+    LocalMemory<const char*> subdivisionTypes;
+    LocalMemory<int32_t> subdivisionReplacementIndexes;
+    readSubdivisionAlias(subdivisionAlias.getAlias(),
+                         &strings,
+                         subdivisionTypes,
+                         subdivisionReplacementIndexes,
+                         subdivisionLength, status);
+
     if (U_FAILURE(status)) {
         return nullptr;
     }
@@ -994,6 +1049,14 @@
                        status);
     }
 
+    // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
+    CharStringMap subdivisionMap(2, status);
+    for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
+        subdivisionMap.put(subdivisionTypes[i],
+                       strings.get(subdivisionReplacementIndexes[i]),
+                       status);
+    }
+
     if (U_FAILURE(status)) {
         return nullptr;
     }
@@ -1004,6 +1067,7 @@
         std::move(scriptMap),
         std::move(territoryMap),
         std::move(variantMap),
+        std::move(subdivisionMap),
         strings.orphanCharStrings());
 
     if (data == nullptr) {
@@ -1105,6 +1169,9 @@
 
     // Replace by using variantAlias.
     bool replaceVariant(UErrorCode& status);
+
+    // Replace by using subdivisionAlias.
+    bool replaceSubdivision(CharString& subdivision, UErrorCode& status);
 };
 
 CharString&
@@ -1433,6 +1500,27 @@
     return false;
 }
 
+bool
+AliasReplacer::replaceSubdivision(CharString& subdivision, UErrorCode& status)
+{
+    if (U_FAILURE(status)) {
+        return false;
+    }
+    const char *replacement = data->subdivisionMap().get(subdivision.data());
+    if (replacement != nullptr) {
+        const char* firstSpace = uprv_strchr(replacement, ' ');
+        // Found replacement data for this subdivision.
+        size_t len = (firstSpace != nullptr) ?
+            (firstSpace - replacement) : uprv_strlen(replacement);
+        // Ignore len == 2, see CLDR-14312
+        if (3 <= len && len <= 8) {
+            subdivision.clear().append(replacement, (int32_t)len, status);
+        }
+        return true;
+    }
+    return false;
+}
+
 CharString&
 AliasReplacer::outputToString(
     CharString& out, UErrorCode status)
@@ -1495,7 +1583,6 @@
         region = nullptr;
     }
     const char* variantsStr = locale.getVariant();
-    const char* extensionsStr = locale_getKeywordsStart(locale.getName());
     CharString variantsBuff(variantsStr, -1, status);
     if (!variantsBuff.isEmpty()) {
         if (U_FAILURE(status)) { return false; }
@@ -1559,11 +1646,42 @@
     if (U_FAILURE(status)) { return false; }
     // Nothing changed and we know the order of the vaiants are not change
     // because we have no variant or only one.
-    if (changed == 0 && variants.size() <= 1) {
+    const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+    if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
         return false;
     }
     outputToString(out, status);
+    if (U_FAILURE(status)) {
+        return false;
+    }
     if (extensionsStr != nullptr) {
+        changed = 0;
+        Locale temp(locale);
+        LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
+        if (U_SUCCESS(status) && !iter.isNull()) {
+            const char* key;
+            while ((key = iter->next(nullptr, status)) != nullptr) {
+                if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0) {
+                    CharString value;
+                    CharStringByteSink valueSink(&value);
+                    locale.getKeywordValue(key, valueSink, status);
+                    if (U_FAILURE(status)) {
+                        status = U_ZERO_ERROR;
+                        continue;
+                    }
+                    if (replaceSubdivision(value, status)) {
+                        changed++;
+                    }
+                    temp.setKeywordValue(key, value.data(), status);
+                    if (U_FAILURE(status)) {
+                        return false;
+                    }
+                }
+            }
+        }
+        if (changed != 0) {
+            extensionsStr = locale_getKeywordsStart(temp.getName());
+        }
         out.append(extensionsStr, status);
     }
     if (U_FAILURE(status)) {
@@ -1572,7 +1690,6 @@
     // If the tag is not changed, return.
     if (uprv_strcmp(out.data(), locale.getName()) == 0) {
         U_ASSERT(changed == 0);
-        U_ASSERT(variants.size() > 1);
         out.clear();
         return false;
     }
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index d12336b..4bb402c 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -4916,8 +4916,24 @@
         // ICU-21344
         { "ku-Arab-NT", "ku-Arab-IQ"},
 
+        // ICU-21402
+        { "und-u-rg-no23", "und-u-rg-no50"},
+        { "und-u-rg-cn11", "und-u-rg-cnbj"},
+        { "und-u-rg-cz10a", "und-u-rg-cz110"},
+        { "und-u-rg-fra", "und-u-rg-frges"},
+        { "und-u-rg-frg", "und-u-rg-frges"},
+        { "und-u-rg-lud", "und-u-rg-lucl"},
+
+        { "und-NO-u-sd-no23", "und-NO-u-sd-no50"},
+        { "und-CN-u-sd-cn11", "und-CN-u-sd-cnbj"},
+        { "und-CZ-u-sd-cz10a", "und-CZ-u-sd-cz110"},
+        { "und-FR-u-sd-fra", "und-FR-u-sd-frges"},
+        { "und-FR-u-sd-frg", "und-FR-u-sd-frges"},
+        { "und-LU-u-sd-lud", "und-LU-u-sd-lucl"},
+
         // ICU-21401
         { "cel-gaulish", "xtg"},
+
     };
     int32_t i;
     for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
index 9faa3c4..3a19c29 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
@@ -1268,12 +1268,33 @@
                 // Nothing changed in this iteration, break out the loop
                 break;
             }  // while(1)
-            if (changed) {
-                String result =  lscvToID(language, script, region,
+            if (extensions == null && !changed) {
+                return null;
+            }
+            String result =  lscvToID(language, script, region,
                     ((variants == null) ? "" : Utility.joinStrings("_", variants)));
-                if (extensions != null) {
-                    result += extensions;
+            if (extensions != null) {
+                boolean keywordChanged = false;
+                ULocale temp = new ULocale(result + extensions);
+                Iterator<String> keywords = temp.getKeywords();
+                while (keywords != null && keywords.hasNext()) {
+                    String key = keywords.next();
+                    if (key.equals("rg") || key.equals("sd")) {
+                        String value = temp.getKeywordValue(key);
+                        String replacement = replaceSubdivision(value);
+                        if (replacement != null) {
+                            temp = temp.setKeywordValue(key, replacement);
+                            keywordChanged = true;
+                        }
+                    }
                 }
+                if (keywordChanged) {
+                    extensions = temp.getName().substring(temp.getBaseName().length());
+                    changed = true;
+                }
+                result += extensions;
+            }
+            if (changed) {
                 return result;
             }
             // Nothing changed in any iteration of the loop.
@@ -1285,6 +1306,7 @@
         private static Map<String, String> scriptAliasMap = null;
         private static Map<String, List<String>> territoryAliasMap = null;
         private static Map<String, String> variantAliasMap = null;
+        private static Map<String, String> subdivisionAliasMap = null;
 
         /*
          * Initializes the alias data from the ICU resource bundles. The alias
@@ -1302,6 +1324,7 @@
             scriptAliasMap = new HashMap<>();
             territoryAliasMap = new HashMap<>();
             variantAliasMap = new HashMap<>();
+            subdivisionAliasMap = new HashMap<>();
 
             UResourceBundle metadata = UResourceBundle.getBundleInstance(
                 ICUData.ICU_BASE_NAME, "metadata",
@@ -1311,6 +1334,7 @@
             UResourceBundle scriptAlias = metadataAlias.get("script");
             UResourceBundle territoryAlias = metadataAlias.get("territory");
             UResourceBundle variantAlias = metadataAlias.get("variant");
+            UResourceBundle subdivisionAlias = metadataAlias.get("subdivision");
 
             for (int i = 0 ; i < languageAlias.getSize(); i++) {
                 UResourceBundle res = languageAlias.get(i);
@@ -1369,6 +1393,22 @@
                 }
                 variantAliasMap.put(aliasFrom, aliasTo);
             }
+            for (int i = 0 ; i < subdivisionAlias.getSize(); i++) {
+                UResourceBundle res = subdivisionAlias.get(i);
+                String aliasFrom = res.getKey();
+                String aliasTo = res.get("replacement").getString().split(" ")[0];
+                if (aliasFrom.length() < 3 || aliasFrom.length() > 8) {
+                    throw new IllegalArgumentException(
+                        "Incorrect key [" + aliasFrom + "] in alias:territory.");
+                }
+                if (aliasTo.length() < 3 || aliasTo.length() > 8) {
+                    // Ignore replacement < 3 for now. see CLDR-14312
+                    // throw new IllegalArgumentException(
+                    //    "Incorrect value [" + aliasTo + "] in alias:subdivision.");
+                    continue;
+                }
+                subdivisionAliasMap.put(aliasFrom, aliasTo);
+            }
 
             aliasDataIsLoaded = true;
         }
@@ -1591,6 +1631,11 @@
             }
             return false;
         }
+
+        private String replaceSubdivision(String subdivision) {
+            return subdivisionAliasMap.get(subdivision);
+        }
+
     };
 
     /**
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
index 2868ddd..06eef5f 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
@@ -5216,6 +5216,21 @@
         // ICU-21344
         Assert.assertEquals("ku-Arab-IQ", canonicalTag("ku-Arab-NT"));
 
+        // ICU-21402
+        Assert.assertEquals("und-u-rg-no50", canonicalTag("und-u-rg-no23"));
+        Assert.assertEquals("und-u-rg-cnbj", canonicalTag("und-u-rg-cn11"));
+        Assert.assertEquals("und-u-rg-cz110", canonicalTag("und-u-rg-cz10a"));
+        Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-fra"));
+        Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-frg"));
+        Assert.assertEquals("und-u-rg-lucl", canonicalTag("und-u-rg-lud"));
+
+        Assert.assertEquals("und-NO-u-sd-no50", canonicalTag("und-NO-u-sd-no23"));
+        Assert.assertEquals("und-CN-u-sd-cnbj", canonicalTag("und-CN-u-sd-cn11"));
+        Assert.assertEquals("und-CZ-u-sd-cz110", canonicalTag("und-CZ-u-sd-cz10a"));
+        Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-fra"));
+        Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-frg"));
+        Assert.assertEquals("und-LU-u-sd-lucl", canonicalTag("und-LU-u-sd-lud"));
+
         // ICU-21401
         Assert.assertEquals("xtg", canonicalTag("cel-gaulish"));
     }