ICU-21236 Correct Locale canonicalization
See #1254
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index fe97edb..08684d8 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@@ -42,6 +42,7 @@
#include "bytesinkutil.h"
#include "charstr.h"
+#include "charstrmap.h"
#include "cmemory.h"
#include "cstring.h"
#include "mutex.h"
@@ -51,7 +52,9 @@
#include "uhash.h"
#include "ulocimp.h"
#include "umutex.h"
+#include "uniquecharstr.h"
#include "ustr_imp.h"
+#include "uvector.h"
U_CDECL_BEGIN
static UBool U_CALLCONV locale_cleanup(void);
@@ -246,6 +249,7 @@
// '_'
// In the platform codepage.
#define SEP_CHAR '_'
+#define NULL_CHAR '\0'
Locale::~Locale()
{
@@ -500,38 +504,1110 @@
return (uprv_strcmp(other.fullName, fullName) == 0);
}
-#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
-
namespace {
-CharString& AppendLSCVE(CharString& out, const char* language, const char* script,
- const char* country, const char* variants, const char* extension,
- UErrorCode& status) {
- out.append(language, status);
- if (script && script[0] != '\0') {
- out.append('_', status);
- out.append(script, status);
+UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER;
+UHashtable *gKnownCanonicalized = nullptr;
+
+static const char* const KNOWN_CANONICALIZED[] = {
+ "c",
+ // Commonly used locales known are already canonicalized
+ "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
+ "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
+ "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
+ "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
+ "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
+ "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
+ "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
+ "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
+ "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
+ "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
+ "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
+ "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
+ "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
+ "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
+ "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
+ "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
+ "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
+ "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
+ "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
+};
+
+static UBool U_CALLCONV cleanupKnownCanonicalized() {
+ gKnownCanonicalizedInitOnce.reset();
+ if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
+ return TRUE;
+}
+
+static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
+ cleanupKnownCanonicalized);
+ LocalUHashtablePointer newKnownCanonicalizedMap(
+ uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
+ for (int32_t i = 0;
+ U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+ i++) {
+ uhash_puti(newKnownCanonicalizedMap.getAlias(),
+ (void*)KNOWN_CANONICALIZED[i],
+ 1, &status);
}
- if (country && country[0] != '\0') {
- out.append('_', status);
- out.append(country, status);
+ if (U_FAILURE(status)) {
+ return;
}
- if (variants && variants[0] != '\0') {
- if ((script == nullptr || script[0] == '\0') &&
- (country == nullptr || country[0] == '\0')) {
- out.append('_', status);
+
+ gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
+}
+
+class AliasData;
+
+/**
+ * A Builder class to build the alias data.
+ */
+class AliasDataBuilder {
+public:
+ AliasDataBuilder() {
+ }
+
+ // Build the AliasData from resource.
+ AliasData* build(UErrorCode &status);
+
+private:
+ void readAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ void (*checkType)(const char* type),
+ void (*checkReplacement)(const UnicodeString& replacement),
+ UErrorCode &status);
+
+ // Read the languageAlias data from alias to
+ // strings+types+replacementIndexes
+ // The number of record will be stored into length.
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readLanguageAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status);
+
+ // Read the scriptAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readScriptAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+
+ // Read the territoryAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readTerritoryAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+
+ // Read the variantAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readVariantAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+};
+
+/**
+ * A class to hold the Alias Data.
+ */
+class AliasData : public UMemory {
+public:
+ static const AliasData* singleton(UErrorCode status) {
+ umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
+ return gSingleton;
+ }
+
+ const CharStringMap& languageMap() const { return language; }
+ const CharStringMap& scriptMap() const { return script; }
+ const CharStringMap& territoryMap() const { return territory; }
+ const CharStringMap& variantMap() const { return variant; }
+
+ static void U_CALLCONV loadData(UErrorCode &status);
+ static UBool U_CALLCONV cleanup();
+
+ static UInitOnce gInitOnce;
+
+private:
+ AliasData(CharStringMap languageMap,
+ CharStringMap scriptMap,
+ CharStringMap territoryMap,
+ CharStringMap variantMap,
+ CharString* strings)
+ : language(std::move(languageMap)),
+ script(std::move(scriptMap)),
+ territory(std::move(territoryMap)),
+ variant(std::move(variantMap)),
+ strings(strings) {
+ }
+
+ ~AliasData() {
+ delete strings;
+ }
+
+ static const AliasData* gSingleton;
+
+ CharStringMap language;
+ CharStringMap script;
+ CharStringMap territory;
+ CharStringMap variant;
+ CharString* strings;
+
+ friend class AliasDataBuilder;
+};
+
+
+const AliasData* AliasData::gSingleton = nullptr;
+UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV
+AliasData::cleanup()
+{
+ gInitOnce.reset();
+ delete gSingleton;
+ return TRUE;
+}
+
+void
+AliasDataBuilder::readAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ void (*checkType)(const char* type),
+ void (*checkReplacement)(const UnicodeString& replacement),
+ UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ length = ures_getSize(alias);
+ const char** rawTypes = types.allocateInsteadAndCopy(length);
+ if (rawTypes == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
+ if (rawIndexes == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int i = 0;
+ while (ures_hasNext(alias)) {
+ LocalUResourceBundlePointer res(
+ ures_getNextResource(alias, nullptr, &status));
+ const char* aliasFrom = ures_getKey(res.getAlias());
+ UnicodeString aliasTo =
+ ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
+
+ checkType(aliasFrom);
+ checkReplacement(aliasTo);
+
+ rawTypes[i] = aliasFrom;
+ rawIndexes[i] = strings->add(aliasTo, status);
+ i++;
+ }
+}
+
+/**
+ * Read the languageAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement language.
+ */
+void
+AliasDataBuilder::readLanguageAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ // Assert the aliasFrom only contains the following possibilties
+ // language_REGION_variant
+ // language_REGION
+ // language_variant
+ // language
+ // und_variant
+ Locale test(type);
+ // Assert no script in aliasFrom
+ U_ASSERT(test.getScript()[0] == '\0');
+ // Assert when language is und, no REGION in aliasFrom.
+ U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) {}, status);
+}
+
+/**
+ * Read the scriptAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement script.
+ */
+void
+AliasDataBuilder::readScriptAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) == 4);
+ },
+ [](const UnicodeString& replacement) {
+ U_ASSERT(replacement.length() == 4);
+ },
+#else
+ [](const char*) {},
+ [](const UnicodeString&) { },
+#endif
+ status);
+}
+
+/**
+ * Read the territoryAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readTerritoryAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
+/**
+ * Read the variantAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement variant.
+ */
+void
+AliasDataBuilder::readVariantAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
+ U_ASSERT(uprv_strlen(type) != 4 ||
+ (type[0] >= '0' && type[0] <= '9'));
+ },
+ [](const UnicodeString& replacement) {
+ U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
+ U_ASSERT(replacement.length() != 4 ||
+ (replacement.charAt(0) >= u'0' &&
+ replacement.charAt(0) <= u'9'));
+ },
+#else
+ [](const char*) {},
+ [](const UnicodeString&) { },
+#endif
+ status);
+}
+
+/**
+ * Initializes the alias data from the ICU resource bundles. The alias data
+ * contains alias of language, country, script and variants.
+ *
+ * If the alias data has already loaded, then this method simply returns without
+ * doing anything meaningful.
+ */
+void U_CALLCONV
+AliasData::loadData(UErrorCode &status)
+{
+#ifdef LOCALE_CANONICALIZATION_DEBUG
+ UDate start = uprv_getRawUTCtime();
+#endif // LOCALE_CANONICALIZATION_DEBUG
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
+ AliasDataBuilder builder;
+ gSingleton = builder.build(status);
+#ifdef LOCALE_CANONICALIZATION_DEBUG
+ UDate end = uprv_getRawUTCtime();
+ printf("AliasData::loadData took total %f ms\n", end - start);
+#endif // LOCALE_CANONICALIZATION_DEBUG
+ if (gSingleton == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+/**
+ * Build the alias data from resources.
+ */
+AliasData*
+AliasDataBuilder::build(UErrorCode &status) {
+ LocalUResourceBundlePointer metadata(
+ ures_openDirect(nullptr, "metadata", &status));
+ LocalUResourceBundlePointer metadataAlias(
+ ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
+ LocalUResourceBundlePointer languageAlias(
+ ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
+ LocalUResourceBundlePointer scriptAlias(
+ ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
+ LocalUResourceBundlePointer territoryAlias(
+ ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
+ LocalUResourceBundlePointer variantAlias(
+ ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+
+ int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
+ variantLength = 0;
+
+ // Read the languageAlias into languageTypes, languageReplacementIndexes
+ // and strings
+ UniqueCharStrings strings(status);
+ LocalMemory<const char*> languageTypes;
+ LocalMemory<int32_t> languageReplacementIndexes;
+ readLanguageAlias(languageAlias.getAlias(),
+ &strings,
+ languageTypes,
+ languageReplacementIndexes,
+ languagesLength,
+ status);
+
+ // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
+ // and strings
+ LocalMemory<const char*> scriptTypes;
+ LocalMemory<int32_t> scriptReplacementIndexes;
+ readScriptAlias(scriptAlias.getAlias(),
+ &strings,
+ scriptTypes,
+ scriptReplacementIndexes,
+ scriptLength,
+ status);
+
+ // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
+ // and strings
+ LocalMemory<const char*> territoryTypes;
+ LocalMemory<int32_t> territoryReplacementIndexes;
+ readTerritoryAlias(territoryAlias.getAlias(),
+ &strings,
+ territoryTypes,
+ territoryReplacementIndexes,
+ territoryLength, status);
+
+ // Read the variantAlias into variantTypes, variantReplacementIndexes
+ // and strings
+ LocalMemory<const char*> variantTypes;
+ LocalMemory<int32_t> variantReplacementIndexes;
+ readVariantAlias(variantAlias.getAlias(),
+ &strings,
+ variantTypes,
+ variantReplacementIndexes,
+ variantLength, status);
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ // We can only use strings after freeze it.
+ strings.freeze();
+
+ // Build the languageMap from languageTypes & languageReplacementIndexes
+ CharStringMap languageMap(490, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
+ languageMap.put(languageTypes[i],
+ strings.get(languageReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the scriptMap from scriptTypes & scriptReplacementIndexes
+ CharStringMap scriptMap(1, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
+ scriptMap.put(scriptTypes[i],
+ strings.get(scriptReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the territoryMap from territoryTypes & territoryReplacementIndexes
+ CharStringMap territoryMap(650, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
+ territoryMap.put(territoryTypes[i],
+ strings.get(territoryReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the variantMap from variantTypes & variantReplacementIndexes.
+ CharStringMap variantMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
+ variantMap.put(variantTypes[i],
+ strings.get(variantReplacementIndexes[i]),
+ status);
+ }
+
+ // copy hashtables
+ return new AliasData(
+ std::move(languageMap),
+ std::move(scriptMap),
+ std::move(territoryMap),
+ std::move(variantMap),
+ strings.orphanCharStrings());
+}
+
+/**
+ * A class that find the replacement values of locale fields by using AliasData.
+ */
+class AliasReplacer {
+public:
+ AliasReplacer(UErrorCode status) :
+ language(nullptr), script(nullptr), region(nullptr),
+ extensions(nullptr), variants(status),
+ data(nullptr) {
+ }
+ ~AliasReplacer() {
+ }
+
+ // Check the fields inside locale, if need to replace fields,
+ // place the the replaced locale ID in out and return true.
+ // Otherwise return false for no replacement or error.
+ bool replace(
+ const Locale& locale, CharString& out, UErrorCode status);
+
+private:
+ const char* language;
+ const char* script;
+ const char* region;
+ const char* extensions;
+ UVector variants;
+
+ const AliasData* data;
+
+ inline bool notEmpty(const char* str) {
+ return str && str[0] != NULL_CHAR;
+ }
+
+ /**
+ * If replacement is neither null nor empty and input is either null or empty,
+ * return replacement.
+ * If replacement is neither null nor empty but input is not empty, return input.
+ * If replacement is either null or empty and type is either null or empty,
+ * return input.
+ * Otherwise return null.
+ * replacement input type return
+ * AAA nullptr * AAA
+ * AAA BBB * BBB
+ * nullptr || "" CCC nullptr CCC
+ * nullptr || "" * DDD nullptr
+ */
+ inline const char* deleteOrReplace(
+ const char* input, const char* type, const char* replacement) {
+ return notEmpty(replacement) ?
+ ((input == nullptr) ? replacement : input) :
+ ((type == nullptr) ? input : nullptr);
+ }
+
+ inline bool same(const char* a, const char* b) {
+ if (a == nullptr && b == nullptr) {
+ return true;
}
- out.append('_', status);
- out.append(variants, status);
+ if ((a == nullptr && b != nullptr) ||
+ (a != nullptr && b == nullptr)) {
+ return false;
+ }
+ return uprv_strcmp(a, b) == 0;
}
- if (extension && extension[0] != '\0') {
- out.append(extension, status);
+
+ // Gather fields and generate locale ID into out.
+ CharString& outputToString(CharString& out, UErrorCode status);
+
+ // Generate the lookup key.
+ CharString& generateKey(const char* language, const char* region,
+ const char* variant, CharString& out,
+ UErrorCode status);
+
+ void parseLanguageReplacement(const char* replacement,
+ const char*& replaceLanguage,
+ const char*& replaceScript,
+ const char*& replaceRegion,
+ const char*& replaceVariant,
+ const char*& replaceExtensions,
+ UVector& toBeFreed,
+ UErrorCode& status);
+
+ // Replace by using languageAlias.
+ bool replaceLanguage(bool checkLanguage, bool checkRegion,
+ bool checkVariants, UVector& toBeFreed,
+ UErrorCode& status);
+
+ // Replace by using territoryAlias.
+ bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
+
+ // Replace by using scriptAlias.
+ bool replaceScript(UErrorCode& status);
+
+ // Replace by using variantAlias.
+ bool replaceVariant(UErrorCode& status);
+};
+
+CharString&
+AliasReplacer::generateKey(
+ const char* language, const char* region, const char* variant,
+ CharString& out, UErrorCode status)
+{
+ out.append(language, status);
+ if (notEmpty(region)) {
+ out.append(SEP_CHAR, status)
+ .append(region, status);
+ }
+ if (notEmpty(variant)) {
+ out.append(SEP_CHAR, status)
+ .append(variant, status);
}
return out;
}
+void
+AliasReplacer::parseLanguageReplacement(
+ const char* replacement,
+ const char*& replacedLanguage,
+ const char*& replacedScript,
+ const char*& replacedRegion,
+ const char*& replacedVariant,
+ const char*& replacedExtensions,
+ UVector& toBeFreed,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ replacedScript = replacedRegion = replacedVariant
+ = replacedExtensions = nullptr;
+ if (uprv_strchr(replacement, '_') == nullptr) {
+ replacedLanguage = replacement;
+ // reach the end, just return it.
+ return;
+ }
+ // We have multiple field so we have to allocate and parse
+ CharString* str = new CharString(
+ replacement, (int32_t)uprv_strlen(replacement), status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (str == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ toBeFreed.addElement(str, status);
+ char* data = str->data();
+ replacedLanguage = (const char*) data;
+ char* endOfField = uprv_strchr(data, '_');
+ *endOfField = '\0'; // null terminiate it.
+ endOfField++;
+ const char* start = endOfField;
+ endOfField = (char*) uprv_strchr(start, '_');
+ size_t len = 0;
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ if (len == 4 && uprv_isASCIILetter(*start)) {
+ // Got a script
+ replacedScript = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ endOfField = (char*)uprv_strchr(start, '_');
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ }
+ if (len >= 2 && len <= 3) {
+ // Got a region
+ replacedRegion = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ endOfField = (char*)uprv_strchr(start, '_');
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ }
+ if (len >= 4) {
+ // Got a variant
+ replacedVariant = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ }
+ replacedExtensions = start;
+}
+
+bool
+AliasReplacer::replaceLanguage(
+ bool checkLanguage, bool checkRegion,
+ bool checkVariants, UVector& toBeFreed, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if ( (checkRegion && region == nullptr) ||
+ (checkVariants && variants.size() == 0)) {
+ // Nothing to search.
+ return false;
+ }
+ int32_t variant_size = checkVariants ? variants.size() : 1;
+ // Since we may have more than one variant, we need to loop through them.
+ const char* searchLanguage = checkLanguage ? language : "und";
+ const char* searchRegion = checkRegion ? region : nullptr;
+ const char* searchVariant = nullptr;
+ for (int32_t variant_index = 0;
+ variant_index < variant_size;
+ variant_index++) {
+ if (checkVariants) {
+ U_ASSERT(variant_index < variant_size);
+ searchVariant = (const char*)(variants.elementAt(variant_index));
+ }
+
+ if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
+ // Do not consider ill-formed variant subtag.
+ searchVariant = nullptr;
+ }
+ CharString typeKey;
+ generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
+ status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->languageMap().get(typeKey.data());
+ if (replacement == nullptr) {
+ // Found no replacement data.
+ continue;
+ }
+
+ const char* replacedLanguage;
+ const char* replacedScript;
+ const char* replacedRegion;
+ const char* replacedVariant;
+ const char* replacedExtensions;
+ parseLanguageReplacement(replacement,
+ replacedLanguage,
+ replacedScript,
+ replacedRegion,
+ replacedVariant,
+ replacedExtensions,
+ toBeFreed,
+ status);
+ replacedLanguage =
+ uprv_strcmp(replacedLanguage, "und") == 0 ?
+ language : replacedLanguage;
+ replacedScript = deleteOrReplace(script, nullptr, replacedScript);
+ replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
+ replacedVariant = deleteOrReplace(
+ searchVariant, searchVariant, replacedVariant);
+
+ if ( same(language, replacedLanguage) &&
+ same(script, replacedScript) &&
+ same(region, replacedRegion) &&
+ same(searchVariant, replacedVariant) &&
+ replacedExtensions == nullptr) {
+ // Replacement produce no changes.
+ continue;
+ }
+
+ language = replacedLanguage;
+ region = replacedRegion;
+ script = replacedScript;
+ if (searchVariant != nullptr) {
+ if (notEmpty(replacedVariant)) {
+ variants.setElementAt((void*)replacedVariant, variant_index);
+ } else {
+ variants.removeElementAt(variant_index);
+ }
+ }
+ if (replacedExtensions != nullptr) {
+ // TODO(ICU-21292)
+ // DO NOTHING
+ // UTS35 does not specifiy what should we do if we have extensions in the
+ // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
+ // extensions in them languageAlias:
+ // i_default => en_x_i_default
+ // i_enochian => und_x_i_enochian
+ // i_mingo => see_x_i_mingo
+ // zh_min => nan_x_zh_min
+ // But all of them are already changed by code inside ultag_parse() before
+ // hitting this code.
+ }
+
+ // Something changed by language alias data.
+ return true;
+ }
+ // Nothing changed by language alias data.
+ return false;
+}
+
+bool
+AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if (region == nullptr) {
+ // No region to search.
+ return false;
+ }
+ const char *replacement = data->territoryMap().get(region);
+ if (replacement == nullptr) {
+ // Found no replacement data for this region.
+ return false;
+ }
+ const char* replacedRegion = replacement;
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ if (firstSpace != nullptr) {
+ // If there are are more than one region in the replacement.
+ // We need to check which one match based on the language.
+ Locale l(language, nullptr, script);
+ l.addLikelySubtags(status);
+ const char* likelyRegion = l.getCountry();
+ CharString* item = nullptr;
+ if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
+ size_t len = uprv_strlen(likelyRegion);
+ const char* foundInReplacement = uprv_strstr(replacement,
+ likelyRegion);
+ if (foundInReplacement != nullptr) {
+ // Assuming the case there are no three letter region code in
+ // the replacement of territoryAlias
+ U_ASSERT(foundInReplacement == replacement ||
+ *(foundInReplacement-1) == ' ');
+ U_ASSERT(foundInReplacement[len] == ' ' ||
+ foundInReplacement[len] == '\0');
+ item = new CharString(foundInReplacement, (int32_t)len, status);
+ }
+ }
+ if (item == nullptr) {
+ item = new CharString(replacement,
+ (int32_t)(firstSpace - replacement), status);
+ }
+ if (U_FAILURE(status)) { return false; }
+ if (item == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ replacedRegion = item->data();
+ toBeFreed.addElement(item, status);
+ }
+ U_ASSERT(!same(region, replacedRegion));
+ region = replacedRegion;
+ // The region is changed by data in territory alias.
+ return true;
+}
+
+bool
+AliasReplacer::replaceScript(UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if (script == nullptr) {
+ // No script to search.
+ return false;
+ }
+ const char *replacement = data->scriptMap().get(script);
+ if (replacement == nullptr) {
+ // Found no replacement data for this script.
+ return false;
+ }
+ U_ASSERT(!same(script, replacement));
+ script = replacement;
+ // The script is changed by data in script alias.
+ return true;
+}
+
+bool
+AliasReplacer::replaceVariant(UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ // Since we may have more than one variant, we need to loop through them.
+ for (int32_t i = 0; i < variants.size(); i++) {
+ const char *variant = (const char*)(variants.elementAt(i));
+ const char *replacement = data->variantMap().get(variant);
+ if (replacement == nullptr) {
+ // Found no replacement data for this variant.
+ continue;
+ }
+ U_ASSERT((uprv_strlen(replacement) >= 5 &&
+ uprv_strlen(replacement) <= 8) ||
+ (uprv_strlen(replacement) == 4 &&
+ replacement[0] >= '0' &&
+ replacement[0] <= '9'));
+ if (!same(variant, replacement)) {
+ variants.setElementAt((void*)replacement, i);
+ // Special hack to handle hepburn-heploc => alalc97
+ if (uprv_strcmp(variant, "heploc") == 0) {
+ for (int32_t j = 0; j < variants.size(); j++) {
+ if (uprv_strcmp((const char*)(variants.elementAt(j)),
+ "hepburn") == 0) {
+ variants.removeElementAt(j);
+ }
+ }
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+CharString&
+AliasReplacer::outputToString(
+ CharString& out, UErrorCode status)
+{
+ out.append(language, status);
+ if (notEmpty(script)) {
+ out.append(SEP_CHAR, status)
+ .append(script, status);
+ }
+ if (notEmpty(region)) {
+ out.append(SEP_CHAR, status)
+ .append(region, status);
+ }
+ if (variants.size() > 0) {
+ if (!notEmpty(script) && !notEmpty(region)) {
+ out.append(SEP_CHAR, status);
+ }
+ variants.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+ int32_t variantsStart = out.length();
+ for (int32_t i = 0; i < variants.size(); i++) {
+ out.append(SEP_CHAR, status)
+ .append((const char*)((UVector*)variants.elementAt(i)),
+ status);
+ }
+ T_CString_toUpperCase(out.data() + variantsStart);
+ }
+ if (notEmpty(extensions)) {
+ CharString tmp("und_", status);
+ tmp.append(extensions, status);
+ Locale tmpLocale(tmp.data());
+ // only support x extension inside CLDR for now.
+ U_ASSERT(extensions[0] == 'x');
+ out.append(tmpLocale.getName() + 1, status);
+ }
+ return out;
+}
+
+bool
+AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
+{
+ data = AliasData::singleton(status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ out.clear();
+ language = locale.getLanguage();
+ if (!notEmpty(language)) {
+ language = nullptr;
+ }
+ script = locale.getScript();
+ if (!notEmpty(script)) {
+ script = nullptr;
+ }
+ region = locale.getCountry();
+ if (!notEmpty(region)) {
+ region = nullptr;
+ }
+ const char* variantsStr = locale.getVariant();
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ CharString variantsBuff(variantsStr, -1, status);
+ if (!variantsBuff.isEmpty()) {
+ if (U_FAILURE(status)) { return false; }
+ char* start = variantsBuff.data();
+ T_CString_toLowerCase(start);
+ char* end;
+ while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
+ U_SUCCESS(status)) {
+ *end = NULL_CHAR; // null terminate inside variantsBuff
+ variants.addElement(start, status);
+ start = end + 1;
+ }
+ variants.addElement(start, status);
+ }
+ if (U_FAILURE(status)) { return false; }
+
+ // Sort the variants
+ variants.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+
+ // A changed count to assert when loop too many times.
+ int changed = 0;
+ // A UVector to to hold CharString allocated by the replace* method
+ // and freed when out of scope from his function.
+ UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
+ nullptr, 10, status);
+ while (U_SUCCESS(status)) {
+ // Something wrong with the data cause looping here more than 10 times
+ // already.
+ U_ASSERT(changed < 5);
+ // From observation of key in data/misc/metadata.txt
+ // we know currently we only need to search in the following combination
+ // of fields for type in languageAlias:
+ // * lang_region_variant
+ // * lang_region
+ // * lang_variant
+ // * lang
+ // * und_variant
+ // This assumption is ensured by the U_ASSERT in readLanguageAlias
+ //
+ // lang REGION variant
+ if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
+ replaceLanguage(true, true, false, stringsToBeFreed, status) ||
+ replaceLanguage(true, false, true, stringsToBeFreed, status) ||
+ replaceLanguage(true, false, false, stringsToBeFreed, status) ||
+ replaceLanguage(false,false, true, stringsToBeFreed, status) ||
+ replaceTerritory(stringsToBeFreed, status) ||
+ replaceScript(status) ||
+ replaceVariant(status)) {
+ // Some values in data is changed, try to match from the beginning
+ // again.
+ changed++;
+ continue;
+ }
+ // Nothing changed. Break out.
+ break;
+ } // while(1)
+
+ if (U_FAILURE(status)) { return false; }
+ // Nothing changed and we know the order of the vaiants are not change
+ // because we have no variant or only one.
+ if (changed == 0 && variants.size() <= 1) {
+ return false;
+ }
+ outputToString(out, status);
+ if (extensionsStr != nullptr) {
+ out.append(extensionsStr, status);
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ // If the tag is not changed, return.
+ if (uprv_strcmp(out.data(), locale.getName()) == 0) {
+ U_ASSERT(changed == 0);
+ U_ASSERT(variants.size() > 1);
+ out.clear();
+ return false;
+ }
+ return true;
+}
+
+// Return true if the locale is changed during canonicalization.
+// The replaced value then will be put into out.
+bool
+canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
+{
+ AliasReplacer replacer(status);
+ return replacer.replace(locale, out, status);
+}
+
+// Function to optimize for known cases without so we can skip the loading
+// of resources in the startup time until we really need it.
+bool
+isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
+{
+ if ( uprv_strcmp(locale, "c") == 0 ||
+ uprv_strcmp(locale, "en") == 0 ||
+ uprv_strcmp(locale, "en_US") == 0) {
+ return true;
+ }
+
+ // common well-known Canonicalized.
+ umtx_initOnce(gKnownCanonicalizedInitOnce,
+ &loadKnownCanonicalized, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ U_ASSERT(gKnownCanonicalized != nullptr);
+ return uhash_geti(gKnownCanonicalized, locale) != 0;
+}
+
} // namespace
+// Function for testing.
+U_CAPI const char* const*
+ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
+{
+ *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+ return KNOWN_CANONICALIZED;
+}
+
+// Function for testing.
+U_CAPI bool
+ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
+{
+ Locale l(localeName);
+ UErrorCode status = U_ZERO_ERROR;
+ CharString temp;
+ return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
+}
+
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(const char* localeID, UBool canonicalize)
{
@@ -626,9 +1702,9 @@
uprv_memcpy(language, fullName, fieldLen[0]);
language[fieldLen[0]] = 0;
}
- if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
- ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
- ISASCIIALPHA(field[1][3])) {
+ if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
+ uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
+ uprv_isASCIILetter(field[1][3])) {
/* We have at least a script */
uprv_memcpy(script, field[1], fieldLen[1]);
script[fieldLen[1]] = 0;
@@ -656,193 +1732,18 @@
}
if (canonicalize) {
- UErrorCode status = U_ZERO_ERROR;
- // TODO: Try to use ResourceDataValue and ures_getValueWithFallback() etc.
- LocalUResourceBundlePointer metadata(ures_openDirect(NULL, "metadata", &status));
- LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(), "alias", NULL, &status));
- // Look up the metadata:alias:language:$key:replacement entries
- // key could be one of the following:
- // language
- // language_Script_REGION
- // language_REGION
- // language_variant
- do {
- // The resource structure looks like
- // metadata {
- // alias {
- // language {
- // art_lojban {
- // replacement{"jbo"}
- // }
- // ...
- // ks_Arab_IN {
- // replacement{"ks_IN"}
- // }
- // ...
- // no {
- // replacement{"nb"}
- // }
- // ....
- // zh_CN {
- // replacement{"zh_Hans_CN"}
- // }
- // }
- // ...
- // }
- // }
- LocalUResourceBundlePointer languageAlias(ures_getByKey(metadataAlias.getAlias(), "language", NULL, &status));
- if (U_FAILURE(status))
+ if (!isKnownCanonicalizedLocale(fullName, err)) {
+ CharString replaced;
+ // Not sure it is already canonicalized
+ if (canonicalizeLocale(*this, replaced, err)) {
+ U_ASSERT(U_SUCCESS(err));
+ // If need replacement, call init again.
+ init(replaced.data(), false);
+ }
+ if (U_FAILURE(err)) {
break;
- CharString temp;
- // Handle cases of key pattern "language _ variant"
- // ex: Map "art_lojban" to "jbo"
- const char* variants = getVariant();
- if (variants != nullptr && variants[0] != '\0') {
- const char* begin = variants;
- const char* end = begin;
- // We may have multiple variants, need to look at each of
- // them.
- for (;;) {
- status = U_ZERO_ERROR;
- end = uprv_strchr(begin, '_');
- int32_t len = (end == nullptr) ? int32_t(uprv_strlen(begin)) : int32_t(end - begin);
- temp.clear().append(getLanguage(), status).append("_", status).append(begin, len, status);
- LocalUResourceBundlePointer languageVariantAlias(
- ures_getByKey(languageAlias.getAlias(),
- temp.data(),
- NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(languageVariantAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- CharString newVar;
- if (begin != variants) {
- newVar.append(variants, static_cast<int32_t>(begin - variants - 1), status);
- }
- if (end != nullptr) {
- if (begin != variants) {
- newVar.append("_", status);
- }
- newVar.append(end + 1, status);
- }
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
- (getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(),
- newVar.data(),
- uprv_strchr(fullName, '@'), status).data(), false);
- break;
- }
- if (end == nullptr) break;
- begin = end + 1;
- }
- } // End of handle language _ variant
- // Handle cases of key pattern "language _ Script _ REGION"
- // ex: Map "ks_Arab_IN" to "ks_IN"
- if (getScript() != nullptr && getScript()[0] != '\0' &&
- getCountry() != nullptr && getCountry()[0] != '\0') {
- status = U_ZERO_ERROR;
- LocalUResourceBundlePointer replacedAlias(
- ures_getByKey(languageAlias.getAlias(),
- AppendLSCVE(temp.clear(), getLanguage(), getScript(), getCountry(),
- nullptr, nullptr, status).data(), NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- l.getScript(),
- l.getCountry(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle language _ Script _ REGION
- // Handle cases of key pattern "language _ REGION"
- // ex: Map "zh_CN" to "zh_Hans_CN"
- if (getCountry() != nullptr && getCountry()[0] != '\0') {
- status = U_ZERO_ERROR;
- LocalUResourceBundlePointer replacedAlias(
- ures_getByKey(languageAlias.getAlias(),
- AppendLSCVE(temp.clear(), getLanguage(), nullptr, getCountry(),
- nullptr, nullptr, status).data(), NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
- l.getCountry(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle "language _ REGION"
- // Handle cases of key pattern "language"
- // ex: Map "no" to "nb"
- {
- status = U_ZERO_ERROR;
- LocalUResourceBundlePointer replaceLanguageAlias(ures_getByKey(languageAlias.getAlias(), getLanguage(), NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(replaceLanguageAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
- (getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle "language"
-
- // Look up the metadata:alias:territory:$key:replacement entries
- // key is region code.
- if (getCountry() != nullptr) {
- status = U_ZERO_ERROR;
- // The resource structure looks like
- // metadata {
- // alias {
- // ...
- // territory: {
- // 172 {
- // replacement{"RU AM AZ BY GE KG KZ MD TJ TM UA UZ"}
- // }
- // ...
- // 554 {
- // replacement{"NZ"}
- // }
- // }
- // }
- // }
- LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(), "territory", NULL, &status));
- LocalUResourceBundlePointer countryAlias(ures_getByKey(territoryAlias.getAlias(), getCountry(), NULL, &status));
- UnicodeString replacements(
- ures_getStringByKey(countryAlias.getAlias(), "replacement", nullptr, &status));
- if (U_SUCCESS(status)) {
- CharString replacedCountry;
- int32_t delPos = replacements.indexOf(' ');
- if (delPos == -1) {
- replacedCountry.appendInvariantChars(replacements, status);
- } else {
- Locale l(AppendLSCVE(temp.clear(), getLanguage(), nullptr, getScript(),
- nullptr, nullptr, status).data());
- l.addLikelySubtags(status);
- if (replacements.indexOf(UnicodeString(l.getCountry())) != -1) {
- replacedCountry.append(l.getCountry(), status);
- } else {
- replacedCountry.appendInvariantChars(replacements.getBuffer(), delPos, status);
- }
- }
- init(AppendLSCVE(temp.clear(),
- getLanguage(),
- getScript(),
- replacedCountry.data(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle REGION
- } while (0);
+ }
+ }
} // if (canonicalize) {
// successful end of init()
diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h
index b837fb9..44b73e9 100644
--- a/icu4c/source/common/ucln_cmn.h
+++ b/icu4c/source/common/ucln_cmn.h
@@ -38,6 +38,8 @@
UCLN_COMMON_SERVICE,
UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
+ UCLN_COMMON_LOCALE_ALIAS,
+ UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
UCLN_COMMON_LOCALE_AVAILABLE,
UCLN_COMMON_LIKELY_SUBTAGS,
UCLN_COMMON_LOCALE_DISTANCE,
diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
index b9070f8..5691fe9 100644
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h
@@ -298,4 +298,10 @@
U_CFUNC const char*
ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+/* Function for testing purpose */
+U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
+
+// Return true if the value is already canonicalized.
+U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
+
#endif
diff --git a/icu4c/source/data/cldr-icu-readme.txt b/icu4c/source/data/cldr-icu-readme.txt
index 039550b..93b9ef3 100644
--- a/icu4c/source/data/cldr-icu-readme.txt
+++ b/icu4c/source/data/cldr-icu-readme.txt
@@ -192,6 +192,12 @@
cd $TOOLS_ROOT/cldr
ant copy-cldr-testdata
+# 4d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt
+# into icu4c/source/test/testdata/localeCanonicalization.txt
+# and icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
+# and add the following line to the begginning of these two files
+# # File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
+
# 5. Check which data files have modifications, which have been added or removed
# (if there are no changes, you may not need to proceed further). Make sure the
# list seems reasonable.
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index c5ed9fa..b807119 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -32,6 +32,8 @@
#include "putilimp.h"
#include "hash.h"
#include "locmap.h"
+#include "uparse.h"
+#include "ulocimp.h"
static const char* const rawData[33][8] = {
@@ -257,6 +259,8 @@
TESTCASE_AUTO(TestBug13554);
TESTCASE_AUTO(TestBug20410);
TESTCASE_AUTO(TestBug20900);
+ TESTCASE_AUTO(TestLocaleCanonicalizationFromFile);
+ TESTCASE_AUTO(TestKnownCanonicalizedListCorrect);
TESTCASE_AUTO(TestConstructorAcceptsBCP47);
TESTCASE_AUTO(TestForLanguageTag);
TESTCASE_AUTO(TestToLanguageTag);
@@ -4707,10 +4711,10 @@
} testCases[] = {
{ "ca_ES-with-extra-stuff-that really doesn't make any sense-unless-you're trying to increase code coverage",
"ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE",
- "ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
+ "ca_ES_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_WITH_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
{ "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
- { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" },
- { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" },
+ { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
+ { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
{ "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
{ "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
{ "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
@@ -4729,13 +4733,17 @@
{ "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML" },
{ "i-cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US" },
{ "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" },
- { "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
+ { "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_B_NY" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
/* fleshing out canonicalization */
/* trim space and sort keywords, ';' is separator so not present at end in canonical form */
- { "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
+ { "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;",
+ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR",
+ "en_Hant_IL_GIRL_VALLEY@calendar=Japanese;currency=EUR" },
/* already-canonical ids are not changed */
- { "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
+ { "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR",
+ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR",
+ "en_Hant_IL_GIRL_VALLEY@calendar=Japanese;currency=EUR" },
/* norwegian is just too weird, if we handle things in their full generality */
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "nb_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
@@ -4776,13 +4784,13 @@
{ "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
{ "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
{ "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
- { "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" },
+ { "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
{ "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
{ "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
{ "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */
- { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */
- { "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */
+ { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
+ { "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
{ "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
{ "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
/* PRE_EURO and EURO conversions don't affect other keywords */
@@ -4799,13 +4807,6 @@
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
for (j=0; j<3; ++j) {
- if (j==1 && logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
- if (uprv_strncmp(testCases[i].localeID, "zh_CN", 5) == 0 ||
- uprv_strncmp(testCases[i].localeID, "zh_TW", 5) == 0 ||
- uprv_strncmp(testCases[i].localeID, "uz-UZ", 5) == 0 ) {
- continue;
- }
- }
const char* expected = (j==1) ? testCases[i].canonicalID : testCases[i].getNameID;
Locale loc = _canonicalize(j, testCases[i].localeID);
const char* getName = loc.isBogus() ? "BOGUS" : loc.getName();
@@ -4858,17 +4859,18 @@
// also test with script, variants and extensions
{ "prs-Cyrl-1009-u-ca-roc", "fa-Cyrl-AF-1009-u-ca-roc" },
- // language _ country -> language _ script _ country
- { "pa-IN", "pa-Guru-IN" },
+ { "pa-IN", "pa-IN" },
// also test with script
{ "pa-Latn-IN", "pa-Latn-IN" },
// also test with variants and extensions
- { "pa-IN-5678-u-ca-hindi", "pa-Guru-IN-5678-u-ca-hindi" },
+ { "pa-IN-5678-u-ca-hindi", "pa-IN-5678-u-ca-hindi" },
- // language _ script _ country -> language _ country
- { "ky-Cyrl-KG", "ky-KG" },
+ { "ky-Cyrl-KG", "ky-Cyrl-KG" },
// also test with variants and extensions
- { "ky-Cyrl-KG-3456-u-ca-roc", "ky-KG-3456-u-ca-roc" },
+ { "ky-Cyrl-KG-3456-u-ca-roc", "ky-Cyrl-KG-3456-u-ca-roc" },
+
+ // Test replacement of scriptAlias
+ { "en-Qaai", "en-Zinh" },
// Test replacement of territoryAlias
// 554 has one replacement
@@ -4887,18 +4889,14 @@
{ "uz-Cyrl-172-5678-u-nu-latn", "uz-Cyrl-UZ-5678-u-nu-latn" },
// a language not used in this region
{ "fr-172", "fr-RU" },
+
+ // variant
+ { "ja-Latn-hepburn-heploc", "ja-Latn-alalc97"},
+
+ { "aaa-Fooo-SU", "aaa-Fooo-RU"},
};
int32_t i;
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
- if (logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
- if (uprv_strstr(testCases[i].localeID, "-BOKMAL") != 0 ||
- uprv_strstr(testCases[i].localeID, "-NYNORSK") != 0 ||
- uprv_strstr(testCases[i].localeID, "-SAAHO") != 0 ||
- uprv_strncmp(testCases[i].localeID, "pa-IN", 5) == 0 ||
- uprv_strncmp(testCases[i].localeID, "ky-Cyrl", 7) == 0 ) {
- continue;
- }
- }
UErrorCode status = U_ZERO_ERROR;
std::string otag = testCases[i].localeID;
Locale loc = Locale::forLanguageTag(otag.c_str(), status);
@@ -5351,6 +5349,73 @@
}
}
+U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
+void LocaleTest::TestLocaleCanonicalizationFromFile()
+{
+ IcuTestErrorCode status(*this, "TestLocaleCanonicalizationFromFile");
+ const char *sourceTestDataPath=getSourceTestData(status);
+ if(status.errIfFailureAndReset("unable to find the source/test/testdata "
+ "folder (getSourceTestData())")) {
+ return;
+ }
+ char testPath[400];
+ char line[256];
+ strcpy(testPath, sourceTestDataPath);
+ strcat(testPath, "localeCanonicalization.txt");
+ LocalStdioFilePointer testFile(fopen(testPath, "r"));
+ if(testFile.isNull()) {
+ errln("unable to open %s", testPath);
+ return;
+ }
+ // Format:
+ // <source locale identifier> ; <expected canonicalized locale identifier>
+ while (fgets(line, (int)sizeof(line), testFile.getAlias())!=NULL) {
+ if (line[0] == '#') {
+ // ignore any lines start with #
+ continue;
+ }
+ char *semi = strchr(line, ';');
+ if (semi == nullptr) {
+ // ignore any lines without ;
+ continue;
+ }
+ *semi = '\0'; // null terminiate on the spot of semi
+ const char* from = u_skipWhitespace((const char*)line);
+ u_rtrim((char*)from);
+ const char* to = u_skipWhitespace((const char*)semi + 1);
+ u_rtrim((char*)to);
+ std::string expect(to);
+ // Change the _ to -
+ std::transform(expect.begin(), expect.end(), expect.begin(),
+ [](unsigned char c){ return c == '_' ? '-' : c; });
+
+ Locale loc = Locale::createCanonical(from);
+ std::string result = loc.toLanguageTag<std::string>(status);
+ const char* tag = loc.isBogus() ? "BOGUS" : result.c_str();
+ status.errIfFailureAndReset(
+ "FAIL: createCanonical(%s).toLanguageTag() expected \"%s\" locale is %s",
+ from, tag, loc.getName());
+ std::string msg("createCanonical(");
+ msg += from;
+ msg += ") locale = ";
+ msg += loc.getName();
+ assertEquals(msg.c_str(), expect.c_str(), tag);
+ }
+}
+
+void LocaleTest::TestKnownCanonicalizedListCorrect()
+{
+ IcuTestErrorCode status(*this, "TestKnownCanonicalizedListCorrect");
+ int32_t numOfKnownCanonicalized;
+ const char* const* knownCanonicalized =
+ ulocimp_getKnownCanonicalizedLocaleForTest(&numOfKnownCanonicalized);
+ for (int32_t i = 0; i < numOfKnownCanonicalized; i++) {
+ std::string msg("Known Canonicalized Locale is not canonicalized: ");
+ assertTrue((msg + knownCanonicalized[i]).c_str(),
+ ulocimp_isCanonicalizedLocaleForTest(knownCanonicalized[i]));
+ }
+}
+
void LocaleTest::TestConstructorAcceptsBCP47() {
IcuTestErrorCode status(*this, "TestConstructorAcceptsBCP47");
diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h
index b217ce2..a3a1ebc 100644
--- a/icu4c/source/test/intltest/loctest.h
+++ b/icu4c/source/test/intltest/loctest.h
@@ -122,6 +122,8 @@
void TestBug13554();
void TestBug20410();
void TestBug20900();
+ void TestLocaleCanonicalizationFromFile();
+ void TestKnownCanonicalizedListCorrect();
void TestConstructorAcceptsBCP47();
void TestAddLikelySubtags();
diff --git a/icu4c/source/test/testdata/localeCanonicalization.txt b/icu4c/source/test/testdata/localeCanonicalization.txt
new file mode 100644
index 0000000..e41eaac
--- /dev/null
+++ b/icu4c/source/test/testdata/localeCanonicalization.txt
@@ -0,0 +1,1648 @@
+# File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
+# Test data for locale identifier canonicalization
+# Copyright © 1991-2020 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/copyright.html
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+#
+# Format:
+# <source locale identifier> ; <expected canonicalized locale identifier>
+#
+# The data lines are divided into 4 sets:
+# explicit: a short list of explicit test cases.
+# fromAliases: test cases generated from the alias data.
+# decanonicalized: test cases generated by reversing the normalization process.
+# withIrrelevants: test cases generated from the others by adding irrelevant fields where possible,
+# to ensure that the canonicalization implementation is not sensitive to irrelevant fields. These include:
+# Language: aaa
+# Script: Adlm
+# Region: AC
+# Variant: fonipa
+######
+
+
+
+# explicit
+
+art_lojban ; jbo
+en_US_aaland ; en_US
+en_US_heploc ; en_US_alalc97
+en_US_polytoni ; en_US_polyton
+en_aaland ; en_AX
+en_arevela ; en
+en_arevmda_arevela ; en
+en_lojban ; en
+hy_arevela ; hy
+hy_arevmda ; hyw
+hy_arevmda_arevela ; hyw
+hye_arevmda ; hyw
+no_bokmal_nynorsk ; nb
+no_nynorsk_bokmal ; nb
+zh_guoyu_hakka_xiang ; hak
+zh_hakka_xiang ; hak
+
+# fromAliases
+
+aa_saaho ; ssy
+aam ; aas
+aar ; aa
+abk ; ab
+adp ; dz
+afr ; af
+aju ; jrb
+aka ; ak
+alb ; sq
+als ; sq
+amh ; am
+ara ; ar
+arb ; ar
+arg ; an
+arm ; hy
+asd ; snz
+asm ; as
+aue ; ktz
+ava ; av
+ave ; ae
+aym ; ay
+ayr ; ay
+ayx ; nun
+aze ; az
+azj ; az
+bak ; ba
+bam ; bm
+baq ; eu
+bcc ; bal
+bcl ; bik
+bel ; be
+ben ; bn
+bgm ; bcg
+bh ; bho
+bih ; bho
+bis ; bi
+bjd ; drl
+bod ; bo
+bos ; bs
+bre ; br
+bul ; bg
+bur ; my
+bxk ; luy
+bxr ; bua
+cat ; ca
+ccq ; rki
+cel_gaulish ; xtg
+ces ; cs
+cha ; ch
+che ; ce
+chi ; zh
+chu ; cu
+chv ; cv
+cjr ; mom
+cka ; cmr
+cld ; syr
+cmk ; xch
+cmn ; zh
+cnr ; sr_ME
+cor ; kw
+cos ; co
+coy ; pij
+cqu ; quh
+cre ; cr
+cwd ; cr
+cym ; cy
+cze ; cs
+dan ; da
+deu ; de
+dgo ; doi
+dhd ; mwr
+dik ; din
+diq ; zza
+dit ; dif
+div ; dv
+drh ; mn
+drw ; fa_AF
+dut ; nl
+dzo ; dz
+ekk ; et
+ell ; el
+emk ; man
+eng ; en
+epo ; eo
+esk ; ik
+est ; et
+eus ; eu
+ewe ; ee
+fao ; fo
+fas ; fa
+fat ; ak
+fij ; fj
+fin ; fi
+fra ; fr
+fre ; fr
+fry ; fy
+fuc ; ff
+ful ; ff
+gav ; dev
+gaz ; om
+gbo ; grb
+geo ; ka
+ger ; de
+gfx ; vaj
+ggn ; gvr
+gla ; gd
+gle ; ga
+glg ; gl
+glv ; gv
+gno ; gon
+gre ; el
+grn ; gn
+gti ; nyc
+gug ; gn
+guj ; gu
+guv ; duz
+gya ; gba
+hat ; ht
+hau ; ha
+hbs ; sr_Latn
+hdn ; hai
+hea ; hmn
+heb ; he
+her ; hz
+him ; srx
+hin ; hi
+hmo ; ho
+hrr ; jal
+hrv ; hr
+hun ; hu
+hye ; hy
+ibi ; opa
+ibo ; ig
+ice ; is
+ido ; io
+iii ; ii
+ike ; iu
+iku ; iu
+ile ; ie
+ilw ; gal
+in ; id
+ina ; ia
+ind ; id
+ipk ; ik
+isl ; is
+ita ; it
+iw ; he
+jav ; jv
+jeg ; oyb
+ji ; yi
+jpn ; ja
+jw ; jv
+kal ; kl
+kan ; kn
+kas ; ks
+kat ; ka
+kau ; kr
+kaz ; kk
+kgc ; tdf
+kgh ; kml
+khk ; mn
+khm ; km
+kik ; ki
+kin ; rw
+kir ; ky
+kmr ; ku
+knc ; kr
+kng ; kg
+knn ; kok
+koj ; kwv
+kom ; kv
+kon ; kg
+kor ; ko
+kpv ; kv
+krm ; bmf
+ktr ; dtp
+kua ; kj
+kur ; ku
+kvs ; gdj
+kwq ; yam
+kxe ; tvd
+kzj ; dtp
+kzt ; dtp
+lao ; lo
+lat ; la
+lav ; lv
+lbk ; bnc
+lii ; raq
+lim ; li
+lin ; ln
+lit ; lt
+llo ; ngt
+lmm ; rmx
+ltz ; lb
+lub ; lu
+lug ; lg
+lvs ; lv
+mac ; mk
+mah ; mh
+mal ; ml
+mao ; mi
+mar ; mr
+may ; ms
+meg ; cir
+mhr ; chm
+mkd ; mk
+mlg ; mg
+mlt ; mt
+mnk ; man
+mo ; ro
+mol ; ro
+mon ; mn
+mri ; mi
+msa ; ms
+mst ; mry
+mup ; raj
+mwj ; vaj
+mya ; my
+myd ; aog
+myt ; mry
+nad ; xny
+nau ; na
+nav ; nv
+nbl ; nr
+ncp ; kdz
+nde ; nd
+ndo ; ng
+nep ; ne
+nld ; nl
+nno ; nn
+nns ; nbr
+nnx ; ngv
+no ; nb
+no_bokmal ; nb
+no_nynorsk ; nn
+nob ; nb
+nor ; nb
+npi ; ne
+nts ; pij
+nya ; ny
+oci ; oc
+ojg ; oj
+oji ; oj
+ori ; or
+orm ; om
+ory ; or
+oss ; os
+oun ; vaj
+pan ; pa
+pbu ; ps
+pcr ; adx
+per ; fa
+pes ; fa
+pli ; pi
+plt ; mg
+pmc ; huw
+pmu ; phr
+pnb ; lah
+pol ; pl
+por ; pt
+ppa ; bfy
+ppr ; lcq
+prs ; fa_AF
+pry ; prt
+pus ; ps
+puz ; pub
+que ; qu
+quz ; qu
+rmy ; rom
+roh ; rm
+ron ; ro
+rum ; ro
+run ; rn
+rus ; ru
+sag ; sg
+san ; sa
+sca ; hle
+scc ; sr
+scr ; hr
+sgn_BR ; bzs
+sgn_CO ; csn
+sgn_DE ; gsg
+sgn_DK ; dsl
+sgn_FR ; fsl
+sgn_GB ; bfi
+sgn_GR ; gss
+sgn_IE ; isg
+sgn_IT ; ise
+sgn_JP ; jsl
+sgn_MX ; mfs
+sgn_NI ; ncs
+sgn_NL ; dse
+sgn_NO ; nsi
+sgn_PT ; psr
+sgn_SE ; swl
+sgn_US ; ase
+sgn_ZA ; sfs
+sh ; sr_Latn
+sin ; si
+skk ; oyb
+slk ; sk
+slo ; sk
+slv ; sl
+sme ; se
+smo ; sm
+sna ; sn
+snd ; sd
+som ; so
+sot ; st
+spa ; es
+spy ; kln
+sqi ; sq
+src ; sc
+srd ; sc
+srp ; sr
+ssw ; ss
+sun ; su
+swa ; sw
+swc ; sw_CD
+swe ; sv
+swh ; sw
+tah ; ty
+tam ; ta
+tat ; tt
+tdu ; dtp
+tel ; te
+tgk ; tg
+tgl ; fil
+tha ; th
+thc ; tpo
+thx ; oyb
+tib ; bo
+tie ; ras
+tir ; ti
+tkk ; twm
+tl ; fil
+tlw ; weo
+tmp ; tyj
+tne ; kak
+tnf ; fa_AF
+ton ; to
+tsf ; taj
+tsn ; tn
+tso ; ts
+ttq ; tmh
+tuk ; tk
+tur ; tr
+tw ; ak
+twi ; ak
+uig ; ug
+ukr ; uk
+umu ; del
+und_004 ; und_AF
+und_008 ; und_AL
+und_010 ; und_AQ
+und_012 ; und_DZ
+und_016 ; und_AS
+und_020 ; und_AD
+und_024 ; und_AO
+und_028 ; und_AG
+und_031 ; und_AZ
+und_032 ; und_AR
+und_036 ; und_AU
+und_040 ; und_AT
+und_044 ; und_BS
+und_048 ; und_BH
+und_050 ; und_BD
+und_051 ; und_AM
+und_052 ; und_BB
+und_056 ; und_BE
+und_060 ; und_BM
+und_062 ; und_034
+und_064 ; und_BT
+und_068 ; und_BO
+und_070 ; und_BA
+und_072 ; und_BW
+und_074 ; und_BV
+und_076 ; und_BR
+und_084 ; und_BZ
+und_086 ; und_IO
+und_090 ; und_SB
+und_092 ; und_VG
+und_096 ; und_BN
+und_100 ; und_BG
+und_104 ; und_MM
+und_108 ; und_BI
+und_112 ; und_BY
+und_116 ; und_KH
+und_120 ; und_CM
+und_124 ; und_CA
+und_132 ; und_CV
+und_136 ; und_KY
+und_140 ; und_CF
+und_144 ; und_LK
+und_148 ; und_TD
+und_152 ; und_CL
+und_156 ; und_CN
+und_158 ; und_TW
+und_162 ; und_CX
+und_166 ; und_CC
+und_170 ; und_CO
+und_172 ; und_RU
+und_174 ; und_KM
+und_175 ; und_YT
+und_178 ; und_CG
+und_180 ; und_CD
+und_184 ; und_CK
+und_188 ; und_CR
+und_191 ; und_HR
+und_192 ; und_CU
+und_196 ; und_CY
+und_200 ; und_CZ
+und_203 ; und_CZ
+und_204 ; und_BJ
+und_208 ; und_DK
+und_212 ; und_DM
+und_214 ; und_DO
+und_218 ; und_EC
+und_222 ; und_SV
+und_226 ; und_GQ
+und_230 ; und_ET
+und_231 ; und_ET
+und_232 ; und_ER
+und_233 ; und_EE
+und_234 ; und_FO
+und_238 ; und_FK
+und_239 ; und_GS
+und_242 ; und_FJ
+und_246 ; und_FI
+und_248 ; und_AX
+und_249 ; und_FR
+und_250 ; und_FR
+und_254 ; und_GF
+und_258 ; und_PF
+und_260 ; und_TF
+und_262 ; und_DJ
+und_266 ; und_GA
+und_268 ; und_GE
+und_270 ; und_GM
+und_275 ; und_PS
+und_276 ; und_DE
+und_278 ; und_DE
+und_280 ; und_DE
+und_288 ; und_GH
+und_292 ; und_GI
+und_296 ; und_KI
+und_300 ; und_GR
+und_304 ; und_GL
+und_308 ; und_GD
+und_312 ; und_GP
+und_316 ; und_GU
+und_320 ; und_GT
+und_324 ; und_GN
+und_328 ; und_GY
+und_332 ; und_HT
+und_334 ; und_HM
+und_336 ; und_VA
+und_340 ; und_HN
+und_344 ; und_HK
+und_348 ; und_HU
+und_352 ; und_IS
+und_356 ; und_IN
+und_360 ; und_ID
+und_364 ; und_IR
+und_368 ; und_IQ
+und_372 ; und_IE
+und_376 ; und_IL
+und_380 ; und_IT
+und_384 ; und_CI
+und_388 ; und_JM
+und_392 ; und_JP
+und_398 ; und_KZ
+und_400 ; und_JO
+und_404 ; und_KE
+und_408 ; und_KP
+und_410 ; und_KR
+und_414 ; und_KW
+und_417 ; und_KG
+und_418 ; und_LA
+und_422 ; und_LB
+und_426 ; und_LS
+und_428 ; und_LV
+und_430 ; und_LR
+und_434 ; und_LY
+und_438 ; und_LI
+und_440 ; und_LT
+und_442 ; und_LU
+und_446 ; und_MO
+und_450 ; und_MG
+und_454 ; und_MW
+und_458 ; und_MY
+und_462 ; und_MV
+und_466 ; und_ML
+und_470 ; und_MT
+und_474 ; und_MQ
+und_478 ; und_MR
+und_480 ; und_MU
+und_484 ; und_MX
+und_492 ; und_MC
+und_496 ; und_MN
+und_498 ; und_MD
+und_499 ; und_ME
+und_500 ; und_MS
+und_504 ; und_MA
+und_508 ; und_MZ
+und_512 ; und_OM
+und_516 ; und_NA
+und_520 ; und_NR
+und_524 ; und_NP
+und_528 ; und_NL
+und_530 ; und_CW
+und_531 ; und_CW
+und_532 ; und_CW
+und_533 ; und_AW
+und_534 ; und_SX
+und_535 ; und_BQ
+und_536 ; und_SA
+und_540 ; und_NC
+und_548 ; und_VU
+und_554 ; und_NZ
+und_558 ; und_NI
+und_562 ; und_NE
+und_566 ; und_NG
+und_570 ; und_NU
+und_574 ; und_NF
+und_578 ; und_NO
+und_580 ; und_MP
+und_581 ; und_UM
+und_582 ; und_FM
+und_583 ; und_FM
+und_584 ; und_MH
+und_585 ; und_PW
+und_586 ; und_PK
+und_591 ; und_PA
+und_598 ; und_PG
+und_600 ; und_PY
+und_604 ; und_PE
+und_608 ; und_PH
+und_612 ; und_PN
+und_616 ; und_PL
+und_620 ; und_PT
+und_624 ; und_GW
+und_626 ; und_TL
+und_630 ; und_PR
+und_634 ; und_QA
+und_638 ; und_RE
+und_642 ; und_RO
+und_643 ; und_RU
+und_646 ; und_RW
+und_652 ; und_BL
+und_654 ; und_SH
+und_659 ; und_KN
+und_660 ; und_AI
+und_662 ; und_LC
+und_663 ; und_MF
+und_666 ; und_PM
+und_670 ; und_VC
+und_674 ; und_SM
+und_678 ; und_ST
+und_682 ; und_SA
+und_686 ; und_SN
+und_688 ; und_RS
+und_690 ; und_SC
+und_694 ; und_SL
+und_702 ; und_SG
+und_703 ; und_SK
+und_704 ; und_VN
+und_705 ; und_SI
+und_706 ; und_SO
+und_710 ; und_ZA
+und_716 ; und_ZW
+und_720 ; und_YE
+und_724 ; und_ES
+und_728 ; und_SS
+und_729 ; und_SD
+und_732 ; und_EH
+und_736 ; und_SD
+und_740 ; und_SR
+und_744 ; und_SJ
+und_748 ; und_SZ
+und_752 ; und_SE
+und_756 ; und_CH
+und_760 ; und_SY
+und_762 ; und_TJ
+und_764 ; und_TH
+und_768 ; und_TG
+und_772 ; und_TK
+und_776 ; und_TO
+und_780 ; und_TT
+und_784 ; und_AE
+und_788 ; und_TN
+und_792 ; und_TR
+und_795 ; und_TM
+und_796 ; und_TC
+und_798 ; und_TV
+und_800 ; und_UG
+und_804 ; und_UA
+und_807 ; und_MK
+und_810 ; und_RU
+und_818 ; und_EG
+und_826 ; und_GB
+und_830 ; und_JE
+und_831 ; und_GG
+und_832 ; und_JE
+und_833 ; und_IM
+und_834 ; und_TZ
+und_840 ; und_US
+und_850 ; und_VI
+und_854 ; und_BF
+und_858 ; und_UY
+und_860 ; und_UZ
+und_862 ; und_VE
+und_876 ; und_WF
+und_882 ; und_WS
+und_886 ; und_YE
+und_887 ; und_YE
+und_890 ; und_RS
+und_891 ; und_RS
+und_894 ; und_ZM
+und_958 ; und_AA
+und_959 ; und_QM
+und_960 ; und_QN
+und_962 ; und_QP
+und_963 ; und_QQ
+und_964 ; und_QR
+und_965 ; und_QS
+und_966 ; und_QT
+und_967 ; und_EU
+und_968 ; und_QV
+und_969 ; und_QW
+und_970 ; und_QX
+und_971 ; und_QY
+und_972 ; und_QZ
+und_973 ; und_XA
+und_974 ; und_XB
+und_975 ; und_XC
+und_976 ; und_XD
+und_977 ; und_XE
+und_978 ; und_XF
+und_979 ; und_XG
+und_980 ; und_XH
+und_981 ; und_XI
+und_982 ; und_XJ
+und_983 ; und_XK
+und_984 ; und_XL
+und_985 ; und_XM
+und_986 ; und_XN
+und_987 ; und_XO
+und_988 ; und_XP
+und_989 ; und_XQ
+und_990 ; und_XR
+und_991 ; und_XS
+und_992 ; und_XT
+und_993 ; und_XU
+und_994 ; und_XV
+und_995 ; und_XW
+und_996 ; und_XX
+und_997 ; und_XY
+und_998 ; und_XZ
+und_999 ; und_ZZ
+und_AN ; und_CW
+und_BU ; und_MM
+und_CS ; und_RS
+und_CT ; und_KI
+und_DD ; und_DE
+und_DY ; und_BJ
+und_FQ ; und_AQ
+und_FX ; und_FR
+und_HV ; und_BF
+und_JT ; und_UM
+und_MI ; und_UM
+und_NH ; und_VU
+und_NQ ; und_AQ
+und_NT ; und_SA
+und_PC ; und_FM
+und_PU ; und_UM
+und_PZ ; und_PA
+und_QU ; und_EU
+und_Qaai ; und_Zinh
+und_RH ; und_ZW
+und_SU ; und_RU
+und_TP ; und_TL
+und_UK ; und_GB
+und_VD ; und_VN
+und_WK ; und_UM
+und_YD ; und_YE
+und_YU ; und_RS
+und_ZR ; und_CD
+und_aaland ; und_AX
+und_arevela ; und
+und_arevmda ; und
+und_bokmal ; und
+und_hakka ; und
+und_heploc ; und_alalc97
+und_lojban ; und
+und_nynorsk ; und
+und_polytoni ; und_polyton
+und_saaho ; und
+und_xiang ; und
+uok ; ema
+urd ; ur
+uzb ; uz
+uzn ; uz
+ven ; ve
+vie ; vi
+vol ; vo
+wel ; cy
+wln ; wa
+wol ; wo
+xba ; cax
+xho ; xh
+xia ; acn
+xkh ; waw
+xpe ; kpe
+xsj ; suj
+xsl ; den
+ybd ; rki
+ydd ; yi
+yid ; yi
+yma ; lrr
+ymt ; mtm
+yor ; yo
+yos ; zom
+yuu ; yug
+zai ; zap
+zh_guoyu ; zh
+zh_hakka ; hak
+zh_xiang ; hsn
+zha ; za
+zho ; zh
+zsm ; ms
+zul ; zu
+zyb ; za
+
+# decanonicalized
+
+aar_saaho ; ssy
+arm_arevela ; hy
+arm_arevela_arevmda ; hyw
+arm_arevmda ; hyw
+chi_guoyu ; zh
+chi_guoyu_hakka_xiang ; hak
+chi_hakka ; hak
+chi_hakka_xiang ; hak
+chi_xiang ; hsn
+cmn_guoyu ; zh
+cmn_guoyu_hakka_xiang ; hak
+cmn_hakka ; hak
+cmn_hakka_xiang ; hak
+cmn_xiang ; hsn
+en_840_aaland ; en_US
+en_840_heploc ; en_US_alalc97
+en_840_polytoni ; en_US_polyton
+eng_840_aaland ; en_US
+eng_840_heploc ; en_US_alalc97
+eng_840_polytoni ; en_US_polyton
+eng_US_aaland ; en_US
+eng_US_heploc ; en_US_alalc97
+eng_US_polytoni ; en_US_polyton
+eng_aaland ; en_AX
+eng_arevela ; en
+eng_arevela_arevmda ; en
+eng_lojban ; en
+hye_arevela ; hy
+hye_arevela_arevmda ; hyw
+sgn_076 ; bzs
+sgn_170 ; csn
+sgn_208 ; dsl
+sgn_249 ; fsl
+sgn_250 ; fsl
+sgn_276 ; gsg
+sgn_278 ; gsg
+sgn_280 ; gsg
+sgn_300 ; gss
+sgn_372 ; isg
+sgn_380 ; ise
+sgn_392 ; jsl
+sgn_484 ; mfs
+sgn_528 ; dse
+sgn_558 ; ncs
+sgn_578 ; nsi
+sgn_620 ; psr
+sgn_710 ; sfs
+sgn_752 ; swl
+sgn_826 ; bfi
+sgn_840 ; ase
+sgn_DD ; gsg
+sgn_FX ; fsl
+sgn_UK ; bfi
+zho_guoyu ; zh
+zho_guoyu_hakka_xiang ; hak
+zho_hakka ; hak
+zho_hakka_xiang ; hak
+zho_xiang ; hsn
+
+# withIrrelevants
+
+aa_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa
+aaa_Adlm_004_fonipa ; aaa_Adlm_AF_fonipa
+aaa_Adlm_008_fonipa ; aaa_Adlm_AL_fonipa
+aaa_Adlm_010_fonipa ; aaa_Adlm_AQ_fonipa
+aaa_Adlm_012_fonipa ; aaa_Adlm_DZ_fonipa
+aaa_Adlm_016_fonipa ; aaa_Adlm_AS_fonipa
+aaa_Adlm_020_fonipa ; aaa_Adlm_AD_fonipa
+aaa_Adlm_024_fonipa ; aaa_Adlm_AO_fonipa
+aaa_Adlm_028_fonipa ; aaa_Adlm_AG_fonipa
+aaa_Adlm_031_fonipa ; aaa_Adlm_AZ_fonipa
+aaa_Adlm_032_fonipa ; aaa_Adlm_AR_fonipa
+aaa_Adlm_036_fonipa ; aaa_Adlm_AU_fonipa
+aaa_Adlm_040_fonipa ; aaa_Adlm_AT_fonipa
+aaa_Adlm_044_fonipa ; aaa_Adlm_BS_fonipa
+aaa_Adlm_048_fonipa ; aaa_Adlm_BH_fonipa
+aaa_Adlm_050_fonipa ; aaa_Adlm_BD_fonipa
+aaa_Adlm_051_fonipa ; aaa_Adlm_AM_fonipa
+aaa_Adlm_052_fonipa ; aaa_Adlm_BB_fonipa
+aaa_Adlm_056_fonipa ; aaa_Adlm_BE_fonipa
+aaa_Adlm_060_fonipa ; aaa_Adlm_BM_fonipa
+aaa_Adlm_062_fonipa ; aaa_Adlm_034_fonipa
+aaa_Adlm_064_fonipa ; aaa_Adlm_BT_fonipa
+aaa_Adlm_068_fonipa ; aaa_Adlm_BO_fonipa
+aaa_Adlm_070_fonipa ; aaa_Adlm_BA_fonipa
+aaa_Adlm_072_fonipa ; aaa_Adlm_BW_fonipa
+aaa_Adlm_074_fonipa ; aaa_Adlm_BV_fonipa
+aaa_Adlm_076_fonipa ; aaa_Adlm_BR_fonipa
+aaa_Adlm_084_fonipa ; aaa_Adlm_BZ_fonipa
+aaa_Adlm_086_fonipa ; aaa_Adlm_IO_fonipa
+aaa_Adlm_090_fonipa ; aaa_Adlm_SB_fonipa
+aaa_Adlm_092_fonipa ; aaa_Adlm_VG_fonipa
+aaa_Adlm_096_fonipa ; aaa_Adlm_BN_fonipa
+aaa_Adlm_100_fonipa ; aaa_Adlm_BG_fonipa
+aaa_Adlm_104_fonipa ; aaa_Adlm_MM_fonipa
+aaa_Adlm_108_fonipa ; aaa_Adlm_BI_fonipa
+aaa_Adlm_112_fonipa ; aaa_Adlm_BY_fonipa
+aaa_Adlm_116_fonipa ; aaa_Adlm_KH_fonipa
+aaa_Adlm_120_fonipa ; aaa_Adlm_CM_fonipa
+aaa_Adlm_124_fonipa ; aaa_Adlm_CA_fonipa
+aaa_Adlm_132_fonipa ; aaa_Adlm_CV_fonipa
+aaa_Adlm_136_fonipa ; aaa_Adlm_KY_fonipa
+aaa_Adlm_140_fonipa ; aaa_Adlm_CF_fonipa
+aaa_Adlm_144_fonipa ; aaa_Adlm_LK_fonipa
+aaa_Adlm_148_fonipa ; aaa_Adlm_TD_fonipa
+aaa_Adlm_152_fonipa ; aaa_Adlm_CL_fonipa
+aaa_Adlm_156_fonipa ; aaa_Adlm_CN_fonipa
+aaa_Adlm_158_fonipa ; aaa_Adlm_TW_fonipa
+aaa_Adlm_162_fonipa ; aaa_Adlm_CX_fonipa
+aaa_Adlm_166_fonipa ; aaa_Adlm_CC_fonipa
+aaa_Adlm_170_fonipa ; aaa_Adlm_CO_fonipa
+aaa_Adlm_172_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_174_fonipa ; aaa_Adlm_KM_fonipa
+aaa_Adlm_175_fonipa ; aaa_Adlm_YT_fonipa
+aaa_Adlm_178_fonipa ; aaa_Adlm_CG_fonipa
+aaa_Adlm_180_fonipa ; aaa_Adlm_CD_fonipa
+aaa_Adlm_184_fonipa ; aaa_Adlm_CK_fonipa
+aaa_Adlm_188_fonipa ; aaa_Adlm_CR_fonipa
+aaa_Adlm_191_fonipa ; aaa_Adlm_HR_fonipa
+aaa_Adlm_192_fonipa ; aaa_Adlm_CU_fonipa
+aaa_Adlm_196_fonipa ; aaa_Adlm_CY_fonipa
+aaa_Adlm_200_fonipa ; aaa_Adlm_CZ_fonipa
+aaa_Adlm_203_fonipa ; aaa_Adlm_CZ_fonipa
+aaa_Adlm_204_fonipa ; aaa_Adlm_BJ_fonipa
+aaa_Adlm_208_fonipa ; aaa_Adlm_DK_fonipa
+aaa_Adlm_212_fonipa ; aaa_Adlm_DM_fonipa
+aaa_Adlm_214_fonipa ; aaa_Adlm_DO_fonipa
+aaa_Adlm_218_fonipa ; aaa_Adlm_EC_fonipa
+aaa_Adlm_222_fonipa ; aaa_Adlm_SV_fonipa
+aaa_Adlm_226_fonipa ; aaa_Adlm_GQ_fonipa
+aaa_Adlm_230_fonipa ; aaa_Adlm_ET_fonipa
+aaa_Adlm_231_fonipa ; aaa_Adlm_ET_fonipa
+aaa_Adlm_232_fonipa ; aaa_Adlm_ER_fonipa
+aaa_Adlm_233_fonipa ; aaa_Adlm_EE_fonipa
+aaa_Adlm_234_fonipa ; aaa_Adlm_FO_fonipa
+aaa_Adlm_238_fonipa ; aaa_Adlm_FK_fonipa
+aaa_Adlm_239_fonipa ; aaa_Adlm_GS_fonipa
+aaa_Adlm_242_fonipa ; aaa_Adlm_FJ_fonipa
+aaa_Adlm_246_fonipa ; aaa_Adlm_FI_fonipa
+aaa_Adlm_248_fonipa ; aaa_Adlm_AX_fonipa
+aaa_Adlm_249_fonipa ; aaa_Adlm_FR_fonipa
+aaa_Adlm_250_fonipa ; aaa_Adlm_FR_fonipa
+aaa_Adlm_254_fonipa ; aaa_Adlm_GF_fonipa
+aaa_Adlm_258_fonipa ; aaa_Adlm_PF_fonipa
+aaa_Adlm_260_fonipa ; aaa_Adlm_TF_fonipa
+aaa_Adlm_262_fonipa ; aaa_Adlm_DJ_fonipa
+aaa_Adlm_266_fonipa ; aaa_Adlm_GA_fonipa
+aaa_Adlm_268_fonipa ; aaa_Adlm_GE_fonipa
+aaa_Adlm_270_fonipa ; aaa_Adlm_GM_fonipa
+aaa_Adlm_275_fonipa ; aaa_Adlm_PS_fonipa
+aaa_Adlm_276_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_278_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_280_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_288_fonipa ; aaa_Adlm_GH_fonipa
+aaa_Adlm_292_fonipa ; aaa_Adlm_GI_fonipa
+aaa_Adlm_296_fonipa ; aaa_Adlm_KI_fonipa
+aaa_Adlm_300_fonipa ; aaa_Adlm_GR_fonipa
+aaa_Adlm_304_fonipa ; aaa_Adlm_GL_fonipa
+aaa_Adlm_308_fonipa ; aaa_Adlm_GD_fonipa
+aaa_Adlm_312_fonipa ; aaa_Adlm_GP_fonipa
+aaa_Adlm_316_fonipa ; aaa_Adlm_GU_fonipa
+aaa_Adlm_320_fonipa ; aaa_Adlm_GT_fonipa
+aaa_Adlm_324_fonipa ; aaa_Adlm_GN_fonipa
+aaa_Adlm_328_fonipa ; aaa_Adlm_GY_fonipa
+aaa_Adlm_332_fonipa ; aaa_Adlm_HT_fonipa
+aaa_Adlm_334_fonipa ; aaa_Adlm_HM_fonipa
+aaa_Adlm_336_fonipa ; aaa_Adlm_VA_fonipa
+aaa_Adlm_340_fonipa ; aaa_Adlm_HN_fonipa
+aaa_Adlm_344_fonipa ; aaa_Adlm_HK_fonipa
+aaa_Adlm_348_fonipa ; aaa_Adlm_HU_fonipa
+aaa_Adlm_352_fonipa ; aaa_Adlm_IS_fonipa
+aaa_Adlm_356_fonipa ; aaa_Adlm_IN_fonipa
+aaa_Adlm_360_fonipa ; aaa_Adlm_ID_fonipa
+aaa_Adlm_364_fonipa ; aaa_Adlm_IR_fonipa
+aaa_Adlm_368_fonipa ; aaa_Adlm_IQ_fonipa
+aaa_Adlm_372_fonipa ; aaa_Adlm_IE_fonipa
+aaa_Adlm_376_fonipa ; aaa_Adlm_IL_fonipa
+aaa_Adlm_380_fonipa ; aaa_Adlm_IT_fonipa
+aaa_Adlm_384_fonipa ; aaa_Adlm_CI_fonipa
+aaa_Adlm_388_fonipa ; aaa_Adlm_JM_fonipa
+aaa_Adlm_392_fonipa ; aaa_Adlm_JP_fonipa
+aaa_Adlm_398_fonipa ; aaa_Adlm_KZ_fonipa
+aaa_Adlm_400_fonipa ; aaa_Adlm_JO_fonipa
+aaa_Adlm_404_fonipa ; aaa_Adlm_KE_fonipa
+aaa_Adlm_408_fonipa ; aaa_Adlm_KP_fonipa
+aaa_Adlm_410_fonipa ; aaa_Adlm_KR_fonipa
+aaa_Adlm_414_fonipa ; aaa_Adlm_KW_fonipa
+aaa_Adlm_417_fonipa ; aaa_Adlm_KG_fonipa
+aaa_Adlm_418_fonipa ; aaa_Adlm_LA_fonipa
+aaa_Adlm_422_fonipa ; aaa_Adlm_LB_fonipa
+aaa_Adlm_426_fonipa ; aaa_Adlm_LS_fonipa
+aaa_Adlm_428_fonipa ; aaa_Adlm_LV_fonipa
+aaa_Adlm_430_fonipa ; aaa_Adlm_LR_fonipa
+aaa_Adlm_434_fonipa ; aaa_Adlm_LY_fonipa
+aaa_Adlm_438_fonipa ; aaa_Adlm_LI_fonipa
+aaa_Adlm_440_fonipa ; aaa_Adlm_LT_fonipa
+aaa_Adlm_442_fonipa ; aaa_Adlm_LU_fonipa
+aaa_Adlm_446_fonipa ; aaa_Adlm_MO_fonipa
+aaa_Adlm_450_fonipa ; aaa_Adlm_MG_fonipa
+aaa_Adlm_454_fonipa ; aaa_Adlm_MW_fonipa
+aaa_Adlm_458_fonipa ; aaa_Adlm_MY_fonipa
+aaa_Adlm_462_fonipa ; aaa_Adlm_MV_fonipa
+aaa_Adlm_466_fonipa ; aaa_Adlm_ML_fonipa
+aaa_Adlm_470_fonipa ; aaa_Adlm_MT_fonipa
+aaa_Adlm_474_fonipa ; aaa_Adlm_MQ_fonipa
+aaa_Adlm_478_fonipa ; aaa_Adlm_MR_fonipa
+aaa_Adlm_480_fonipa ; aaa_Adlm_MU_fonipa
+aaa_Adlm_484_fonipa ; aaa_Adlm_MX_fonipa
+aaa_Adlm_492_fonipa ; aaa_Adlm_MC_fonipa
+aaa_Adlm_496_fonipa ; aaa_Adlm_MN_fonipa
+aaa_Adlm_498_fonipa ; aaa_Adlm_MD_fonipa
+aaa_Adlm_499_fonipa ; aaa_Adlm_ME_fonipa
+aaa_Adlm_500_fonipa ; aaa_Adlm_MS_fonipa
+aaa_Adlm_504_fonipa ; aaa_Adlm_MA_fonipa
+aaa_Adlm_508_fonipa ; aaa_Adlm_MZ_fonipa
+aaa_Adlm_512_fonipa ; aaa_Adlm_OM_fonipa
+aaa_Adlm_516_fonipa ; aaa_Adlm_NA_fonipa
+aaa_Adlm_520_fonipa ; aaa_Adlm_NR_fonipa
+aaa_Adlm_524_fonipa ; aaa_Adlm_NP_fonipa
+aaa_Adlm_528_fonipa ; aaa_Adlm_NL_fonipa
+aaa_Adlm_530_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_531_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_532_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_533_fonipa ; aaa_Adlm_AW_fonipa
+aaa_Adlm_534_fonipa ; aaa_Adlm_SX_fonipa
+aaa_Adlm_535_fonipa ; aaa_Adlm_BQ_fonipa
+aaa_Adlm_536_fonipa ; aaa_Adlm_SA_fonipa
+aaa_Adlm_540_fonipa ; aaa_Adlm_NC_fonipa
+aaa_Adlm_548_fonipa ; aaa_Adlm_VU_fonipa
+aaa_Adlm_554_fonipa ; aaa_Adlm_NZ_fonipa
+aaa_Adlm_558_fonipa ; aaa_Adlm_NI_fonipa
+aaa_Adlm_562_fonipa ; aaa_Adlm_NE_fonipa
+aaa_Adlm_566_fonipa ; aaa_Adlm_NG_fonipa
+aaa_Adlm_570_fonipa ; aaa_Adlm_NU_fonipa
+aaa_Adlm_574_fonipa ; aaa_Adlm_NF_fonipa
+aaa_Adlm_578_fonipa ; aaa_Adlm_NO_fonipa
+aaa_Adlm_580_fonipa ; aaa_Adlm_MP_fonipa
+aaa_Adlm_581_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_582_fonipa ; aaa_Adlm_FM_fonipa
+aaa_Adlm_583_fonipa ; aaa_Adlm_FM_fonipa
+aaa_Adlm_584_fonipa ; aaa_Adlm_MH_fonipa
+aaa_Adlm_585_fonipa ; aaa_Adlm_PW_fonipa
+aaa_Adlm_586_fonipa ; aaa_Adlm_PK_fonipa
+aaa_Adlm_591_fonipa ; aaa_Adlm_PA_fonipa
+aaa_Adlm_598_fonipa ; aaa_Adlm_PG_fonipa
+aaa_Adlm_600_fonipa ; aaa_Adlm_PY_fonipa
+aaa_Adlm_604_fonipa ; aaa_Adlm_PE_fonipa
+aaa_Adlm_608_fonipa ; aaa_Adlm_PH_fonipa
+aaa_Adlm_612_fonipa ; aaa_Adlm_PN_fonipa
+aaa_Adlm_616_fonipa ; aaa_Adlm_PL_fonipa
+aaa_Adlm_620_fonipa ; aaa_Adlm_PT_fonipa
+aaa_Adlm_624_fonipa ; aaa_Adlm_GW_fonipa
+aaa_Adlm_626_fonipa ; aaa_Adlm_TL_fonipa
+aaa_Adlm_630_fonipa ; aaa_Adlm_PR_fonipa
+aaa_Adlm_634_fonipa ; aaa_Adlm_QA_fonipa
+aaa_Adlm_638_fonipa ; aaa_Adlm_RE_fonipa
+aaa_Adlm_642_fonipa ; aaa_Adlm_RO_fonipa
+aaa_Adlm_643_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_646_fonipa ; aaa_Adlm_RW_fonipa
+aaa_Adlm_652_fonipa ; aaa_Adlm_BL_fonipa
+aaa_Adlm_654_fonipa ; aaa_Adlm_SH_fonipa
+aaa_Adlm_659_fonipa ; aaa_Adlm_KN_fonipa
+aaa_Adlm_660_fonipa ; aaa_Adlm_AI_fonipa
+aaa_Adlm_662_fonipa ; aaa_Adlm_LC_fonipa
+aaa_Adlm_663_fonipa ; aaa_Adlm_MF_fonipa
+aaa_Adlm_666_fonipa ; aaa_Adlm_PM_fonipa
+aaa_Adlm_670_fonipa ; aaa_Adlm_VC_fonipa
+aaa_Adlm_674_fonipa ; aaa_Adlm_SM_fonipa
+aaa_Adlm_678_fonipa ; aaa_Adlm_ST_fonipa
+aaa_Adlm_682_fonipa ; aaa_Adlm_SA_fonipa
+aaa_Adlm_686_fonipa ; aaa_Adlm_SN_fonipa
+aaa_Adlm_688_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_690_fonipa ; aaa_Adlm_SC_fonipa
+aaa_Adlm_694_fonipa ; aaa_Adlm_SL_fonipa
+aaa_Adlm_702_fonipa ; aaa_Adlm_SG_fonipa
+aaa_Adlm_703_fonipa ; aaa_Adlm_SK_fonipa
+aaa_Adlm_704_fonipa ; aaa_Adlm_VN_fonipa
+aaa_Adlm_705_fonipa ; aaa_Adlm_SI_fonipa
+aaa_Adlm_706_fonipa ; aaa_Adlm_SO_fonipa
+aaa_Adlm_710_fonipa ; aaa_Adlm_ZA_fonipa
+aaa_Adlm_716_fonipa ; aaa_Adlm_ZW_fonipa
+aaa_Adlm_720_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_724_fonipa ; aaa_Adlm_ES_fonipa
+aaa_Adlm_728_fonipa ; aaa_Adlm_SS_fonipa
+aaa_Adlm_729_fonipa ; aaa_Adlm_SD_fonipa
+aaa_Adlm_732_fonipa ; aaa_Adlm_EH_fonipa
+aaa_Adlm_736_fonipa ; aaa_Adlm_SD_fonipa
+aaa_Adlm_740_fonipa ; aaa_Adlm_SR_fonipa
+aaa_Adlm_744_fonipa ; aaa_Adlm_SJ_fonipa
+aaa_Adlm_748_fonipa ; aaa_Adlm_SZ_fonipa
+aaa_Adlm_752_fonipa ; aaa_Adlm_SE_fonipa
+aaa_Adlm_756_fonipa ; aaa_Adlm_CH_fonipa
+aaa_Adlm_760_fonipa ; aaa_Adlm_SY_fonipa
+aaa_Adlm_762_fonipa ; aaa_Adlm_TJ_fonipa
+aaa_Adlm_764_fonipa ; aaa_Adlm_TH_fonipa
+aaa_Adlm_768_fonipa ; aaa_Adlm_TG_fonipa
+aaa_Adlm_772_fonipa ; aaa_Adlm_TK_fonipa
+aaa_Adlm_776_fonipa ; aaa_Adlm_TO_fonipa
+aaa_Adlm_780_fonipa ; aaa_Adlm_TT_fonipa
+aaa_Adlm_784_fonipa ; aaa_Adlm_AE_fonipa
+aaa_Adlm_788_fonipa ; aaa_Adlm_TN_fonipa
+aaa_Adlm_792_fonipa ; aaa_Adlm_TR_fonipa
+aaa_Adlm_795_fonipa ; aaa_Adlm_TM_fonipa
+aaa_Adlm_796_fonipa ; aaa_Adlm_TC_fonipa
+aaa_Adlm_798_fonipa ; aaa_Adlm_TV_fonipa
+aaa_Adlm_800_fonipa ; aaa_Adlm_UG_fonipa
+aaa_Adlm_804_fonipa ; aaa_Adlm_UA_fonipa
+aaa_Adlm_807_fonipa ; aaa_Adlm_MK_fonipa
+aaa_Adlm_810_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_818_fonipa ; aaa_Adlm_EG_fonipa
+aaa_Adlm_826_fonipa ; aaa_Adlm_GB_fonipa
+aaa_Adlm_830_fonipa ; aaa_Adlm_JE_fonipa
+aaa_Adlm_831_fonipa ; aaa_Adlm_GG_fonipa
+aaa_Adlm_832_fonipa ; aaa_Adlm_JE_fonipa
+aaa_Adlm_833_fonipa ; aaa_Adlm_IM_fonipa
+aaa_Adlm_834_fonipa ; aaa_Adlm_TZ_fonipa
+aaa_Adlm_840_fonipa ; aaa_Adlm_US_fonipa
+aaa_Adlm_850_fonipa ; aaa_Adlm_VI_fonipa
+aaa_Adlm_854_fonipa ; aaa_Adlm_BF_fonipa
+aaa_Adlm_858_fonipa ; aaa_Adlm_UY_fonipa
+aaa_Adlm_860_fonipa ; aaa_Adlm_UZ_fonipa
+aaa_Adlm_862_fonipa ; aaa_Adlm_VE_fonipa
+aaa_Adlm_876_fonipa ; aaa_Adlm_WF_fonipa
+aaa_Adlm_882_fonipa ; aaa_Adlm_WS_fonipa
+aaa_Adlm_886_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_887_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_890_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_891_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_894_fonipa ; aaa_Adlm_ZM_fonipa
+aaa_Adlm_958_fonipa ; aaa_Adlm_AA_fonipa
+aaa_Adlm_959_fonipa ; aaa_Adlm_QM_fonipa
+aaa_Adlm_960_fonipa ; aaa_Adlm_QN_fonipa
+aaa_Adlm_962_fonipa ; aaa_Adlm_QP_fonipa
+aaa_Adlm_963_fonipa ; aaa_Adlm_QQ_fonipa
+aaa_Adlm_964_fonipa ; aaa_Adlm_QR_fonipa
+aaa_Adlm_965_fonipa ; aaa_Adlm_QS_fonipa
+aaa_Adlm_966_fonipa ; aaa_Adlm_QT_fonipa
+aaa_Adlm_967_fonipa ; aaa_Adlm_EU_fonipa
+aaa_Adlm_968_fonipa ; aaa_Adlm_QV_fonipa
+aaa_Adlm_969_fonipa ; aaa_Adlm_QW_fonipa
+aaa_Adlm_970_fonipa ; aaa_Adlm_QX_fonipa
+aaa_Adlm_971_fonipa ; aaa_Adlm_QY_fonipa
+aaa_Adlm_972_fonipa ; aaa_Adlm_QZ_fonipa
+aaa_Adlm_973_fonipa ; aaa_Adlm_XA_fonipa
+aaa_Adlm_974_fonipa ; aaa_Adlm_XB_fonipa
+aaa_Adlm_975_fonipa ; aaa_Adlm_XC_fonipa
+aaa_Adlm_976_fonipa ; aaa_Adlm_XD_fonipa
+aaa_Adlm_977_fonipa ; aaa_Adlm_XE_fonipa
+aaa_Adlm_978_fonipa ; aaa_Adlm_XF_fonipa
+aaa_Adlm_979_fonipa ; aaa_Adlm_XG_fonipa
+aaa_Adlm_980_fonipa ; aaa_Adlm_XH_fonipa
+aaa_Adlm_981_fonipa ; aaa_Adlm_XI_fonipa
+aaa_Adlm_982_fonipa ; aaa_Adlm_XJ_fonipa
+aaa_Adlm_983_fonipa ; aaa_Adlm_XK_fonipa
+aaa_Adlm_984_fonipa ; aaa_Adlm_XL_fonipa
+aaa_Adlm_985_fonipa ; aaa_Adlm_XM_fonipa
+aaa_Adlm_986_fonipa ; aaa_Adlm_XN_fonipa
+aaa_Adlm_987_fonipa ; aaa_Adlm_XO_fonipa
+aaa_Adlm_988_fonipa ; aaa_Adlm_XP_fonipa
+aaa_Adlm_989_fonipa ; aaa_Adlm_XQ_fonipa
+aaa_Adlm_990_fonipa ; aaa_Adlm_XR_fonipa
+aaa_Adlm_991_fonipa ; aaa_Adlm_XS_fonipa
+aaa_Adlm_992_fonipa ; aaa_Adlm_XT_fonipa
+aaa_Adlm_993_fonipa ; aaa_Adlm_XU_fonipa
+aaa_Adlm_994_fonipa ; aaa_Adlm_XV_fonipa
+aaa_Adlm_995_fonipa ; aaa_Adlm_XW_fonipa
+aaa_Adlm_996_fonipa ; aaa_Adlm_XX_fonipa
+aaa_Adlm_997_fonipa ; aaa_Adlm_XY_fonipa
+aaa_Adlm_998_fonipa ; aaa_Adlm_XZ_fonipa
+aaa_Adlm_999_fonipa ; aaa_Adlm_ZZ_fonipa
+aaa_Adlm_AC_aaland_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_arevela_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_arevmda_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_bokmal_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_hakka ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_heploc ; aaa_Adlm_AC_alalc97_fonipa
+aaa_Adlm_AC_fonipa_lojban ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_nynorsk ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_polytoni ; aaa_Adlm_AC_fonipa_polyton
+aaa_Adlm_AC_fonipa_saaho ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_xiang ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AN_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_BU_fonipa ; aaa_Adlm_MM_fonipa
+aaa_Adlm_CS_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_CT_fonipa ; aaa_Adlm_KI_fonipa
+aaa_Adlm_DD_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_DY_fonipa ; aaa_Adlm_BJ_fonipa
+aaa_Adlm_FQ_fonipa ; aaa_Adlm_AQ_fonipa
+aaa_Adlm_FX_fonipa ; aaa_Adlm_FR_fonipa
+aaa_Adlm_HV_fonipa ; aaa_Adlm_BF_fonipa
+aaa_Adlm_JT_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_MI_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_NH_fonipa ; aaa_Adlm_VU_fonipa
+aaa_Adlm_NQ_fonipa ; aaa_Adlm_AQ_fonipa
+aaa_Adlm_NT_fonipa ; aaa_Adlm_SA_fonipa
+aaa_Adlm_PC_fonipa ; aaa_Adlm_FM_fonipa
+aaa_Adlm_PU_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_PZ_fonipa ; aaa_Adlm_PA_fonipa
+aaa_Adlm_QU_fonipa ; aaa_Adlm_EU_fonipa
+aaa_Adlm_RH_fonipa ; aaa_Adlm_ZW_fonipa
+aaa_Adlm_SU_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_TP_fonipa ; aaa_Adlm_TL_fonipa
+aaa_Adlm_UK_fonipa ; aaa_Adlm_GB_fonipa
+aaa_Adlm_VD_fonipa ; aaa_Adlm_VN_fonipa
+aaa_Adlm_WK_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_YD_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_YU_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_ZR_fonipa ; aaa_Adlm_CD_fonipa
+aaa_Qaai_AC_fonipa ; aaa_Zinh_AC_fonipa
+aam_Adlm_AC_fonipa ; aas_Adlm_AC_fonipa
+aar_Adlm_AC_fonipa ; aa_Adlm_AC_fonipa
+aar_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa
+abk_Adlm_AC_fonipa ; ab_Adlm_AC_fonipa
+adp_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa
+afr_Adlm_AC_fonipa ; af_Adlm_AC_fonipa
+aju_Adlm_AC_fonipa ; jrb_Adlm_AC_fonipa
+aka_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+alb_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa
+als_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa
+amh_Adlm_AC_fonipa ; am_Adlm_AC_fonipa
+ara_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa
+arb_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa
+arg_Adlm_AC_fonipa ; an_Adlm_AC_fonipa
+arm_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+arm_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa
+arm_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+arm_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa
+art_Adlm_AC_fonipa_lojban ; jbo_Adlm_AC_fonipa
+asd_Adlm_AC_fonipa ; snz_Adlm_AC_fonipa
+asm_Adlm_AC_fonipa ; as_Adlm_AC_fonipa
+aue_Adlm_AC_fonipa ; ktz_Adlm_AC_fonipa
+ava_Adlm_AC_fonipa ; av_Adlm_AC_fonipa
+ave_Adlm_AC_fonipa ; ae_Adlm_AC_fonipa
+aym_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa
+ayr_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa
+ayx_Adlm_AC_fonipa ; nun_Adlm_AC_fonipa
+aze_Adlm_AC_fonipa ; az_Adlm_AC_fonipa
+azj_Adlm_AC_fonipa ; az_Adlm_AC_fonipa
+bak_Adlm_AC_fonipa ; ba_Adlm_AC_fonipa
+bam_Adlm_AC_fonipa ; bm_Adlm_AC_fonipa
+baq_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa
+bcc_Adlm_AC_fonipa ; bal_Adlm_AC_fonipa
+bcl_Adlm_AC_fonipa ; bik_Adlm_AC_fonipa
+bel_Adlm_AC_fonipa ; be_Adlm_AC_fonipa
+ben_Adlm_AC_fonipa ; bn_Adlm_AC_fonipa
+bgm_Adlm_AC_fonipa ; bcg_Adlm_AC_fonipa
+bh_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa
+bih_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa
+bis_Adlm_AC_fonipa ; bi_Adlm_AC_fonipa
+bjd_Adlm_AC_fonipa ; drl_Adlm_AC_fonipa
+bod_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa
+bos_Adlm_AC_fonipa ; bs_Adlm_AC_fonipa
+bre_Adlm_AC_fonipa ; br_Adlm_AC_fonipa
+bul_Adlm_AC_fonipa ; bg_Adlm_AC_fonipa
+bur_Adlm_AC_fonipa ; my_Adlm_AC_fonipa
+bxk_Adlm_AC_fonipa ; luy_Adlm_AC_fonipa
+bxr_Adlm_AC_fonipa ; bua_Adlm_AC_fonipa
+cat_Adlm_AC_fonipa ; ca_Adlm_AC_fonipa
+ccq_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa
+cel_Adlm_AC_fonipa_gaulish ; xtg_Adlm_AC_fonipa
+ces_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa
+cha_Adlm_AC_fonipa ; ch_Adlm_AC_fonipa
+che_Adlm_AC_fonipa ; ce_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+chu_Adlm_AC_fonipa ; cu_Adlm_AC_fonipa
+chv_Adlm_AC_fonipa ; cv_Adlm_AC_fonipa
+cjr_Adlm_AC_fonipa ; mom_Adlm_AC_fonipa
+cka_Adlm_AC_fonipa ; cmr_Adlm_AC_fonipa
+cld_Adlm_AC_fonipa ; syr_Adlm_AC_fonipa
+cmk_Adlm_AC_fonipa ; xch_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+cnr_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+cor_Adlm_AC_fonipa ; kw_Adlm_AC_fonipa
+cos_Adlm_AC_fonipa ; co_Adlm_AC_fonipa
+coy_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa
+cqu_Adlm_AC_fonipa ; quh_Adlm_AC_fonipa
+cre_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa
+cwd_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa
+cym_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa
+cze_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa
+dan_Adlm_AC_fonipa ; da_Adlm_AC_fonipa
+deu_Adlm_AC_fonipa ; de_Adlm_AC_fonipa
+dgo_Adlm_AC_fonipa ; doi_Adlm_AC_fonipa
+dhd_Adlm_AC_fonipa ; mwr_Adlm_AC_fonipa
+dik_Adlm_AC_fonipa ; din_Adlm_AC_fonipa
+diq_Adlm_AC_fonipa ; zza_Adlm_AC_fonipa
+dit_Adlm_AC_fonipa ; dif_Adlm_AC_fonipa
+div_Adlm_AC_fonipa ; dv_Adlm_AC_fonipa
+drh_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa
+drw_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+dut_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa
+dzo_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa
+ekk_Adlm_AC_fonipa ; et_Adlm_AC_fonipa
+ell_Adlm_AC_fonipa ; el_Adlm_AC_fonipa
+emk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa
+en_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa
+en_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+en_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+en_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa
+en_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa
+en_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa
+en_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa
+en_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa
+en_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+en_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+eng_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa
+eng_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+eng_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+eng_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa
+eng_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa
+eng_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+eng_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+epo_Adlm_AC_fonipa ; eo_Adlm_AC_fonipa
+esk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa
+est_Adlm_AC_fonipa ; et_Adlm_AC_fonipa
+eus_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa
+ewe_Adlm_AC_fonipa ; ee_Adlm_AC_fonipa
+fao_Adlm_AC_fonipa ; fo_Adlm_AC_fonipa
+fas_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+fat_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+fij_Adlm_AC_fonipa ; fj_Adlm_AC_fonipa
+fin_Adlm_AC_fonipa ; fi_Adlm_AC_fonipa
+fra_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa
+fre_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa
+fry_Adlm_AC_fonipa ; fy_Adlm_AC_fonipa
+fuc_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa
+ful_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa
+gav_Adlm_AC_fonipa ; dev_Adlm_AC_fonipa
+gaz_Adlm_AC_fonipa ; om_Adlm_AC_fonipa
+gbo_Adlm_AC_fonipa ; grb_Adlm_AC_fonipa
+geo_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa
+ger_Adlm_AC_fonipa ; de_Adlm_AC_fonipa
+gfx_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa
+ggn_Adlm_AC_fonipa ; gvr_Adlm_AC_fonipa
+gla_Adlm_AC_fonipa ; gd_Adlm_AC_fonipa
+gle_Adlm_AC_fonipa ; ga_Adlm_AC_fonipa
+glg_Adlm_AC_fonipa ; gl_Adlm_AC_fonipa
+glv_Adlm_AC_fonipa ; gv_Adlm_AC_fonipa
+gno_Adlm_AC_fonipa ; gon_Adlm_AC_fonipa
+gre_Adlm_AC_fonipa ; el_Adlm_AC_fonipa
+grn_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa
+gti_Adlm_AC_fonipa ; nyc_Adlm_AC_fonipa
+gug_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa
+guj_Adlm_AC_fonipa ; gu_Adlm_AC_fonipa
+guv_Adlm_AC_fonipa ; duz_Adlm_AC_fonipa
+gya_Adlm_AC_fonipa ; gba_Adlm_AC_fonipa
+hat_Adlm_AC_fonipa ; ht_Adlm_AC_fonipa
+hau_Adlm_AC_fonipa ; ha_Adlm_AC_fonipa
+hbs_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+hdn_Adlm_AC_fonipa ; hai_Adlm_AC_fonipa
+hea_Adlm_AC_fonipa ; hmn_Adlm_AC_fonipa
+heb_Adlm_AC_fonipa ; he_Adlm_AC_fonipa
+her_Adlm_AC_fonipa ; hz_Adlm_AC_fonipa
+him_Adlm_AC_fonipa ; srx_Adlm_AC_fonipa
+hin_Adlm_AC_fonipa ; hi_Adlm_AC_fonipa
+hmo_Adlm_AC_fonipa ; ho_Adlm_AC_fonipa
+hrr_Adlm_AC_fonipa ; jal_Adlm_AC_fonipa
+hrv_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa
+hun_Adlm_AC_fonipa ; hu_Adlm_AC_fonipa
+hy_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hy_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa
+hy_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hye_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hye_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa
+hye_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hye_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa
+ibi_Adlm_AC_fonipa ; opa_Adlm_AC_fonipa
+ibo_Adlm_AC_fonipa ; ig_Adlm_AC_fonipa
+ice_Adlm_AC_fonipa ; is_Adlm_AC_fonipa
+ido_Adlm_AC_fonipa ; io_Adlm_AC_fonipa
+iii_Adlm_AC_fonipa ; ii_Adlm_AC_fonipa
+ike_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa
+iku_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa
+ile_Adlm_AC_fonipa ; ie_Adlm_AC_fonipa
+ilw_Adlm_AC_fonipa ; gal_Adlm_AC_fonipa
+in_Adlm_AC_fonipa ; id_Adlm_AC_fonipa
+ina_Adlm_AC_fonipa ; ia_Adlm_AC_fonipa
+ind_Adlm_AC_fonipa ; id_Adlm_AC_fonipa
+ipk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa
+isl_Adlm_AC_fonipa ; is_Adlm_AC_fonipa
+ita_Adlm_AC_fonipa ; it_Adlm_AC_fonipa
+iw_Adlm_AC_fonipa ; he_Adlm_AC_fonipa
+jav_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa
+jeg_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa
+ji_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa
+jpn_Adlm_AC_fonipa ; ja_Adlm_AC_fonipa
+jw_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa
+kal_Adlm_AC_fonipa ; kl_Adlm_AC_fonipa
+kan_Adlm_AC_fonipa ; kn_Adlm_AC_fonipa
+kas_Adlm_AC_fonipa ; ks_Adlm_AC_fonipa
+kat_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa
+kau_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa
+kaz_Adlm_AC_fonipa ; kk_Adlm_AC_fonipa
+kgc_Adlm_AC_fonipa ; tdf_Adlm_AC_fonipa
+kgh_Adlm_AC_fonipa ; kml_Adlm_AC_fonipa
+khk_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa
+khm_Adlm_AC_fonipa ; km_Adlm_AC_fonipa
+kik_Adlm_AC_fonipa ; ki_Adlm_AC_fonipa
+kin_Adlm_AC_fonipa ; rw_Adlm_AC_fonipa
+kir_Adlm_AC_fonipa ; ky_Adlm_AC_fonipa
+kmr_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa
+knc_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa
+kng_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa
+knn_Adlm_AC_fonipa ; kok_Adlm_AC_fonipa
+koj_Adlm_AC_fonipa ; kwv_Adlm_AC_fonipa
+kom_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa
+kon_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa
+kor_Adlm_AC_fonipa ; ko_Adlm_AC_fonipa
+kpv_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa
+krm_Adlm_AC_fonipa ; bmf_Adlm_AC_fonipa
+ktr_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+kua_Adlm_AC_fonipa ; kj_Adlm_AC_fonipa
+kur_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa
+kvs_Adlm_AC_fonipa ; gdj_Adlm_AC_fonipa
+kwq_Adlm_AC_fonipa ; yam_Adlm_AC_fonipa
+kxe_Adlm_AC_fonipa ; tvd_Adlm_AC_fonipa
+kzj_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+kzt_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+lao_Adlm_AC_fonipa ; lo_Adlm_AC_fonipa
+lat_Adlm_AC_fonipa ; la_Adlm_AC_fonipa
+lav_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa
+lbk_Adlm_AC_fonipa ; bnc_Adlm_AC_fonipa
+lii_Adlm_AC_fonipa ; raq_Adlm_AC_fonipa
+lim_Adlm_AC_fonipa ; li_Adlm_AC_fonipa
+lin_Adlm_AC_fonipa ; ln_Adlm_AC_fonipa
+lit_Adlm_AC_fonipa ; lt_Adlm_AC_fonipa
+llo_Adlm_AC_fonipa ; ngt_Adlm_AC_fonipa
+lmm_Adlm_AC_fonipa ; rmx_Adlm_AC_fonipa
+ltz_Adlm_AC_fonipa ; lb_Adlm_AC_fonipa
+lub_Adlm_AC_fonipa ; lu_Adlm_AC_fonipa
+lug_Adlm_AC_fonipa ; lg_Adlm_AC_fonipa
+lvs_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa
+mac_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa
+mah_Adlm_AC_fonipa ; mh_Adlm_AC_fonipa
+mal_Adlm_AC_fonipa ; ml_Adlm_AC_fonipa
+mao_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa
+mar_Adlm_AC_fonipa ; mr_Adlm_AC_fonipa
+may_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa
+meg_Adlm_AC_fonipa ; cir_Adlm_AC_fonipa
+mhr_Adlm_AC_fonipa ; chm_Adlm_AC_fonipa
+mkd_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa
+mlg_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa
+mlt_Adlm_AC_fonipa ; mt_Adlm_AC_fonipa
+mnk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa
+mo_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+mol_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+mon_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa
+mri_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa
+msa_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa
+mst_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa
+mup_Adlm_AC_fonipa ; raj_Adlm_AC_fonipa
+mwj_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa
+mya_Adlm_AC_fonipa ; my_Adlm_AC_fonipa
+myd_Adlm_AC_fonipa ; aog_Adlm_AC_fonipa
+myt_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa
+nad_Adlm_AC_fonipa ; xny_Adlm_AC_fonipa
+nau_Adlm_AC_fonipa ; na_Adlm_AC_fonipa
+nav_Adlm_AC_fonipa ; nv_Adlm_AC_fonipa
+nbl_Adlm_AC_fonipa ; nr_Adlm_AC_fonipa
+ncp_Adlm_AC_fonipa ; kdz_Adlm_AC_fonipa
+nde_Adlm_AC_fonipa ; nd_Adlm_AC_fonipa
+ndo_Adlm_AC_fonipa ; ng_Adlm_AC_fonipa
+nep_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa
+nld_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa
+nno_Adlm_AC_fonipa ; nn_Adlm_AC_fonipa
+nns_Adlm_AC_fonipa ; nbr_Adlm_AC_fonipa
+nnx_Adlm_AC_fonipa ; ngv_Adlm_AC_fonipa
+no_Adlm_AC_bokmal_fonipa ; nb_Adlm_AC_fonipa
+no_Adlm_AC_bokmal_fonipa_nynorsk ; nb_Adlm_AC_fonipa
+no_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa
+no_Adlm_AC_fonipa_nynorsk ; nn_Adlm_AC_fonipa
+nob_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa
+nor_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa
+npi_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa
+nts_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa
+nya_Adlm_AC_fonipa ; ny_Adlm_AC_fonipa
+oci_Adlm_AC_fonipa ; oc_Adlm_AC_fonipa
+ojg_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa
+oji_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa
+ori_Adlm_AC_fonipa ; or_Adlm_AC_fonipa
+orm_Adlm_AC_fonipa ; om_Adlm_AC_fonipa
+ory_Adlm_AC_fonipa ; or_Adlm_AC_fonipa
+oss_Adlm_AC_fonipa ; os_Adlm_AC_fonipa
+oun_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa
+pan_Adlm_AC_fonipa ; pa_Adlm_AC_fonipa
+pbu_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa
+pcr_Adlm_AC_fonipa ; adx_Adlm_AC_fonipa
+per_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+pes_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+pli_Adlm_AC_fonipa ; pi_Adlm_AC_fonipa
+plt_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa
+pmc_Adlm_AC_fonipa ; huw_Adlm_AC_fonipa
+pmu_Adlm_AC_fonipa ; phr_Adlm_AC_fonipa
+pnb_Adlm_AC_fonipa ; lah_Adlm_AC_fonipa
+pol_Adlm_AC_fonipa ; pl_Adlm_AC_fonipa
+por_Adlm_AC_fonipa ; pt_Adlm_AC_fonipa
+ppa_Adlm_AC_fonipa ; bfy_Adlm_AC_fonipa
+ppr_Adlm_AC_fonipa ; lcq_Adlm_AC_fonipa
+prs_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+pry_Adlm_AC_fonipa ; prt_Adlm_AC_fonipa
+pus_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa
+puz_Adlm_AC_fonipa ; pub_Adlm_AC_fonipa
+que_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa
+quz_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa
+rmy_Adlm_AC_fonipa ; rom_Adlm_AC_fonipa
+roh_Adlm_AC_fonipa ; rm_Adlm_AC_fonipa
+ron_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+rum_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+run_Adlm_AC_fonipa ; rn_Adlm_AC_fonipa
+rus_Adlm_AC_fonipa ; ru_Adlm_AC_fonipa
+sag_Adlm_AC_fonipa ; sg_Adlm_AC_fonipa
+san_Adlm_AC_fonipa ; sa_Adlm_AC_fonipa
+sca_Adlm_AC_fonipa ; hle_Adlm_AC_fonipa
+scc_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+scr_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa
+sgn_Adlm_076_fonipa ; bzs_Adlm_fonipa
+sgn_Adlm_170_fonipa ; csn_Adlm_fonipa
+sgn_Adlm_208_fonipa ; dsl_Adlm_fonipa
+sgn_Adlm_249_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_250_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_276_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_278_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_280_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_300_fonipa ; gss_Adlm_fonipa
+sgn_Adlm_372_fonipa ; isg_Adlm_fonipa
+sgn_Adlm_380_fonipa ; ise_Adlm_fonipa
+sgn_Adlm_392_fonipa ; jsl_Adlm_fonipa
+sgn_Adlm_484_fonipa ; mfs_Adlm_fonipa
+sgn_Adlm_528_fonipa ; dse_Adlm_fonipa
+sgn_Adlm_558_fonipa ; ncs_Adlm_fonipa
+sgn_Adlm_578_fonipa ; nsi_Adlm_fonipa
+sgn_Adlm_620_fonipa ; psr_Adlm_fonipa
+sgn_Adlm_710_fonipa ; sfs_Adlm_fonipa
+sgn_Adlm_752_fonipa ; swl_Adlm_fonipa
+sgn_Adlm_826_fonipa ; bfi_Adlm_fonipa
+sgn_Adlm_840_fonipa ; ase_Adlm_fonipa
+sgn_Adlm_BR_fonipa ; bzs_Adlm_fonipa
+sgn_Adlm_CO_fonipa ; csn_Adlm_fonipa
+sgn_Adlm_DD_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_DE_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_DK_fonipa ; dsl_Adlm_fonipa
+sgn_Adlm_FR_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_FX_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_GB_fonipa ; bfi_Adlm_fonipa
+sgn_Adlm_GR_fonipa ; gss_Adlm_fonipa
+sgn_Adlm_IE_fonipa ; isg_Adlm_fonipa
+sgn_Adlm_IT_fonipa ; ise_Adlm_fonipa
+sgn_Adlm_JP_fonipa ; jsl_Adlm_fonipa
+sgn_Adlm_MX_fonipa ; mfs_Adlm_fonipa
+sgn_Adlm_NI_fonipa ; ncs_Adlm_fonipa
+sgn_Adlm_NL_fonipa ; dse_Adlm_fonipa
+sgn_Adlm_NO_fonipa ; nsi_Adlm_fonipa
+sgn_Adlm_PT_fonipa ; psr_Adlm_fonipa
+sgn_Adlm_SE_fonipa ; swl_Adlm_fonipa
+sgn_Adlm_UK_fonipa ; bfi_Adlm_fonipa
+sgn_Adlm_US_fonipa ; ase_Adlm_fonipa
+sgn_Adlm_ZA_fonipa ; sfs_Adlm_fonipa
+sh_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+sin_Adlm_AC_fonipa ; si_Adlm_AC_fonipa
+skk_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa
+slk_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa
+slo_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa
+slv_Adlm_AC_fonipa ; sl_Adlm_AC_fonipa
+sme_Adlm_AC_fonipa ; se_Adlm_AC_fonipa
+smo_Adlm_AC_fonipa ; sm_Adlm_AC_fonipa
+sna_Adlm_AC_fonipa ; sn_Adlm_AC_fonipa
+snd_Adlm_AC_fonipa ; sd_Adlm_AC_fonipa
+som_Adlm_AC_fonipa ; so_Adlm_AC_fonipa
+sot_Adlm_AC_fonipa ; st_Adlm_AC_fonipa
+spa_Adlm_AC_fonipa ; es_Adlm_AC_fonipa
+spy_Adlm_AC_fonipa ; kln_Adlm_AC_fonipa
+sqi_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa
+src_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa
+srd_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa
+srp_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+ssw_Adlm_AC_fonipa ; ss_Adlm_AC_fonipa
+sun_Adlm_AC_fonipa ; su_Adlm_AC_fonipa
+swa_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa
+swc_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa
+swe_Adlm_AC_fonipa ; sv_Adlm_AC_fonipa
+swh_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa
+tah_Adlm_AC_fonipa ; ty_Adlm_AC_fonipa
+tam_Adlm_AC_fonipa ; ta_Adlm_AC_fonipa
+tat_Adlm_AC_fonipa ; tt_Adlm_AC_fonipa
+tdu_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+tel_Adlm_AC_fonipa ; te_Adlm_AC_fonipa
+tgk_Adlm_AC_fonipa ; tg_Adlm_AC_fonipa
+tgl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa
+tha_Adlm_AC_fonipa ; th_Adlm_AC_fonipa
+thc_Adlm_AC_fonipa ; tpo_Adlm_AC_fonipa
+thx_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa
+tib_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa
+tie_Adlm_AC_fonipa ; ras_Adlm_AC_fonipa
+tir_Adlm_AC_fonipa ; ti_Adlm_AC_fonipa
+tkk_Adlm_AC_fonipa ; twm_Adlm_AC_fonipa
+tl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa
+tlw_Adlm_AC_fonipa ; weo_Adlm_AC_fonipa
+tmp_Adlm_AC_fonipa ; tyj_Adlm_AC_fonipa
+tne_Adlm_AC_fonipa ; kak_Adlm_AC_fonipa
+tnf_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+ton_Adlm_AC_fonipa ; to_Adlm_AC_fonipa
+tsf_Adlm_AC_fonipa ; taj_Adlm_AC_fonipa
+tsn_Adlm_AC_fonipa ; tn_Adlm_AC_fonipa
+tso_Adlm_AC_fonipa ; ts_Adlm_AC_fonipa
+ttq_Adlm_AC_fonipa ; tmh_Adlm_AC_fonipa
+tuk_Adlm_AC_fonipa ; tk_Adlm_AC_fonipa
+tur_Adlm_AC_fonipa ; tr_Adlm_AC_fonipa
+tw_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+twi_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+uig_Adlm_AC_fonipa ; ug_Adlm_AC_fonipa
+ukr_Adlm_AC_fonipa ; uk_Adlm_AC_fonipa
+umu_Adlm_AC_fonipa ; del_Adlm_AC_fonipa
+uok_Adlm_AC_fonipa ; ema_Adlm_AC_fonipa
+urd_Adlm_AC_fonipa ; ur_Adlm_AC_fonipa
+uzb_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa
+uzn_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa
+ven_Adlm_AC_fonipa ; ve_Adlm_AC_fonipa
+vie_Adlm_AC_fonipa ; vi_Adlm_AC_fonipa
+vol_Adlm_AC_fonipa ; vo_Adlm_AC_fonipa
+wel_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa
+wln_Adlm_AC_fonipa ; wa_Adlm_AC_fonipa
+wol_Adlm_AC_fonipa ; wo_Adlm_AC_fonipa
+xba_Adlm_AC_fonipa ; cax_Adlm_AC_fonipa
+xho_Adlm_AC_fonipa ; xh_Adlm_AC_fonipa
+xia_Adlm_AC_fonipa ; acn_Adlm_AC_fonipa
+xkh_Adlm_AC_fonipa ; waw_Adlm_AC_fonipa
+xpe_Adlm_AC_fonipa ; kpe_Adlm_AC_fonipa
+xsj_Adlm_AC_fonipa ; suj_Adlm_AC_fonipa
+xsl_Adlm_AC_fonipa ; den_Adlm_AC_fonipa
+ybd_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa
+ydd_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa
+yid_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa
+yma_Adlm_AC_fonipa ; lrr_Adlm_AC_fonipa
+ymt_Adlm_AC_fonipa ; mtm_Adlm_AC_fonipa
+yor_Adlm_AC_fonipa ; yo_Adlm_AC_fonipa
+yos_Adlm_AC_fonipa ; zom_Adlm_AC_fonipa
+yuu_Adlm_AC_fonipa ; yug_Adlm_AC_fonipa
+zai_Adlm_AC_fonipa ; zap_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+zha_Adlm_AC_fonipa ; za_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+zsm_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa
+zul_Adlm_AC_fonipa ; zu_Adlm_AC_fonipa
+zyb_Adlm_AC_fonipa ; za_Adlm_AC_fonipa
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
index 428a68f..ef2d6e6 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
@@ -78,7 +78,7 @@
final String[][] entries = {
//{"tag", "preferred"},
{"art-lojban", "jbo"},
- {"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback
+ {"cel-gaulish", "xtg"}, // fallback
{"en-GB-oed", "en-GB-x-oed"}, // fallback
{"i-ami", "ami"},
{"i-bnn", "bnn"},
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
index 109c495..2a3fce3 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
@@ -16,12 +16,15 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.MissingResourceException;
+import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
@@ -1203,6 +1206,396 @@
return new LocaleIDParser(localeID).getKeywordValue(keywordName);
}
+ static private class AliasReplacer {
+ /**
+ * @param language language subtag to be replaced. Cannot be null but could be empty.
+ * @param script script subtag to be replaced. Cannot be null but could be empty.
+ * @param region region subtag to be replaced. Cannot be null but could be empty.
+ * @param variants variant subtags to be replaced. Cannot be null but could be empty.
+ * @param extensions extensions in string to be replaced. Cannot be null but could be empty.
+ */
+ public AliasReplacer(String language, String script, String region,
+ String variants, String extensions) {
+
+ assert language != null;
+ assert script != null;
+ assert region != null;
+ assert variants != null;
+ assert extensions != null;
+ this.language = language;
+ this.script = script;
+ this.region = region;
+ if (!variants.isEmpty()) {
+ this.variants =
+ new ArrayList<String>(Arrays.asList(variants.split("_")));
+ }
+ this.extensions = extensions;
+ }
+
+ private String language;
+ private String script;
+ private String region;
+ private List<String> variants;
+ private String extensions;
+
+ public String replace() {
+ boolean changed = false;
+ loadAliasData();
+ int count = 0;
+ while (true) {
+ if (count++ > 10) {
+ // Throw exception when we loop through too many time
+ // stop to avoid infinity loop cauesd by incorrect data
+ // in resource.
+ throw new IllegalArgumentException(
+ "Have problem to resolve locale alias of " +
+ lscvToID(language, script, region,
+ ((variants == null) ? "" : String.join("_", variants))) +
+ extensions);
+ }
+ // Anytime we replace something, we need to start over again.
+ // lang REGION variant
+ if ( replaceLanguage(true, true, true) ||
+ replaceLanguage(true, true, false) ||
+ replaceLanguage(true, false, true) ||
+ replaceLanguage(true, false, false) ||
+ replaceLanguage(false, false, true) ||
+ replaceRegion() ||
+ replaceScript() ||
+ replaceVariant()) {
+ // Some values in data is changed, try to match from the
+ // beginning again.
+ changed = true;
+ continue;
+ }
+ // Nothing changed in this iteration, break out the loop
+ break;
+ } // while(1)
+ if (changed) {
+ String result = lscvToID(language, script, region,
+ ((variants == null) ? "" : String.join("_", variants)));
+ if (extensions != null) {
+ result += extensions;
+ }
+ return result;
+ }
+ // Nothing changed in any iteration of the loop.
+ return null;
+ };
+
+ private static boolean aliasDataIsLoaded = false;
+ private static Map<String, String> languageAliasMap = null;
+ private static Map<String, String> scriptAliasMap = null;
+ private static Map<String, List<String>> territoryAliasMap = null;
+ private static Map<String, String> variantAliasMap = null;
+
+ /*
+ * Initializes the alias data from the ICU resource bundles. The alias
+ * data contains alias of language, country, script and variants.
+ *
+ * If the alias data has already loaded, then this method simply
+ * returns without doing anything meaningful.
+ *
+ */
+ private static synchronized void loadAliasData() {
+ if (aliasDataIsLoaded) {
+ return;
+ }
+ languageAliasMap = new HashMap<String, String>();
+ scriptAliasMap = new HashMap<String, String>();
+ territoryAliasMap = new HashMap<String, List<String>>();
+ variantAliasMap = new HashMap<String, String>();
+
+ UResourceBundle metadata = UResourceBundle.getBundleInstance(
+ ICUData.ICU_BASE_NAME, "metadata",
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ UResourceBundle metadataAlias = metadata.get("alias");
+ UResourceBundle languageAlias = metadataAlias.get("language");
+ UResourceBundle scriptAlias = metadataAlias.get("script");
+ UResourceBundle territoryAlias = metadataAlias.get("territory");
+ UResourceBundle variantAlias = metadataAlias.get("variant");
+
+ for (int i = 0 ; i < languageAlias.getSize(); i++) {
+ UResourceBundle res = languageAlias.get(i);
+ String aliasFrom = res.getKey();
+ String aliasTo = res.get("replacement").getString();
+ Locale testLocale = new Locale(aliasFrom);
+ // if there are script in the aliasFrom
+ // or we have both a und as language and a region code.
+ if ( ! testLocale.getScript().isEmpty() ||
+ (aliasFrom.startsWith("und") && ! testLocale.getCountry().isEmpty())) {
+ throw new IllegalArgumentException(
+ "key [" + aliasFrom +
+ "] in alias:language contains unsupported fields combination.");
+ }
+ languageAliasMap.put(aliasFrom, aliasTo);
+ }
+ for (int i = 0 ; i < scriptAlias.getSize(); i++) {
+ UResourceBundle res = scriptAlias.get(i);
+ String aliasFrom = res.getKey();
+ String aliasTo = res.get("replacement").getString();
+ if (aliasFrom.length() != 4) {
+ throw new IllegalArgumentException(
+ "Incorrect key [" + aliasFrom + "] in alias:script.");
+ }
+ scriptAliasMap.put(aliasFrom, aliasTo);
+ }
+ for (int i = 0 ; i < territoryAlias.getSize(); i++) {
+ UResourceBundle res = territoryAlias.get(i);
+ String aliasFrom = res.getKey();
+ String aliasTo = res.get("replacement").getString();
+ if (aliasFrom.length() < 2 || aliasFrom.length() > 3) {
+ throw new IllegalArgumentException(
+ "Incorrect key [" + aliasFrom + "] in alias:territory.");
+ }
+ territoryAliasMap.put(aliasFrom,
+ new ArrayList<String>(Arrays.asList(aliasTo.split(" "))));
+ }
+ for (int i = 0 ; i < variantAlias.getSize(); i++) {
+ UResourceBundle res = variantAlias.get(i);
+ String aliasFrom = res.getKey();
+ String aliasTo = res.get("replacement").getString();
+ if ( aliasFrom.length() < 4 ||
+ aliasFrom.length() > 8 ||
+ (aliasFrom.length() == 4 &&
+ (aliasFrom.charAt(0) < '0' || aliasFrom.charAt(0) > '9'))) {
+ throw new IllegalArgumentException(
+ "Incorrect key [" + aliasFrom + "] in alias:variant.");
+ }
+ if ( aliasTo.length() < 4 ||
+ aliasTo.length() > 8 ||
+ (aliasTo.length() == 4 &&
+ (aliasTo.charAt(0) < '0' || aliasTo.charAt(0) > '9'))) {
+ throw new IllegalArgumentException(
+ "Incorrect variant [" + aliasTo + "] for the key [" + aliasFrom +
+ "] in alias:variant.");
+ }
+ variantAliasMap.put(aliasFrom, aliasTo);
+ }
+
+ aliasDataIsLoaded = true;
+ }
+
+ private static String generateKey(
+ String language, String region, String variant) {
+ assert variant == null || variant.length() >= 4;
+ StringBuilder buf = new StringBuilder();
+ buf.append(language);
+ if (region != null && !region.isEmpty()) {
+ buf.append(UNDERSCORE);
+ buf.append(region);
+ }
+ if (variant != null && !variant.isEmpty()) {
+ buf.append(UNDERSCORE);
+ buf.append(variant);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * If replacement is neither null nor empty and input is either null or empty,
+ * return replacement.
+ * If replacement is neither null nor empty but input is not empty, return input.
+ * If replacement is either null or empty and type is either null or empty,
+ * return input.
+ * Otherwise return null.
+ * replacement input type return
+ * AAA "" * AAA
+ * AAA BBB * BBB
+ * "" CCC "" CCC
+ * "" * i DDD ""
+ */
+ private static String deleteOrReplace(
+ String input, String type, String replacement) {
+ return (replacement != null && !replacement.isEmpty()) ?
+ ((input == null || input.isEmpty()) ? replacement : input) :
+ ((type == null || type.isEmpty()) ? input : null);
+ }
+
+ private boolean replaceLanguage(boolean checkLanguage,
+ boolean checkRegion, boolean checkVariants) {
+ if ( (checkRegion && (region == null || region.isEmpty())) ||
+ (checkVariants && (variants == null))) {
+ // Nothing to search
+ return false;
+ }
+ int variantSize = checkVariants ? variants.size() : 1;
+ // Since we may have more than one variant, we need to loop through
+ // them.
+ String searchLanguage = checkLanguage ? language : UNDEFINED_LANGUAGE;
+ String searchRegion = checkRegion ? region : null;
+ String searchVariant = null;
+ for (int variantIndex = 0; variantIndex < variantSize; ++variantIndex) {
+ if (checkVariants) {
+ searchVariant = variants.get(variantIndex);
+ }
+ if (searchVariant != null && searchVariant.length() < 4) {
+ // Do not consider ill-formed variant subtag.
+ searchVariant = null;
+ }
+ String typeKey = generateKey(
+ searchLanguage, searchRegion, searchVariant);
+ String replacement = languageAliasMap.get(typeKey);
+ if (replacement == null) {
+ // Found no replacement data.
+ continue;
+ }
+ String replacedScript = null;
+ String replacedRegion = null;
+ String replacedVariant = null;
+ String replacedExtensions = null;
+ String replacedLanguage = null;
+
+ if (replacement.indexOf('_') < 0) {
+ replacedLanguage = replacement.equals(UNDEFINED_LANGUAGE) ?
+ language : replacement;
+ } else {
+ String[] replacementFields = replacement.split("_");
+ replacedLanguage = replacementFields[0];
+ int index = 1;
+
+ if (replacedLanguage.equals(UNDEFINED_LANGUAGE)) {
+ replacedLanguage = language;
+ }
+ int consumed = replacementFields[0].length() + 1;
+ while (replacementFields.length > index) {
+ String field = replacementFields[index];
+ int len = field.length();
+ if (1 == len) {
+ replacedExtensions = replacement.substring(consumed);
+ break;
+ } else if (len >= 2 && len <= 3) {
+ assert replacedRegion == null;
+ replacedRegion = field;
+ } else if (len >= 5 && len <= 8) {
+ assert replacedVariant == null;
+ replacedVariant = field;
+ } else if (len == 4) {
+ if (field.charAt(0) >= '0' && field.charAt(0) <= '9') {
+ assert replacedVariant == null;
+ replacedVariant = field;
+ } else {
+ assert replacedScript == null;
+ replacedScript = field;
+ }
+ }
+ index++;
+ consumed += len + 1;
+ }
+ }
+
+ replacedScript = deleteOrReplace(script, null, replacedScript);
+ replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
+ replacedVariant = deleteOrReplace(searchVariant, searchVariant, replacedVariant);
+
+ if ( this.language.equals(replacedLanguage) &&
+ this.script.equals(replacedScript) &&
+ this.region.equals(replacedRegion) &&
+ Objects.equals(searchVariant, replacedVariant) &&
+ replacedExtensions == null) {
+ // Replacement produce no changes on search.
+ // For example, apply pa_IN=> pa_Guru_IN on pa_Guru_IN.
+ continue;
+ }
+ this.language = replacedLanguage;
+ this.script = replacedScript;
+ this.region = replacedRegion;
+ if (searchVariant != null && !searchVariant.isEmpty()) {
+ if (replacedVariant != null && !replacedVariant.isEmpty()) {
+ this.variants.set(variantIndex, replacedVariant);
+ } else {
+ this.variants.remove(variantIndex);
+ if (this.variants.isEmpty()) {
+ this.variants = null;
+ }
+ }
+ }
+ if (replacedExtensions != null && !replacedExtensions.isEmpty()) {
+ // TODO(ICU-21292)
+ // DO NOTHING
+ // UTS35 does not specifiy what should we do if we have extensions in the
+ // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
+ // extensions in them languageAlias:
+ // i_default => en_x_i_default
+ // i_enochian => und_x_i_enochian
+ // i_mingo => see_x_i_mingo
+ // zh_min => nan_x_zh_min
+ // But all of them are already changed by code inside LanguageTag before
+ // hitting this code.
+ }
+ // Something in search changed by language alias data.
+ return true;
+ }
+ // Nothing changed in search by language alias data.
+ return false;
+ }
+
+ private boolean replaceRegion() {
+ if (region == null || region.isEmpty()) return false;
+ List<String> replacement = territoryAliasMap.get(region);
+ if (replacement == null) {
+ // Found no replacement data for this region.
+ return false;
+ }
+ String replacedRegion;
+ if (replacement.size() > 1) {
+ String regionOfLanguageAndScript =
+ ULocale.addLikelySubtags(
+ new ULocale(this.language, this.script, null))
+ .getCountry();
+ replacedRegion = replacement.contains(regionOfLanguageAndScript) ?
+ regionOfLanguageAndScript : replacement.get(0);
+ } else {
+ replacedRegion = replacement.get(0);
+ }
+ assert this.region != replacedRegion;
+ this.region = replacedRegion;
+ // The region is changed by data in territory alias.
+ return true;
+ }
+
+ private boolean replaceScript() {
+ if (script == null || script.isEmpty()) return false;
+ String replacement = scriptAliasMap.get(script);
+ if (replacement == null) {
+ // Found no replacement data for this script.
+ return false;
+ }
+ assert this.script != replacement;
+ this.script = replacement;
+ // The script is changed by data in script alias.
+ return true;
+ }
+
+ private boolean replaceVariant() {
+ if (variants == null) return false;
+ for (int i = 0; i < variants.size(); i++) {
+ String variant = variants.get(i);
+ String replacement = variantAliasMap.get(variant);
+ if (replacement == null) {
+ // Found no replacement data for this variant.
+ continue;
+ }
+ assert replacement.length() >= 4;
+ assert replacement.length() <= 8;
+ assert replacement.length() != 4 ||
+ ( replacement.charAt(0) >= '0' && replacement.charAt(0) <= '9');
+ if (!variant.equals(replacement)) {
+ variants.set(i, replacement);
+ // Special hack to handle hepburn-heploc => alalc97
+ if (variant.equals("heploc")) {
+ variants.remove("hepburn");
+ if (variants.isEmpty()) {
+ variants = null;
+ }
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+
/**
* {@icu} Returns the canonical name according to CLDR for the specified locale ID.
* This is used to convert POSIX and other legacy IDs to standard ICU form.
@@ -1239,147 +1632,55 @@
}
}
- // If the BCP 47 primary language subtag matches the type attribute of a languageAlias
- // element in Supplemental Data, replace the language subtag with the replacement value.
- // If there are additional subtags in the replacement value, add them to the result, but
- // only if there is no corresponding subtag already in the tag.
- // Five special deprecated codes (such as i-default) are in type attributes, and are also replaced.
- try {
- UResourceBundle languageAlias = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
- "metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
- .get("alias")
- .get("language");
- // language _ variant
- if (!parser.getVariant().isEmpty()) {
- String [] variants = parser.getVariant().split("_");
- for (String variant : variants) {
- try {
- // Note the key in the metadata.txt is formatted as language_variant
- // instead of language__variant but lscvToID will generate
- // language__variant so we have to build the string ourselves.
- ULocale replaceLocale = new ULocale(languageAlias.get(
- (new StringBuilder(parser.getLanguage().length() + 1 + parser.getVariant().length()))
- .append(parser.getLanguage())
- .append("_")
- .append(variant)
- .toString())
- .get("replacement")
- .getString());
- StringBuilder replacedVariant = new StringBuilder(parser.getVariant().length());
- for (String current : variants) {
- if (current.equals(variant)) continue;
- if (replacedVariant.length() > 0) replacedVariant.append("_");
- replacedVariant.append(current);
- }
- parser = new LocaleIDParser(
- (new StringBuilder(localeID.length()))
- .append(lscvToID(replaceLocale.getLanguage(),
- !parser.getScript().isEmpty() ? parser.getScript() : replaceLocale.getScript(),
- !parser.getCountry().isEmpty() ? parser.getCountry() : replaceLocale.getCountry(),
- replacedVariant.toString()))
- .append(parser.getName().substring(parser.getBaseName().length()))
- .toString());
- } catch (MissingResourceException e) {
- }
- }
- }
-
- // language _ script _ country
- // ug_Arab_CN -> ug_CN
- if (!parser.getScript().isEmpty() && !parser.getCountry().isEmpty()) {
- try {
- ULocale replaceLocale = new ULocale(languageAlias.get(
- lscvToID(parser.getLanguage(), parser.getScript(), parser.getCountry(), null))
- .get("replacement")
- .getString());
- parser = new LocaleIDParser((new StringBuilder(localeID.length()))
- .append(lscvToID(replaceLocale.getLanguage(),
- replaceLocale.getScript(),
- replaceLocale.getCountry(),
- parser.getVariant()))
- .append(parser.getName().substring(parser.getBaseName().length()))
- .toString());
- } catch (MissingResourceException e) {
- }
- }
- // language _ country
- // eg. az_AZ -> az_Latn_AZ
- if (!parser.getCountry().isEmpty()) {
- try {
- ULocale replaceLocale = new ULocale(languageAlias.get(
- lscvToID(parser.getLanguage(), null, parser.getCountry(), null))
- .get("replacement")
- .getString());
- parser = new LocaleIDParser((new StringBuilder(localeID.length()))
- .append(lscvToID(replaceLocale.getLanguage(),
- parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript(),
- replaceLocale.getCountry(),
- parser.getVariant()))
- .append(parser.getName().substring(parser.getBaseName().length()))
- .toString());
- } catch (MissingResourceException e) {
- }
- }
- // only language
- // e.g. twi -> ak
- try {
- ULocale replaceLocale = new ULocale(languageAlias.get(parser.getLanguage())
- .get("replacement")
- .getString());
- parser = new LocaleIDParser((new StringBuilder(localeID.length()))
- .append(lscvToID(replaceLocale.getLanguage(),
- parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript() ,
- parser.getCountry().isEmpty() ? replaceLocale.getCountry() : parser.getCountry() ,
- parser.getVariant()))
- .append(parser.getName().substring(parser.getBaseName().length()))
- .toString());
- } catch (MissingResourceException e) {
- }
- } catch (MissingResourceException e) {
- }
-
- // If the BCP 47 region subtag matches the type attribute of a
- // territoryAlias element in Supplemental Data, replace the language
- // subtag with the replacement value, as follows:
- if (!parser.getCountry().isEmpty()) {
- try {
- String replacements[] = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
- "metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
- .get("alias")
- .get("territory")
- .get(parser.getCountry())
- .get("replacement")
- .getString()
- .split(" ");
- String replacement = replacements[0];
- // If there is a single territory in the replacement, use it.
- // If there are multiple territories:
- // Look up the most likely territory for the base language code (and script, if there is one).
- // If that likely territory is in the list, use it.
- // Otherwise, use the first territory in the list.
- if (replacements.length > 1) {
- String likelyCountry = ULocale.addLikelySubtags(
- new ULocale(lscvToID(parser.getLanguage(), parser.getScript(), null, parser.getVariant())))
- .getCountry();
- for (String country : replacements) {
- if (country.equals(likelyCountry)) {
- replacement = likelyCountry;
- break;
- }
- }
- }
- parser = new LocaleIDParser(
- (new StringBuilder(localeID.length()))
- .append(lscvToID(parser.getLanguage(), parser.getScript(), replacement, parser.getVariant()))
- .append(parser.getName().substring(parser.getBaseName().length()))
- .toString());
- } catch (MissingResourceException e) {
+ boolean knownCanonicalized = false;
+ String name = parser.getName();
+ if (!isKnownCanonicalizedLocale(name)) {
+ AliasReplacer replacer = new AliasReplacer(
+ parser.getLanguage(), parser.getScript(), parser.getCountry(),
+ AsciiUtil.toLowerString(parser.getVariant()),
+ parser.getName().substring(parser.getBaseName().length()));
+ String replaced = replacer.replace();
+ if (replaced != null) {
+ parser = new LocaleIDParser(replaced);
}
}
return parser.getName();
}
+ private static synchronized boolean isKnownCanonicalizedLocale(String name) {
+ if (name.equals("c") || name.equals("en") || name.equals("en_US")) {
+ return true;
+ }
+ if (gKnownCanonicalizedCases == null) {
+ List<String> items = Arrays.asList(
+ "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
+ "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
+ "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
+ "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
+ "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
+ "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
+ "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
+ "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
+ "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
+ "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
+ "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
+ "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
+ "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
+ "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
+ "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
+ "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
+ "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
+ "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
+ "zh_Hant_TW", "zh_TW", "zu", "zu_ZA");
+ gKnownCanonicalizedCases = new HashSet<String>(items);
+
+ }
+ return gKnownCanonicalizedCases.contains(name);
+ }
+
+ private static Set<String> gKnownCanonicalizedCases = null;
+
/**
* {@icu} Given a keyword and a value, return a new locale with an updated
* keyword and value. If the keyword is null, this removes all keywords from the locale id.
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
new file mode 100644
index 0000000..e41eaac
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
@@ -0,0 +1,1648 @@
+# File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
+# Test data for locale identifier canonicalization
+# Copyright © 1991-2020 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/copyright.html
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+#
+# Format:
+# <source locale identifier> ; <expected canonicalized locale identifier>
+#
+# The data lines are divided into 4 sets:
+# explicit: a short list of explicit test cases.
+# fromAliases: test cases generated from the alias data.
+# decanonicalized: test cases generated by reversing the normalization process.
+# withIrrelevants: test cases generated from the others by adding irrelevant fields where possible,
+# to ensure that the canonicalization implementation is not sensitive to irrelevant fields. These include:
+# Language: aaa
+# Script: Adlm
+# Region: AC
+# Variant: fonipa
+######
+
+
+
+# explicit
+
+art_lojban ; jbo
+en_US_aaland ; en_US
+en_US_heploc ; en_US_alalc97
+en_US_polytoni ; en_US_polyton
+en_aaland ; en_AX
+en_arevela ; en
+en_arevmda_arevela ; en
+en_lojban ; en
+hy_arevela ; hy
+hy_arevmda ; hyw
+hy_arevmda_arevela ; hyw
+hye_arevmda ; hyw
+no_bokmal_nynorsk ; nb
+no_nynorsk_bokmal ; nb
+zh_guoyu_hakka_xiang ; hak
+zh_hakka_xiang ; hak
+
+# fromAliases
+
+aa_saaho ; ssy
+aam ; aas
+aar ; aa
+abk ; ab
+adp ; dz
+afr ; af
+aju ; jrb
+aka ; ak
+alb ; sq
+als ; sq
+amh ; am
+ara ; ar
+arb ; ar
+arg ; an
+arm ; hy
+asd ; snz
+asm ; as
+aue ; ktz
+ava ; av
+ave ; ae
+aym ; ay
+ayr ; ay
+ayx ; nun
+aze ; az
+azj ; az
+bak ; ba
+bam ; bm
+baq ; eu
+bcc ; bal
+bcl ; bik
+bel ; be
+ben ; bn
+bgm ; bcg
+bh ; bho
+bih ; bho
+bis ; bi
+bjd ; drl
+bod ; bo
+bos ; bs
+bre ; br
+bul ; bg
+bur ; my
+bxk ; luy
+bxr ; bua
+cat ; ca
+ccq ; rki
+cel_gaulish ; xtg
+ces ; cs
+cha ; ch
+che ; ce
+chi ; zh
+chu ; cu
+chv ; cv
+cjr ; mom
+cka ; cmr
+cld ; syr
+cmk ; xch
+cmn ; zh
+cnr ; sr_ME
+cor ; kw
+cos ; co
+coy ; pij
+cqu ; quh
+cre ; cr
+cwd ; cr
+cym ; cy
+cze ; cs
+dan ; da
+deu ; de
+dgo ; doi
+dhd ; mwr
+dik ; din
+diq ; zza
+dit ; dif
+div ; dv
+drh ; mn
+drw ; fa_AF
+dut ; nl
+dzo ; dz
+ekk ; et
+ell ; el
+emk ; man
+eng ; en
+epo ; eo
+esk ; ik
+est ; et
+eus ; eu
+ewe ; ee
+fao ; fo
+fas ; fa
+fat ; ak
+fij ; fj
+fin ; fi
+fra ; fr
+fre ; fr
+fry ; fy
+fuc ; ff
+ful ; ff
+gav ; dev
+gaz ; om
+gbo ; grb
+geo ; ka
+ger ; de
+gfx ; vaj
+ggn ; gvr
+gla ; gd
+gle ; ga
+glg ; gl
+glv ; gv
+gno ; gon
+gre ; el
+grn ; gn
+gti ; nyc
+gug ; gn
+guj ; gu
+guv ; duz
+gya ; gba
+hat ; ht
+hau ; ha
+hbs ; sr_Latn
+hdn ; hai
+hea ; hmn
+heb ; he
+her ; hz
+him ; srx
+hin ; hi
+hmo ; ho
+hrr ; jal
+hrv ; hr
+hun ; hu
+hye ; hy
+ibi ; opa
+ibo ; ig
+ice ; is
+ido ; io
+iii ; ii
+ike ; iu
+iku ; iu
+ile ; ie
+ilw ; gal
+in ; id
+ina ; ia
+ind ; id
+ipk ; ik
+isl ; is
+ita ; it
+iw ; he
+jav ; jv
+jeg ; oyb
+ji ; yi
+jpn ; ja
+jw ; jv
+kal ; kl
+kan ; kn
+kas ; ks
+kat ; ka
+kau ; kr
+kaz ; kk
+kgc ; tdf
+kgh ; kml
+khk ; mn
+khm ; km
+kik ; ki
+kin ; rw
+kir ; ky
+kmr ; ku
+knc ; kr
+kng ; kg
+knn ; kok
+koj ; kwv
+kom ; kv
+kon ; kg
+kor ; ko
+kpv ; kv
+krm ; bmf
+ktr ; dtp
+kua ; kj
+kur ; ku
+kvs ; gdj
+kwq ; yam
+kxe ; tvd
+kzj ; dtp
+kzt ; dtp
+lao ; lo
+lat ; la
+lav ; lv
+lbk ; bnc
+lii ; raq
+lim ; li
+lin ; ln
+lit ; lt
+llo ; ngt
+lmm ; rmx
+ltz ; lb
+lub ; lu
+lug ; lg
+lvs ; lv
+mac ; mk
+mah ; mh
+mal ; ml
+mao ; mi
+mar ; mr
+may ; ms
+meg ; cir
+mhr ; chm
+mkd ; mk
+mlg ; mg
+mlt ; mt
+mnk ; man
+mo ; ro
+mol ; ro
+mon ; mn
+mri ; mi
+msa ; ms
+mst ; mry
+mup ; raj
+mwj ; vaj
+mya ; my
+myd ; aog
+myt ; mry
+nad ; xny
+nau ; na
+nav ; nv
+nbl ; nr
+ncp ; kdz
+nde ; nd
+ndo ; ng
+nep ; ne
+nld ; nl
+nno ; nn
+nns ; nbr
+nnx ; ngv
+no ; nb
+no_bokmal ; nb
+no_nynorsk ; nn
+nob ; nb
+nor ; nb
+npi ; ne
+nts ; pij
+nya ; ny
+oci ; oc
+ojg ; oj
+oji ; oj
+ori ; or
+orm ; om
+ory ; or
+oss ; os
+oun ; vaj
+pan ; pa
+pbu ; ps
+pcr ; adx
+per ; fa
+pes ; fa
+pli ; pi
+plt ; mg
+pmc ; huw
+pmu ; phr
+pnb ; lah
+pol ; pl
+por ; pt
+ppa ; bfy
+ppr ; lcq
+prs ; fa_AF
+pry ; prt
+pus ; ps
+puz ; pub
+que ; qu
+quz ; qu
+rmy ; rom
+roh ; rm
+ron ; ro
+rum ; ro
+run ; rn
+rus ; ru
+sag ; sg
+san ; sa
+sca ; hle
+scc ; sr
+scr ; hr
+sgn_BR ; bzs
+sgn_CO ; csn
+sgn_DE ; gsg
+sgn_DK ; dsl
+sgn_FR ; fsl
+sgn_GB ; bfi
+sgn_GR ; gss
+sgn_IE ; isg
+sgn_IT ; ise
+sgn_JP ; jsl
+sgn_MX ; mfs
+sgn_NI ; ncs
+sgn_NL ; dse
+sgn_NO ; nsi
+sgn_PT ; psr
+sgn_SE ; swl
+sgn_US ; ase
+sgn_ZA ; sfs
+sh ; sr_Latn
+sin ; si
+skk ; oyb
+slk ; sk
+slo ; sk
+slv ; sl
+sme ; se
+smo ; sm
+sna ; sn
+snd ; sd
+som ; so
+sot ; st
+spa ; es
+spy ; kln
+sqi ; sq
+src ; sc
+srd ; sc
+srp ; sr
+ssw ; ss
+sun ; su
+swa ; sw
+swc ; sw_CD
+swe ; sv
+swh ; sw
+tah ; ty
+tam ; ta
+tat ; tt
+tdu ; dtp
+tel ; te
+tgk ; tg
+tgl ; fil
+tha ; th
+thc ; tpo
+thx ; oyb
+tib ; bo
+tie ; ras
+tir ; ti
+tkk ; twm
+tl ; fil
+tlw ; weo
+tmp ; tyj
+tne ; kak
+tnf ; fa_AF
+ton ; to
+tsf ; taj
+tsn ; tn
+tso ; ts
+ttq ; tmh
+tuk ; tk
+tur ; tr
+tw ; ak
+twi ; ak
+uig ; ug
+ukr ; uk
+umu ; del
+und_004 ; und_AF
+und_008 ; und_AL
+und_010 ; und_AQ
+und_012 ; und_DZ
+und_016 ; und_AS
+und_020 ; und_AD
+und_024 ; und_AO
+und_028 ; und_AG
+und_031 ; und_AZ
+und_032 ; und_AR
+und_036 ; und_AU
+und_040 ; und_AT
+und_044 ; und_BS
+und_048 ; und_BH
+und_050 ; und_BD
+und_051 ; und_AM
+und_052 ; und_BB
+und_056 ; und_BE
+und_060 ; und_BM
+und_062 ; und_034
+und_064 ; und_BT
+und_068 ; und_BO
+und_070 ; und_BA
+und_072 ; und_BW
+und_074 ; und_BV
+und_076 ; und_BR
+und_084 ; und_BZ
+und_086 ; und_IO
+und_090 ; und_SB
+und_092 ; und_VG
+und_096 ; und_BN
+und_100 ; und_BG
+und_104 ; und_MM
+und_108 ; und_BI
+und_112 ; und_BY
+und_116 ; und_KH
+und_120 ; und_CM
+und_124 ; und_CA
+und_132 ; und_CV
+und_136 ; und_KY
+und_140 ; und_CF
+und_144 ; und_LK
+und_148 ; und_TD
+und_152 ; und_CL
+und_156 ; und_CN
+und_158 ; und_TW
+und_162 ; und_CX
+und_166 ; und_CC
+und_170 ; und_CO
+und_172 ; und_RU
+und_174 ; und_KM
+und_175 ; und_YT
+und_178 ; und_CG
+und_180 ; und_CD
+und_184 ; und_CK
+und_188 ; und_CR
+und_191 ; und_HR
+und_192 ; und_CU
+und_196 ; und_CY
+und_200 ; und_CZ
+und_203 ; und_CZ
+und_204 ; und_BJ
+und_208 ; und_DK
+und_212 ; und_DM
+und_214 ; und_DO
+und_218 ; und_EC
+und_222 ; und_SV
+und_226 ; und_GQ
+und_230 ; und_ET
+und_231 ; und_ET
+und_232 ; und_ER
+und_233 ; und_EE
+und_234 ; und_FO
+und_238 ; und_FK
+und_239 ; und_GS
+und_242 ; und_FJ
+und_246 ; und_FI
+und_248 ; und_AX
+und_249 ; und_FR
+und_250 ; und_FR
+und_254 ; und_GF
+und_258 ; und_PF
+und_260 ; und_TF
+und_262 ; und_DJ
+und_266 ; und_GA
+und_268 ; und_GE
+und_270 ; und_GM
+und_275 ; und_PS
+und_276 ; und_DE
+und_278 ; und_DE
+und_280 ; und_DE
+und_288 ; und_GH
+und_292 ; und_GI
+und_296 ; und_KI
+und_300 ; und_GR
+und_304 ; und_GL
+und_308 ; und_GD
+und_312 ; und_GP
+und_316 ; und_GU
+und_320 ; und_GT
+und_324 ; und_GN
+und_328 ; und_GY
+und_332 ; und_HT
+und_334 ; und_HM
+und_336 ; und_VA
+und_340 ; und_HN
+und_344 ; und_HK
+und_348 ; und_HU
+und_352 ; und_IS
+und_356 ; und_IN
+und_360 ; und_ID
+und_364 ; und_IR
+und_368 ; und_IQ
+und_372 ; und_IE
+und_376 ; und_IL
+und_380 ; und_IT
+und_384 ; und_CI
+und_388 ; und_JM
+und_392 ; und_JP
+und_398 ; und_KZ
+und_400 ; und_JO
+und_404 ; und_KE
+und_408 ; und_KP
+und_410 ; und_KR
+und_414 ; und_KW
+und_417 ; und_KG
+und_418 ; und_LA
+und_422 ; und_LB
+und_426 ; und_LS
+und_428 ; und_LV
+und_430 ; und_LR
+und_434 ; und_LY
+und_438 ; und_LI
+und_440 ; und_LT
+und_442 ; und_LU
+und_446 ; und_MO
+und_450 ; und_MG
+und_454 ; und_MW
+und_458 ; und_MY
+und_462 ; und_MV
+und_466 ; und_ML
+und_470 ; und_MT
+und_474 ; und_MQ
+und_478 ; und_MR
+und_480 ; und_MU
+und_484 ; und_MX
+und_492 ; und_MC
+und_496 ; und_MN
+und_498 ; und_MD
+und_499 ; und_ME
+und_500 ; und_MS
+und_504 ; und_MA
+und_508 ; und_MZ
+und_512 ; und_OM
+und_516 ; und_NA
+und_520 ; und_NR
+und_524 ; und_NP
+und_528 ; und_NL
+und_530 ; und_CW
+und_531 ; und_CW
+und_532 ; und_CW
+und_533 ; und_AW
+und_534 ; und_SX
+und_535 ; und_BQ
+und_536 ; und_SA
+und_540 ; und_NC
+und_548 ; und_VU
+und_554 ; und_NZ
+und_558 ; und_NI
+und_562 ; und_NE
+und_566 ; und_NG
+und_570 ; und_NU
+und_574 ; und_NF
+und_578 ; und_NO
+und_580 ; und_MP
+und_581 ; und_UM
+und_582 ; und_FM
+und_583 ; und_FM
+und_584 ; und_MH
+und_585 ; und_PW
+und_586 ; und_PK
+und_591 ; und_PA
+und_598 ; und_PG
+und_600 ; und_PY
+und_604 ; und_PE
+und_608 ; und_PH
+und_612 ; und_PN
+und_616 ; und_PL
+und_620 ; und_PT
+und_624 ; und_GW
+und_626 ; und_TL
+und_630 ; und_PR
+und_634 ; und_QA
+und_638 ; und_RE
+und_642 ; und_RO
+und_643 ; und_RU
+und_646 ; und_RW
+und_652 ; und_BL
+und_654 ; und_SH
+und_659 ; und_KN
+und_660 ; und_AI
+und_662 ; und_LC
+und_663 ; und_MF
+und_666 ; und_PM
+und_670 ; und_VC
+und_674 ; und_SM
+und_678 ; und_ST
+und_682 ; und_SA
+und_686 ; und_SN
+und_688 ; und_RS
+und_690 ; und_SC
+und_694 ; und_SL
+und_702 ; und_SG
+und_703 ; und_SK
+und_704 ; und_VN
+und_705 ; und_SI
+und_706 ; und_SO
+und_710 ; und_ZA
+und_716 ; und_ZW
+und_720 ; und_YE
+und_724 ; und_ES
+und_728 ; und_SS
+und_729 ; und_SD
+und_732 ; und_EH
+und_736 ; und_SD
+und_740 ; und_SR
+und_744 ; und_SJ
+und_748 ; und_SZ
+und_752 ; und_SE
+und_756 ; und_CH
+und_760 ; und_SY
+und_762 ; und_TJ
+und_764 ; und_TH
+und_768 ; und_TG
+und_772 ; und_TK
+und_776 ; und_TO
+und_780 ; und_TT
+und_784 ; und_AE
+und_788 ; und_TN
+und_792 ; und_TR
+und_795 ; und_TM
+und_796 ; und_TC
+und_798 ; und_TV
+und_800 ; und_UG
+und_804 ; und_UA
+und_807 ; und_MK
+und_810 ; und_RU
+und_818 ; und_EG
+und_826 ; und_GB
+und_830 ; und_JE
+und_831 ; und_GG
+und_832 ; und_JE
+und_833 ; und_IM
+und_834 ; und_TZ
+und_840 ; und_US
+und_850 ; und_VI
+und_854 ; und_BF
+und_858 ; und_UY
+und_860 ; und_UZ
+und_862 ; und_VE
+und_876 ; und_WF
+und_882 ; und_WS
+und_886 ; und_YE
+und_887 ; und_YE
+und_890 ; und_RS
+und_891 ; und_RS
+und_894 ; und_ZM
+und_958 ; und_AA
+und_959 ; und_QM
+und_960 ; und_QN
+und_962 ; und_QP
+und_963 ; und_QQ
+und_964 ; und_QR
+und_965 ; und_QS
+und_966 ; und_QT
+und_967 ; und_EU
+und_968 ; und_QV
+und_969 ; und_QW
+und_970 ; und_QX
+und_971 ; und_QY
+und_972 ; und_QZ
+und_973 ; und_XA
+und_974 ; und_XB
+und_975 ; und_XC
+und_976 ; und_XD
+und_977 ; und_XE
+und_978 ; und_XF
+und_979 ; und_XG
+und_980 ; und_XH
+und_981 ; und_XI
+und_982 ; und_XJ
+und_983 ; und_XK
+und_984 ; und_XL
+und_985 ; und_XM
+und_986 ; und_XN
+und_987 ; und_XO
+und_988 ; und_XP
+und_989 ; und_XQ
+und_990 ; und_XR
+und_991 ; und_XS
+und_992 ; und_XT
+und_993 ; und_XU
+und_994 ; und_XV
+und_995 ; und_XW
+und_996 ; und_XX
+und_997 ; und_XY
+und_998 ; und_XZ
+und_999 ; und_ZZ
+und_AN ; und_CW
+und_BU ; und_MM
+und_CS ; und_RS
+und_CT ; und_KI
+und_DD ; und_DE
+und_DY ; und_BJ
+und_FQ ; und_AQ
+und_FX ; und_FR
+und_HV ; und_BF
+und_JT ; und_UM
+und_MI ; und_UM
+und_NH ; und_VU
+und_NQ ; und_AQ
+und_NT ; und_SA
+und_PC ; und_FM
+und_PU ; und_UM
+und_PZ ; und_PA
+und_QU ; und_EU
+und_Qaai ; und_Zinh
+und_RH ; und_ZW
+und_SU ; und_RU
+und_TP ; und_TL
+und_UK ; und_GB
+und_VD ; und_VN
+und_WK ; und_UM
+und_YD ; und_YE
+und_YU ; und_RS
+und_ZR ; und_CD
+und_aaland ; und_AX
+und_arevela ; und
+und_arevmda ; und
+und_bokmal ; und
+und_hakka ; und
+und_heploc ; und_alalc97
+und_lojban ; und
+und_nynorsk ; und
+und_polytoni ; und_polyton
+und_saaho ; und
+und_xiang ; und
+uok ; ema
+urd ; ur
+uzb ; uz
+uzn ; uz
+ven ; ve
+vie ; vi
+vol ; vo
+wel ; cy
+wln ; wa
+wol ; wo
+xba ; cax
+xho ; xh
+xia ; acn
+xkh ; waw
+xpe ; kpe
+xsj ; suj
+xsl ; den
+ybd ; rki
+ydd ; yi
+yid ; yi
+yma ; lrr
+ymt ; mtm
+yor ; yo
+yos ; zom
+yuu ; yug
+zai ; zap
+zh_guoyu ; zh
+zh_hakka ; hak
+zh_xiang ; hsn
+zha ; za
+zho ; zh
+zsm ; ms
+zul ; zu
+zyb ; za
+
+# decanonicalized
+
+aar_saaho ; ssy
+arm_arevela ; hy
+arm_arevela_arevmda ; hyw
+arm_arevmda ; hyw
+chi_guoyu ; zh
+chi_guoyu_hakka_xiang ; hak
+chi_hakka ; hak
+chi_hakka_xiang ; hak
+chi_xiang ; hsn
+cmn_guoyu ; zh
+cmn_guoyu_hakka_xiang ; hak
+cmn_hakka ; hak
+cmn_hakka_xiang ; hak
+cmn_xiang ; hsn
+en_840_aaland ; en_US
+en_840_heploc ; en_US_alalc97
+en_840_polytoni ; en_US_polyton
+eng_840_aaland ; en_US
+eng_840_heploc ; en_US_alalc97
+eng_840_polytoni ; en_US_polyton
+eng_US_aaland ; en_US
+eng_US_heploc ; en_US_alalc97
+eng_US_polytoni ; en_US_polyton
+eng_aaland ; en_AX
+eng_arevela ; en
+eng_arevela_arevmda ; en
+eng_lojban ; en
+hye_arevela ; hy
+hye_arevela_arevmda ; hyw
+sgn_076 ; bzs
+sgn_170 ; csn
+sgn_208 ; dsl
+sgn_249 ; fsl
+sgn_250 ; fsl
+sgn_276 ; gsg
+sgn_278 ; gsg
+sgn_280 ; gsg
+sgn_300 ; gss
+sgn_372 ; isg
+sgn_380 ; ise
+sgn_392 ; jsl
+sgn_484 ; mfs
+sgn_528 ; dse
+sgn_558 ; ncs
+sgn_578 ; nsi
+sgn_620 ; psr
+sgn_710 ; sfs
+sgn_752 ; swl
+sgn_826 ; bfi
+sgn_840 ; ase
+sgn_DD ; gsg
+sgn_FX ; fsl
+sgn_UK ; bfi
+zho_guoyu ; zh
+zho_guoyu_hakka_xiang ; hak
+zho_hakka ; hak
+zho_hakka_xiang ; hak
+zho_xiang ; hsn
+
+# withIrrelevants
+
+aa_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa
+aaa_Adlm_004_fonipa ; aaa_Adlm_AF_fonipa
+aaa_Adlm_008_fonipa ; aaa_Adlm_AL_fonipa
+aaa_Adlm_010_fonipa ; aaa_Adlm_AQ_fonipa
+aaa_Adlm_012_fonipa ; aaa_Adlm_DZ_fonipa
+aaa_Adlm_016_fonipa ; aaa_Adlm_AS_fonipa
+aaa_Adlm_020_fonipa ; aaa_Adlm_AD_fonipa
+aaa_Adlm_024_fonipa ; aaa_Adlm_AO_fonipa
+aaa_Adlm_028_fonipa ; aaa_Adlm_AG_fonipa
+aaa_Adlm_031_fonipa ; aaa_Adlm_AZ_fonipa
+aaa_Adlm_032_fonipa ; aaa_Adlm_AR_fonipa
+aaa_Adlm_036_fonipa ; aaa_Adlm_AU_fonipa
+aaa_Adlm_040_fonipa ; aaa_Adlm_AT_fonipa
+aaa_Adlm_044_fonipa ; aaa_Adlm_BS_fonipa
+aaa_Adlm_048_fonipa ; aaa_Adlm_BH_fonipa
+aaa_Adlm_050_fonipa ; aaa_Adlm_BD_fonipa
+aaa_Adlm_051_fonipa ; aaa_Adlm_AM_fonipa
+aaa_Adlm_052_fonipa ; aaa_Adlm_BB_fonipa
+aaa_Adlm_056_fonipa ; aaa_Adlm_BE_fonipa
+aaa_Adlm_060_fonipa ; aaa_Adlm_BM_fonipa
+aaa_Adlm_062_fonipa ; aaa_Adlm_034_fonipa
+aaa_Adlm_064_fonipa ; aaa_Adlm_BT_fonipa
+aaa_Adlm_068_fonipa ; aaa_Adlm_BO_fonipa
+aaa_Adlm_070_fonipa ; aaa_Adlm_BA_fonipa
+aaa_Adlm_072_fonipa ; aaa_Adlm_BW_fonipa
+aaa_Adlm_074_fonipa ; aaa_Adlm_BV_fonipa
+aaa_Adlm_076_fonipa ; aaa_Adlm_BR_fonipa
+aaa_Adlm_084_fonipa ; aaa_Adlm_BZ_fonipa
+aaa_Adlm_086_fonipa ; aaa_Adlm_IO_fonipa
+aaa_Adlm_090_fonipa ; aaa_Adlm_SB_fonipa
+aaa_Adlm_092_fonipa ; aaa_Adlm_VG_fonipa
+aaa_Adlm_096_fonipa ; aaa_Adlm_BN_fonipa
+aaa_Adlm_100_fonipa ; aaa_Adlm_BG_fonipa
+aaa_Adlm_104_fonipa ; aaa_Adlm_MM_fonipa
+aaa_Adlm_108_fonipa ; aaa_Adlm_BI_fonipa
+aaa_Adlm_112_fonipa ; aaa_Adlm_BY_fonipa
+aaa_Adlm_116_fonipa ; aaa_Adlm_KH_fonipa
+aaa_Adlm_120_fonipa ; aaa_Adlm_CM_fonipa
+aaa_Adlm_124_fonipa ; aaa_Adlm_CA_fonipa
+aaa_Adlm_132_fonipa ; aaa_Adlm_CV_fonipa
+aaa_Adlm_136_fonipa ; aaa_Adlm_KY_fonipa
+aaa_Adlm_140_fonipa ; aaa_Adlm_CF_fonipa
+aaa_Adlm_144_fonipa ; aaa_Adlm_LK_fonipa
+aaa_Adlm_148_fonipa ; aaa_Adlm_TD_fonipa
+aaa_Adlm_152_fonipa ; aaa_Adlm_CL_fonipa
+aaa_Adlm_156_fonipa ; aaa_Adlm_CN_fonipa
+aaa_Adlm_158_fonipa ; aaa_Adlm_TW_fonipa
+aaa_Adlm_162_fonipa ; aaa_Adlm_CX_fonipa
+aaa_Adlm_166_fonipa ; aaa_Adlm_CC_fonipa
+aaa_Adlm_170_fonipa ; aaa_Adlm_CO_fonipa
+aaa_Adlm_172_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_174_fonipa ; aaa_Adlm_KM_fonipa
+aaa_Adlm_175_fonipa ; aaa_Adlm_YT_fonipa
+aaa_Adlm_178_fonipa ; aaa_Adlm_CG_fonipa
+aaa_Adlm_180_fonipa ; aaa_Adlm_CD_fonipa
+aaa_Adlm_184_fonipa ; aaa_Adlm_CK_fonipa
+aaa_Adlm_188_fonipa ; aaa_Adlm_CR_fonipa
+aaa_Adlm_191_fonipa ; aaa_Adlm_HR_fonipa
+aaa_Adlm_192_fonipa ; aaa_Adlm_CU_fonipa
+aaa_Adlm_196_fonipa ; aaa_Adlm_CY_fonipa
+aaa_Adlm_200_fonipa ; aaa_Adlm_CZ_fonipa
+aaa_Adlm_203_fonipa ; aaa_Adlm_CZ_fonipa
+aaa_Adlm_204_fonipa ; aaa_Adlm_BJ_fonipa
+aaa_Adlm_208_fonipa ; aaa_Adlm_DK_fonipa
+aaa_Adlm_212_fonipa ; aaa_Adlm_DM_fonipa
+aaa_Adlm_214_fonipa ; aaa_Adlm_DO_fonipa
+aaa_Adlm_218_fonipa ; aaa_Adlm_EC_fonipa
+aaa_Adlm_222_fonipa ; aaa_Adlm_SV_fonipa
+aaa_Adlm_226_fonipa ; aaa_Adlm_GQ_fonipa
+aaa_Adlm_230_fonipa ; aaa_Adlm_ET_fonipa
+aaa_Adlm_231_fonipa ; aaa_Adlm_ET_fonipa
+aaa_Adlm_232_fonipa ; aaa_Adlm_ER_fonipa
+aaa_Adlm_233_fonipa ; aaa_Adlm_EE_fonipa
+aaa_Adlm_234_fonipa ; aaa_Adlm_FO_fonipa
+aaa_Adlm_238_fonipa ; aaa_Adlm_FK_fonipa
+aaa_Adlm_239_fonipa ; aaa_Adlm_GS_fonipa
+aaa_Adlm_242_fonipa ; aaa_Adlm_FJ_fonipa
+aaa_Adlm_246_fonipa ; aaa_Adlm_FI_fonipa
+aaa_Adlm_248_fonipa ; aaa_Adlm_AX_fonipa
+aaa_Adlm_249_fonipa ; aaa_Adlm_FR_fonipa
+aaa_Adlm_250_fonipa ; aaa_Adlm_FR_fonipa
+aaa_Adlm_254_fonipa ; aaa_Adlm_GF_fonipa
+aaa_Adlm_258_fonipa ; aaa_Adlm_PF_fonipa
+aaa_Adlm_260_fonipa ; aaa_Adlm_TF_fonipa
+aaa_Adlm_262_fonipa ; aaa_Adlm_DJ_fonipa
+aaa_Adlm_266_fonipa ; aaa_Adlm_GA_fonipa
+aaa_Adlm_268_fonipa ; aaa_Adlm_GE_fonipa
+aaa_Adlm_270_fonipa ; aaa_Adlm_GM_fonipa
+aaa_Adlm_275_fonipa ; aaa_Adlm_PS_fonipa
+aaa_Adlm_276_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_278_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_280_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_288_fonipa ; aaa_Adlm_GH_fonipa
+aaa_Adlm_292_fonipa ; aaa_Adlm_GI_fonipa
+aaa_Adlm_296_fonipa ; aaa_Adlm_KI_fonipa
+aaa_Adlm_300_fonipa ; aaa_Adlm_GR_fonipa
+aaa_Adlm_304_fonipa ; aaa_Adlm_GL_fonipa
+aaa_Adlm_308_fonipa ; aaa_Adlm_GD_fonipa
+aaa_Adlm_312_fonipa ; aaa_Adlm_GP_fonipa
+aaa_Adlm_316_fonipa ; aaa_Adlm_GU_fonipa
+aaa_Adlm_320_fonipa ; aaa_Adlm_GT_fonipa
+aaa_Adlm_324_fonipa ; aaa_Adlm_GN_fonipa
+aaa_Adlm_328_fonipa ; aaa_Adlm_GY_fonipa
+aaa_Adlm_332_fonipa ; aaa_Adlm_HT_fonipa
+aaa_Adlm_334_fonipa ; aaa_Adlm_HM_fonipa
+aaa_Adlm_336_fonipa ; aaa_Adlm_VA_fonipa
+aaa_Adlm_340_fonipa ; aaa_Adlm_HN_fonipa
+aaa_Adlm_344_fonipa ; aaa_Adlm_HK_fonipa
+aaa_Adlm_348_fonipa ; aaa_Adlm_HU_fonipa
+aaa_Adlm_352_fonipa ; aaa_Adlm_IS_fonipa
+aaa_Adlm_356_fonipa ; aaa_Adlm_IN_fonipa
+aaa_Adlm_360_fonipa ; aaa_Adlm_ID_fonipa
+aaa_Adlm_364_fonipa ; aaa_Adlm_IR_fonipa
+aaa_Adlm_368_fonipa ; aaa_Adlm_IQ_fonipa
+aaa_Adlm_372_fonipa ; aaa_Adlm_IE_fonipa
+aaa_Adlm_376_fonipa ; aaa_Adlm_IL_fonipa
+aaa_Adlm_380_fonipa ; aaa_Adlm_IT_fonipa
+aaa_Adlm_384_fonipa ; aaa_Adlm_CI_fonipa
+aaa_Adlm_388_fonipa ; aaa_Adlm_JM_fonipa
+aaa_Adlm_392_fonipa ; aaa_Adlm_JP_fonipa
+aaa_Adlm_398_fonipa ; aaa_Adlm_KZ_fonipa
+aaa_Adlm_400_fonipa ; aaa_Adlm_JO_fonipa
+aaa_Adlm_404_fonipa ; aaa_Adlm_KE_fonipa
+aaa_Adlm_408_fonipa ; aaa_Adlm_KP_fonipa
+aaa_Adlm_410_fonipa ; aaa_Adlm_KR_fonipa
+aaa_Adlm_414_fonipa ; aaa_Adlm_KW_fonipa
+aaa_Adlm_417_fonipa ; aaa_Adlm_KG_fonipa
+aaa_Adlm_418_fonipa ; aaa_Adlm_LA_fonipa
+aaa_Adlm_422_fonipa ; aaa_Adlm_LB_fonipa
+aaa_Adlm_426_fonipa ; aaa_Adlm_LS_fonipa
+aaa_Adlm_428_fonipa ; aaa_Adlm_LV_fonipa
+aaa_Adlm_430_fonipa ; aaa_Adlm_LR_fonipa
+aaa_Adlm_434_fonipa ; aaa_Adlm_LY_fonipa
+aaa_Adlm_438_fonipa ; aaa_Adlm_LI_fonipa
+aaa_Adlm_440_fonipa ; aaa_Adlm_LT_fonipa
+aaa_Adlm_442_fonipa ; aaa_Adlm_LU_fonipa
+aaa_Adlm_446_fonipa ; aaa_Adlm_MO_fonipa
+aaa_Adlm_450_fonipa ; aaa_Adlm_MG_fonipa
+aaa_Adlm_454_fonipa ; aaa_Adlm_MW_fonipa
+aaa_Adlm_458_fonipa ; aaa_Adlm_MY_fonipa
+aaa_Adlm_462_fonipa ; aaa_Adlm_MV_fonipa
+aaa_Adlm_466_fonipa ; aaa_Adlm_ML_fonipa
+aaa_Adlm_470_fonipa ; aaa_Adlm_MT_fonipa
+aaa_Adlm_474_fonipa ; aaa_Adlm_MQ_fonipa
+aaa_Adlm_478_fonipa ; aaa_Adlm_MR_fonipa
+aaa_Adlm_480_fonipa ; aaa_Adlm_MU_fonipa
+aaa_Adlm_484_fonipa ; aaa_Adlm_MX_fonipa
+aaa_Adlm_492_fonipa ; aaa_Adlm_MC_fonipa
+aaa_Adlm_496_fonipa ; aaa_Adlm_MN_fonipa
+aaa_Adlm_498_fonipa ; aaa_Adlm_MD_fonipa
+aaa_Adlm_499_fonipa ; aaa_Adlm_ME_fonipa
+aaa_Adlm_500_fonipa ; aaa_Adlm_MS_fonipa
+aaa_Adlm_504_fonipa ; aaa_Adlm_MA_fonipa
+aaa_Adlm_508_fonipa ; aaa_Adlm_MZ_fonipa
+aaa_Adlm_512_fonipa ; aaa_Adlm_OM_fonipa
+aaa_Adlm_516_fonipa ; aaa_Adlm_NA_fonipa
+aaa_Adlm_520_fonipa ; aaa_Adlm_NR_fonipa
+aaa_Adlm_524_fonipa ; aaa_Adlm_NP_fonipa
+aaa_Adlm_528_fonipa ; aaa_Adlm_NL_fonipa
+aaa_Adlm_530_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_531_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_532_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_533_fonipa ; aaa_Adlm_AW_fonipa
+aaa_Adlm_534_fonipa ; aaa_Adlm_SX_fonipa
+aaa_Adlm_535_fonipa ; aaa_Adlm_BQ_fonipa
+aaa_Adlm_536_fonipa ; aaa_Adlm_SA_fonipa
+aaa_Adlm_540_fonipa ; aaa_Adlm_NC_fonipa
+aaa_Adlm_548_fonipa ; aaa_Adlm_VU_fonipa
+aaa_Adlm_554_fonipa ; aaa_Adlm_NZ_fonipa
+aaa_Adlm_558_fonipa ; aaa_Adlm_NI_fonipa
+aaa_Adlm_562_fonipa ; aaa_Adlm_NE_fonipa
+aaa_Adlm_566_fonipa ; aaa_Adlm_NG_fonipa
+aaa_Adlm_570_fonipa ; aaa_Adlm_NU_fonipa
+aaa_Adlm_574_fonipa ; aaa_Adlm_NF_fonipa
+aaa_Adlm_578_fonipa ; aaa_Adlm_NO_fonipa
+aaa_Adlm_580_fonipa ; aaa_Adlm_MP_fonipa
+aaa_Adlm_581_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_582_fonipa ; aaa_Adlm_FM_fonipa
+aaa_Adlm_583_fonipa ; aaa_Adlm_FM_fonipa
+aaa_Adlm_584_fonipa ; aaa_Adlm_MH_fonipa
+aaa_Adlm_585_fonipa ; aaa_Adlm_PW_fonipa
+aaa_Adlm_586_fonipa ; aaa_Adlm_PK_fonipa
+aaa_Adlm_591_fonipa ; aaa_Adlm_PA_fonipa
+aaa_Adlm_598_fonipa ; aaa_Adlm_PG_fonipa
+aaa_Adlm_600_fonipa ; aaa_Adlm_PY_fonipa
+aaa_Adlm_604_fonipa ; aaa_Adlm_PE_fonipa
+aaa_Adlm_608_fonipa ; aaa_Adlm_PH_fonipa
+aaa_Adlm_612_fonipa ; aaa_Adlm_PN_fonipa
+aaa_Adlm_616_fonipa ; aaa_Adlm_PL_fonipa
+aaa_Adlm_620_fonipa ; aaa_Adlm_PT_fonipa
+aaa_Adlm_624_fonipa ; aaa_Adlm_GW_fonipa
+aaa_Adlm_626_fonipa ; aaa_Adlm_TL_fonipa
+aaa_Adlm_630_fonipa ; aaa_Adlm_PR_fonipa
+aaa_Adlm_634_fonipa ; aaa_Adlm_QA_fonipa
+aaa_Adlm_638_fonipa ; aaa_Adlm_RE_fonipa
+aaa_Adlm_642_fonipa ; aaa_Adlm_RO_fonipa
+aaa_Adlm_643_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_646_fonipa ; aaa_Adlm_RW_fonipa
+aaa_Adlm_652_fonipa ; aaa_Adlm_BL_fonipa
+aaa_Adlm_654_fonipa ; aaa_Adlm_SH_fonipa
+aaa_Adlm_659_fonipa ; aaa_Adlm_KN_fonipa
+aaa_Adlm_660_fonipa ; aaa_Adlm_AI_fonipa
+aaa_Adlm_662_fonipa ; aaa_Adlm_LC_fonipa
+aaa_Adlm_663_fonipa ; aaa_Adlm_MF_fonipa
+aaa_Adlm_666_fonipa ; aaa_Adlm_PM_fonipa
+aaa_Adlm_670_fonipa ; aaa_Adlm_VC_fonipa
+aaa_Adlm_674_fonipa ; aaa_Adlm_SM_fonipa
+aaa_Adlm_678_fonipa ; aaa_Adlm_ST_fonipa
+aaa_Adlm_682_fonipa ; aaa_Adlm_SA_fonipa
+aaa_Adlm_686_fonipa ; aaa_Adlm_SN_fonipa
+aaa_Adlm_688_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_690_fonipa ; aaa_Adlm_SC_fonipa
+aaa_Adlm_694_fonipa ; aaa_Adlm_SL_fonipa
+aaa_Adlm_702_fonipa ; aaa_Adlm_SG_fonipa
+aaa_Adlm_703_fonipa ; aaa_Adlm_SK_fonipa
+aaa_Adlm_704_fonipa ; aaa_Adlm_VN_fonipa
+aaa_Adlm_705_fonipa ; aaa_Adlm_SI_fonipa
+aaa_Adlm_706_fonipa ; aaa_Adlm_SO_fonipa
+aaa_Adlm_710_fonipa ; aaa_Adlm_ZA_fonipa
+aaa_Adlm_716_fonipa ; aaa_Adlm_ZW_fonipa
+aaa_Adlm_720_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_724_fonipa ; aaa_Adlm_ES_fonipa
+aaa_Adlm_728_fonipa ; aaa_Adlm_SS_fonipa
+aaa_Adlm_729_fonipa ; aaa_Adlm_SD_fonipa
+aaa_Adlm_732_fonipa ; aaa_Adlm_EH_fonipa
+aaa_Adlm_736_fonipa ; aaa_Adlm_SD_fonipa
+aaa_Adlm_740_fonipa ; aaa_Adlm_SR_fonipa
+aaa_Adlm_744_fonipa ; aaa_Adlm_SJ_fonipa
+aaa_Adlm_748_fonipa ; aaa_Adlm_SZ_fonipa
+aaa_Adlm_752_fonipa ; aaa_Adlm_SE_fonipa
+aaa_Adlm_756_fonipa ; aaa_Adlm_CH_fonipa
+aaa_Adlm_760_fonipa ; aaa_Adlm_SY_fonipa
+aaa_Adlm_762_fonipa ; aaa_Adlm_TJ_fonipa
+aaa_Adlm_764_fonipa ; aaa_Adlm_TH_fonipa
+aaa_Adlm_768_fonipa ; aaa_Adlm_TG_fonipa
+aaa_Adlm_772_fonipa ; aaa_Adlm_TK_fonipa
+aaa_Adlm_776_fonipa ; aaa_Adlm_TO_fonipa
+aaa_Adlm_780_fonipa ; aaa_Adlm_TT_fonipa
+aaa_Adlm_784_fonipa ; aaa_Adlm_AE_fonipa
+aaa_Adlm_788_fonipa ; aaa_Adlm_TN_fonipa
+aaa_Adlm_792_fonipa ; aaa_Adlm_TR_fonipa
+aaa_Adlm_795_fonipa ; aaa_Adlm_TM_fonipa
+aaa_Adlm_796_fonipa ; aaa_Adlm_TC_fonipa
+aaa_Adlm_798_fonipa ; aaa_Adlm_TV_fonipa
+aaa_Adlm_800_fonipa ; aaa_Adlm_UG_fonipa
+aaa_Adlm_804_fonipa ; aaa_Adlm_UA_fonipa
+aaa_Adlm_807_fonipa ; aaa_Adlm_MK_fonipa
+aaa_Adlm_810_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_818_fonipa ; aaa_Adlm_EG_fonipa
+aaa_Adlm_826_fonipa ; aaa_Adlm_GB_fonipa
+aaa_Adlm_830_fonipa ; aaa_Adlm_JE_fonipa
+aaa_Adlm_831_fonipa ; aaa_Adlm_GG_fonipa
+aaa_Adlm_832_fonipa ; aaa_Adlm_JE_fonipa
+aaa_Adlm_833_fonipa ; aaa_Adlm_IM_fonipa
+aaa_Adlm_834_fonipa ; aaa_Adlm_TZ_fonipa
+aaa_Adlm_840_fonipa ; aaa_Adlm_US_fonipa
+aaa_Adlm_850_fonipa ; aaa_Adlm_VI_fonipa
+aaa_Adlm_854_fonipa ; aaa_Adlm_BF_fonipa
+aaa_Adlm_858_fonipa ; aaa_Adlm_UY_fonipa
+aaa_Adlm_860_fonipa ; aaa_Adlm_UZ_fonipa
+aaa_Adlm_862_fonipa ; aaa_Adlm_VE_fonipa
+aaa_Adlm_876_fonipa ; aaa_Adlm_WF_fonipa
+aaa_Adlm_882_fonipa ; aaa_Adlm_WS_fonipa
+aaa_Adlm_886_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_887_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_890_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_891_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_894_fonipa ; aaa_Adlm_ZM_fonipa
+aaa_Adlm_958_fonipa ; aaa_Adlm_AA_fonipa
+aaa_Adlm_959_fonipa ; aaa_Adlm_QM_fonipa
+aaa_Adlm_960_fonipa ; aaa_Adlm_QN_fonipa
+aaa_Adlm_962_fonipa ; aaa_Adlm_QP_fonipa
+aaa_Adlm_963_fonipa ; aaa_Adlm_QQ_fonipa
+aaa_Adlm_964_fonipa ; aaa_Adlm_QR_fonipa
+aaa_Adlm_965_fonipa ; aaa_Adlm_QS_fonipa
+aaa_Adlm_966_fonipa ; aaa_Adlm_QT_fonipa
+aaa_Adlm_967_fonipa ; aaa_Adlm_EU_fonipa
+aaa_Adlm_968_fonipa ; aaa_Adlm_QV_fonipa
+aaa_Adlm_969_fonipa ; aaa_Adlm_QW_fonipa
+aaa_Adlm_970_fonipa ; aaa_Adlm_QX_fonipa
+aaa_Adlm_971_fonipa ; aaa_Adlm_QY_fonipa
+aaa_Adlm_972_fonipa ; aaa_Adlm_QZ_fonipa
+aaa_Adlm_973_fonipa ; aaa_Adlm_XA_fonipa
+aaa_Adlm_974_fonipa ; aaa_Adlm_XB_fonipa
+aaa_Adlm_975_fonipa ; aaa_Adlm_XC_fonipa
+aaa_Adlm_976_fonipa ; aaa_Adlm_XD_fonipa
+aaa_Adlm_977_fonipa ; aaa_Adlm_XE_fonipa
+aaa_Adlm_978_fonipa ; aaa_Adlm_XF_fonipa
+aaa_Adlm_979_fonipa ; aaa_Adlm_XG_fonipa
+aaa_Adlm_980_fonipa ; aaa_Adlm_XH_fonipa
+aaa_Adlm_981_fonipa ; aaa_Adlm_XI_fonipa
+aaa_Adlm_982_fonipa ; aaa_Adlm_XJ_fonipa
+aaa_Adlm_983_fonipa ; aaa_Adlm_XK_fonipa
+aaa_Adlm_984_fonipa ; aaa_Adlm_XL_fonipa
+aaa_Adlm_985_fonipa ; aaa_Adlm_XM_fonipa
+aaa_Adlm_986_fonipa ; aaa_Adlm_XN_fonipa
+aaa_Adlm_987_fonipa ; aaa_Adlm_XO_fonipa
+aaa_Adlm_988_fonipa ; aaa_Adlm_XP_fonipa
+aaa_Adlm_989_fonipa ; aaa_Adlm_XQ_fonipa
+aaa_Adlm_990_fonipa ; aaa_Adlm_XR_fonipa
+aaa_Adlm_991_fonipa ; aaa_Adlm_XS_fonipa
+aaa_Adlm_992_fonipa ; aaa_Adlm_XT_fonipa
+aaa_Adlm_993_fonipa ; aaa_Adlm_XU_fonipa
+aaa_Adlm_994_fonipa ; aaa_Adlm_XV_fonipa
+aaa_Adlm_995_fonipa ; aaa_Adlm_XW_fonipa
+aaa_Adlm_996_fonipa ; aaa_Adlm_XX_fonipa
+aaa_Adlm_997_fonipa ; aaa_Adlm_XY_fonipa
+aaa_Adlm_998_fonipa ; aaa_Adlm_XZ_fonipa
+aaa_Adlm_999_fonipa ; aaa_Adlm_ZZ_fonipa
+aaa_Adlm_AC_aaland_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_arevela_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_arevmda_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_bokmal_fonipa ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_hakka ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_heploc ; aaa_Adlm_AC_alalc97_fonipa
+aaa_Adlm_AC_fonipa_lojban ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_nynorsk ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_polytoni ; aaa_Adlm_AC_fonipa_polyton
+aaa_Adlm_AC_fonipa_saaho ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AC_fonipa_xiang ; aaa_Adlm_AC_fonipa
+aaa_Adlm_AN_fonipa ; aaa_Adlm_CW_fonipa
+aaa_Adlm_BU_fonipa ; aaa_Adlm_MM_fonipa
+aaa_Adlm_CS_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_CT_fonipa ; aaa_Adlm_KI_fonipa
+aaa_Adlm_DD_fonipa ; aaa_Adlm_DE_fonipa
+aaa_Adlm_DY_fonipa ; aaa_Adlm_BJ_fonipa
+aaa_Adlm_FQ_fonipa ; aaa_Adlm_AQ_fonipa
+aaa_Adlm_FX_fonipa ; aaa_Adlm_FR_fonipa
+aaa_Adlm_HV_fonipa ; aaa_Adlm_BF_fonipa
+aaa_Adlm_JT_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_MI_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_NH_fonipa ; aaa_Adlm_VU_fonipa
+aaa_Adlm_NQ_fonipa ; aaa_Adlm_AQ_fonipa
+aaa_Adlm_NT_fonipa ; aaa_Adlm_SA_fonipa
+aaa_Adlm_PC_fonipa ; aaa_Adlm_FM_fonipa
+aaa_Adlm_PU_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_PZ_fonipa ; aaa_Adlm_PA_fonipa
+aaa_Adlm_QU_fonipa ; aaa_Adlm_EU_fonipa
+aaa_Adlm_RH_fonipa ; aaa_Adlm_ZW_fonipa
+aaa_Adlm_SU_fonipa ; aaa_Adlm_RU_fonipa
+aaa_Adlm_TP_fonipa ; aaa_Adlm_TL_fonipa
+aaa_Adlm_UK_fonipa ; aaa_Adlm_GB_fonipa
+aaa_Adlm_VD_fonipa ; aaa_Adlm_VN_fonipa
+aaa_Adlm_WK_fonipa ; aaa_Adlm_UM_fonipa
+aaa_Adlm_YD_fonipa ; aaa_Adlm_YE_fonipa
+aaa_Adlm_YU_fonipa ; aaa_Adlm_RS_fonipa
+aaa_Adlm_ZR_fonipa ; aaa_Adlm_CD_fonipa
+aaa_Qaai_AC_fonipa ; aaa_Zinh_AC_fonipa
+aam_Adlm_AC_fonipa ; aas_Adlm_AC_fonipa
+aar_Adlm_AC_fonipa ; aa_Adlm_AC_fonipa
+aar_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa
+abk_Adlm_AC_fonipa ; ab_Adlm_AC_fonipa
+adp_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa
+afr_Adlm_AC_fonipa ; af_Adlm_AC_fonipa
+aju_Adlm_AC_fonipa ; jrb_Adlm_AC_fonipa
+aka_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+alb_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa
+als_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa
+amh_Adlm_AC_fonipa ; am_Adlm_AC_fonipa
+ara_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa
+arb_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa
+arg_Adlm_AC_fonipa ; an_Adlm_AC_fonipa
+arm_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+arm_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa
+arm_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+arm_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa
+art_Adlm_AC_fonipa_lojban ; jbo_Adlm_AC_fonipa
+asd_Adlm_AC_fonipa ; snz_Adlm_AC_fonipa
+asm_Adlm_AC_fonipa ; as_Adlm_AC_fonipa
+aue_Adlm_AC_fonipa ; ktz_Adlm_AC_fonipa
+ava_Adlm_AC_fonipa ; av_Adlm_AC_fonipa
+ave_Adlm_AC_fonipa ; ae_Adlm_AC_fonipa
+aym_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa
+ayr_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa
+ayx_Adlm_AC_fonipa ; nun_Adlm_AC_fonipa
+aze_Adlm_AC_fonipa ; az_Adlm_AC_fonipa
+azj_Adlm_AC_fonipa ; az_Adlm_AC_fonipa
+bak_Adlm_AC_fonipa ; ba_Adlm_AC_fonipa
+bam_Adlm_AC_fonipa ; bm_Adlm_AC_fonipa
+baq_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa
+bcc_Adlm_AC_fonipa ; bal_Adlm_AC_fonipa
+bcl_Adlm_AC_fonipa ; bik_Adlm_AC_fonipa
+bel_Adlm_AC_fonipa ; be_Adlm_AC_fonipa
+ben_Adlm_AC_fonipa ; bn_Adlm_AC_fonipa
+bgm_Adlm_AC_fonipa ; bcg_Adlm_AC_fonipa
+bh_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa
+bih_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa
+bis_Adlm_AC_fonipa ; bi_Adlm_AC_fonipa
+bjd_Adlm_AC_fonipa ; drl_Adlm_AC_fonipa
+bod_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa
+bos_Adlm_AC_fonipa ; bs_Adlm_AC_fonipa
+bre_Adlm_AC_fonipa ; br_Adlm_AC_fonipa
+bul_Adlm_AC_fonipa ; bg_Adlm_AC_fonipa
+bur_Adlm_AC_fonipa ; my_Adlm_AC_fonipa
+bxk_Adlm_AC_fonipa ; luy_Adlm_AC_fonipa
+bxr_Adlm_AC_fonipa ; bua_Adlm_AC_fonipa
+cat_Adlm_AC_fonipa ; ca_Adlm_AC_fonipa
+ccq_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa
+cel_Adlm_AC_fonipa_gaulish ; xtg_Adlm_AC_fonipa
+ces_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa
+cha_Adlm_AC_fonipa ; ch_Adlm_AC_fonipa
+che_Adlm_AC_fonipa ; ce_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+chi_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+chu_Adlm_AC_fonipa ; cu_Adlm_AC_fonipa
+chv_Adlm_AC_fonipa ; cv_Adlm_AC_fonipa
+cjr_Adlm_AC_fonipa ; mom_Adlm_AC_fonipa
+cka_Adlm_AC_fonipa ; cmr_Adlm_AC_fonipa
+cld_Adlm_AC_fonipa ; syr_Adlm_AC_fonipa
+cmk_Adlm_AC_fonipa ; xch_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+cmn_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+cnr_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+cor_Adlm_AC_fonipa ; kw_Adlm_AC_fonipa
+cos_Adlm_AC_fonipa ; co_Adlm_AC_fonipa
+coy_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa
+cqu_Adlm_AC_fonipa ; quh_Adlm_AC_fonipa
+cre_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa
+cwd_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa
+cym_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa
+cze_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa
+dan_Adlm_AC_fonipa ; da_Adlm_AC_fonipa
+deu_Adlm_AC_fonipa ; de_Adlm_AC_fonipa
+dgo_Adlm_AC_fonipa ; doi_Adlm_AC_fonipa
+dhd_Adlm_AC_fonipa ; mwr_Adlm_AC_fonipa
+dik_Adlm_AC_fonipa ; din_Adlm_AC_fonipa
+diq_Adlm_AC_fonipa ; zza_Adlm_AC_fonipa
+dit_Adlm_AC_fonipa ; dif_Adlm_AC_fonipa
+div_Adlm_AC_fonipa ; dv_Adlm_AC_fonipa
+drh_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa
+drw_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+dut_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa
+dzo_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa
+ekk_Adlm_AC_fonipa ; et_Adlm_AC_fonipa
+ell_Adlm_AC_fonipa ; el_Adlm_AC_fonipa
+emk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa
+en_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa
+en_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+en_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+en_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa
+en_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa
+en_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa
+en_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa
+en_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa
+en_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+en_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+eng_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa
+eng_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+eng_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+eng_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_fonipa ; en_Adlm_AC_fonipa
+eng_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa
+eng_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa
+eng_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa
+eng_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton
+epo_Adlm_AC_fonipa ; eo_Adlm_AC_fonipa
+esk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa
+est_Adlm_AC_fonipa ; et_Adlm_AC_fonipa
+eus_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa
+ewe_Adlm_AC_fonipa ; ee_Adlm_AC_fonipa
+fao_Adlm_AC_fonipa ; fo_Adlm_AC_fonipa
+fas_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+fat_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+fij_Adlm_AC_fonipa ; fj_Adlm_AC_fonipa
+fin_Adlm_AC_fonipa ; fi_Adlm_AC_fonipa
+fra_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa
+fre_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa
+fry_Adlm_AC_fonipa ; fy_Adlm_AC_fonipa
+fuc_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa
+ful_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa
+gav_Adlm_AC_fonipa ; dev_Adlm_AC_fonipa
+gaz_Adlm_AC_fonipa ; om_Adlm_AC_fonipa
+gbo_Adlm_AC_fonipa ; grb_Adlm_AC_fonipa
+geo_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa
+ger_Adlm_AC_fonipa ; de_Adlm_AC_fonipa
+gfx_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa
+ggn_Adlm_AC_fonipa ; gvr_Adlm_AC_fonipa
+gla_Adlm_AC_fonipa ; gd_Adlm_AC_fonipa
+gle_Adlm_AC_fonipa ; ga_Adlm_AC_fonipa
+glg_Adlm_AC_fonipa ; gl_Adlm_AC_fonipa
+glv_Adlm_AC_fonipa ; gv_Adlm_AC_fonipa
+gno_Adlm_AC_fonipa ; gon_Adlm_AC_fonipa
+gre_Adlm_AC_fonipa ; el_Adlm_AC_fonipa
+grn_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa
+gti_Adlm_AC_fonipa ; nyc_Adlm_AC_fonipa
+gug_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa
+guj_Adlm_AC_fonipa ; gu_Adlm_AC_fonipa
+guv_Adlm_AC_fonipa ; duz_Adlm_AC_fonipa
+gya_Adlm_AC_fonipa ; gba_Adlm_AC_fonipa
+hat_Adlm_AC_fonipa ; ht_Adlm_AC_fonipa
+hau_Adlm_AC_fonipa ; ha_Adlm_AC_fonipa
+hbs_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+hdn_Adlm_AC_fonipa ; hai_Adlm_AC_fonipa
+hea_Adlm_AC_fonipa ; hmn_Adlm_AC_fonipa
+heb_Adlm_AC_fonipa ; he_Adlm_AC_fonipa
+her_Adlm_AC_fonipa ; hz_Adlm_AC_fonipa
+him_Adlm_AC_fonipa ; srx_Adlm_AC_fonipa
+hin_Adlm_AC_fonipa ; hi_Adlm_AC_fonipa
+hmo_Adlm_AC_fonipa ; ho_Adlm_AC_fonipa
+hrr_Adlm_AC_fonipa ; jal_Adlm_AC_fonipa
+hrv_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa
+hun_Adlm_AC_fonipa ; hu_Adlm_AC_fonipa
+hy_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hy_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa
+hy_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hye_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hye_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa
+hye_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa
+hye_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa
+ibi_Adlm_AC_fonipa ; opa_Adlm_AC_fonipa
+ibo_Adlm_AC_fonipa ; ig_Adlm_AC_fonipa
+ice_Adlm_AC_fonipa ; is_Adlm_AC_fonipa
+ido_Adlm_AC_fonipa ; io_Adlm_AC_fonipa
+iii_Adlm_AC_fonipa ; ii_Adlm_AC_fonipa
+ike_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa
+iku_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa
+ile_Adlm_AC_fonipa ; ie_Adlm_AC_fonipa
+ilw_Adlm_AC_fonipa ; gal_Adlm_AC_fonipa
+in_Adlm_AC_fonipa ; id_Adlm_AC_fonipa
+ina_Adlm_AC_fonipa ; ia_Adlm_AC_fonipa
+ind_Adlm_AC_fonipa ; id_Adlm_AC_fonipa
+ipk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa
+isl_Adlm_AC_fonipa ; is_Adlm_AC_fonipa
+ita_Adlm_AC_fonipa ; it_Adlm_AC_fonipa
+iw_Adlm_AC_fonipa ; he_Adlm_AC_fonipa
+jav_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa
+jeg_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa
+ji_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa
+jpn_Adlm_AC_fonipa ; ja_Adlm_AC_fonipa
+jw_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa
+kal_Adlm_AC_fonipa ; kl_Adlm_AC_fonipa
+kan_Adlm_AC_fonipa ; kn_Adlm_AC_fonipa
+kas_Adlm_AC_fonipa ; ks_Adlm_AC_fonipa
+kat_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa
+kau_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa
+kaz_Adlm_AC_fonipa ; kk_Adlm_AC_fonipa
+kgc_Adlm_AC_fonipa ; tdf_Adlm_AC_fonipa
+kgh_Adlm_AC_fonipa ; kml_Adlm_AC_fonipa
+khk_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa
+khm_Adlm_AC_fonipa ; km_Adlm_AC_fonipa
+kik_Adlm_AC_fonipa ; ki_Adlm_AC_fonipa
+kin_Adlm_AC_fonipa ; rw_Adlm_AC_fonipa
+kir_Adlm_AC_fonipa ; ky_Adlm_AC_fonipa
+kmr_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa
+knc_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa
+kng_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa
+knn_Adlm_AC_fonipa ; kok_Adlm_AC_fonipa
+koj_Adlm_AC_fonipa ; kwv_Adlm_AC_fonipa
+kom_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa
+kon_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa
+kor_Adlm_AC_fonipa ; ko_Adlm_AC_fonipa
+kpv_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa
+krm_Adlm_AC_fonipa ; bmf_Adlm_AC_fonipa
+ktr_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+kua_Adlm_AC_fonipa ; kj_Adlm_AC_fonipa
+kur_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa
+kvs_Adlm_AC_fonipa ; gdj_Adlm_AC_fonipa
+kwq_Adlm_AC_fonipa ; yam_Adlm_AC_fonipa
+kxe_Adlm_AC_fonipa ; tvd_Adlm_AC_fonipa
+kzj_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+kzt_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+lao_Adlm_AC_fonipa ; lo_Adlm_AC_fonipa
+lat_Adlm_AC_fonipa ; la_Adlm_AC_fonipa
+lav_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa
+lbk_Adlm_AC_fonipa ; bnc_Adlm_AC_fonipa
+lii_Adlm_AC_fonipa ; raq_Adlm_AC_fonipa
+lim_Adlm_AC_fonipa ; li_Adlm_AC_fonipa
+lin_Adlm_AC_fonipa ; ln_Adlm_AC_fonipa
+lit_Adlm_AC_fonipa ; lt_Adlm_AC_fonipa
+llo_Adlm_AC_fonipa ; ngt_Adlm_AC_fonipa
+lmm_Adlm_AC_fonipa ; rmx_Adlm_AC_fonipa
+ltz_Adlm_AC_fonipa ; lb_Adlm_AC_fonipa
+lub_Adlm_AC_fonipa ; lu_Adlm_AC_fonipa
+lug_Adlm_AC_fonipa ; lg_Adlm_AC_fonipa
+lvs_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa
+mac_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa
+mah_Adlm_AC_fonipa ; mh_Adlm_AC_fonipa
+mal_Adlm_AC_fonipa ; ml_Adlm_AC_fonipa
+mao_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa
+mar_Adlm_AC_fonipa ; mr_Adlm_AC_fonipa
+may_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa
+meg_Adlm_AC_fonipa ; cir_Adlm_AC_fonipa
+mhr_Adlm_AC_fonipa ; chm_Adlm_AC_fonipa
+mkd_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa
+mlg_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa
+mlt_Adlm_AC_fonipa ; mt_Adlm_AC_fonipa
+mnk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa
+mo_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+mol_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+mon_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa
+mri_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa
+msa_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa
+mst_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa
+mup_Adlm_AC_fonipa ; raj_Adlm_AC_fonipa
+mwj_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa
+mya_Adlm_AC_fonipa ; my_Adlm_AC_fonipa
+myd_Adlm_AC_fonipa ; aog_Adlm_AC_fonipa
+myt_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa
+nad_Adlm_AC_fonipa ; xny_Adlm_AC_fonipa
+nau_Adlm_AC_fonipa ; na_Adlm_AC_fonipa
+nav_Adlm_AC_fonipa ; nv_Adlm_AC_fonipa
+nbl_Adlm_AC_fonipa ; nr_Adlm_AC_fonipa
+ncp_Adlm_AC_fonipa ; kdz_Adlm_AC_fonipa
+nde_Adlm_AC_fonipa ; nd_Adlm_AC_fonipa
+ndo_Adlm_AC_fonipa ; ng_Adlm_AC_fonipa
+nep_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa
+nld_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa
+nno_Adlm_AC_fonipa ; nn_Adlm_AC_fonipa
+nns_Adlm_AC_fonipa ; nbr_Adlm_AC_fonipa
+nnx_Adlm_AC_fonipa ; ngv_Adlm_AC_fonipa
+no_Adlm_AC_bokmal_fonipa ; nb_Adlm_AC_fonipa
+no_Adlm_AC_bokmal_fonipa_nynorsk ; nb_Adlm_AC_fonipa
+no_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa
+no_Adlm_AC_fonipa_nynorsk ; nn_Adlm_AC_fonipa
+nob_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa
+nor_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa
+npi_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa
+nts_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa
+nya_Adlm_AC_fonipa ; ny_Adlm_AC_fonipa
+oci_Adlm_AC_fonipa ; oc_Adlm_AC_fonipa
+ojg_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa
+oji_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa
+ori_Adlm_AC_fonipa ; or_Adlm_AC_fonipa
+orm_Adlm_AC_fonipa ; om_Adlm_AC_fonipa
+ory_Adlm_AC_fonipa ; or_Adlm_AC_fonipa
+oss_Adlm_AC_fonipa ; os_Adlm_AC_fonipa
+oun_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa
+pan_Adlm_AC_fonipa ; pa_Adlm_AC_fonipa
+pbu_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa
+pcr_Adlm_AC_fonipa ; adx_Adlm_AC_fonipa
+per_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+pes_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+pli_Adlm_AC_fonipa ; pi_Adlm_AC_fonipa
+plt_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa
+pmc_Adlm_AC_fonipa ; huw_Adlm_AC_fonipa
+pmu_Adlm_AC_fonipa ; phr_Adlm_AC_fonipa
+pnb_Adlm_AC_fonipa ; lah_Adlm_AC_fonipa
+pol_Adlm_AC_fonipa ; pl_Adlm_AC_fonipa
+por_Adlm_AC_fonipa ; pt_Adlm_AC_fonipa
+ppa_Adlm_AC_fonipa ; bfy_Adlm_AC_fonipa
+ppr_Adlm_AC_fonipa ; lcq_Adlm_AC_fonipa
+prs_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+pry_Adlm_AC_fonipa ; prt_Adlm_AC_fonipa
+pus_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa
+puz_Adlm_AC_fonipa ; pub_Adlm_AC_fonipa
+que_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa
+quz_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa
+rmy_Adlm_AC_fonipa ; rom_Adlm_AC_fonipa
+roh_Adlm_AC_fonipa ; rm_Adlm_AC_fonipa
+ron_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+rum_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa
+run_Adlm_AC_fonipa ; rn_Adlm_AC_fonipa
+rus_Adlm_AC_fonipa ; ru_Adlm_AC_fonipa
+sag_Adlm_AC_fonipa ; sg_Adlm_AC_fonipa
+san_Adlm_AC_fonipa ; sa_Adlm_AC_fonipa
+sca_Adlm_AC_fonipa ; hle_Adlm_AC_fonipa
+scc_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+scr_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa
+sgn_Adlm_076_fonipa ; bzs_Adlm_fonipa
+sgn_Adlm_170_fonipa ; csn_Adlm_fonipa
+sgn_Adlm_208_fonipa ; dsl_Adlm_fonipa
+sgn_Adlm_249_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_250_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_276_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_278_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_280_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_300_fonipa ; gss_Adlm_fonipa
+sgn_Adlm_372_fonipa ; isg_Adlm_fonipa
+sgn_Adlm_380_fonipa ; ise_Adlm_fonipa
+sgn_Adlm_392_fonipa ; jsl_Adlm_fonipa
+sgn_Adlm_484_fonipa ; mfs_Adlm_fonipa
+sgn_Adlm_528_fonipa ; dse_Adlm_fonipa
+sgn_Adlm_558_fonipa ; ncs_Adlm_fonipa
+sgn_Adlm_578_fonipa ; nsi_Adlm_fonipa
+sgn_Adlm_620_fonipa ; psr_Adlm_fonipa
+sgn_Adlm_710_fonipa ; sfs_Adlm_fonipa
+sgn_Adlm_752_fonipa ; swl_Adlm_fonipa
+sgn_Adlm_826_fonipa ; bfi_Adlm_fonipa
+sgn_Adlm_840_fonipa ; ase_Adlm_fonipa
+sgn_Adlm_BR_fonipa ; bzs_Adlm_fonipa
+sgn_Adlm_CO_fonipa ; csn_Adlm_fonipa
+sgn_Adlm_DD_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_DE_fonipa ; gsg_Adlm_fonipa
+sgn_Adlm_DK_fonipa ; dsl_Adlm_fonipa
+sgn_Adlm_FR_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_FX_fonipa ; fsl_Adlm_fonipa
+sgn_Adlm_GB_fonipa ; bfi_Adlm_fonipa
+sgn_Adlm_GR_fonipa ; gss_Adlm_fonipa
+sgn_Adlm_IE_fonipa ; isg_Adlm_fonipa
+sgn_Adlm_IT_fonipa ; ise_Adlm_fonipa
+sgn_Adlm_JP_fonipa ; jsl_Adlm_fonipa
+sgn_Adlm_MX_fonipa ; mfs_Adlm_fonipa
+sgn_Adlm_NI_fonipa ; ncs_Adlm_fonipa
+sgn_Adlm_NL_fonipa ; dse_Adlm_fonipa
+sgn_Adlm_NO_fonipa ; nsi_Adlm_fonipa
+sgn_Adlm_PT_fonipa ; psr_Adlm_fonipa
+sgn_Adlm_SE_fonipa ; swl_Adlm_fonipa
+sgn_Adlm_UK_fonipa ; bfi_Adlm_fonipa
+sgn_Adlm_US_fonipa ; ase_Adlm_fonipa
+sgn_Adlm_ZA_fonipa ; sfs_Adlm_fonipa
+sh_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+sin_Adlm_AC_fonipa ; si_Adlm_AC_fonipa
+skk_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa
+slk_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa
+slo_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa
+slv_Adlm_AC_fonipa ; sl_Adlm_AC_fonipa
+sme_Adlm_AC_fonipa ; se_Adlm_AC_fonipa
+smo_Adlm_AC_fonipa ; sm_Adlm_AC_fonipa
+sna_Adlm_AC_fonipa ; sn_Adlm_AC_fonipa
+snd_Adlm_AC_fonipa ; sd_Adlm_AC_fonipa
+som_Adlm_AC_fonipa ; so_Adlm_AC_fonipa
+sot_Adlm_AC_fonipa ; st_Adlm_AC_fonipa
+spa_Adlm_AC_fonipa ; es_Adlm_AC_fonipa
+spy_Adlm_AC_fonipa ; kln_Adlm_AC_fonipa
+sqi_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa
+src_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa
+srd_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa
+srp_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa
+ssw_Adlm_AC_fonipa ; ss_Adlm_AC_fonipa
+sun_Adlm_AC_fonipa ; su_Adlm_AC_fonipa
+swa_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa
+swc_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa
+swe_Adlm_AC_fonipa ; sv_Adlm_AC_fonipa
+swh_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa
+tah_Adlm_AC_fonipa ; ty_Adlm_AC_fonipa
+tam_Adlm_AC_fonipa ; ta_Adlm_AC_fonipa
+tat_Adlm_AC_fonipa ; tt_Adlm_AC_fonipa
+tdu_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa
+tel_Adlm_AC_fonipa ; te_Adlm_AC_fonipa
+tgk_Adlm_AC_fonipa ; tg_Adlm_AC_fonipa
+tgl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa
+tha_Adlm_AC_fonipa ; th_Adlm_AC_fonipa
+thc_Adlm_AC_fonipa ; tpo_Adlm_AC_fonipa
+thx_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa
+tib_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa
+tie_Adlm_AC_fonipa ; ras_Adlm_AC_fonipa
+tir_Adlm_AC_fonipa ; ti_Adlm_AC_fonipa
+tkk_Adlm_AC_fonipa ; twm_Adlm_AC_fonipa
+tl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa
+tlw_Adlm_AC_fonipa ; weo_Adlm_AC_fonipa
+tmp_Adlm_AC_fonipa ; tyj_Adlm_AC_fonipa
+tne_Adlm_AC_fonipa ; kak_Adlm_AC_fonipa
+tnf_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa
+ton_Adlm_AC_fonipa ; to_Adlm_AC_fonipa
+tsf_Adlm_AC_fonipa ; taj_Adlm_AC_fonipa
+tsn_Adlm_AC_fonipa ; tn_Adlm_AC_fonipa
+tso_Adlm_AC_fonipa ; ts_Adlm_AC_fonipa
+ttq_Adlm_AC_fonipa ; tmh_Adlm_AC_fonipa
+tuk_Adlm_AC_fonipa ; tk_Adlm_AC_fonipa
+tur_Adlm_AC_fonipa ; tr_Adlm_AC_fonipa
+tw_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+twi_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa
+uig_Adlm_AC_fonipa ; ug_Adlm_AC_fonipa
+ukr_Adlm_AC_fonipa ; uk_Adlm_AC_fonipa
+umu_Adlm_AC_fonipa ; del_Adlm_AC_fonipa
+uok_Adlm_AC_fonipa ; ema_Adlm_AC_fonipa
+urd_Adlm_AC_fonipa ; ur_Adlm_AC_fonipa
+uzb_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa
+uzn_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa
+ven_Adlm_AC_fonipa ; ve_Adlm_AC_fonipa
+vie_Adlm_AC_fonipa ; vi_Adlm_AC_fonipa
+vol_Adlm_AC_fonipa ; vo_Adlm_AC_fonipa
+wel_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa
+wln_Adlm_AC_fonipa ; wa_Adlm_AC_fonipa
+wol_Adlm_AC_fonipa ; wo_Adlm_AC_fonipa
+xba_Adlm_AC_fonipa ; cax_Adlm_AC_fonipa
+xho_Adlm_AC_fonipa ; xh_Adlm_AC_fonipa
+xia_Adlm_AC_fonipa ; acn_Adlm_AC_fonipa
+xkh_Adlm_AC_fonipa ; waw_Adlm_AC_fonipa
+xpe_Adlm_AC_fonipa ; kpe_Adlm_AC_fonipa
+xsj_Adlm_AC_fonipa ; suj_Adlm_AC_fonipa
+xsl_Adlm_AC_fonipa ; den_Adlm_AC_fonipa
+ybd_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa
+ydd_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa
+yid_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa
+yma_Adlm_AC_fonipa ; lrr_Adlm_AC_fonipa
+ymt_Adlm_AC_fonipa ; mtm_Adlm_AC_fonipa
+yor_Adlm_AC_fonipa ; yo_Adlm_AC_fonipa
+yos_Adlm_AC_fonipa ; zom_Adlm_AC_fonipa
+yuu_Adlm_AC_fonipa ; yug_Adlm_AC_fonipa
+zai_Adlm_AC_fonipa ; zap_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+zh_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+zha_Adlm_AC_fonipa ; za_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa
+zho_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa
+zsm_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa
+zul_Adlm_AC_fonipa ; zu_Adlm_AC_fonipa
+zyb_Adlm_AC_fonipa ; za_Adlm_AC_fonipa
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
index 296d92d..42e7716 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
@@ -14,6 +14,8 @@
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.io.BufferedReader;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
@@ -892,8 +894,8 @@
public void TestCanonicalization(){
final String[][]testCases = new String[][]{
{ "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
- { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" },
- { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" },
+ { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
+ { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
{ "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
{ "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
{ "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
@@ -956,14 +958,14 @@
{ "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
{ "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
{ "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
- { "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" },
+ { "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
{ "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
{ "qz-qz@Euro", null, "qz_QZ_EURO" }, /* qz-qz uses private use iso codes */
{ "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
{ "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */
- { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */
- { "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */
+ { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
+ { "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
{ "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
{ "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
/* PRE_EURO and EURO conversions don't affect other keywords */
@@ -5175,21 +5177,18 @@
// also test with script, variants and extensions
Assert.assertEquals("fa-Cyrl-AF-1009-u-ca-roc", canonicalTag("prs-Cyrl-1009-u-ca-roc"));
- if (!logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
- // language _ country -> language _ script _ country
- Assert.assertEquals("pa-Guru-IN", canonicalTag("pa-IN"));
- }
+ Assert.assertEquals("pa-IN", canonicalTag("pa-IN"));
// also test with script
Assert.assertEquals("pa-Latn-IN", canonicalTag("pa-Latn-IN"));
- if (!logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
- // also test with variants and extensions
- Assert.assertEquals("pa-Guru-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi"));
+ // also test with variants and extensions
+ Assert.assertEquals("pa-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi"));
- // language _ script _ country -> language _ country
- Assert.assertEquals("ky-KG", canonicalTag("ky-Cyrl-KG"));
- // also test with variants and extensions
- Assert.assertEquals("ky-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc"));
- }
+ Assert.assertEquals("ky-Cyrl-KG", canonicalTag("ky-Cyrl-KG"));
+ // also test with variants and extensions
+ Assert.assertEquals("ky-Cyrl-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc"));
+
+ // Test replacement of scriptAlias
+ Assert.assertEquals("en-Zinh", canonicalTag("en-Qaai"));
// Test replacement of territoryAlias
// 554 has one replacement
@@ -5209,5 +5208,35 @@
Assert.assertEquals("uz-Cyrl-UZ-5678-u-nu-latn", canonicalTag("uz-Cyrl-172-5678-u-nu-latn"));
// a language not used in this region
Assert.assertEquals("fr-RU", canonicalTag("fr-172"));
+
+ Assert.assertEquals("ja-Latn-alalc97", canonicalTag("ja-Latn-hepburn-heploc"));
+
+ Assert.assertEquals("aaa-Fooo-RU", canonicalTag("aaa-Fooo-SU"));
+ }
+
+ @Test
+ public void TestLocaleCanonicalizationFromFile() throws IOException {
+ BufferedReader testFile = TestUtil.getDataReader("unicode/localeCanonicalization.txt");
+ try {
+ String line;
+ while ((line = testFile.readLine()) != null) {
+ if (line.startsWith("#")) {
+ // ignore any lines start with #
+ continue;
+ }
+ String[] fields = line.split("\t;\t");
+ if (fields.length != 2) {
+ // ignore any lines without TAB ; TAB
+ continue;
+ }
+ String from = fields[0].replace("_", "-");
+ String to = fields[1].replace("_", "-");
+ Assert.assertEquals("canonicalTag(" + from + ")",
+ to, canonicalTag(from));
+ }
+ } finally {
+ testFile.close();
+ }
+
}
}