ICU-9695 port LocaleMatcher to C++
diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in
index 79e371b..d21f5d0 100644
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@@ -88,8 +88,9 @@
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucurr.o \
-localebuilder.o \
+localebuilder.o localeprioritylist.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
+lsr.o loclikelysubtags.o locdistance.o localematcher.o \
bytestream.o stringpiece.o bytesinkutil.o \
stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
diff --git a/icu4c/source/common/charstr.cpp b/icu4c/source/common/charstr.cpp
index 852cc53..dda29da 100644
--- a/icu4c/source/common/charstr.cpp
+++ b/icu4c/source/common/charstr.cpp
@@ -35,6 +35,17 @@
return *this;
}
+char *CharString::cloneData(UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ char *p = static_cast<char *>(uprv_malloc(len + 1));
+ if (p == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(p, buffer.getAlias(), len + 1);
+ return p;
+}
+
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
len=s.len;
@@ -52,6 +63,18 @@
return -1;
}
+bool CharString::contains(StringPiece s) const {
+ if (s.empty()) { return false; }
+ const char *p = buffer.getAlias();
+ int32_t lastStart = len - s.length();
+ for (int32_t i = 0; i <= lastStart; ++i) {
+ if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
CharString &CharString::truncate(int32_t newLength) {
if(newLength<0) {
newLength=0;
diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h
index 1a97e01..23b950e 100644
--- a/icu4c/source/common/charstr.h
+++ b/icu4c/source/common/charstr.h
@@ -82,10 +82,24 @@
const char *data() const { return buffer.getAlias(); }
char *data() { return buffer.getAlias(); }
+ /**
+ * Allocates length()+1 chars and copies the NUL-terminated data().
+ * The caller must uprv_free() the result.
+ */
+ char *cloneData(UErrorCode &errorCode) const;
+
+ bool operator==(StringPiece other) const {
+ return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+ }
+ bool operator!=(StringPiece other) const {
+ return !operator==(other);
+ }
/** @return last index of c, or -1 if c is not in this string */
int32_t lastIndexOf(char c) const;
+ bool contains(StringPiece s) const;
+
CharString &clear() { len=0; buffer[0]=0; return *this; }
CharString &truncate(int32_t newLength);
diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj
index 3bfc646..f8368bb 100644
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@@ -239,14 +239,20 @@
<ClCompile Include="punycode.cpp" />
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
+ <ClCompile Include="localebuilder.cpp" />
+ <ClCompile Include="localematcher.cpp" />
+ <ClCompile Include="localeprioritylist.cpp" />
<ClCompile Include="locavailable.cpp" />
<ClCompile Include="locbased.cpp" />
<ClCompile Include="locdispnames.cpp" />
+ <ClCompile Include="locdistance.cpp" />
<ClCompile Include="locdspnm.cpp" />
<ClCompile Include="locid.cpp" />
<ClCompile Include="loclikely.cpp" />
+ <ClCompile Include="loclikelysubtags.cpp" />
<ClCompile Include="locresdata.cpp" />
<ClCompile Include="locutil.cpp" />
+ <ClCompile Include="lsr.cpp" />
<ClCompile Include="resbund.cpp" />
<ClCompile Include="resbund_cnv.cpp" />
<ClCompile Include="ucat.cpp" />
@@ -257,7 +263,6 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
- <ClCompile Include="localebuilder.cpp" />
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
@@ -408,8 +413,12 @@
<ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" />
<ClInclude Include="punycode.h" />
+ <ClInclude Include="localeprioritylist.h" />
<ClInclude Include="locbased.h" />
+ <ClInclude Include="locdistance.h" />
+ <ClInclude Include="loclikelysubtags.h" />
<ClInclude Include="locutil.h" />
+ <ClInclude Include="lsr.h" />
<ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" />
<ClInclude Include="ulocimp.h" />
@@ -449,7 +458,6 @@
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="static_unicode_sets.h" />
<ClInclude Include="capi_helper.h" />
- <ClInclude Include="unicode\localebuilder.h" />
<ClInclude Include="restrace.h" />
</ItemGroup>
<ItemGroup>
diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index f1a7825..5fabc6f 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -313,6 +313,15 @@
<ClCompile Include="uts46.cpp">
<Filter>idna</Filter>
</ClCompile>
+ <ClCompile Include="localebuilder.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
+ <ClCompile Include="localematcher.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
+ <ClCompile Include="localeprioritylist.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
<ClCompile Include="locavailable.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
@@ -322,18 +331,27 @@
<ClCompile Include="locdispnames.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
+ <ClCompile Include="locdistance.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
<ClCompile Include="locid.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
<ClCompile Include="loclikely.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
+ <ClCompile Include="loclikelysubtags.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
<ClCompile Include="locresdata.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
<ClCompile Include="locutil.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
+ <ClCompile Include="lsr.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
<ClCompile Include="resbund.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
@@ -361,9 +379,6 @@
<ClCompile Include="resource.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
- <ClCompile Include="localebuilder.cpp">
- <Filter>locales & resources</Filter>
- </ClCompile>
<ClCompile Include="caniter.cpp">
<Filter>normalization</Filter>
</ClCompile>
@@ -816,12 +831,24 @@
<ClInclude Include="punycode.h">
<Filter>idna</Filter>
</ClInclude>
+ <ClInclude Include="localeprioritylist.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
<ClInclude Include="locbased.h">
<Filter>locales & resources</Filter>
</ClInclude>
+ <ClInclude Include="locdistance.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
+ <ClInclude Include="loclikelysubtags.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
<ClInclude Include="locutil.h">
<Filter>locales & resources</Filter>
</ClInclude>
+ <ClInclude Include="lsr.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
<ClInclude Include="ulocimp.h">
<Filter>locales & resources</Filter>
</ClInclude>
@@ -1078,6 +1105,12 @@
<CustomBuild Include="unicode\uidna.h">
<Filter>idna</Filter>
</CustomBuild>
+ <CustomBuild Include="unicode\localebuilder.h">
+ <Filter>locales & resources</Filter>
+ </CustomBuild>
+ <CustomBuild Include="unicode\localematcher.h">
+ <Filter>locales & resources</Filter>
+ </CustomBuild>
<CustomBuild Include="unicode\locid.h">
<Filter>locales & resources</Filter>
</CustomBuild>
@@ -1237,8 +1270,5 @@
<CustomBuild Include="unicode\stringoptions.h">
<Filter>strings</Filter>
</CustomBuild>
- <CustomBuild Include="unicode\localebuilder.h">
- <Filter>locales & resources</Filter>
- </CustomBuild>
</ItemGroup>
</Project>
diff --git a/icu4c/source/common/common_uwp.vcxproj b/icu4c/source/common/common_uwp.vcxproj
index bff752c..d85942c 100644
--- a/icu4c/source/common/common_uwp.vcxproj
+++ b/icu4c/source/common/common_uwp.vcxproj
@@ -430,14 +430,20 @@
<ClCompile Include="punycode.cpp" />
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
+ <ClCompile Include="localebuilder.cpp" />
+ <ClCompile Include="localematcher.cpp" />
+ <ClCompile Include="localeprioritylist.cpp" />
<ClCompile Include="locavailable.cpp" />
<ClCompile Include="locbased.cpp" />
<ClCompile Include="locdispnames.cpp" />
+ <ClCompile Include="locdistance.cpp" />
<ClCompile Include="locdspnm.cpp" />
<ClCompile Include="locid.cpp" />
<ClCompile Include="loclikely.cpp" />
+ <ClCompile Include="loclikelysubtags.cpp" />
<ClCompile Include="locresdata.cpp" />
<ClCompile Include="locutil.cpp" />
+ <ClCompile Include="lsr.cpp" />
<ClCompile Include="resbund.cpp" />
<ClCompile Include="resbund_cnv.cpp" />
<ClCompile Include="ucat.cpp" />
@@ -448,7 +454,6 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
- <ClCompile Include="localebuilder.cpp" />
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
@@ -600,8 +605,12 @@
<ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" />
<ClInclude Include="punycode.h" />
+ <ClInclude Include="localeprioritylist.h" />
<ClInclude Include="locbased.h" />
+ <ClInclude Include="locdistance.h" />
+ <ClInclude Include="loclikelysubtags.h" />
<ClInclude Include="locutil.h" />
+ <ClInclude Include="lsr.h" />
<ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" />
<ClInclude Include="ulocimp.h" />
@@ -640,7 +649,6 @@
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="static_unicode_sets.h" />
<ClInclude Include="capi_helper.h" />
- <ClInclude Include="unicode\localebuilder.h" />
<ClInclude Include="restrace.h" />
</ItemGroup>
<ItemGroup>
diff --git a/icu4c/source/common/localebuilder.cpp b/icu4c/source/common/localebuilder.cpp
index e9caa20..1dd8131 100644
--- a/icu4c/source/common/localebuilder.cpp
+++ b/icu4c/source/common/localebuilder.cpp
@@ -157,13 +157,18 @@
}
static void
-_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
+_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
+ Locale& to, bool validate, UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) { return; }
- LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
- if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+ LocalPointer<icu::StringEnumeration> ownedKeywords;
+ if (keywords == nullptr) {
+ ownedKeywords.adoptInstead(from.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
+ keywords = ownedKeywords.getAlias();
+ }
const char* key;
- while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+ while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
CharString value;
CharStringByteSink sink(&value);
from.getKeywordValue(key, sink, errorCode);
@@ -176,34 +181,34 @@
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
- to->setKeywordValue(key, value.data(), errorCode);
+ to.setKeywordValue(key, value.data(), errorCode);
if (U_FAILURE(errorCode)) { return; }
}
}
void static
-_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
+_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
{
// Clear Unicode attributes
- locale->setKeywordValue(kAttributeKey, "", errorCode);
+ locale.setKeywordValue(kAttributeKey, "", errorCode);
// Clear all Unicode keyword values
- LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
+ LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
const char* key;
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
- locale->setUnicodeKeywordValue(key, nullptr, errorCode);
+ locale.setUnicodeKeywordValue(key, nullptr, errorCode);
}
}
static void
-_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
+_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
{
// Add the unicode extensions to extensions_
CharString locale_str("und-u-", errorCode);
locale_str.append(value, errorCode);
_copyExtensions(
- Locale::forLanguageTag(locale_str.data(), errorCode),
+ Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
locale, false, errorCode);
}
@@ -235,10 +240,10 @@
status_);
return *this;
}
- _clearUAttributesAndKeyType(extensions_, status_);
+ _clearUAttributesAndKeyType(*extensions_, status_);
if (U_FAILURE(status_)) { return *this; }
if (!value.empty()) {
- _setUnicodeExtensions(extensions_, value_str, status_);
+ _setUnicodeExtensions(*extensions_, value_str, status_);
}
return *this;
}
@@ -401,6 +406,24 @@
return bogus;
}
+void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) { return; }
+ LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
+ // Error, or no extensions to copy.
+ return;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+ _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
+}
+
Locale LocaleBuilder::build(UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) {
@@ -425,7 +448,7 @@
}
Locale product(locale_str.data());
if (extensions_ != nullptr) {
- _copyExtensions(*extensions_, &product, true, errorCode);
+ _copyExtensions(*extensions_, nullptr, product, true, errorCode);
}
if (U_FAILURE(errorCode)) {
return makeBogusLocale();
diff --git a/icu4c/source/common/localematcher.cpp b/icu4c/source/common/localematcher.cpp
new file mode 100644
index 0000000..d975fe7
--- /dev/null
+++ b/icu4c/source/common/localematcher.cpp
@@ -0,0 +1,720 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localematcher.cpp
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCMATCHER_H__
+#define __LOCMATCHER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/localebuilder.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+#include "localeprioritylist.h"
+#include "loclikelysubtags.h"
+#include "locdistance.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "uvector.h"
+
+#define UND_LSR LSR("und", "", "")
+
+/**
+ * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
+ *
+ * @draft ICU 65
+ */
+enum ULocMatchLifetime {
+ /**
+ * Locale objects are temporary.
+ * The matcher will make a copy of a locale that will be used beyond one function call.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_TEMPORARY_LOCALES,
+ /**
+ * Locale objects are stored at least as long as the matcher is used.
+ * The matcher will keep only a pointer to a locale that will be used beyond one function call,
+ * avoiding a copy.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchLifetime ULocMatchLifetime;
+#endif
+
+U_NAMESPACE_BEGIN
+
+LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
+ desiredLocale(src.desiredLocale),
+ supportedLocale(src.supportedLocale),
+ desiredIndex(src.desiredIndex),
+ supportedIndex(src.supportedIndex),
+ desiredIsOwned(src.desiredIsOwned) {
+ if (desiredIsOwned) {
+ src.desiredLocale = nullptr;
+ src.desiredIndex = -1;
+ src.desiredIsOwned = FALSE;
+ }
+}
+
+LocaleMatcher::Result::~Result() {
+ if (desiredIsOwned) {
+ delete desiredLocale;
+ }
+}
+
+LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
+ this->~Result();
+
+ desiredLocale = src.desiredLocale;
+ supportedLocale = src.supportedLocale;
+ desiredIndex = src.desiredIndex;
+ supportedIndex = src.supportedIndex;
+ desiredIsOwned = src.desiredIsOwned;
+
+ if (desiredIsOwned) {
+ src.desiredLocale = nullptr;
+ src.desiredIndex = -1;
+ src.desiredIsOwned = FALSE;
+ }
+ return *this;
+}
+
+Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
+ return Locale::getRoot();
+ }
+ const Locale *bestDesired = getDesiredLocale();
+ if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
+ return *supportedLocale;
+ }
+ LocaleBuilder b;
+ b.setLocale(*supportedLocale);
+
+ // Copy the region from bestDesired, if there is one.
+ const char *region = bestDesired->getCountry();
+ if (*region != 0) {
+ b.setRegion(region);
+ }
+
+ // Copy the variants from bestDesired, if there are any.
+ // Note that this will override any supportedLocale variants.
+ // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+ const char *variants = bestDesired->getVariant();
+ if (*variants != 0) {
+ b.setVariant(variants);
+ }
+
+ // Copy the extensions from bestDesired, if there are any.
+ // C++ note: The following note, copied from Java, may not be true,
+ // as long as C++ copies by legacy ICU keyword, not by extension singleton.
+ // Note that this will override any supportedLocale extensions.
+ // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+ // (replacing calendar).
+ b.copyExtensionsFrom(*bestDesired, errorCode);
+ return b.build(errorCode);
+}
+
+LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
+ errorCode_(src.errorCode_),
+ supportedLocales_(src.supportedLocales_),
+ thresholdDistance_(src.thresholdDistance_),
+ demotion_(src.demotion_),
+ defaultLocale_(src.defaultLocale_),
+ favor_(src.favor_) {
+ src.supportedLocales_ = nullptr;
+ src.defaultLocale_ = nullptr;
+}
+
+LocaleMatcher::Builder::~Builder() {
+ delete supportedLocales_;
+ delete defaultLocale_;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
+ this->~Builder();
+
+ errorCode_ = src.errorCode_;
+ supportedLocales_ = src.supportedLocales_;
+ thresholdDistance_ = src.thresholdDistance_;
+ demotion_ = src.demotion_;
+ defaultLocale_ = src.defaultLocale_;
+ favor_ = src.favor_;
+
+ src.supportedLocales_ = nullptr;
+ src.defaultLocale_ = nullptr;
+ return *this;
+}
+
+void LocaleMatcher::Builder::clearSupportedLocales() {
+ if (supportedLocales_ != nullptr) {
+ supportedLocales_->removeAllElements();
+ }
+}
+
+bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
+ if (U_FAILURE(errorCode_)) { return false; }
+ if (supportedLocales_ != nullptr) { return true; }
+ supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
+ if (U_FAILURE(errorCode_)) { return false; }
+ if (supportedLocales_ == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ return true;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
+ StringPiece locales) {
+ LocalePriorityList list(locales, errorCode_);
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ int32_t length = list.getLengthIncludingRemoved();
+ for (int32_t i = 0; i < length; ++i) {
+ Locale *locale = list.orphanLocaleAt(i);
+ if (locale == nullptr) { continue; }
+ supportedLocales_->addElement(locale, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete locale;
+ break;
+ }
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ while (locales.hasNext()) {
+ const Locale &locale = locales.next();
+ Locale *clone = locale.clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ supportedLocales_->addElement(clone, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete clone;
+ break;
+ }
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ Locale *clone = locale.clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ supportedLocales_->addElement(clone, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete clone;
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ Locale *clone = nullptr;
+ if (defaultLocale != nullptr) {
+ clone = defaultLocale->clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ }
+ delete defaultLocale_;
+ defaultLocale_ = clone;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ favor_ = subtag;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ demotion_ = demotion;
+ return *this;
+}
+
+#if 0
+/**
+ * <i>Internal only!</i>
+ *
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+@Deprecated
+LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ if (thresholdDistance > 100) {
+ thresholdDistance = 100;
+ }
+ thresholdDistance_ = thresholdDistance;
+ return *this;
+}
+#endif
+
+UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
+ if (U_FAILURE(outErrorCode)) { return TRUE; }
+ if (U_SUCCESS(errorCode_)) { return FALSE; }
+ outErrorCode = errorCode_;
+ return TRUE;
+}
+
+LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
+ if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
+ errorCode = errorCode_;
+ }
+ return LocaleMatcher(*this, errorCode);
+}
+
+namespace {
+
+LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
+ return UND_LSR;
+ } else {
+ return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
+ }
+}
+
+int32_t hashLSR(const UHashTok token) {
+ const LSR *lsr = static_cast<const LSR *>(token.pointer);
+ return lsr->hashCode;
+}
+
+UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
+ const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
+ const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
+ return *lsr1 == *lsr2;
+}
+
+bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ U_ASSERT(i > 0);
+ int32_t index = uhash_geti(lsrToIndex, &lsr);
+ if (index != 0) {
+ return false;
+ } else {
+ uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
+ return U_SUCCESS(errorCode);
+ }
+}
+
+} // namespace
+
+LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
+ likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
+ localeDistance(*LocaleDistance::getSingleton(errorCode)),
+ thresholdDistance(builder.thresholdDistance_),
+ demotionPerDesiredLocale(0),
+ favorSubtag(builder.favor_),
+ supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
+ supportedLsrToIndex(nullptr),
+ supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
+ ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (thresholdDistance < 0) {
+ thresholdDistance = localeDistance.getDefaultScriptDistance();
+ }
+ supportedLocalesLength = builder.supportedLocales_ != nullptr ?
+ builder.supportedLocales_->size() : 0;
+ const Locale *def = builder.defaultLocale_;
+ int32_t idef = -1;
+ if (supportedLocalesLength > 0) {
+ // Store the supported locales in input order,
+ // so that when different types are used (e.g., language tag strings)
+ // we can return those by parallel index.
+ supportedLocales = static_cast<const Locale **>(
+ uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
+ // Supported LRSs in input order.
+ // In C++, we store these permanently to simplify ownership management
+ // in the hash tables. Duplicate LSRs (if any) are unused overhead.
+ lsrs = new LSR[supportedLocalesLength];
+ if (supportedLocales == nullptr || lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ // If the constructor fails partway, we need null pointers for destructibility.
+ uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
+ // Also find the first supported locale whose LSR is
+ // the same as that for the default locale.
+ LSR builderDefaultLSR;
+ const LSR *defLSR = nullptr;
+ if (def != nullptr) {
+ builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ defLSR = &builderDefaultLSR;
+ }
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
+ supportedLocales[i] = locale.clone();
+ if (supportedLocales[i] == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ const Locale &supportedLocale = *supportedLocales[i];
+ LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
+ lsr.setHashCode();
+ if (U_FAILURE(errorCode)) { return; }
+ if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
+ idef = i;
+ defLSR = &lsr; // owned pointer to put into supportedLsrToIndex
+ if (*def == supportedLocale) {
+ def = &supportedLocale; // owned pointer to keep
+ }
+ }
+ }
+
+ // We need an unordered map from LSR to first supported locale with that LSR,
+ // and an ordered list of (LSR, supported index).
+ // We insert the supported locales in the following order:
+ // 1. Default locale, if it is supported.
+ // 2. Priority locales (aka "paradigm locales") in builder order.
+ // 3. Remaining locales in builder order.
+ // In Java, we use a LinkedHashMap for both map & ordered lists.
+ // In C++, we use separate structures.
+ // We over-allocate arrays of LSRs and indexes for simplicity.
+ // We reserve slots at the array starts for the default and paradigm locales,
+ // plus enough for all supported locales.
+ // If there are few paradigm locales and few duplicate supported LSRs,
+ // then the amount of wasted space is small.
+ supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
+ supportedLocalesLength, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
+ int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
+ supportedLSRs = static_cast<const LSR **>(
+ uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
+ supportedIndexes = static_cast<int32_t *>(
+ uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
+ if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t paradigmIndex = 0;
+ int32_t otherIndex = paradigmLimit;
+ if (idef >= 0) {
+ uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
+ supportedLSRs[0] = defLSR;
+ supportedIndexes[0] = idef;
+ paradigmIndex = 1;
+ }
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ if (i == idef) { continue; }
+ const Locale &locale = *supportedLocales[i];
+ const LSR &lsr = lsrs[i];
+ if (defLSR == nullptr) {
+ U_ASSERT(i == 0);
+ def = &locale;
+ defLSR = &lsr;
+ idef = 0;
+ uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
+ supportedLSRs[0] = &lsr;
+ supportedIndexes[0] = 0;
+ paradigmIndex = 1;
+ } else if (idef >= 0 && lsr == *defLSR) {
+ // lsr == *defLSR means that this supported locale is
+ // a duplicate of the default locale.
+ // Either an explicit default locale is supported, and we added it before the loop,
+ // or there is no explicit default locale, and this is
+ // a duplicate of the first supported locale.
+ // In both cases, idef >= 0 now, so otherwise we can skip the comparison.
+ // For a duplicate, putIfAbsent() is a no-op, so nothing to do.
+ } else {
+ if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
+ if (localeDistance.isParadigmLSR(lsr)) {
+ supportedLSRs[paradigmIndex] = &lsr;
+ supportedIndexes[paradigmIndex++] = i;
+ } else {
+ supportedLSRs[otherIndex] = &lsr;
+ supportedIndexes[otherIndex++] = i;
+ }
+ }
+ }
+ if (U_FAILURE(errorCode)) { return; }
+ }
+ // Squeeze out unused array slots.
+ if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
+ uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
+ (otherIndex - paradigmLimit) * sizeof(const LSR *));
+ uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
+ (otherIndex - paradigmLimit) * sizeof(int32_t));
+ }
+ supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
+ }
+
+ if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
+ ownedDefaultLocale = def->clone();
+ if (ownedDefaultLocale == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ def = ownedDefaultLocale;
+ }
+ defaultLocale = def;
+ defaultLocaleIndex = idef;
+
+ if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
+ demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
+ }
+}
+
+LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
+ likelySubtags(src.likelySubtags),
+ localeDistance(src.localeDistance),
+ thresholdDistance(src.thresholdDistance),
+ demotionPerDesiredLocale(src.demotionPerDesiredLocale),
+ favorSubtag(src.favorSubtag),
+ supportedLocales(src.supportedLocales), lsrs(src.lsrs),
+ supportedLocalesLength(src.supportedLocalesLength),
+ supportedLsrToIndex(src.supportedLsrToIndex),
+ supportedLSRs(src.supportedLSRs),
+ supportedIndexes(src.supportedIndexes),
+ supportedLSRsLength(src.supportedLSRsLength),
+ ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
+ defaultLocaleIndex(src.defaultLocaleIndex) {
+ src.supportedLocales = nullptr;
+ src.lsrs = nullptr;
+ src.supportedLocalesLength = 0;
+ src.supportedLsrToIndex = nullptr;
+ src.supportedLSRs = nullptr;
+ src.supportedIndexes = nullptr;
+ src.supportedLSRsLength = 0;
+ src.ownedDefaultLocale = nullptr;
+ src.defaultLocale = nullptr;
+ src.defaultLocaleIndex = -1;
+}
+
+LocaleMatcher::~LocaleMatcher() {
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ delete supportedLocales[i];
+ }
+ uprv_free(supportedLocales);
+ delete[] lsrs;
+ uhash_close(supportedLsrToIndex);
+ uprv_free(supportedLSRs);
+ uprv_free(supportedIndexes);
+ delete ownedDefaultLocale;
+}
+
+LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
+ this->~LocaleMatcher();
+
+ thresholdDistance = src.thresholdDistance;
+ demotionPerDesiredLocale = src.demotionPerDesiredLocale;
+ favorSubtag = src.favorSubtag;
+ supportedLocales = src.supportedLocales;
+ lsrs = src.lsrs;
+ supportedLocalesLength = src.supportedLocalesLength;
+ supportedLsrToIndex = src.supportedLsrToIndex;
+ supportedLSRs = src.supportedLSRs;
+ supportedIndexes = src.supportedIndexes;
+ supportedLSRsLength = src.supportedLSRsLength;
+ ownedDefaultLocale = src.ownedDefaultLocale;
+ defaultLocale = src.defaultLocale;
+ defaultLocaleIndex = src.defaultLocaleIndex;
+
+ src.supportedLocales = nullptr;
+ src.lsrs = nullptr;
+ src.supportedLocalesLength = 0;
+ src.supportedLsrToIndex = nullptr;
+ src.supportedLSRs = nullptr;
+ src.supportedIndexes = nullptr;
+ src.supportedLSRsLength = 0;
+ src.ownedDefaultLocale = nullptr;
+ src.defaultLocale = nullptr;
+ src.defaultLocaleIndex = -1;
+ return *this;
+}
+
+class LocaleLsrIterator {
+public:
+ LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
+ ULocMatchLifetime lifetime) :
+ likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
+
+ ~LocaleLsrIterator() {
+ if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
+ delete remembered;
+ }
+ }
+
+ bool hasNext() const {
+ return locales.hasNext();
+ }
+
+ LSR next(UErrorCode &errorCode) {
+ current = &locales.next();
+ return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
+ }
+
+ void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ bestDesiredIndex = desiredIndex;
+ if (lifetime == ULOCMATCH_STORED_LOCALES) {
+ remembered = current;
+ } else {
+ // ULOCMATCH_TEMPORARY_LOCALES
+ delete remembered;
+ remembered = new Locale(*current);
+ if (remembered == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ }
+
+ const Locale *orphanRemembered() {
+ const Locale *rem = remembered;
+ remembered = nullptr;
+ return rem;
+ }
+
+ int32_t getBestDesiredIndex() const {
+ return bestDesiredIndex;
+ }
+
+private:
+ const XLikelySubtags &likelySubtags;
+ Locale::Iterator &locales;
+ ULocMatchLifetime lifetime;
+ const Locale *current = nullptr, *remembered = nullptr;
+ int32_t bestDesiredIndex = -1;
+};
+
+const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ int32_t suppIndex = getBestSuppIndex(
+ getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+ nullptr, errorCode);
+ return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
+ UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ if (!desiredLocales.hasNext()) {
+ return defaultLocale;
+ }
+ LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+ int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+ return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatchForListString(
+ StringPiece desiredLocaleList, UErrorCode &errorCode) const {
+ LocalePriorityList list(desiredLocaleList, errorCode);
+ LocalePriorityList::Iterator iter = list.iterator();
+ return getBestMatch(iter, errorCode);
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+ const Locale &desiredLocale, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ }
+ int32_t suppIndex = getBestSuppIndex(
+ getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+ nullptr, errorCode);
+ if (U_FAILURE(errorCode) || suppIndex < 0) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ } else {
+ return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
+ }
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+ Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ }
+ LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+ int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+ if (U_FAILURE(errorCode) || suppIndex < 0) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ } else {
+ return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
+ lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
+ }
+}
+
+int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
+ UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return -1; }
+ int32_t desiredIndex = 0;
+ int32_t bestSupportedLsrIndex = -1;
+ for (int32_t bestDistance = thresholdDistance;;) {
+ // Quick check for exact maximized LSR.
+ // Returns suppIndex+1 where 0 means not found.
+ if (supportedLsrToIndex != nullptr) {
+ desiredLSR.setHashCode();
+ int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
+ if (index != 0) {
+ int32_t suppIndex = index - 1;
+ if (remainingIter != nullptr) {
+ remainingIter->rememberCurrent(desiredIndex, errorCode);
+ }
+ return suppIndex;
+ }
+ }
+ int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
+ desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
+ if (bestIndexAndDistance >= 0) {
+ bestDistance = bestIndexAndDistance & 0xff;
+ if (remainingIter != nullptr) {
+ remainingIter->rememberCurrent(desiredIndex, errorCode);
+ if (U_FAILURE(errorCode)) { return -1; }
+ }
+ bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
+ }
+ if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+ break;
+ }
+ if (remainingIter == nullptr || !remainingIter->hasNext()) {
+ break;
+ }
+ desiredLSR = remainingIter->next(errorCode);
+ if (U_FAILURE(errorCode)) { return -1; }
+ ++desiredIndex;
+ }
+ if (bestSupportedLsrIndex < 0) {
+ // no good match
+ return -1;
+ }
+ return supportedIndexes[bestSupportedLsrIndex];
+}
+
+double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
+ // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t distance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+ &pSuppLSR, 1,
+ thresholdDistance, favorSubtag) & 0xff;
+ return (100 - distance) / 100.0;
+}
+
+U_NAMESPACE_END
+
+#endif // __LOCMATCHER_H__
diff --git a/icu4c/source/common/localeprioritylist.cpp b/icu4c/source/common/localeprioritylist.cpp
new file mode 100644
index 0000000..06442fb
--- /dev/null
+++ b/icu4c/source/common/localeprioritylist.cpp
@@ -0,0 +1,239 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localeprioritylist.cpp
+// created: 2019jul11 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "localeprioritylist.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+int32_t hashLocale(const UHashTok token) {
+ auto *locale = static_cast<const Locale *>(token.pointer);
+ return locale->hashCode();
+}
+
+UBool compareLocales(const UHashTok t1, const UHashTok t2) {
+ auto *l1 = static_cast<const Locale *>(t1.pointer);
+ auto *l2 = static_cast<const Locale *>(t2.pointer);
+ return *l1 == *l2;
+}
+
+constexpr int32_t WEIGHT_ONE = 1000;
+
+struct LocaleAndWeight {
+ Locale *locale;
+ int32_t weight; // 0..1000 = 0.0..1.0
+ int32_t index; // force stable sort
+
+ int32_t compare(const LocaleAndWeight &other) const {
+ int32_t diff = other.weight - weight; // descending: other-this
+ if (diff != 0) { return diff; }
+ return index - other.index;
+ }
+};
+
+int32_t U_CALLCONV
+compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
+ return static_cast<const LocaleAndWeight *>(left)->
+ compare(*static_cast<const LocaleAndWeight *>(right));
+}
+
+const char *skipSpaces(const char *p, const char *limit) {
+ while (p < limit && *p == ' ') { ++p; }
+ return p;
+}
+
+int32_t findTagLength(const char *p, const char *limit) {
+ // Look for accept-language delimiters.
+ // Leave other validation up to the Locale constructor.
+ const char *q;
+ for (q = p; q < limit; ++q) {
+ char c = *q;
+ if (c == ' ' || c == ',' || c == ';') { break; }
+ }
+ return static_cast<int32_t>(q - p);
+}
+
+/**
+ * Parses and returns a qvalue weight in millis.
+ * Advances p to after the parsed substring.
+ * Returns a negative value if parsing fails.
+ */
+int32_t parseWeight(const char *&p, const char *limit) {
+ p = skipSpaces(p, limit);
+ char c;
+ if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
+ int32_t weight = (c - '0') * 1000;
+ if (++p == limit || *p != '.') { return weight; }
+ int32_t multiplier = 100;
+ while (++p != limit && '0' <= (c = *p) && c <= '9') {
+ c -= '0';
+ if (multiplier > 0) {
+ weight += c * multiplier;
+ multiplier /= 10;
+ } else if (multiplier == 0) {
+ // round up
+ if (c >= 5) { ++weight; }
+ multiplier = -1;
+ } // else ignore further fraction digits
+ }
+ return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0
+}
+
+} // namespace
+
+/**
+ * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
+ *
+ * This wrapper exists (and is not in an anonymous namespace)
+ * so that we can forward-declare it in the header file and
+ * don't have to expose the MaybeStackArray specialization and
+ * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
+ * Also, otherwise we would have to do a platform-specific
+ * template export declaration of some kind for the MaybeStackArray specialization
+ * to be properly exported from the common DLL.
+ */
+struct LocaleAndWeightArray : public UMemory {
+ MaybeStackArray<LocaleAndWeight, 20> array;
+};
+
+LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ list = new LocaleAndWeightArray();
+ if (list == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ const char *p = s.data();
+ const char *limit = p + s.length();
+ while ((p = skipSpaces(p, limit)) != limit) {
+ if (*p == ',') { // empty range field
+ ++p;
+ continue;
+ }
+ int32_t tagLength = findTagLength(p, limit);
+ if (tagLength == 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ CharString tag(p, tagLength, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ Locale locale = Locale(tag.data());
+ if (locale.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t weight = WEIGHT_ONE;
+ if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
+ if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
+ (p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
+ (++p, (weight = parseWeight(p, limit)) < 0)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ p = skipSpaces(p, limit);
+ }
+ if (p != limit && *p != ',') { // trailing junk
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ add(locale, weight, errorCode);
+ if (p == limit) { break; }
+ ++p;
+ }
+ sort(errorCode);
+}
+
+LocalePriorityList::~LocalePriorityList() {
+ if (list != nullptr) {
+ for (int32_t i = 0; i < listLength; ++i) {
+ delete list->array[i].locale;
+ }
+ delete list;
+ }
+ uhash_close(map);
+}
+
+const Locale *LocalePriorityList::localeAt(int32_t i) const {
+ return list->array[i].locale;
+}
+
+Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
+ if (list == nullptr) { return nullptr; }
+ LocaleAndWeight &lw = list->array[i];
+ Locale *l = lw.locale;
+ lw.locale = nullptr;
+ return l;
+}
+
+bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ if (map == nullptr) {
+ if (weight <= 0) { return true; } // do not add q=0
+ map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ }
+ LocalPointer<Locale> clone;
+ int32_t index = uhash_geti(map, &locale);
+ if (index != 0) {
+ // Duplicate: Remove the old item and append it anew.
+ LocaleAndWeight &lw = list->array[index - 1];
+ clone.adoptInstead(lw.locale);
+ lw.locale = nullptr;
+ lw.weight = 0;
+ ++numRemoved;
+ }
+ if (weight <= 0) { // do not add q=0
+ if (index != 0) {
+ // Not strictly necessary but cleaner.
+ uhash_removei(map, &locale);
+ }
+ return true;
+ }
+ if (clone.isNull()) {
+ clone.adoptInstead(locale.clone());
+ if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ }
+ if (listLength == list->array.getCapacity()) {
+ int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
+ if (list->array.resize(newCapacity, listLength) == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ }
+ uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ LocaleAndWeight &lw = list->array[listLength];
+ lw.locale = clone.orphan();
+ lw.weight = weight;
+ lw.index = listLength++;
+ if (weight < WEIGHT_ONE) { hasWeights = true; }
+ U_ASSERT(uhash_count(map) == getLength());
+ return true;
+}
+
+void LocalePriorityList::sort(UErrorCode &errorCode) {
+ // Sort by descending weights if there is a mix of weights.
+ // The comparator forces a stable sort via the item index.
+ if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
+ uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
+ compareLocaleAndWeight, nullptr, FALSE, &errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/localeprioritylist.h b/icu4c/source/common/localeprioritylist.h
new file mode 100644
index 0000000..80ca38a
--- /dev/null
+++ b/icu4c/source/common/localeprioritylist.h
@@ -0,0 +1,115 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localeprioritylist.h
+// created: 2019jul11 Markus W. Scherer
+
+#ifndef __LOCALEPRIORITYLIST_H__
+#define __LOCALEPRIORITYLIST_H__
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LocaleAndWeightArray;
+
+/**
+ * Parses a list of locales from an accept-language string.
+ * We are a bit more lenient than the spec:
+ * We accept extra whitespace in more places, empty range fields,
+ * and any number of qvalue fraction digits.
+ *
+ * https://tools.ietf.org/html/rfc2616#section-14.4
+ * 14.4 Accept-Language
+ *
+ * Accept-Language = "Accept-Language" ":"
+ * 1#( language-range [ ";" "q" "=" qvalue ] )
+ * language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
+ *
+ * Each language-range MAY be given an associated quality value which
+ * represents an estimate of the user's preference for the languages
+ * specified by that range. The quality value defaults to "q=1". For
+ * example,
+ *
+ * Accept-Language: da, en-gb;q=0.8, en;q=0.7
+ *
+ * https://tools.ietf.org/html/rfc2616#section-3.9
+ * 3.9 Quality Values
+ *
+ * HTTP content negotiation (section 12) uses short "floating point"
+ * numbers to indicate the relative importance ("weight") of various
+ * negotiable parameters. A weight is normalized to a real number in
+ * the range 0 through 1, where 0 is the minimum and 1 the maximum
+ * value. If a parameter has a quality value of 0, then content with
+ * this parameter is `not acceptable' for the client. HTTP/1.1
+ * applications MUST NOT generate more than three digits after the
+ * decimal point. User configuration of these values SHOULD also be
+ * limited in this fashion.
+ *
+ * qvalue = ( "0" [ "." 0*3DIGIT ] )
+ * | ( "1" [ "." 0*3("0") ] )
+ */
+class U_COMMON_API LocalePriorityList : public UMemory {
+public:
+ class Iterator : public Locale::Iterator {
+ public:
+ UBool hasNext() const override { return count < length; }
+
+ const Locale &next() override {
+ for(;;) {
+ const Locale *locale = list.localeAt(index++);
+ if (locale != nullptr) {
+ ++count;
+ return *locale;
+ }
+ }
+ }
+
+ private:
+ friend class LocalePriorityList;
+
+ Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
+
+ const LocalePriorityList &list;
+ int32_t index = 0;
+ int32_t count = 0;
+ const int32_t length;
+ };
+
+ LocalePriorityList(StringPiece s, UErrorCode &errorCode);
+
+ ~LocalePriorityList();
+
+ int32_t getLength() const { return listLength - numRemoved; }
+
+ int32_t getLengthIncludingRemoved() const { return listLength; }
+
+ Iterator iterator() const { return Iterator(*this); }
+
+ const Locale *localeAt(int32_t i) const;
+
+ Locale *orphanLocaleAt(int32_t i);
+
+private:
+ LocalePriorityList(const LocalePriorityList &) = delete;
+ LocalePriorityList &operator=(const LocalePriorityList &) = delete;
+
+ bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
+
+ void sort(UErrorCode &errorCode);
+
+ LocaleAndWeightArray *list = nullptr;
+ int32_t listLength = 0;
+ int32_t numRemoved = 0;
+ bool hasWeights = false; // other than 1.0
+ UHashtable *map = nullptr;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCALEPRIORITYLIST_H__
diff --git a/icu4c/source/common/locdistance.cpp b/icu4c/source/common/locdistance.cpp
new file mode 100644
index 0000000..800d0ea
--- /dev/null
+++ b/icu4c/source/common/locdistance.cpp
@@ -0,0 +1,364 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// locdistance.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "locdistance.h"
+#include "loclikelysubtags.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uinvchar.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * Bit flag used on the last character of a subtag in the trie.
+ * Must be set consistently by the builder and the lookup code.
+ */
+constexpr int32_t END_OF_SUBTAG = 0x80;
+/** Distance value bit flag, set by the builder. */
+constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
+/** Distance value bit flag, set by trieNext(). */
+constexpr int32_t DISTANCE_IS_FINAL = 0x100;
+constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+
+constexpr int32_t ABOVE_THRESHOLD = 100;
+
+// Indexes into array of distances.
+enum {
+ IX_DEF_LANG_DISTANCE,
+ IX_DEF_SCRIPT_DISTANCE,
+ IX_DEF_REGION_DISTANCE,
+ IX_MIN_REGION_DISTANCE,
+ IX_LIMIT
+};
+
+LocaleDistance *gLocaleDistance = nullptr;
+UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanup() {
+ delete gLocaleDistance;
+ gLocaleDistance = nullptr;
+ gInitOnce.reset();
+ return TRUE;
+}
+
+} // namespace
+
+void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(gLocaleDistance == nullptr);
+ const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ const LocaleDistanceData &data = likely.getDistanceData();
+ if (data.distanceTrieBytes == nullptr ||
+ data.regionToPartitions == nullptr || data.partitions == nullptr ||
+ // ok if no paradigms
+ data.distances == nullptr) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ gLocaleDistance = new LocaleDistance(data);
+ if (gLocaleDistance == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
+}
+
+const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
+ return gLocaleDistance;
+}
+
+LocaleDistance::LocaleDistance(const LocaleDistanceData &data) :
+ trie(data.distanceTrieBytes),
+ regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
+ paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
+ defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
+ defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
+ defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
+ minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
+ // For the default demotion value, use the
+ // default region distance between unrelated Englishes.
+ // Thus, unless demotion is turned off,
+ // a mere region difference for one desired locale
+ // is as good as a perfect match for the next following desired locale.
+ // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
+ LSR en("en", "Latn", "US");
+ LSR enGB("en", "Latn", "GB");
+ const LSR *p_enGB = &enGB;
+ defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
+ 50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
+}
+
+int32_t LocaleDistance::getBestIndexAndDistance(
+ const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
+ BytesTrie iter(trie);
+ // Look up the desired language only once for all supported LSRs.
+ // Its "distance" is either a match point value of 0, or a non-match negative value.
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int32_t desLangDistance = trieNext(iter, desired.language, false);
+ uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
+ // Index of the supported LSR with the lowest distance.
+ int32_t bestIndex = -1;
+ for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
+ const LSR &supported = *supportedLSRs[slIndex];
+ bool star = false;
+ int32_t distance = desLangDistance;
+ if (distance >= 0) {
+ U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
+ if (slIndex != 0) {
+ iter.resetToState64(desLangState);
+ }
+ distance = trieNext(iter, supported.language, true);
+ }
+ // Note: The data builder verifies that there are no rules with "any" (*) language and
+ // real (non *) script or region subtags.
+ // This means that if the lookup for either language fails we can use
+ // the default distances without further lookups.
+ int32_t flags;
+ if (distance >= 0) {
+ flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ } else { // <*, *>
+ if (uprv_strcmp(desired.language, supported.language) == 0) {
+ distance = 0;
+ } else {
+ distance = defaultLanguageDistance;
+ }
+ flags = 0;
+ star = true;
+ }
+ U_ASSERT(0 <= distance && distance <= 100);
+ // We implement "favor subtag" by reducing the language subtag distance
+ // (unscientifically reducing it to a quarter of the normal value),
+ // so that the script distance is relatively more important.
+ // For example, given a default language distance of 80, we reduce it to 20,
+ // which is below the default threshold of 50, which is the default script distance.
+ if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
+ distance >>= 2;
+ }
+ if (distance >= threshold) {
+ continue;
+ }
+
+ int32_t scriptDistance;
+ if (star || flags != 0) {
+ if (uprv_strcmp(desired.script, supported.script) == 0) {
+ scriptDistance = 0;
+ } else {
+ scriptDistance = defaultScriptDistance;
+ }
+ } else {
+ scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
+ desired.script, supported.script);
+ flags = scriptDistance & DISTANCE_IS_FINAL;
+ scriptDistance &= ~DISTANCE_IS_FINAL;
+ }
+ distance += scriptDistance;
+ if (distance >= threshold) {
+ continue;
+ }
+
+ if (uprv_strcmp(desired.region, supported.region) == 0) {
+ // regionDistance = 0
+ } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
+ distance += defaultRegionDistance;
+ } else {
+ int32_t remainingThreshold = threshold - distance;
+ if (minRegionDistance >= remainingThreshold) {
+ continue;
+ }
+
+ // From here on we know the regions are not equal.
+ // Map each region to zero or more partitions. (zero = one non-matching string)
+ // (Each array of single-character partition strings is encoded as one string.)
+ // If either side has more than one, then we find the maximum distance.
+ // This could be optimized by adding some more structure, but probably not worth it.
+ distance += getRegionPartitionsDistance(
+ iter, iter.getState64(),
+ partitionsForRegion(desired),
+ partitionsForRegion(supported),
+ remainingThreshold);
+ }
+ if (distance < threshold) {
+ if (distance == 0) {
+ return slIndex << 8;
+ }
+ bestIndex = slIndex;
+ threshold = distance;
+ }
+ }
+ return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+}
+
+int32_t LocaleDistance::getDesSuppScriptDistance(
+ BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int32_t distance = trieNext(iter, desired, false);
+ if (distance >= 0) {
+ distance = trieNext(iter, supported, true);
+ }
+ if (distance < 0) {
+ UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ if (uprv_strcmp(desired, supported) == 0) {
+ distance = 0; // same script
+ } else {
+ distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ }
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ distance |= DISTANCE_IS_FINAL;
+ }
+ }
+ return distance;
+}
+
+int32_t LocaleDistance::getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
+ char desired = *desiredPartitions++;
+ char supported = *supportedPartitions++;
+ U_ASSERT(desired != 0 && supported != 0);
+ // See if we have single desired/supported partitions, from NUL-terminated
+ // partition strings without explicit length.
+ bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
+ // equivalent to: if (desLength == 1 && suppLength == 1)
+ if (*desiredPartitions == 0 && !suppLengthGt1) {
+ // Fastpath for single desired/supported partitions.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ return iter.getValue();
+ }
+ }
+ return getFallbackRegionDistance(iter, startState);
+ }
+
+ const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
+ int32_t regionDistance = 0;
+ // Fall back to * only once, not for each pair of partition strings.
+ bool star = false;
+ for (;;) {
+ // Look up each desired-partition string only once,
+ // not for each (desired, supported) pair.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
+ for (;;) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ int32_t d;
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ d = iter.getValue();
+ } else if (star) {
+ d = 0;
+ } else {
+ d = getFallbackRegionDistance(iter, startState);
+ star = true;
+ }
+ if (d >= threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ if ((supported = *supportedPartitions++) != 0) {
+ iter.resetToState64(desState);
+ } else {
+ break;
+ }
+ }
+ } else if (!star) {
+ int32_t d = getFallbackRegionDistance(iter, startState);
+ if (d >= threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ star = true;
+ }
+ if ((desired = *desiredPartitions++) != 0) {
+ iter.resetToState64(startState);
+ supportedPartitions = supportedStart;
+ supported = *supportedPartitions++;
+ } else {
+ break;
+ }
+ }
+ return regionDistance;
+}
+
+int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
+#if U_DEBUG
+ UStringTrieResult result =
+#endif
+ iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ int32_t distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ return distance;
+}
+
+int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
+ uint8_t c;
+ if ((c = *s) == 0) {
+ return -1; // no empty subtags in the distance data
+ }
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If *s is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = *++s;
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
+ if (wantValue) {
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ int32_t value = iter.getValue();
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ value |= DISTANCE_IS_FINAL;
+ }
+ return value;
+ }
+ } else {
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ return 0;
+ }
+ }
+ return -1;
+ }
+ c = next;
+ }
+}
+
+UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+ // Linear search for a very short list (length 6 as of 2019).
+ // If there are many paradigm LSRs we should use a hash set.
+ U_ASSERT(paradigmLSRsLength <= 15);
+ for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
+ if (lsr == paradigmLSRs[i]) { return true; }
+ }
+ return false;
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/locdistance.h b/icu4c/source/common/locdistance.h
new file mode 100644
index 0000000..7439f51
--- /dev/null
+++ b/icu4c/source/common/locdistance.h
@@ -0,0 +1,109 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// locdistance.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCDISTANCE_H__
+#define __LOCDISTANCE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleDistanceData;
+
+/**
+ * Offline-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+class LocaleDistance final : public UMemory {
+public:
+ static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+
+ /**
+ * Finds the supported LSR with the smallest distance from the desired one.
+ * Equivalent LSR subtags must be normalized into a canonical form.
+ *
+ * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+ * (negative if none has a distance below the threshold),
+ * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+ */
+ int32_t getBestIndexAndDistance(const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
+
+ int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
+
+ UBool isParadigmLSR(const LSR &lsr) const;
+
+ int32_t getDefaultScriptDistance() const {
+ return defaultScriptDistance;
+ }
+
+ int32_t getDefaultDemotionPerDesiredLocale() const {
+ return defaultDemotionPerDesiredLocale;
+ }
+
+private:
+ LocaleDistance(const LocaleDistanceData &data);
+ LocaleDistance(const LocaleDistance &other) = delete;
+ LocaleDistance &operator=(const LocaleDistance &other) = delete;
+
+ static void initLocaleDistance(UErrorCode &errorCode);
+
+ static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
+ const char *desired, const char *supported);
+
+ static int32_t getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions,
+ int32_t threshold);
+
+ static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
+
+ const char *partitionsForRegion(const LSR &lsr) const {
+ // ill-formed region -> one non-matching string
+ int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
+ return partitionArrays[pIndex];
+ }
+
+ int32_t getDefaultRegionDistance() const {
+ return defaultRegionDistance;
+ }
+
+ // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+ // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+ // There is also a trie value for each subsequence of whole subtags.
+ // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+ BytesTrie trie;
+
+ /**
+ * Maps each region to zero or more single-character partitions.
+ */
+ const uint8_t *regionToPartitionsIndex;
+ const char **partitionArrays;
+
+ /**
+ * Used to get the paradigm region for a cluster, if there is one.
+ */
+ const LSR *paradigmLSRs;
+ int32_t paradigmLSRsLength;
+
+ int32_t defaultLanguageDistance;
+ int32_t defaultScriptDistance;
+ int32_t defaultRegionDistance;
+ int32_t minRegionDistance;
+ int32_t defaultDemotionPerDesiredLocale;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCDISTANCE_H__
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index 5868b5e..c6d3f88 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@@ -1396,5 +1396,7 @@
return baseName;
}
+Locale::Iterator::~Iterator() = default;
+
//eof
U_NAMESPACE_END
diff --git a/icu4c/source/common/loclikelysubtags.cpp b/icu4c/source/common/loclikelysubtags.cpp
new file mode 100644
index 0000000..d7f5e12
--- /dev/null
+++ b/icu4c/source/common/loclikelysubtags.cpp
@@ -0,0 +1,638 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// loclikelysubtags.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "loclikelysubtags.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "umutex.h"
+#include "uresdata.h"
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
+constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
+constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
+
+/**
+ * Stores NUL-terminated strings with duplicate elimination.
+ * Checks for unique UTF-16 string pointers and converts to invariant characters.
+ */
+class UniqueCharStrings {
+public:
+ UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
+ uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ strings = new CharString();
+ if (strings == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ ~UniqueCharStrings() {
+ uhash_close(&map);
+ delete strings;
+ }
+
+ /** Returns/orphans the CharString that contains all strings. */
+ CharString *orphanCharStrings() {
+ CharString *result = strings;
+ strings = nullptr;
+ return result;
+ }
+
+ /** Adds a string and returns a unique number for it. */
+ int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return 0; }
+ if (isFrozen) {
+ errorCode = U_NO_WRITE_PERMISSION;
+ return 0;
+ }
+ // The string points into the resource bundle.
+ const char16_t *p = s.getBuffer();
+ int32_t oldIndex = uhash_geti(&map, p);
+ if (oldIndex != 0) { // found duplicate
+ return oldIndex;
+ }
+ // Explicit NUL terminator for the previous string.
+ // The strings object is also terminated with one implicit NUL.
+ strings->append(0, errorCode);
+ int32_t newIndex = strings->length();
+ strings->appendInvariantChars(s, errorCode);
+ uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
+ return newIndex;
+ }
+
+ void freeze() { isFrozen = true; }
+
+ /**
+ * Returns a string pointer for its unique number, if this object is frozen.
+ * Otherwise nullptr.
+ */
+ const char *get(int32_t i) const {
+ U_ASSERT(isFrozen);
+ return isFrozen && i > 0 ? strings->data() + i : nullptr;
+ }
+
+private:
+ UHashtable map;
+ CharString *strings;
+ bool isFrozen = false;
+};
+
+} // namespace
+
+LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
+ distanceTrieBytes(data.distanceTrieBytes),
+ regionToPartitions(data.regionToPartitions),
+ partitions(data.partitions),
+ paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
+ distances(data.distances) {
+ data.partitions = nullptr;
+ data.paradigms = nullptr;
+}
+
+LocaleDistanceData::~LocaleDistanceData() {
+ uprv_free(partitions);
+ delete[] paradigms;
+}
+
+// TODO(ICU-20777): Rename to just LikelySubtagsData.
+struct XLikelySubtagsData {
+ UResourceBundle *langInfoBundle = nullptr;
+ UniqueCharStrings strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+ const uint8_t *trieBytes = nullptr;
+ LSR *lsrs = nullptr;
+ int32_t lsrsLength = 0;
+
+ LocaleDistanceData distanceData;
+
+ XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+
+ ~XLikelySubtagsData() {
+ ures_close(langInfoBundle);
+ delete[] lsrs;
+ }
+
+ void load(UErrorCode &errorCode) {
+ langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ StackUResourceBundle stackTempBundle;
+ ResourceDataValue value;
+ ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
+ value, errorCode);
+ ResourceTable likelyTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Read all strings in the resource bundle and convert them to invariant char *.
+ LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
+ int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
+ if (!readStrings(likelyTable, "languageAliases", value,
+ languageIndexes, languagesLength, errorCode) ||
+ !readStrings(likelyTable, "regionAliases", value,
+ regionIndexes, regionsLength, errorCode) ||
+ !readStrings(likelyTable, "lsrs", value,
+ lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((languagesLength & 1) != 0 ||
+ (regionsLength & 1) != 0 ||
+ (lsrSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ if (lsrSubtagsLength == 0) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ if (!likelyTable.findValue("trie", value)) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ int32_t length;
+ trieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Also read distance/matcher data if available,
+ // to open & keep only one resource bundle pointer
+ // and to use one single UniqueCharStrings.
+ UErrorCode matchErrorCode = U_ZERO_ERROR;
+ ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
+ value, matchErrorCode);
+ LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
+ int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
+ if (U_SUCCESS(matchErrorCode)) {
+ ResourceTable matchTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ if (matchTable.findValue("trie", value)) {
+ distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ }
+
+ if (matchTable.findValue("regionToPartitions", value)) {
+ distanceData.regionToPartitions = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < LSR::REGION_INDEX_LIMIT) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+
+ if (!readStrings(matchTable, "partitions", value,
+ partitionIndexes, partitionsLength, errorCode) ||
+ !readStrings(matchTable, "paradigms", value,
+ paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((paradigmSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ if (matchTable.findValue("distances", value)) {
+ distanceData.distances = value.getIntVector(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < 4) { // LocaleDistance IX_LIMIT
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+ } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
+ // ok for likely subtags
+ } else { // error other than missing resource
+ errorCode = matchErrorCode;
+ return;
+ }
+
+ // Fetch & store invariant-character versions of strings
+ // only after we have collected and de-duplicated all of them.
+ strings.freeze();
+
+ languageAliases = CharStringMap(languagesLength / 2, errorCode);
+ for (int32_t i = 0; i < languagesLength; i += 2) {
+ languageAliases.put(strings.get(languageIndexes[i]),
+ strings.get(languageIndexes[i + 1]), errorCode);
+ }
+
+ regionAliases = CharStringMap(regionsLength / 2, errorCode);
+ for (int32_t i = 0; i < regionsLength; i += 2) {
+ regionAliases.put(strings.get(regionIndexes[i]),
+ strings.get(regionIndexes[i + 1]), errorCode);
+ }
+ if (U_FAILURE(errorCode)) { return; }
+
+ lsrsLength = lsrSubtagsLength / 3;
+ lsrs = new LSR[lsrsLength];
+ if (lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
+ lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
+ strings.get(lsrSubtagIndexes[i + 1]),
+ strings.get(lsrSubtagIndexes[i + 2]));
+ }
+
+ if (partitionsLength > 0) {
+ distanceData.partitions = static_cast<const char **>(
+ uprv_malloc(partitionsLength * sizeof(const char *)));
+ if (distanceData.partitions == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0; i < partitionsLength; ++i) {
+ distanceData.partitions[i] = strings.get(partitionIndexes[i]);
+ }
+ }
+
+ if (paradigmSubtagsLength > 0) {
+ distanceData.paradigmsLength = paradigmSubtagsLength / 3;
+ LSR *paradigms = new LSR[distanceData.paradigmsLength];
+ if (paradigms == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
+ paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
+ strings.get(paradigmSubtagIndexes[i + 1]),
+ strings.get(paradigmSubtagIndexes[i + 2]));
+ }
+ distanceData.paradigms = paradigms;
+ }
+ }
+
+private:
+ bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
+ LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+ if (table.findValue(key, value)) {
+ ResourceArray stringArray = value.getArray(errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ length = stringArray.getSize();
+ if (length == 0) { return true; }
+ int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
+ if (rawIndexes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ for (int i = 0; i < length; ++i) {
+ stringArray.getValue(i, value); // returns TRUE because i < length
+ rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ }
+ }
+ return true;
+ }
+};
+
+namespace {
+
+XLikelySubtags *gLikelySubtags = nullptr;
+UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanup() {
+ delete gLikelySubtags;
+ gLikelySubtags = nullptr;
+ gInitOnce.reset();
+ return TRUE;
+}
+
+} // namespace
+
+void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(gLikelySubtags == nullptr);
+ XLikelySubtagsData data(errorCode);
+ data.load(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ gLikelySubtags = new XLikelySubtags(data);
+ if (gLikelySubtags == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
+}
+
+const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+ return gLikelySubtags;
+}
+
+XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
+ langInfoBundle(data.langInfoBundle),
+ strings(data.strings.orphanCharStrings()),
+ languageAliases(std::move(data.languageAliases)),
+ regionAliases(std::move(data.regionAliases)),
+ trie(data.trieBytes),
+ lsrs(data.lsrs),
+#if U_DEBUG
+ lsrsLength(data.lsrsLength),
+#endif
+ distanceData(std::move(data.distanceData)) {
+ data.langInfoBundle = nullptr;
+ data.lsrs = nullptr;
+
+ // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+ UStringTrieResult result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+ trieUndState = trie.getState64();
+ result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+ trieUndZzzzState = trie.getState64();
+ result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ defaultLsrIndex = trie.getValue();
+ trie.reset();
+
+ for (char16_t c = u'a'; c <= u'z'; ++c) {
+ result = trie.next(c);
+ if (result == USTRINGTRIE_NO_VALUE) {
+ trieFirstLetterStates[c - u'a'] = trie.getState64();
+ }
+ trie.reset();
+ }
+}
+
+XLikelySubtags::~XLikelySubtags() {
+ ures_close(langInfoBundle);
+ delete strings;
+ delete[] lsrs;
+}
+
+LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
+ const char *name = locale.getName();
+ if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
+ // Private use language tag x-subtag-subtag...
+ return LSR(name, "", "");
+ }
+ return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+ locale.getVariant(), errorCode);
+}
+
+namespace {
+
+const char *getCanonical(const CharStringMap &aliases, const char *alias) {
+ const char *canonical = aliases.get(alias);
+ return canonical == nullptr ? alias : canonical;
+}
+
+} // namespace
+
+LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
+ const char *variant, UErrorCode &errorCode) const {
+ // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+ // They should match only themselves,
+ // not other locales with what looks like the same language and script subtags.
+ char c1;
+ if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
+ switch (c1) {
+ case 'A':
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
+ case 'B':
+ return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
+ case 'C':
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
+ default: // normal locale
+ break;
+ }
+ }
+
+ if (variant[0] == 'P' && variant[1] == 'S') {
+ if (uprv_strcmp(variant, "PSACCENT") == 0) {
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
+ *region == 0 ? "XA" : region, errorCode);
+ } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
+ return LSR(PSEUDO_BIDI_PREFIX, language, script,
+ *region == 0 ? "XB" : region, errorCode);
+ } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script,
+ *region == 0 ? "XC" : region, errorCode);
+ }
+ // else normal locale
+ }
+
+ language = getCanonical(languageAliases, language);
+ // (We have no script mappings.)
+ region = getCanonical(regionAliases, region);
+ return maximize(language, script, region);
+}
+
+LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
+ if (uprv_strcmp(language, "und") == 0) {
+ language = "";
+ }
+ if (uprv_strcmp(script, "Zzzz") == 0) {
+ script = "";
+ }
+ if (uprv_strcmp(region, "ZZ") == 0) {
+ region = "";
+ }
+ if (*script != 0 && *region != 0 && *language != 0) {
+ return LSR(language, script, region); // already maximized
+ }
+
+ uint32_t retainOldMask = 0;
+ BytesTrie iter(trie);
+ uint64_t state;
+ int32_t value;
+ // Small optimization: Array lookup for first language letter.
+ int32_t c0;
+ if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
+ language[1] != 0 && // language.length() >= 2
+ (state = trieFirstLetterStates[c0]) != 0) {
+ value = trieNext(iter.resetToState64(state), language, 1);
+ } else {
+ value = trieNext(iter, language, 0);
+ }
+ if (value >= 0) {
+ if (*language != 0) {
+ retainOldMask |= 4;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 4;
+ iter.resetToState64(trieUndState); // "und" ("*")
+ state = 0;
+ }
+
+ if (value > 0) {
+ // Intermediate or final value from just language.
+ if (value == SKIP_SCRIPT) {
+ value = 0;
+ }
+ if (*script != 0) {
+ retainOldMask |= 2;
+ }
+ } else {
+ value = trieNext(iter, script, 0);
+ if (value >= 0) {
+ if (*script != 0) {
+ retainOldMask |= 2;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 2;
+ if (state == 0) {
+ iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ state = iter.getState64();
+ }
+ }
+ }
+
+ if (value > 0) {
+ // Final value from just language or language+script.
+ if (*region != 0) {
+ retainOldMask |= 1;
+ }
+ } else {
+ value = trieNext(iter, region, 0);
+ if (value >= 0) {
+ if (*region != 0) {
+ retainOldMask |= 1;
+ }
+ } else {
+ retainOldMask |= 1;
+ if (state == 0) {
+ value = defaultLsrIndex;
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value > 0);
+ }
+ }
+ }
+ U_ASSERT(value < lsrsLength);
+ const LSR &result = lsrs[value];
+
+ if (*language == 0) {
+ language = "und";
+ }
+
+ if (retainOldMask == 0) {
+ // Quickly return a copy of the lookup-result LSR
+ // without new allocation of the subtags.
+ return LSR(result.language, result.script, result.region);
+ }
+ if ((retainOldMask & 4) == 0) {
+ language = result.language;
+ }
+ if ((retainOldMask & 2) == 0) {
+ script = result.script;
+ }
+ if ((retainOldMask & 1) == 0) {
+ region = result.region;
+ }
+ return LSR(language, script, region);
+}
+
+int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
+ UStringTrieResult result;
+ uint8_t c;
+ if ((c = s[i]) == 0) {
+ result = iter.next(u'*');
+ } else {
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If s[i] is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = s[++i];
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ result = iter.next(c | 0x80);
+ break;
+ }
+ c = next;
+ }
+ }
+ switch (result) {
+ case USTRINGTRIE_NO_MATCH: return -1;
+ case USTRINGTRIE_NO_VALUE: return 0;
+ case USTRINGTRIE_INTERMEDIATE_VALUE:
+ U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+ return SKIP_SCRIPT;
+ case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+ default: return -1;
+ }
+}
+
+// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+// in loclikely.cpp to this new code, including activating this
+// minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
+ const char *regionIn, ULocale.Minimize fieldToFavor,
+ UErrorCode &errorCode) const {
+ LSR result = maximize(languageIn, scriptIn, regionIn);
+
+ // We could try just a series of checks, like:
+ // LSR result2 = addLikelySubtags(languageIn, "", "");
+ // if result.equals(result2) return result2;
+ // However, we can optimize 2 of the cases:
+ // (languageIn, "", "")
+ // (languageIn, "", regionIn)
+
+ // value00 = lookup(result.language, "", "")
+ BytesTrie iter = new BytesTrie(trie);
+ int value = trieNext(iter, result.language, 0);
+ U_ASSERT(value >= 0);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ }
+ }
+ U_ASSERT(value > 0);
+ LSR value00 = lsrs[value];
+ boolean favorRegionOk = false;
+ if (result.script.equals(value00.script)) { //script is default
+ if (result.region.equals(value00.region)) {
+ return new LSR(result.language, "", "");
+ } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
+ return new LSR(result.language, "", result.region);
+ } else {
+ favorRegionOk = true;
+ }
+ }
+
+ // The last case is not as easy to optimize.
+ // Maybe do later, but for now use the straightforward code.
+ LSR result2 = maximize(languageIn, scriptIn, "");
+ if (result2.equals(result)) {
+ return new LSR(result.language, result.script, "");
+ } else if (favorRegionOk) {
+ return new LSR(result.language, "", result.region);
+ }
+ return result;
+}
+#endif
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/loclikelysubtags.h b/icu4c/source/common/loclikelysubtags.h
new file mode 100644
index 0000000..8c8a08a
--- /dev/null
+++ b/icu4c/source/common/loclikelysubtags.h
@@ -0,0 +1,143 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// loclikelysubtags.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCLIKELYSUBTAGS_H__
+#define __LOCLIKELYSUBTAGS_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "lsr.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+struct XLikelySubtagsData;
+
+/**
+ * Map of const char * keys & values.
+ * Stores pointers as is: Does not own/copy/adopt/release strings.
+ */
+class CharStringMap final : public UMemory {
+public:
+ /** Constructs an unusable non-map. */
+ CharStringMap() : map(nullptr) {}
+ CharStringMap(int32_t size, UErrorCode &errorCode) {
+ map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
+ size, &errorCode);
+ }
+ CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
+ other.map = nullptr;
+ }
+ CharStringMap(const CharStringMap &other) = delete;
+ ~CharStringMap() {
+ uhash_close(map);
+ }
+
+ CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
+ map = other.map;
+ other.map = nullptr;
+ return *this;
+ }
+ CharStringMap &operator=(const CharStringMap &other) = delete;
+
+ const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
+ void put(const char *key, const char *value, UErrorCode &errorCode) {
+ uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
+ }
+
+private:
+ UHashtable *map;
+};
+
+struct LocaleDistanceData {
+ LocaleDistanceData() = default;
+ LocaleDistanceData(LocaleDistanceData &&data);
+ ~LocaleDistanceData();
+
+ const uint8_t *distanceTrieBytes = nullptr;
+ const uint8_t *regionToPartitions = nullptr;
+ const char **partitions = nullptr;
+ const LSR *paradigms = nullptr;
+ int32_t paradigmsLength = 0;
+ const int32_t *distances = nullptr;
+
+private:
+ LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
+};
+
+// TODO(ICU-20777): Rename to just LikelySubtags.
+class XLikelySubtags final : public UMemory {
+public:
+ ~XLikelySubtags();
+
+ static constexpr int32_t SKIP_SCRIPT = 1;
+
+ // VisibleForTesting
+ static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
+
+ // VisibleForTesting
+ LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
+
+ // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+ // in loclikely.cpp to this new code, including activating this
+ // minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+ LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
+ ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
+#endif
+
+ // visible for LocaleDistance
+ const LocaleDistanceData &getDistanceData() const { return distanceData; }
+
+private:
+ XLikelySubtags(XLikelySubtagsData &data);
+ XLikelySubtags(const XLikelySubtags &other) = delete;
+ XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
+
+ static void initLikelySubtags(UErrorCode &errorCode);
+
+ LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
+ const char *variant, UErrorCode &errorCode) const;
+
+ /**
+ * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+ */
+ LSR maximize(const char *language, const char *script, const char *region) const;
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
+
+ UResourceBundle *langInfoBundle;
+ // We could store the strings by value, except that if there were few enough strings,
+ // moving the contents could copy it to a different array,
+ // invalidating the pointers stored in the maps.
+ CharString *strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+
+ // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+ // There is also a trie value for each intermediate lang and lang+script.
+ // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+ BytesTrie trie;
+ uint64_t trieUndState;
+ uint64_t trieUndZzzzState;
+ int32_t defaultLsrIndex;
+ uint64_t trieFirstLetterStates[26];
+ const LSR *lsrs;
+#if U_DEBUG
+ int32_t lsrsLength;
+#endif
+
+ // distance/matcher data: see comment in XLikelySubtagsData::load()
+ LocaleDistanceData distanceData;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCLIKELYSUBTAGS_H__
diff --git a/icu4c/source/common/lsr.cpp b/icu4c/source/common/lsr.cpp
new file mode 100644
index 0000000..0c28eed
--- /dev/null
+++ b/icu4c/source/common/lsr.cpp
@@ -0,0 +1,101 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// lsr.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "lsr.h"
+#include "uinvchar.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) :
+ language(nullptr), script(nullptr), region(r),
+ regionIndex(indexForRegion(region)) {
+ if (U_SUCCESS(errorCode)) {
+ CharString langScript;
+ langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
+ int32_t scriptOffset = langScript.length();
+ langScript.append(prefix, errorCode).append(scr, errorCode);
+ owned = langScript.cloneData(errorCode);
+ if (U_SUCCESS(errorCode)) {
+ language = owned;
+ script = owned + scriptOffset;
+ }
+ }
+}
+
+LSR::LSR(LSR &&other) U_NOEXCEPT :
+ language(other.language), script(other.script), region(other.region), owned(other.owned),
+ regionIndex(other.regionIndex), hashCode(other.hashCode) {
+ if (owned != nullptr) {
+ other.language = other.script = "";
+ other.owned = nullptr;
+ other.hashCode = 0;
+ }
+}
+
+void LSR::deleteOwned() {
+ uprv_free(owned);
+}
+
+LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
+ this->~LSR();
+ language = other.language;
+ script = other.script;
+ region = other.region;
+ regionIndex = other.regionIndex;
+ owned = other.owned;
+ hashCode = other.hashCode;
+ if (owned != nullptr) {
+ other.language = other.script = "";
+ other.owned = nullptr;
+ other.hashCode = 0;
+ }
+ return *this;
+}
+
+UBool LSR::operator==(const LSR &other) const {
+ return
+ uprv_strcmp(language, other.language) == 0 &&
+ uprv_strcmp(script, other.script) == 0 &&
+ regionIndex == other.regionIndex &&
+ // Compare regions if both are ill-formed (and their indexes are 0).
+ (regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
+}
+
+int32_t LSR::indexForRegion(const char *region) {
+ int32_t c = region[0];
+ int32_t a = c - '0';
+ if (0 <= a && a <= 9) { // digits: "419"
+ int32_t b = region[1] - '0';
+ if (b < 0 || 9 < b) { return 0; }
+ c = region[2] - '0';
+ if (c < 0 || 9 < c || region[3] != 0) { return 0; }
+ return (10 * a + b) * 10 + c + 1;
+ } else { // letters: "DE"
+ a = uprv_upperOrdinal(c);
+ if (a < 0 || 25 < a) { return 0; }
+ int32_t b = uprv_upperOrdinal(region[1]);
+ if (b < 0 || 25 < b || region[2] != 0) { return 0; }
+ return 26 * a + b + 1001;
+ }
+ return 0;
+}
+
+LSR &LSR::setHashCode() {
+ if (hashCode == 0) {
+ hashCode =
+ (ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 +
+ ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 +
+ regionIndex;
+ }
+ return *this;
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/lsr.h b/icu4c/source/common/lsr.h
new file mode 100644
index 0000000..db6cf93
--- /dev/null
+++ b/icu4c/source/common/lsr.h
@@ -0,0 +1,72 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// lsr.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LSR_H__
+#define __LSR_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+
+U_NAMESPACE_BEGIN
+
+struct LSR final : public UMemory {
+ static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
+
+ const char *language;
+ const char *script;
+ const char *region;
+ char *owned = nullptr;
+ /** Index for region, 0 if ill-formed. @see indexForRegion */
+ int32_t regionIndex = 0;
+ /** Only set for LSRs that will be used in a hash table. */
+ int32_t hashCode = 0;
+
+ LSR() : language("und"), script(""), region("") {}
+
+ /** Constructor which aliases all subtag pointers. */
+ LSR(const char *lang, const char *scr, const char *r) :
+ language(lang), script(scr), region(r),
+ regionIndex(indexForRegion(region)) {}
+ /**
+ * Constructor which prepends the prefix to the language and script,
+ * copies those into owned memory, and aliases the region.
+ */
+ LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode);
+ LSR(LSR &&other) U_NOEXCEPT;
+ LSR(const LSR &other) = delete;
+ inline ~LSR() {
+ // Pure inline code for almost all instances.
+ if (owned != nullptr) {
+ deleteOwned();
+ }
+ }
+
+ LSR &operator=(LSR &&other) U_NOEXCEPT;
+ LSR &operator=(const LSR &other) = delete;
+
+ /**
+ * Returns a positive index (>0) for a well-formed region code.
+ * Do not rely on a particular region->index mapping; it may change.
+ * Returns 0 for ill-formed strings.
+ */
+ static int32_t indexForRegion(const char *region);
+
+ UBool operator==(const LSR &other) const;
+
+ inline UBool operator!=(const LSR &other) const {
+ return !operator==(other);
+ }
+
+ LSR &setHashCode();
+
+private:
+ void deleteOwned();
+};
+
+U_NAMESPACE_END
+
+#endif // __LSR_H__
diff --git a/icu4c/source/common/resource.h b/icu4c/source/common/resource.h
index ee93d41..5199b85 100644
--- a/icu4c/source/common/resource.h
+++ b/icu4c/source/common/resource.h
@@ -94,13 +94,20 @@
*/
int32_t getSize() const { return length; }
/**
- * @param i Array item index.
+ * @param i Table item index.
* @param key Output-only, receives the key of the i'th item.
* @param value Output-only, receives the value of the i'th item.
* @return TRUE if i is non-negative and less than getSize().
*/
UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;
+ /**
+ * @param key Key string to find in the table.
+ * @param value Output-only, receives the value of the item with that key.
+ * @return TRUE if the table contains the key.
+ */
+ UBool findValue(const char *key, ResourceValue &value) const;
+
private:
const uint16_t *keys16;
const int32_t *keys32;
diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h
index d281810..b837fb9 100644
--- a/icu4c/source/common/ucln_cmn.h
+++ b/icu4c/source/common/ucln_cmn.h
@@ -39,6 +39,8 @@
UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
+ UCLN_COMMON_LIKELY_SUBTAGS,
+ UCLN_COMMON_LOCALE_DISTANCE,
UCLN_COMMON_ULOC,
UCLN_COMMON_CURRENCY,
UCLN_COMMON_LOADED_NORMALIZER2,
diff --git a/icu4c/source/common/uinvchar.cpp b/icu4c/source/common/uinvchar.cpp
index 8ce2350..6e5fb48 100644
--- a/icu4c/source/common/uinvchar.cpp
+++ b/icu4c/source/common/uinvchar.cpp
@@ -445,6 +445,13 @@
return length;
}
+U_CFUNC UBool
+uprv_isEbcdicAtSign(char c) {
+ static const uint8_t ebcdicAtSigns[] = {
+ 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
+ return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
+}
+
/* compare invariant strings; variant characters compare less than others and unlike each other */
U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper *ds,
@@ -562,6 +569,11 @@
}
U_CAPI char U_EXPORT2
+uprv_ebcdicToAscii(char c) {
+ return (char)asciiFromEbcdic[(uint8_t)c];
+}
+
+U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c) {
return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
}
diff --git a/icu4c/source/common/uinvchar.h b/icu4c/source/common/uinvchar.h
index 56dddfa..a43cfcd 100644
--- a/icu4c/source/common/uinvchar.h
+++ b/icu4c/source/common/uinvchar.h
@@ -68,6 +68,75 @@
# error Unknown charset family!
#endif
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Like U_UPPER_ORDINAL(x) but with validation.
+ * Returns 0..25 for A..Z else a value outside 0..25.
+ */
+inline int32_t uprv_upperOrdinal(int32_t c) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ return c - 'A';
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
+ // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
+ if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
+ if (c < 'J') { return -1; }
+ if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
+ if (c < 'S') { return -1; }
+ return c - 'S' + 18; // S-Z --> 18..25
+#else
+# error Unknown charset family!
+#endif
+}
+
+// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
+// Returns 0..25 for a..z else a value outside 0..25.
+inline int32_t uprv_lowerOrdinal(int32_t c) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ return c - 'a';
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
+ // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
+ if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
+ if (c < 'j') { return -1; }
+ if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
+ if (c < 's') { return -1; }
+ return c - 's' + 18; // s-z --> 18..25
+#else
+# error Unknown charset family!
+#endif
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns true if c == '@' is possible.
+ * The @ sign is variant, and the @ sign used on one
+ * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
+ * @internal
+ */
+U_CFUNC UBool
+uprv_isEbcdicAtSign(char c);
+
+/**
+ * \def uprv_isAtSign
+ * Returns true if c == '@' is possible.
+ * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_isAtSign(c) ((c)=='@')
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
+#else
+# error Unknown charset family!
+#endif
+
/**
* Compare two EBCDIC invariant-character strings in ASCII order.
* @internal
@@ -89,6 +158,26 @@
#endif
/**
+ * Converts an EBCDIC invariant character to ASCII.
+ * @internal
+ */
+U_INTERNAL char U_EXPORT2
+uprv_ebcdicToAscii(char c);
+
+/**
+ * \def uprv_invCharToAscii
+ * Converts an invariant character to ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_invCharToAscii(c) (c)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
+#else
+# error Unknown charset family!
+#endif
+
+/**
* Converts an EBCDIC invariant character to lowercase ASCII.
* @internal
*/
diff --git a/icu4c/source/common/unicode/localebuilder.h b/icu4c/source/common/unicode/localebuilder.h
index d3f587c..6805ab1 100644
--- a/icu4c/source/common/unicode/localebuilder.h
+++ b/icu4c/source/common/unicode/localebuilder.h
@@ -8,10 +8,10 @@
#if U_SHOW_CPLUSPLUS_API
#include "unicode/locid.h"
+#include "unicode/localematcher.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
-
#ifndef U_HIDE_DRAFT_API
/**
* \file
@@ -291,6 +291,10 @@
UBool copyErrorTo(UErrorCode &outErrorCode) const;
private:
+ friend class LocaleMatcher::Result;
+
+ void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
+
UErrorCode status_;
char language_[9];
char script_[5];
diff --git a/icu4c/source/common/unicode/localematcher.h b/icu4c/source/common/unicode/localematcher.h
new file mode 100644
index 0000000..701123f
--- /dev/null
+++ b/icu4c/source/common/unicode/localematcher.h
@@ -0,0 +1,605 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localematcher.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCALEMATCHER_H__
+#define __LOCALEMATCHER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
+ */
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Builder option for whether the language subtag or the script subtag is most important.
+ *
+ * @see Builder#setFavorSubtag(FavorSubtag)
+ * @draft ICU 65
+ */
+enum ULocMatchFavorSubtag {
+ /**
+ * Language differences are most important, then script differences, then region differences.
+ * (This is the default behavior.)
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_FAVOR_LANGUAGE,
+ /**
+ * Makes script differences matter relatively more than language differences.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_FAVOR_SCRIPT
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
+#endif
+
+/**
+ * Builder option for whether all desired locales are treated equally or
+ * earlier ones are preferred.
+ *
+ * @see Builder#setDemotionPerDesiredLocale(Demotion)
+ * @draft ICU 65
+ */
+enum ULocMatchDemotion {
+ /**
+ * All desired locales are treated equally.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_DEMOTION_NONE,
+ /**
+ * Earlier desired locales are preferred.
+ *
+ * <p>From each desired locale to the next,
+ * the distance to any supported locale is increased by an additional amount
+ * which is at least as large as most region mismatches.
+ * A later desired locale has to have a better match with some supported locale
+ * due to more than merely having the same region subtag.
+ *
+ * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
+ * yields <code>Result(en-GB, en)</code> because
+ * with the demotion of sv its perfect match is no better than
+ * the region distance between the earlier desired locale en-GB and en=en-US.
+ *
+ * <p>Notes:
+ * <ul>
+ * <li>In some cases, language and/or script differences can be as small as
+ * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+ * <li>It is possible for certain region differences to be larger than usual,
+ * and larger than the demotion.
+ * (As of CLDR 35 there is no such case, but
+ * this is possible in future versions of the data.)
+ * </ul>
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_DEMOTION_REGION
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchDemotion ULocMatchDemotion;
+#endif
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LSR;
+
+class LocaleDistance;
+class LocaleLsrIterator;
+class UVector;
+class XLikelySubtags;
+
+/**
+ * Immutable class that picks the best match between a user's desired locales and
+ * an application's supported locales.
+ * Movable but not copyable.
+ *
+ * <p>Example:
+ * <pre>
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
+ * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
+ * </pre>
+ *
+ * <p>A matcher takes into account when languages are close to one another,
+ * such as Danish and Norwegian,
+ * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
+ * <p>Often a product will just need one matcher instance, built with the languages
+ * that it supports. However, it may want multiple instances with different
+ * default languages based on additional information, such as the domain.
+ *
+ * <p>This class is not intended for public subclassing.
+ *
+ * @draft ICU 65
+ */
+class U_COMMON_API LocaleMatcher : public UMemory {
+public:
+ /**
+ * Data for the best-matching pair of a desired and a supported locale.
+ * Movable but not copyable.
+ *
+ * @draft ICU 65
+ */
+ class U_COMMON_API Result : public UMemory {
+ public:
+ /**
+ * Move constructor; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @draft ICU 65
+ */
+ Result(Result &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ *
+ * @draft ICU 65
+ */
+ ~Result();
+
+ /**
+ * Move assignment; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @draft ICU 65
+ */
+ Result &operator=(Result &&src) U_NOEXCEPT;
+
+ /**
+ * Returns the best-matching desired locale.
+ * nullptr if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or nullptr.
+ * @draft ICU 65
+ */
+ inline const Locale *getDesiredLocale() const { return desiredLocale; }
+
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is nullptr if the list of supported locales is empty and
+ * no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or nullptr.
+ * @draft ICU 65
+ */
+ inline const Locale *getSupportedLocale() const { return supportedLocale; }
+
+ /**
+ * Returns the index of the best-matching desired locale in the input Iterable order.
+ * -1 if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching desired locale, or -1.
+ * @draft ICU 65
+ */
+ inline int32_t getDesiredIndex() const { return desiredIndex; }
+
+ /**
+ * Returns the index of the best-matching supported locale in the
+ * constructor’s or builder’s input order (“set” Collection plus “added” locales).
+ * If the matcher was built from a locale list string, then the iteration order is that
+ * of a LocalePriorityList built from the same string.
+ * -1 if the list of supported locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching supported locale, or -1.
+ * @draft ICU 65
+ */
+ inline int32_t getSupportedIndex() const { return supportedIndex; }
+
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ * Returns the root locale if getSupportedLocale() returns nullptr.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
+ *
+ * @return a locale combining the best-matching desired and supported locales.
+ * @draft ICU 65
+ */
+ Locale makeResolvedLocale(UErrorCode &errorCode) const;
+
+ private:
+ Result(const Locale *desired, const Locale *supported,
+ int32_t desIndex, int32_t suppIndex, UBool owned) :
+ desiredLocale(desired), supportedLocale(supported),
+ desiredIndex(desIndex), supportedIndex(suppIndex),
+ desiredIsOwned(owned) {}
+
+ Result(const Result &other) = delete;
+ Result &operator=(const Result &other) = delete;
+
+ const Locale *desiredLocale;
+ const Locale *supportedLocale;
+ int32_t desiredIndex;
+ int32_t supportedIndex;
+ UBool desiredIsOwned;
+
+ friend class LocaleMatcher;
+ };
+
+ /**
+ * LocaleMatcher builder.
+ * Movable but not copyable.
+ *
+ * @see LocaleMatcher#builder()
+ * @draft ICU 65
+ */
+ class U_COMMON_API Builder : public UMemory {
+ public:
+ /**
+ * Constructs a builder used in chaining parameters for building a LocaleMatcher.
+ *
+ * @return a new Builder object
+ * @draft ICU 65
+ */
+ Builder() {}
+
+ /**
+ * Move constructor; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @draft ICU 65
+ */
+ Builder(Builder &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ *
+ * @draft ICU 65
+ */
+ ~Builder();
+
+ /**
+ * Move assignment; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @draft ICU 65
+ */
+ Builder &operator=(Builder &&src) U_NOEXCEPT;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
+ * Allows whitespace in more places but does not allow "*".
+ * Clears any previously set/added supported locales first.
+ *
+ * @param locales the Accept-Language string of locales to set
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setSupportedLocalesFromListString(StringPiece locales);
+
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setSupportedLocales(Locale::Iterator &locales);
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ template<typename Iter>
+ Builder &setSupportedLocales(Iter begin, Iter end) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(*begin++);
+ }
+ return *this;
+ }
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Calls the converter to convert each *begin to a Locale or const Locale &.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ template<typename Iter, typename Conv>
+ Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(converter(*begin++));
+ }
+ return *this;
+ }
+
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale another locale
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &addSupportedLocale(const Locale &locale);
+
+ /**
+ * Sets the default locale; if nullptr, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ *
+ * @param defaultLocale the default locale (will be copied)
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setDefaultLocale(const Locale *defaultLocale);
+
+ /**
+ * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
+ * differences.
+ * This is used in situations (such as maps) where
+ * it is better to fall back to the same script than a similar language.
+ *
+ * @param subtag the subtag to favor
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
+
+ /**
+ * Option for whether all desired locales are treated equally or
+ * earlier ones are preferred (this is the default).
+ *
+ * @param demotion the demotion per desired locale to set.
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
+
+ /**
+ * Sets the UErrorCode if an error occurred while setting parameters.
+ * Preserves older error codes in the outErrorCode.
+ *
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while setting parameters.
+ * Otherwise unchanged.
+ * @return TRUE if U_FAILURE(outErrorCode)
+ * @draft ICU 65
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+ /**
+ * Builds and returns a new locale matcher.
+ * This builder can continue to be used.
+ *
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return new LocaleMatcher.
+ * @draft ICU 65
+ */
+ LocaleMatcher build(UErrorCode &errorCode) const;
+
+ private:
+ friend class LocaleMatcher;
+
+ Builder(const Builder &other) = delete;
+ Builder &operator=(const Builder &other) = delete;
+
+ void clearSupportedLocales();
+ bool ensureSupportedLocaleVector();
+
+ UErrorCode errorCode_ = U_ZERO_ERROR;
+ UVector *supportedLocales_ = nullptr;
+ int32_t thresholdDistance_ = -1;
+ ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
+ Locale *defaultLocale_ = nullptr;
+ ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
+ };
+
+ // FYI No public LocaleMatcher constructors in C++; use the Builder.
+
+ /**
+ * Move copy constructor; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * @param src source matcher
+ * @draft ICU 65
+ */
+ LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ * @draft ICU 65
+ */
+ ~LocaleMatcher();
+
+ /**
+ * Move assignment operator; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source matcher
+ * @return *this
+ * @draft ICU 65
+ */
+ LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
+
+ /**
+ * Returns the supported locale which best matches the desired locale.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ */
+ const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ */
+ const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9",
+ * and returns the supported locale which best matches one of the desired locales.
+ * Allows whitespace in more places but does not allow "*".
+ *
+ * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ */
+ const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired locale and the supported locales.
+ * If the result's desired locale is not nullptr, then it is the address of the input locale.
+ * It has not been cloned.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of the desired and a supported locale.
+ * @draft ICU 65
+ */
+ Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired and supported locales.
+ * If the result's desired locale is not nullptr, then it is a clone of
+ * the best-matching desired locale. The Result object owns the clone.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of a desired and a supported locale.
+ * @draft ICU 65
+ */
+ Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Returns a fraction between 0 and 1, where 1 means that the languages are a
+ * perfect match, and 0 means that they are completely different.
+ *
+ * <p>This is mostly an implementation detail, and the precise values may change over time.
+ * The implementation may use either the maximized forms or the others ones, or both.
+ * The implementation may or may not rely on the forms to be consistent with each other.
+ *
+ * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
+ *
+ * @param desired Desired locale.
+ * @param supported Supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return value between 0 and 1, inclusive.
+ * @internal (has a known user)
+ */
+ double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+#endif // U_HIDE_INTERNAL_API
+
+private:
+ LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
+ LocaleMatcher(const LocaleMatcher &other) = delete;
+ LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
+
+ int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
+
+ const XLikelySubtags &likelySubtags;
+ const LocaleDistance &localeDistance;
+ int32_t thresholdDistance;
+ int32_t demotionPerDesiredLocale;
+ ULocMatchFavorSubtag favorSubtag;
+
+ // These are in input order.
+ const Locale ** supportedLocales;
+ LSR *lsrs;
+ int32_t supportedLocalesLength;
+ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+ UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
+ // Array versions of the supportedLsrToIndex keys and values.
+ // The distance lookup loops over the supportedLSRs and returns the index of the best match.
+ const LSR **supportedLSRs;
+ int32_t *supportedIndexes;
+ int32_t supportedLSRsLength;
+ Locale *ownedDefaultLocale;
+ const Locale *defaultLocale;
+ int32_t defaultLocaleIndex;
+};
+
+U_NAMESPACE_END
+
+#endif // U_HIDE_DRAFT_API
+#endif // U_SHOW_CPLUSPLUS_API
+#endif // __LOCALEMATCHER_H__
diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h
index 8048c1e..7ed070b 100644
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h
@@ -1011,6 +1011,104 @@
*/
virtual UClassID getDynamicClassID() const;
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * A Locale iterator interface similar to a Java Iterator<Locale>.
+ * @draft ICU 65
+ */
+ class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ {
+ public:
+ /** @draft ICU 65 */
+ virtual ~Iterator();
+
+ /**
+ * @return TRUE if next() can be called again.
+ * @draft ICU 65
+ */
+ virtual UBool hasNext() const = 0;
+
+ /**
+ * @return the next locale.
+ * @draft ICU 65
+ */
+ virtual const Locale &next() = 0;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * @draft ICU 65
+ */
+ template<typename Iter>
+ class RangeIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @draft ICU 65
+ */
+ RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {}
+
+ /**
+ * @return TRUE if next() can be called again.
+ * @draft ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @draft ICU 65
+ */
+ const Locale &next() override { return *it_++; }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * Calls the converter to convert each *begin to a const Locale &.
+ * @draft ICU 65
+ */
+ template<typename Iter, typename Conv>
+ class ConvertingIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value the converter converts to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @draft ICU 65
+ */
+ ConvertingIterator(Iter begin, Iter end, Conv converter) :
+ it_(begin), end_(end), converter_(converter) {}
+
+ /**
+ * @return TRUE if next() can be called again.
+ * @draft ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @draft ICU 65
+ */
+ const Locale &next() override { return converter_(*it_++); }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ Conv converter_;
+ };
+#endif // U_HIDE_DRAFT_API
+
protected: /* only protected for testing purposes. DO NOT USE. */
#ifndef U_HIDE_INTERNAL_API
/**
diff --git a/icu4c/source/common/uresbund.cpp b/icu4c/source/common/uresbund.cpp
index 124ed5b..6c0e760 100644
--- a/icu4c/source/common/uresbund.cpp
+++ b/icu4c/source/common/uresbund.cpp
@@ -39,6 +39,7 @@
#include "umutex.h"
#include "putilimp.h"
#include "uassert.h"
+#include "uresdata.h"
using namespace icu;
@@ -1952,7 +1953,7 @@
// When the sink sees the no-fallback/no-inheritance marker,
// then it would remove the parent's item.
// We would deserialize parent values even though they are overridden in a child bundle.
- value.pResData = &bundle->fResData;
+ value.setData(&bundle->fResData);
UResourceDataEntry *parentEntry = bundle->fData->fParent;
UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus);
value.setResource(bundle->fRes, ResourceTracer(bundle));
@@ -2000,31 +2001,60 @@
} // namespace
+// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue.
+// Unfortunately, the caller must know which subclass to make and pass in.
+// Alternatively, we could make it as polymorphic as in Java by
+// returning a ResourceValue pointer (possibly wrapped into a LocalPointer)
+// that the caller then owns.
+//
+// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer
+// can point to a non-local bundle.
+// Without tracing, the child bundle could be a function-local object.
U_CAPI void U_EXPORT2
-ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
- icu::ResourceSink &sink, UErrorCode &errorCode) {
+ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
+ UResourceBundle *tempFillIn,
+ ResourceDataValue &value, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
- if (path == NULL) {
+ if (path == nullptr) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
- UResourceBundle stackBundle;
- ures_initStackObject(&stackBundle);
const UResourceBundle *rb;
if (*path == 0) {
// empty path
rb = bundle;
} else {
- rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode);
+ rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode);
if (U_FAILURE(errorCode)) {
- ures_close(&stackBundle);
+ return;
+ }
+ }
+ value.setData(&rb->fResData);
+ value.setResource(rb->fRes, ResourceTracer(rb));
+}
+
+U_CAPI void U_EXPORT2
+ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
+ icu::ResourceSink &sink, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (path == nullptr) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ StackUResourceBundle stackBundle;
+ const UResourceBundle *rb;
+ if (*path == 0) {
+ // empty path
+ rb = bundle;
+ } else {
+ rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode);
+ if (U_FAILURE(errorCode)) {
return;
}
}
// Get all table items with fallback.
ResourceDataValue value;
getAllItemsWithFallback(rb, value, sink, errorCode);
- ures_close(&stackBundle);
}
U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) {
diff --git a/icu4c/source/common/uresdata.cpp b/icu4c/source/common/uresdata.cpp
index ce04142..b3c2e2e 100644
--- a/icu4c/source/common/uresdata.cpp
+++ b/icu4c/source/common/uresdata.cpp
@@ -509,7 +509,7 @@
if(U_FAILURE(errorCode)) {
return NULL;
}
- const UChar *s = res_getString(fTraceInfo, pResData, res, &length);
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &length);
if(s == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -520,7 +520,7 @@
if(U_FAILURE(errorCode)) {
return NULL;
}
- const UChar *s = res_getAlias(pResData, res, &length);
+ const UChar *s = res_getAlias(&getData(), res, &length);
if(s == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -551,7 +551,7 @@
if(U_FAILURE(errorCode)) {
return NULL;
}
- const int32_t *iv = res_getIntVector(fTraceInfo, pResData, res, &length);
+ const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length);
if(iv == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -562,7 +562,7 @@
if(U_FAILURE(errorCode)) {
return NULL;
}
- const uint8_t *b = res_getBinary(fTraceInfo, pResData, res, &length);
+ const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length);
if(b == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -580,12 +580,12 @@
switch(RES_GET_TYPE(res)) {
case URES_ARRAY:
if (offset!=0) { // empty if offset==0
- items32 = (const Resource *)pResData->pRoot+offset;
+ items32 = (const Resource *)getData().pRoot+offset;
length = *items32++;
}
break;
case URES_ARRAY16:
- items16 = pResData->p16BitUnits+offset;
+ items16 = getData().p16BitUnits+offset;
length = *items16++;
break;
default:
@@ -608,19 +608,19 @@
switch(RES_GET_TYPE(res)) {
case URES_TABLE:
if (offset != 0) { // empty if offset==0
- keys16 = (const uint16_t *)(pResData->pRoot+offset);
+ keys16 = (const uint16_t *)(getData().pRoot+offset);
length = *keys16++;
items32 = (const Resource *)(keys16+length+(~length&1));
}
break;
case URES_TABLE16:
- keys16 = pResData->p16BitUnits+offset;
+ keys16 = getData().p16BitUnits+offset;
length = *keys16++;
items16 = keys16 + length;
break;
case URES_TABLE32:
if (offset != 0) { // empty if offset==0
- keys32 = pResData->pRoot+offset;
+ keys32 = getData().pRoot+offset;
length = *keys32++;
items32 = (const Resource *)keys32 + length;
}
@@ -633,18 +633,18 @@
}
UBool ResourceDataValue::isNoInheritanceMarker() const {
- return ::isNoInheritanceMarker(pResData, res);
+ return ::isNoInheritanceMarker(&getData(), res);
}
int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity,
UErrorCode &errorCode) const {
- return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
+ return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
}
int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
UErrorCode &errorCode) const {
if(URES_IS_ARRAY(res)) {
- return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
+ return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
}
if(U_FAILURE(errorCode)) {
return 0;
@@ -658,7 +658,7 @@
return 1;
}
int32_t sLength;
- const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
if(s != NULL) {
dest[0].setTo(TRUE, s, sLength);
return 1;
@@ -673,7 +673,7 @@
return us;
}
int32_t sLength;
- const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
if(s != NULL) {
us.setTo(TRUE, s, sLength);
return us;
@@ -684,7 +684,7 @@
}
if(array.getSize() > 0) {
// Tracing is already performed above (unimportant for trace that this is an array)
- s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, 0), &sLength);
+ s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength);
if(s != NULL) {
us.setTo(TRUE, s, sLength);
return us;
@@ -821,14 +821,14 @@
const char *&key, icu::ResourceValue &value) const {
if(0 <= i && i < length) {
icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
- if (keys16 != NULL) {
- key = RES_GET_KEY16(rdValue.pResData, keys16[i]);
+ if (keys16 != nullptr) {
+ key = RES_GET_KEY16(&rdValue.getData(), keys16[i]);
} else {
- key = RES_GET_KEY32(rdValue.pResData, keys32[i]);
+ key = RES_GET_KEY32(&rdValue.getData(), keys32[i]);
}
Resource res;
- if (items16 != NULL) {
- res = makeResourceFrom16(rdValue.pResData, items16[i]);
+ if (items16 != nullptr) {
+ res = makeResourceFrom16(&rdValue.getData(), items16[i]);
} else {
res = items32[i];
}
@@ -842,6 +842,29 @@
return FALSE;
}
+UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const {
+ icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
+ const char *realKey = nullptr;
+ int32_t i;
+ if (keys16 != nullptr) {
+ i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey);
+ } else {
+ i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey);
+ }
+ if (i >= 0) {
+ Resource res;
+ if (items16 != nullptr) {
+ res = makeResourceFrom16(&rdValue.getData(), items16[i]);
+ } else {
+ res = items32[i];
+ }
+ // Same note about lifetime as in getKeyAndValue().
+ rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
+ return TRUE;
+ }
+ return FALSE;
+}
+
U_CAPI Resource U_EXPORT2
res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) {
uint32_t offset=RES_GET_OFFSET(array);
@@ -887,7 +910,7 @@
// alive for the duration that fields are being read from it
// (including nested fields).
rdValue.setResource(
- internalGetResource(rdValue.pResData, i),
+ internalGetResource(&rdValue.getData(), i),
ResourceTracer(fTraceInfo, i));
return TRUE;
}
diff --git a/icu4c/source/common/uresdata.h b/icu4c/source/common/uresdata.h
index 5164740..d1b67ba 100644
--- a/icu4c/source/common/uresdata.h
+++ b/icu4c/source/common/uresdata.h
@@ -511,13 +511,12 @@
class ResourceDataValue : public ResourceValue {
public:
ResourceDataValue() :
- pResData(NULL),
res(static_cast<Resource>(URES_NONE)),
fTraceInfo() {}
virtual ~ResourceDataValue();
void setData(const ResourceData *data) {
- pResData = data;
+ resData = *data;
}
void setResource(Resource r, ResourceTracer&& traceInfo) {
@@ -525,6 +524,7 @@
fTraceInfo = traceInfo;
}
+ const ResourceData &getData() const { return resData; }
virtual UResType getType() const;
virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
@@ -541,9 +541,10 @@
UErrorCode &errorCode) const;
virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;
- const ResourceData *pResData;
-
private:
+ // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
+ // then remove this value field again and just store a pResData pointer.
+ ResourceData resData;
Resource res;
ResourceTracer fTraceInfo;
};
diff --git a/icu4c/source/common/uresimp.h b/icu4c/source/common/uresimp.h
index 51db6c5..f453ddc 100644
--- a/icu4c/source/common/uresimp.h
+++ b/icu4c/source/common/uresimp.h
@@ -67,6 +67,9 @@
char *fVersion;
UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
+ // TODO(ICU-20769): Try to change the by-value fResData into a pointer,
+ // with the struct in only one place for each bundle.
+ // Also replace class ResourceDataValue.resData with a pResData pointer again.
ResourceData fResData;
char fResBuf[RES_BUFSIZE];
int32_t fResPathLen;
@@ -282,6 +285,11 @@
#ifdef __cplusplus
U_CAPI void U_EXPORT2
+ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
+ UResourceBundle *tempFillIn,
+ icu::ResourceDataValue &value, UErrorCode &errorCode);
+
+U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
icu::ResourceSink &sink, UErrorCode &errorCode);
diff --git a/icu4c/source/test/cintltst/cstrtest.c b/icu4c/source/test/cintltst/cstrtest.c
index d9bea20..cfc3a3d 100644
--- a/icu4c/source/test/cintltst/cstrtest.c
+++ b/icu4c/source/test/cintltst/cstrtest.c
@@ -27,11 +27,17 @@
static void TestInvariant(void);
static void TestCompareInvEbcdicAsAscii(void);
+static void TestLocaleAtSign(void);
+static void TestNoInvariantAtSign(void);
+static void TestInvCharToAscii(void);
void addCStringTest(TestNode** root) {
- addTest(root, &TestAPI, "tsutil/cstrtest/TestAPI");
- addTest(root, &TestInvariant, "tsutil/cstrtest/TestInvariant");
+ addTest(root, &TestAPI, "tsutil/cstrtest/TestAPI");
+ addTest(root, &TestInvariant, "tsutil/cstrtest/TestInvariant");
addTest(root, &TestCompareInvEbcdicAsAscii, "tsutil/cstrtest/TestCompareInvEbcdicAsAscii");
+ addTest(root, &TestLocaleAtSign, "tsutil/cstrtest/TestLocaleAtSign");
+ addTest(root, &TestNoInvariantAtSign, "tsutil/cstrtest/TestNoInvariantAtSign");
+ addTest(root, &TestInvCharToAscii, "tsutil/cstrtest/TestInvCharToAscii");
}
static void TestAPI(void)
@@ -339,3 +345,53 @@
}
}
}
+
+// See U_CHARSET_FAMILY in unicode/platform.h.
+static const char *nativeInvChars =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789 \"%&'()*+,-./:;<=>?_";
+static const UChar *asciiInvChars =
+ u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ u"abcdefghijklmnopqrstuvwxyz"
+ u"0123456789 \"%&'()*+,-./:;<=>?_";
+
+static void
+TestLocaleAtSign() {
+ static const char *invLocale = "de-Latn_DE@PHONEBOOK";
+ for (int32_t i = 0;; ++i) {
+ char ic = invLocale[i];
+ if (ic == 0) { break; }
+ UBool expected = i == 10;
+ UBool actual = uprv_isAtSign(ic);
+ if (actual != expected) {
+ log_err("uprv_isAtSign('%c')=%d is wrong\n", ic, (int)actual);
+ }
+ }
+}
+
+// The at sign is not an invariant character.
+static void
+TestNoInvariantAtSign() {
+ for (int32_t i = 0;; ++i) {
+ char ic = nativeInvChars[i];
+ UBool actual = uprv_isAtSign(ic);
+ if (actual) {
+ log_err("uprv_isAtSign(invariant '%c')=TRUE is wrong\n", ic);
+ }
+ if (ic == 0) { break; }
+ }
+}
+
+static void
+TestInvCharToAscii() {
+ for (int32_t i = 0;; ++i) {
+ char ic = nativeInvChars[i];
+ uint8_t ac = asciiInvChars[i];
+ uint8_t actual = uprv_invCharToAscii(ic);
+ if (actual != ac) {
+ log_err("uprv_invCharToAscii('%c') did not convert to ASCII 0x%02x\n", ic, (int)ac);
+ }
+ if (ic == 0) { break; }
+ }
+}
diff --git a/icu4c/source/test/cintltst/tracetst.c b/icu4c/source/test/cintltst/tracetst.c
index ed99742..4ea7f0e 100644
--- a/icu4c/source/test/cintltst/tracetst.c
+++ b/icu4c/source/test/cintltst/tracetst.c
@@ -187,7 +187,8 @@
/* printf(" %s() %s\n", fnName, buf); */
}
-static UConverter * psuedo_ucnv_open(const char *name, UErrorCode * err)
+#if !ENABLE_TRACING_ORIG_VAL
+static UConverter * pseudo_ucnv_open(const char *name, UErrorCode * err)
{
UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
@@ -196,13 +197,13 @@
UTRACE_EXIT_PTR_STATUS(NULL, *err);
return NULL;
}
-static void psuedo_ucnv_close(UConverter * cnv)
+static void pseudo_ucnv_close(UConverter * cnv)
{
UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
UTRACE_DATA1(UTRACE_OPEN_CLOSE, "unload converter %p", cnv);
UTRACE_EXIT_VALUE((int32_t)TRUE);
}
-
+#endif
/*
* TestTraceAPI
@@ -282,9 +283,9 @@
TEST_ASSERT(U_SUCCESS(status));
ucnv_close(cnv);
#else
- cnv = psuedo_ucnv_open(NULL, &status);
+ cnv = pseudo_ucnv_open(NULL, &status);
TEST_ASSERT(U_SUCCESS(status));
- psuedo_ucnv_close(cnv);
+ pseudo_ucnv_close(cnv);
#endif
TEST_ASSERT(gTraceEntryCount > 0);
TEST_ASSERT(gTraceExitCount > 0);
diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt
index 33c9d1a..750db6a 100644
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@@ -184,7 +184,7 @@
uinit utypes errorcode
icuplug
platform
- localebuilder
+ localebuilder localematcher
group: pluralmap
# TODO: Move to i18n library, ticket #11926.
@@ -631,7 +631,7 @@
# We can probably only disentangle basic locale ID handling from resource bundle code
# by hardcoding all of the locale ID data.
locid.o locmap.o wintz.o
- # Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
+ # Do we need class LocaleBased? https://unicode-org.atlassian.net/browse/ICU-8608
locbased.o
loclikely.o
deps
@@ -646,6 +646,31 @@
deps
resourcebundle
+group: localematcher
+ localematcher.o
+ deps
+ localebuilder localeprioritylist loclikelysubtags locdistance lsr
+
+group: localeprioritylist
+ localeprioritylist.o
+ deps
+ resourcebundle
+
+group: locdistance
+ locdistance.o
+ deps
+ loclikelysubtags
+
+group: loclikelysubtags
+ loclikelysubtags.o
+ deps
+ lsr resourcebundle
+
+group: lsr
+ lsr.o
+ deps
+ platform
+
group: udata
udata.o ucmndata.o udatamem.o restrace.o
umapfile.o
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index 625e4e4..bcab2c9 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -44,7 +44,8 @@
fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
-loctest.o localebuildertest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
+loctest.o localebuildertest.o localematchertest.o \
+miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index 80a75a0..5cf154d 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -366,6 +366,7 @@
<ClCompile Include="listformattertest.cpp" />
<ClCompile Include="formattedvaluetest.cpp" />
<ClCompile Include="localebuildertest.cpp" />
+ <ClCompile Include="localematchertest.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="colldata.h" />
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index 88b558c..0bf336b 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -547,6 +547,9 @@
<ClCompile Include="localebuildertest.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
+ <ClCompile Include="localematchertest.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="itrbbi.h">
diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp
index 3cda39d..228dbf2 100644
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@@ -35,6 +35,7 @@
#include "usettest.h"
extern IntlTest *createBytesTrieTest();
+extern IntlTest *createLocaleMatcherTest();
static IntlTest *createLocalPointerTest();
extern IntlTest *createUCharsTrieTest();
static IntlTest *createEnumSetTest();
@@ -46,113 +47,40 @@
extern IntlTest *createStaticUnicodeSetsTest();
#endif
-
-#define CASE(id, test) case id: \
- name = #test; \
- if (exec) { \
- logln(#test "---"); logln(); \
- test t; \
- callTest(t, par); \
- } \
- break
-
void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
{
if (exec) logln("TestSuite Utilities: ");
- switch (index) {
- CASE(0, MultithreadTest);
- CASE(1, StringTest);
- CASE(2, UnicodeStringTest);
- CASE(3, LocaleTest);
- CASE(4, CharIterTest);
- CASE(5, UObjectTest);
- CASE(6, UnicodeTest);
- CASE(7, ResourceBundleTest);
- CASE(8, NewResourceBundleTest);
- CASE(9, PUtilTest);
- CASE(10, UVector32Test);
- CASE(11, UVectorTest);
- CASE(12, UTextTest);
- CASE(13, LocaleAliasTest);
- CASE(14, UnicodeSetTest);
- CASE(15, ErrorCodeTest);
- case 16:
- name = "LocalPointerTest";
- if (exec) {
- logln("TestSuite LocalPointerTest---"); logln();
- LocalPointer<IntlTest> test(createLocalPointerTest());
- callTest(*test, par);
- }
- break;
- case 17:
- name = "BytesTrieTest";
- if (exec) {
- logln("TestSuite BytesTrieTest---"); logln();
- LocalPointer<IntlTest> test(createBytesTrieTest());
- callTest(*test, par);
- }
- break;
- case 18:
- name = "UCharsTrieTest";
- if (exec) {
- logln("TestSuite UCharsTrieTest---"); logln();
- LocalPointer<IntlTest> test(createUCharsTrieTest());
- callTest(*test, par);
- }
- break;
- case 19:
- name = "EnumSetTest";
- if (exec) {
- logln("TestSuite EnumSetTest---"); logln();
- LocalPointer<IntlTest> test(createEnumSetTest());
- callTest(*test, par);
- }
- break;
- case 20:
- name = "SimpleFormatterTest";
- if (exec) {
- logln("TestSuite SimpleFormatterTest---"); logln();
- LocalPointer<IntlTest> test(createSimpleFormatterTest());
- callTest(*test, par);
- }
- break;
- case 21:
- name = "UnifiedCacheTest";
- if (exec) {
- logln("TestSuite UnifiedCacheTest---"); logln();
- LocalPointer<IntlTest> test(createUnifiedCacheTest());
- callTest(*test, par);
- }
- break;
- case 22:
- name = "QuantityFormatterTest";
- if (exec) {
- logln("TestSuite QuantityFormatterTest---"); logln();
- LocalPointer<IntlTest> test(createQuantityFormatterTest());
- callTest(*test, par);
- }
- break;
- case 23:
- name = "PluralMapTest";
- if (exec) {
- logln("TestSuite PluralMapTest---"); logln();
- LocalPointer<IntlTest> test(createPluralMapTest());
- callTest(*test, par);
- }
- break;
- case 24:
- name = "StaticUnicodeSetsTest";
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO_CLASS(MultithreadTest);
+ TESTCASE_AUTO_CLASS(StringTest);
+ TESTCASE_AUTO_CLASS(UnicodeStringTest);
+ TESTCASE_AUTO_CLASS(LocaleTest);
+ TESTCASE_AUTO_CLASS(CharIterTest);
+ TESTCASE_AUTO_CLASS(UObjectTest);
+ TESTCASE_AUTO_CLASS(UnicodeTest);
+ TESTCASE_AUTO_CLASS(ResourceBundleTest);
+ TESTCASE_AUTO_CLASS(NewResourceBundleTest);
+ TESTCASE_AUTO_CLASS(PUtilTest);
+ TESTCASE_AUTO_CLASS(UVector32Test);
+ TESTCASE_AUTO_CLASS(UVectorTest);
+ TESTCASE_AUTO_CLASS(UTextTest);
+ TESTCASE_AUTO_CLASS(LocaleAliasTest);
+ TESTCASE_AUTO_CLASS(UnicodeSetTest);
+ TESTCASE_AUTO_CLASS(ErrorCodeTest);
+ TESTCASE_AUTO_CREATE_CLASS(LocalPointerTest);
+ TESTCASE_AUTO_CREATE_CLASS(BytesTrieTest);
+ TESTCASE_AUTO_CREATE_CLASS(UCharsTrieTest);
+ TESTCASE_AUTO_CREATE_CLASS(EnumSetTest);
+ TESTCASE_AUTO_CREATE_CLASS(SimpleFormatterTest);
+ TESTCASE_AUTO_CREATE_CLASS(UnifiedCacheTest);
+ TESTCASE_AUTO_CREATE_CLASS(QuantityFormatterTest);
+ TESTCASE_AUTO_CREATE_CLASS(PluralMapTest);
#if !UCONFIG_NO_FORMATTING
- if (exec) {
- logln("TestSuite StaticUnicodeSetsTest---"); logln();
- LocalPointer<IntlTest> test(createStaticUnicodeSetsTest());
- callTest(*test, par);
- }
+ TESTCASE_AUTO_CREATE_CLASS(StaticUnicodeSetsTest);
#endif
- break;
- CASE(25, LocaleBuilderTest);
- default: name = ""; break; //needed to end loop
- }
+ TESTCASE_AUTO_CLASS(LocaleBuilderTest);
+ TESTCASE_AUTO_CREATE_CLASS(LocaleMatcherTest);
+ TESTCASE_AUTO_END;
}
void ErrorCodeTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {
diff --git a/icu4c/source/test/intltest/localematchertest.cpp b/icu4c/source/test/intltest/localematchertest.cpp
new file mode 100644
index 0000000..f8cb7a3
--- /dev/null
+++ b/icu4c/source/test/intltest/localematchertest.cpp
@@ -0,0 +1,589 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localematchertest.cpp
+// created: 2019jul04 Markus W. Scherer
+
+#include <string>
+#include <vector>
+
+#include "unicode/utypes.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "intltest.h"
+#include "localeprioritylist.h"
+#include "ucbuf.h"
+
+#define ARRAY_RANGE(array) (array), ((array) + UPRV_LENGTHOF(array))
+
+namespace {
+
+const char *locString(const Locale *loc) {
+ return loc != nullptr ? loc->getName() : "(null)";
+}
+
+struct TestCase {
+ int32_t lineNr = 0;
+
+ CharString supported;
+ CharString def;
+ UnicodeString favor;
+ UnicodeString threshold;
+ CharString desired;
+ CharString expMatch;
+ CharString expDesired;
+ CharString expCombined;
+
+ void reset() {
+ supported.clear();
+ def.clear();
+ favor.remove();
+ threshold.remove();
+ }
+};
+
+} // namespace
+
+class LocaleMatcherTest : public IntlTest {
+public:
+ LocaleMatcherTest() {}
+
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
+
+ void testEmpty();
+ void testCopyErrorTo();
+ void testBasics();
+ void testSupportedDefault();
+ void testUnsupportedDefault();
+ void testDemotion();
+ void testMatch();
+ void testResolvedLocale();
+ void testDataDriven();
+
+private:
+ UBool dataDriven(const TestCase &test, IcuTestErrorCode &errorCode);
+};
+
+extern IntlTest *createLocaleMatcherTest() {
+ return new LocaleMatcherTest();
+}
+
+void LocaleMatcherTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
+ if(exec) {
+ logln("TestSuite LocaleMatcherTest: ");
+ }
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(testEmpty);
+ TESTCASE_AUTO(testCopyErrorTo);
+ TESTCASE_AUTO(testBasics);
+ TESTCASE_AUTO(testSupportedDefault);
+ TESTCASE_AUTO(testUnsupportedDefault);
+ TESTCASE_AUTO(testDemotion);
+ TESTCASE_AUTO(testMatch);
+ TESTCASE_AUTO(testResolvedLocale);
+ TESTCASE_AUTO(testDataDriven);
+ TESTCASE_AUTO_END;
+}
+
+void LocaleMatcherTest::testEmpty() {
+ IcuTestErrorCode errorCode(*this, "testEmpty");
+ LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
+ const Locale *best = matcher.getBestMatch(Locale::getFrench(), errorCode);
+ assertEquals("getBestMatch(fr)", "(null)", locString(best));
+ LocaleMatcher::Result result = matcher.getBestMatchResult("fr", errorCode);
+ assertEquals("getBestMatchResult(fr).des", "(null)", locString(result.getDesiredLocale()));
+ assertEquals("getBestMatchResult(fr).desIndex", -1, result.getDesiredIndex());
+ assertEquals("getBestMatchResult(fr).supp",
+ "(null)", locString(result.getSupportedLocale()));
+ assertEquals("getBestMatchResult(fr).suppIndex",
+ -1, result.getSupportedIndex());
+}
+
+void LocaleMatcherTest::testCopyErrorTo() {
+ IcuTestErrorCode errorCode(*this, "testCopyErrorTo");
+ // The builder does not set any errors except out-of-memory.
+ // Test what we can.
+ LocaleMatcher::Builder builder;
+ UErrorCode success = U_ZERO_ERROR;
+ assertFalse("no error", builder.copyErrorTo(success));
+ assertTrue("still success", U_SUCCESS(success));
+ UErrorCode failure = U_INVALID_FORMAT_ERROR;
+ assertTrue("failure passed in", builder.copyErrorTo(failure));
+ assertEquals("same failure", U_INVALID_FORMAT_ERROR, failure);
+}
+
+void LocaleMatcherTest::testBasics() {
+ IcuTestErrorCode errorCode(*this, "testBasics");
+ Locale locales[] = { "fr", "en_GB", "en" };
+ {
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocales(ARRAY_RANGE(locales)).build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("fromRange.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("fromRange.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("fromRange.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("fromRange.getBestMatch(ja_JP)", "fr", locString(best));
+ }
+ // Code coverage: Variations of setting supported locales.
+ {
+ std::vector<Locale> locales{ "fr", "en_GB", "en" };
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocales(locales.begin(), locales.end()).build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("fromRange.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("fromRange.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("fromRange.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("fromRange.getBestMatch(ja_JP)", "fr", locString(best));
+ }
+ {
+ Locale::RangeIterator<Locale *> iter(ARRAY_RANGE(locales));
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocales(iter).build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("fromIter.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("fromIter.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("fromIter.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("fromIter.getBestMatch(ja_JP)", "fr", locString(best));
+ }
+ {
+ Locale *pointers[] = { locales, locales + 1, locales + 2 };
+ // Lambda with explicit reference return type to prevent copy-constructing a temporary
+ // which would be destructed right away.
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocalesViaConverter(
+ ARRAY_RANGE(pointers), [](const Locale *p) -> const Locale & { return *p; }).
+ build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("viaConverter.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("viaConverter.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("viaConverter.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("viaConverter.getBestMatch(ja_JP)", "fr", locString(best));
+ }
+ {
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ addSupportedLocale(locales[0]).
+ addSupportedLocale(locales[1]).
+ addSupportedLocale(locales[2]).
+ build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("added.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("added.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("added.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("added.getBestMatch(ja_JP)", "fr", locString(best));
+ }
+ {
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocalesFromListString(
+ " el, fr;q=0.555555, en-GB ; q = 0.88 , el; q =0, en;q=0.88 , fr ").
+ build(errorCode);
+ const Locale *best = matcher.getBestMatchForListString("el, fr, fr;q=0, en-GB", errorCode);
+ assertEquals("fromList.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("fromList.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("fromList.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("fromList.getBestMatch(ja_JP)", "fr", locString(best));
+ }
+ // more API coverage
+ {
+ LocalePriorityList list("fr, en-GB", errorCode);
+ LocalePriorityList::Iterator iter(list.iterator());
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocales(iter).
+ addSupportedLocale(Locale::getEnglish()).
+ setDefaultLocale(&Locale::getGerman()).
+ build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("withDefault.getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("withDefault.getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("withDefault.getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("withDefault.getBestMatch(ja_JP)", "de", locString(best));
+
+ Locale desired("en_GB"); // distinct object from Locale.UK
+ LocaleMatcher::Result result = matcher.getBestMatchResult(desired, errorCode);
+ assertTrue("withDefault: exactly desired en-GB object",
+ &desired == result.getDesiredLocale());
+ assertEquals("withDefault: en-GB desired index", 0, result.getDesiredIndex());
+ assertEquals("withDefault: en-GB supported",
+ "en_GB", locString(result.getSupportedLocale()));
+ assertEquals("withDefault: en-GB supported index", 1, result.getSupportedIndex());
+
+ LocalePriorityList list2("ja-JP, en-US", errorCode);
+ LocalePriorityList::Iterator iter2(list2.iterator());
+ result = matcher.getBestMatchResult(iter2, errorCode);
+ assertEquals("withDefault: ja-JP, en-US desired index", 1, result.getDesiredIndex());
+ assertEquals("withDefault: ja-JP, en-US desired",
+ "en_US", locString(result.getDesiredLocale()));
+
+ desired = Locale("en", "US"); // distinct object from Locale.US
+ result = matcher.getBestMatchResult(desired, errorCode);
+ assertTrue("withDefault: exactly desired en-US object",
+ &desired == result.getDesiredLocale());
+ assertEquals("withDefault: en-US desired index", 0, result.getDesiredIndex());
+ assertEquals("withDefault: en-US supported", "en", locString(result.getSupportedLocale()));
+ assertEquals("withDefault: en-US supported index", 2, result.getSupportedIndex());
+
+ result = matcher.getBestMatchResult("ja_JP", errorCode);
+ assertEquals("withDefault: ja-JP desired", "(null)", locString(result.getDesiredLocale()));
+ assertEquals("withDefault: ja-JP desired index", -1, result.getDesiredIndex());
+ assertEquals("withDefault: ja-JP supported", "de", locString(result.getSupportedLocale()));
+ assertEquals("withDefault: ja-JP supported index", -1, result.getSupportedIndex());
+ }
+}
+
+void LocaleMatcherTest::testSupportedDefault() {
+ // The default locale is one of the supported locales.
+ IcuTestErrorCode errorCode(*this, "testSupportedDefault");
+ Locale locales[] = { "fr", "en_GB", "en" };
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocales(ARRAY_RANGE(locales)).
+ setDefaultLocale(&locales[1]).
+ build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("getBestMatch(ja_JP)", "en_GB", locString(best));
+ LocaleMatcher::Result result = matcher.getBestMatchResult("ja_JP", errorCode);
+ assertEquals("getBestMatchResult(ja_JP).supp",
+ "en_GB", locString(result.getSupportedLocale()));
+ assertEquals("getBestMatchResult(ja_JP).suppIndex",
+ 1, result.getSupportedIndex());
+}
+
+void LocaleMatcherTest::testUnsupportedDefault() {
+ // The default locale does not match any of the supported locales.
+ IcuTestErrorCode errorCode(*this, "testUnsupportedDefault");
+ Locale locales[] = { "fr", "en_GB", "en" };
+ Locale def("de");
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ setSupportedLocales(ARRAY_RANGE(locales)).
+ setDefaultLocale(&def).
+ build(errorCode);
+ const Locale *best = matcher.getBestMatch("en_GB", errorCode);
+ assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US", errorCode);
+ assertEquals("getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR", errorCode);
+ assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP", errorCode);
+ assertEquals("getBestMatch(ja_JP)", "de", locString(best));
+ LocaleMatcher::Result result = matcher.getBestMatchResult("ja_JP", errorCode);
+ assertEquals("getBestMatchResult(ja_JP).supp",
+ "de", locString(result.getSupportedLocale()));
+ assertEquals("getBestMatchResult(ja_JP).suppIndex",
+ -1, result.getSupportedIndex());
+}
+
+void LocaleMatcherTest::testDemotion() {
+ IcuTestErrorCode errorCode(*this, "testDemotion");
+ Locale supported[] = { "fr", "de-CH", "it" };
+ Locale desired[] = { "fr-CH", "de-CH", "it" };
+ {
+ LocaleMatcher noDemotion = LocaleMatcher::Builder().
+ setSupportedLocales(ARRAY_RANGE(supported)).
+ setDemotionPerDesiredLocale(ULOCMATCH_DEMOTION_NONE).build(errorCode);
+ Locale::RangeIterator<Locale *> desiredIter(ARRAY_RANGE(desired));
+ assertEquals("no demotion",
+ "de_CH", locString(noDemotion.getBestMatch(desiredIter, errorCode)));
+ }
+
+ {
+ LocaleMatcher regionDemotion = LocaleMatcher::Builder().
+ setSupportedLocales(ARRAY_RANGE(supported)).
+ setDemotionPerDesiredLocale(ULOCMATCH_DEMOTION_REGION).build(errorCode);
+ Locale::RangeIterator<Locale *> desiredIter(ARRAY_RANGE(desired));
+ assertEquals("region demotion",
+ "fr", locString(regionDemotion.getBestMatch(desiredIter, errorCode)));
+ }
+}
+
+void LocaleMatcherTest::testMatch() {
+ IcuTestErrorCode errorCode(*this, "testMatch");
+ LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
+
+ // Java test function testMatch_exact()
+ Locale en_CA("en_CA");
+ assertEquals("exact match", 1.0, matcher.internalMatch(en_CA, en_CA, errorCode));
+
+ // testMatch_none
+ Locale ar_MK("ar_MK");
+ double match = matcher.internalMatch(ar_MK, en_CA, errorCode);
+ assertTrue("mismatch: 0<=match<0.2", 0 <= match && match < 0.2);
+
+ // testMatch_matchOnMaximized
+ Locale und_TW("und_TW");
+ Locale zh("zh");
+ Locale zh_Hant("zh_Hant");
+ double matchZh = matcher.internalMatch(und_TW, zh, errorCode);
+ double matchZhHant = matcher.internalMatch(und_TW, zh_Hant, errorCode);
+ assertTrue("und_TW should be closer to zh_Hant than to zh",
+ matchZh < matchZhHant);
+ Locale en_Hant_TW("en_Hant_TW");
+ double matchEnHantTw = matcher.internalMatch(en_Hant_TW, zh_Hant, errorCode);
+ assertTrue("zh_Hant should be closer to und_TW than to en_Hant_TW",
+ matchEnHantTw < matchZhHant);
+ assertTrue("zh should be closer to und_TW than to en_Hant_TW",
+ matchEnHantTw < matchZh);
+}
+
+void LocaleMatcherTest::testResolvedLocale() {
+ IcuTestErrorCode errorCode(*this, "testResolvedLocale");
+ LocaleMatcher matcher = LocaleMatcher::Builder().
+ addSupportedLocale("ar-EG").
+ build(errorCode);
+ Locale desired("ar-SA-u-nu-latn");
+ LocaleMatcher::Result result = matcher.getBestMatchResult(desired, errorCode);
+ assertEquals("best", "ar_EG", locString(result.getSupportedLocale()));
+ Locale resolved = result.makeResolvedLocale(errorCode);
+ assertEquals("ar-EG + ar-SA-u-nu-latn = ar-SA-u-nu-latn",
+ "ar-SA-u-nu-latn",
+ resolved.toLanguageTag<std::string>(errorCode).data());
+}
+
+namespace {
+
+bool toInvariant(const UnicodeString &s, CharString &inv, ErrorCode &errorCode) {
+ if (errorCode.isSuccess()) {
+ inv.clear().appendInvariantChars(s, errorCode);
+ return errorCode.isSuccess();
+ }
+ return false;
+}
+
+bool getSuffixAfterPrefix(const UnicodeString &s, int32_t limit,
+ const UnicodeString &prefix, UnicodeString &suffix) {
+ if (prefix.length() <= limit && s.startsWith(prefix)) {
+ suffix.setTo(s, prefix.length(), limit - prefix.length());
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool getInvariantSuffixAfterPrefix(const UnicodeString &s, int32_t limit,
+ const UnicodeString &prefix, CharString &suffix,
+ ErrorCode &errorCode) {
+ UnicodeString u_suffix;
+ return getSuffixAfterPrefix(s, limit, prefix, u_suffix) &&
+ toInvariant(u_suffix, suffix, errorCode);
+}
+
+bool readTestCase(const UnicodeString &line, TestCase &test, IcuTestErrorCode &errorCode) {
+ if (errorCode.isFailure()) { return false; }
+ ++test.lineNr;
+ // Start of comment, or end of line, minus trailing spaces.
+ int32_t limit = line.indexOf(u'#');
+ if (limit < 0) {
+ limit = line.length();
+ // Remove trailing CR LF.
+ char16_t c;
+ while (limit > 0 && ((c = line.charAt(limit - 1)) == u'\n' || c == u'\r')) {
+ --limit;
+ }
+ }
+ // Remove spaces before comment or at the end of the line.
+ char16_t c;
+ while (limit > 0 && ((c = line.charAt(limit - 1)) == u' ' || c == u'\t')) {
+ --limit;
+ }
+ if (limit == 0) { // empty line
+ return false;
+ }
+ if (line.startsWith(u"** test: ")) {
+ test.reset();
+ } else if (getInvariantSuffixAfterPrefix(line, limit, u"@supported=",
+ test.supported, errorCode)) {
+ } else if (getInvariantSuffixAfterPrefix(line, limit, u"@default=",
+ test.def, errorCode)) {
+ } else if (getSuffixAfterPrefix(line, limit, u"@favor=", test.favor)) {
+ } else if (getSuffixAfterPrefix(line, limit, u"@threshold=", test.threshold)) {
+ } else {
+ int32_t matchSep = line.indexOf(u">>");
+ // >> before an inline comment, and followed by more than white space.
+ if (0 <= matchSep && (matchSep + 2) < limit) {
+ toInvariant(line.tempSubStringBetween(0, matchSep).trim(), test.desired, errorCode);
+ test.expDesired.clear();
+ test.expCombined.clear();
+ int32_t start = matchSep + 2;
+ int32_t expLimit = line.indexOf(u'|', start);
+ if (expLimit < 0) {
+ toInvariant(line.tempSubStringBetween(start, limit).trim(),
+ test.expMatch, errorCode);
+ } else {
+ toInvariant(line.tempSubStringBetween(start, expLimit).trim(),
+ test.expMatch, errorCode);
+ start = expLimit + 1;
+ expLimit = line.indexOf(u'|', start);
+ if (expLimit < 0) {
+ toInvariant(line.tempSubStringBetween(start, limit).trim(),
+ test.expDesired, errorCode);
+ } else {
+ toInvariant(line.tempSubStringBetween(start, expLimit).trim(),
+ test.expDesired, errorCode);
+ toInvariant(line.tempSubStringBetween(expLimit + 1, limit).trim(),
+ test.expCombined, errorCode);
+ }
+ }
+ return errorCode.isSuccess();
+ } else {
+ errorCode.set(U_INVALID_FORMAT_ERROR);
+ }
+ }
+ return false;
+}
+
+Locale *getLocaleOrNull(const CharString &s, Locale &locale) {
+ if (s == "null") {
+ return nullptr;
+ } else {
+ return &(locale = Locale(s.data()));
+ }
+}
+
+} // namespace
+
+UBool LocaleMatcherTest::dataDriven(const TestCase &test, IcuTestErrorCode &errorCode) {
+ LocaleMatcher::Builder builder;
+ builder.setSupportedLocalesFromListString(test.supported.toStringPiece());
+ if (!test.def.isEmpty()) {
+ Locale defaultLocale(test.def.data());
+ builder.setDefaultLocale(&defaultLocale);
+ }
+ if (!test.favor.isEmpty()) {
+ ULocMatchFavorSubtag favor;
+ if (test.favor == u"normal") {
+ favor = ULOCMATCH_FAVOR_LANGUAGE;
+ } else if (test.favor == u"script") {
+ favor = ULOCMATCH_FAVOR_SCRIPT;
+ } else {
+ errln(UnicodeString(u"unsupported FavorSubtag value ") + test.favor);
+ return FALSE;
+ }
+ builder.setFavorSubtag(favor);
+ }
+ if (!test.threshold.isEmpty()) {
+ infoln("skipping test case on line %d with non-default threshold: not exposed via API",
+ (int)test.lineNr);
+ return TRUE;
+ // int32_t threshold = Integer.valueOf(test.threshold);
+ // builder.internalSetThresholdDistance(threshold);
+ }
+ LocaleMatcher matcher = builder.build(errorCode);
+ if (errorCode.errIfFailureAndReset("LocaleMatcher::Builder::build()")) {
+ return FALSE;
+ }
+
+ Locale expMatchLocale("");
+ Locale *expMatch = getLocaleOrNull(test.expMatch, expMatchLocale);
+ if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
+ StringPiece desiredSP = test.desired.toStringPiece();
+ const Locale *bestSupported = matcher.getBestMatchForListString(desiredSP, errorCode);
+ if (!assertEquals("bestSupported from string",
+ locString(expMatch), locString(bestSupported))) {
+ return FALSE;
+ }
+ LocalePriorityList desired(test.desired.toStringPiece(), errorCode);
+ LocalePriorityList::Iterator desiredIter = desired.iterator();
+ if (desired.getLength() == 1) {
+ const Locale &desiredLocale = desiredIter.next();
+ bestSupported = matcher.getBestMatch(desiredLocale, errorCode);
+ UBool ok = assertEquals("bestSupported from Locale",
+ locString(expMatch), locString(bestSupported));
+
+ LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocale, errorCode);
+ return ok & assertEquals("result.getSupportedLocale from Locale",
+ locString(expMatch), locString(result.getSupportedLocale()));
+ } else {
+ bestSupported = matcher.getBestMatch(desiredIter, errorCode);
+ return assertEquals("bestSupported from Locale iterator",
+ locString(expMatch), locString(bestSupported));
+ }
+ } else {
+ LocalePriorityList desired(test.desired.toStringPiece(), errorCode);
+ LocalePriorityList::Iterator desiredIter = desired.iterator();
+ LocaleMatcher::Result result = matcher.getBestMatchResult(desiredIter, errorCode);
+ UBool ok = assertEquals("result.getSupportedLocale from Locales",
+ locString(expMatch), locString(result.getSupportedLocale()));
+ if (!test.expDesired.isEmpty()) {
+ Locale expDesiredLocale("");
+ Locale *expDesired = getLocaleOrNull(test.expDesired, expDesiredLocale);
+ ok &= assertEquals("result.getDesiredLocale from Locales",
+ locString(expDesired), locString(result.getDesiredLocale()));
+ }
+ if (!test.expCombined.isEmpty()) {
+ if (test.expMatch.contains("-u-")) {
+ logKnownIssue("20727",
+ UnicodeString(u"ignoring makeResolvedLocale() line ") + test.lineNr);
+ return ok;
+ }
+ Locale expCombinedLocale("");
+ Locale *expCombined = getLocaleOrNull(test.expCombined, expCombinedLocale);
+ Locale combined = result.makeResolvedLocale(errorCode);
+ ok &= assertEquals("combined Locale from Locales",
+ locString(expCombined), locString(&combined));
+ }
+ return ok;
+ }
+}
+
+void LocaleMatcherTest::testDataDriven() {
+ IcuTestErrorCode errorCode(*this, "testDataDriven");
+ CharString path(getSourceTestData(errorCode), errorCode);
+ path.appendPathPart("localeMatcherTest.txt", errorCode);
+ const char *codePage = "UTF-8";
+ LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, errorCode));
+ if(errorCode.errIfFailureAndReset("ucbuf_open(localeMatcherTest.txt)")) {
+ return;
+ }
+ int32_t lineLength;
+ const UChar *p;
+ UnicodeString line;
+ TestCase test;
+ int32_t numPassed = 0;
+ while ((p = ucbuf_readline(f.getAlias(), &lineLength, errorCode)) != nullptr &&
+ errorCode.isSuccess()) {
+ line.setTo(FALSE, p, lineLength);
+ if (!readTestCase(line, test, errorCode)) {
+ if (errorCode.errIfFailureAndReset(
+ "test data syntax error on line %d", (int)test.lineNr)) {
+ infoln(line);
+ }
+ continue;
+ }
+ UBool ok = dataDriven(test, errorCode);
+ if (errorCode.errIfFailureAndReset("test error on line %d", (int)test.lineNr)) {
+ infoln(line);
+ } else if (!ok) {
+ infoln("test failure on line %d", (int)test.lineNr);
+ infoln(line);
+ } else {
+ ++numPassed;
+ }
+ }
+ infoln("number of passing test cases: %d", (int)numPassed);
+}
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index d4eec23..b7a28dd 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -6,6 +6,7 @@
* others. All Rights Reserved.
********************************************************************/
+#include <functional>
#include <iterator>
#include <set>
#include <utility>
@@ -266,6 +267,10 @@
TESTCASE_AUTO(TestUndScript);
TESTCASE_AUTO(TestUndRegion);
TESTCASE_AUTO(TestUndCAPI);
+ TESTCASE_AUTO(TestRangeIterator);
+ TESTCASE_AUTO(TestPointerConvertingIterator);
+ TESTCASE_AUTO(TestTagConvertingIterator);
+ TESTCASE_AUTO(TestCapturingTagConvertingIterator);
TESTCASE_AUTO_END;
}
@@ -3832,3 +3837,118 @@
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
}
+
+#define ARRAY_RANGE(array) (array), ((array) + UPRV_LENGTHOF(array))
+
+void LocaleTest::TestRangeIterator() {
+ IcuTestErrorCode status(*this, "TestRangeIterator");
+ Locale locales[] = { "fr", "en_GB", "en" };
+ Locale::RangeIterator<Locale *> iter(ARRAY_RANGE(locales));
+
+ assertTrue("0.hasNext()", iter.hasNext());
+ const Locale &l0 = iter.next();
+ assertEquals("0.next()", "fr", l0.getName());
+ assertTrue("&0.next()", &l0 == &locales[0]);
+
+ assertTrue("1.hasNext()", iter.hasNext());
+ const Locale &l1 = iter.next();
+ assertEquals("1.next()", "en_GB", l1.getName());
+ assertTrue("&1.next()", &l1 == &locales[1]);
+
+ assertTrue("2.hasNext()", iter.hasNext());
+ const Locale &l2 = iter.next();
+ assertEquals("2.next()", "en", l2.getName());
+ assertTrue("&2.next()", &l2 == &locales[2]);
+
+ assertFalse("3.hasNext()", iter.hasNext());
+}
+
+void LocaleTest::TestPointerConvertingIterator() {
+ IcuTestErrorCode status(*this, "TestPointerConvertingIterator");
+ Locale locales[] = { "fr", "en_GB", "en" };
+ Locale *pointers[] = { locales, locales + 1, locales + 2 };
+ // Lambda with explicit reference return type to prevent copy-constructing a temporary
+ // which would be destructed right away.
+ Locale::ConvertingIterator<Locale **, std::function<const Locale &(const Locale *)>> iter(
+ ARRAY_RANGE(pointers), [](const Locale *p) -> const Locale & { return *p; });
+
+ assertTrue("0.hasNext()", iter.hasNext());
+ const Locale &l0 = iter.next();
+ assertEquals("0.next()", "fr", l0.getName());
+ assertTrue("&0.next()", &l0 == pointers[0]);
+
+ assertTrue("1.hasNext()", iter.hasNext());
+ const Locale &l1 = iter.next();
+ assertEquals("1.next()", "en_GB", l1.getName());
+ assertTrue("&1.next()", &l1 == pointers[1]);
+
+ assertTrue("2.hasNext()", iter.hasNext());
+ const Locale &l2 = iter.next();
+ assertEquals("2.next()", "en", l2.getName());
+ assertTrue("&2.next()", &l2 == pointers[2]);
+
+ assertFalse("3.hasNext()", iter.hasNext());
+}
+
+namespace {
+
+class LocaleFromTag {
+public:
+ LocaleFromTag() : locale(Locale::getRoot()) {}
+ const Locale &operator()(const char *tag) { return locale = Locale(tag); }
+
+private:
+ // Store the locale in the converter, rather than return a reference to a temporary,
+ // or a value which could go out of scope with the caller's reference to it.
+ Locale locale;
+};
+
+} // namespace
+
+void LocaleTest::TestTagConvertingIterator() {
+ IcuTestErrorCode status(*this, "TestTagConvertingIterator");
+ const char *tags[] = { "fr", "en_GB", "en" };
+ LocaleFromTag converter;
+ Locale::ConvertingIterator<const char **, LocaleFromTag> iter(ARRAY_RANGE(tags), converter);
+
+ assertTrue("0.hasNext()", iter.hasNext());
+ const Locale &l0 = iter.next();
+ assertEquals("0.next()", "fr", l0.getName());
+
+ assertTrue("1.hasNext()", iter.hasNext());
+ const Locale &l1 = iter.next();
+ assertEquals("1.next()", "en_GB", l1.getName());
+
+ assertTrue("2.hasNext()", iter.hasNext());
+ const Locale &l2 = iter.next();
+ assertEquals("2.next()", "en", l2.getName());
+
+ assertFalse("3.hasNext()", iter.hasNext());
+}
+
+void LocaleTest::TestCapturingTagConvertingIterator() {
+ IcuTestErrorCode status(*this, "TestCapturingTagConvertingIterator");
+ const char *tags[] = { "fr", "en_GB", "en" };
+ // Store the converted locale in a locale variable,
+ // rather than return a reference to a temporary,
+ // or a value which could go out of scope with the caller's reference to it.
+ Locale locale;
+ // Lambda with explicit reference return type to prevent copy-constructing a temporary
+ // which would be destructed right away.
+ Locale::ConvertingIterator<const char **, std::function<const Locale &(const char *)>> iter(
+ ARRAY_RANGE(tags), [&](const char *tag) -> const Locale & { return locale = Locale(tag); });
+
+ assertTrue("0.hasNext()", iter.hasNext());
+ const Locale &l0 = iter.next();
+ assertEquals("0.next()", "fr", l0.getName());
+
+ assertTrue("1.hasNext()", iter.hasNext());
+ const Locale &l1 = iter.next();
+ assertEquals("1.next()", "en_GB", l1.getName());
+
+ assertTrue("2.hasNext()", iter.hasNext());
+ const Locale &l2 = iter.next();
+ assertEquals("2.next()", "en", l2.getName());
+
+ assertFalse("3.hasNext()", iter.hasNext());
+}
diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h
index 72e3a82..065213e 100644
--- a/icu4c/source/test/intltest/loctest.h
+++ b/icu4c/source/test/intltest/loctest.h
@@ -141,6 +141,10 @@
void TestUndScript();
void TestUndRegion();
void TestUndCAPI();
+ void TestRangeIterator();
+ void TestPointerConvertingIterator();
+ void TestTagConvertingIterator();
+ void TestCapturingTagConvertingIterator();
private:
void _checklocs(const char* label,
diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp
index 6381a8b..79a3d01 100644
--- a/icu4c/source/test/intltest/strtest.cpp
+++ b/icu4c/source/test/intltest/strtest.cpp
@@ -33,6 +33,7 @@
#include "cstr.h"
#include "intltest.h"
#include "strtest.h"
+#include "uinvchar.h"
StringTest::~StringTest() {}
@@ -147,6 +148,64 @@
}
}
+namespace {
+
+// See U_CHARSET_FAMILY in unicode/platform.h.
+const char *nativeInvChars =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789 \"%&'()*+,-./:;<=>?_";
+const char16_t *asciiInvChars =
+ u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ u"abcdefghijklmnopqrstuvwxyz"
+ u"0123456789 \"%&'()*+,-./:;<=>?_";
+
+} // namespace
+
+void
+StringTest::TestUpperOrdinal() {
+ for (int32_t i = 0;; ++i) {
+ char ic = nativeInvChars[i];
+ uint8_t ac = asciiInvChars[i];
+ int32_t expected = ac - 'A';
+ int32_t actual = uprv_upperOrdinal(ic);
+ if (0 <= expected && expected <= 25) {
+ if (actual != expected) {
+ errln("uprv_upperOrdinal('%c')=%d != expected %d",
+ ic, (int)actual, (int)expected);
+ }
+ } else {
+ if (0 <= actual && actual <= 25) {
+ errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
+ ic, (int)actual);
+ }
+ }
+ if (ic == 0) { break; }
+ }
+}
+
+void
+StringTest::TestLowerOrdinal() {
+ for (int32_t i = 0;; ++i) {
+ char ic = nativeInvChars[i];
+ uint8_t ac = asciiInvChars[i];
+ int32_t expected = ac - 'a';
+ int32_t actual = uprv_lowerOrdinal(ic);
+ if (0 <= expected && expected <= 25) {
+ if (actual != expected) {
+ errln("uprv_lowerOrdinal('%c')=%d != expected %d",
+ ic, (int)actual, (int)expected);
+ }
+ } else {
+ if (0 <= actual && actual <= 25) {
+ errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
+ ic, (int)actual);
+ }
+ }
+ if (ic == 0) { break; }
+ }
+}
+
void
StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
#if !U_HIDE_OBSOLETE_UTF_OLD_H
@@ -178,6 +237,8 @@
TESTCASE_AUTO(Test_U_STRING);
TESTCASE_AUTO(Test_UNICODE_STRING);
TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
+ TESTCASE_AUTO(TestUpperOrdinal);
+ TESTCASE_AUTO(TestLowerOrdinal);
TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
TESTCASE_AUTO(TestSTLCompatibility);
TESTCASE_AUTO(TestStringPiece);
diff --git a/icu4c/source/test/intltest/strtest.h b/icu4c/source/test/intltest/strtest.h
index 4d8a5e7..e620c8a 100644
--- a/icu4c/source/test/intltest/strtest.h
+++ b/icu4c/source/test/intltest/strtest.h
@@ -39,6 +39,8 @@
void Test_U_STRING();
void Test_UNICODE_STRING();
void Test_UNICODE_STRING_SIMPLE();
+ void TestUpperOrdinal();
+ void TestLowerOrdinal();
void Test_UTF8_COUNT_TRAIL_BYTES();
void TestStringPiece();
void TestStringPieceComparisons();
diff --git a/icu4c/source/test/testdata/localeMatcherTest.txt b/icu4c/source/test/testdata/localeMatcherTest.txt
new file mode 100644
index 0000000..21c9b60
--- /dev/null
+++ b/icu4c/source/test/testdata/localeMatcherTest.txt
@@ -0,0 +1,1959 @@
+# © 2017 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# Data-driven test for the language/locale matcher.
+# Format:
+#
+# Everything after "#" is a comment.
+# ** test: This line starts a group of test cases.
+#
+# Lines starting with an '@' sign provide matcher parameters.
+# @supported=<comma-separated supported languages>
+# @default=<default language> # no value = no explicit default
+# @favor=[normal|script] # no value = no explicit setting
+# @threshold=<number 0..100> # no value = no explicit setting
+#
+# A line with ">>" is a getBestMatch() test case:
+# <comma-separated desired languages> >> match | desired | combined
+# - match is the expected best supported language
+# - desired is the expected best desired language
+# - combined is the expected result of combine(match, desired)
+# An expected language can be "null" to check for the matcher returning null.
+# An empty or omitted value is not tested. (Omitted = not even the '|' separator.)
+#
+# ** test: A new test group resets all matcher parameters.
+
+## X
+
+** test: testParentLocales
+
+# es-419, es-AR, and es-MX are in a cluster; es is in a different one
+
+@supported=es-419, es-ES
+es-AR >> es-419
+@supported=es-ES, es-419
+es-AR >> es-419
+
+@supported=es-419, es
+es-AR >> es-419
+@supported=es, es-419
+es-AR >> es-419
+
+@supported=es-MX, es
+es-AR >> es-MX
+@supported=es, es-MX
+es-AR >> es-MX
+
+# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
+
+@supported=en-GB, en-US
+en-AU >> en-GB
+@supported=en-US, en-GB
+en-AU >> en-GB
+
+@supported=en-GB, en
+en-AU >> en-GB
+@supported=en, en-GB
+en-AU >> en-GB
+
+@supported=en-NZ, en-US
+en-AU >> en-NZ
+@supported=en-US, en-NZ
+en-AU >> en-NZ
+
+@supported=en-NZ, en
+en-AU >> en-NZ
+@supported=en, en-NZ
+en-AU >> en-NZ
+
+# pt-AU and pt-PT in one cluster; pt-BR in another
+
+@supported=pt-PT, pt-BR
+pt-AO >> pt-PT
+@supported=pt-BR, pt-PT
+pt-AO >> pt-PT
+
+@supported=pt-PT, pt
+pt-AO >> pt-PT
+@supported=pt, pt-PT
+pt-AO >> pt-PT
+
+@supported=zh-MO, zh-TW
+zh-HK >> zh-MO
+@supported=zh-TW, zh-MO
+zh-HK >> zh-MO
+
+@supported=zh-MO, zh-CN
+zh-HK >> zh-MO
+@supported=zh-CN, zh-MO
+zh-HK >> zh-MO
+
+@supported=zh-MO, zh
+zh-HK >> zh-MO
+@supported=zh, zh-MO
+zh-HK >> zh-MO
+
+@favor=script
+@supported=es-419, es-ES
+es-AR >> es-419
+@supported=es-ES, es-419
+es-AR >> es-419
+@supported=es-419, es
+es-AR >> es-419
+@supported=es, es-419
+es-AR >> es-419
+@supported=es-MX, es
+es-AR >> es-MX
+@supported=es, es-MX
+es-AR >> es-MX
+@supported=en-GB, en-US
+en-AU >> en-GB
+@supported=en-US, en-GB
+en-AU >> en-GB
+@supported=en-GB, en
+en-AU >> en-GB
+@supported=en, en-GB
+en-AU >> en-GB
+@supported=en-NZ, en-US
+en-AU >> en-NZ
+@supported=en-US, en-NZ
+en-AU >> en-NZ
+@supported=en-NZ, en
+en-AU >> en-NZ
+@supported=en, en-NZ
+en-AU >> en-NZ
+@supported=pt-PT, pt-BR
+pt-AO >> pt-PT
+@supported=pt-BR, pt-PT
+pt-AO >> pt-PT
+@supported=pt-PT, pt
+pt-AO >> pt-PT
+@supported=pt, pt-PT
+pt-AO >> pt-PT
+@supported=zh-MO, zh-TW
+zh-HK >> zh-MO
+@supported=zh-TW, zh-MO
+zh-HK >> zh-MO
+@supported=zh-MO, zh-CN
+zh-HK >> zh-MO
+@supported=zh-CN, zh-MO
+zh-HK >> zh-MO
+@supported=zh-MO, zh
+zh-HK >> zh-MO
+@supported=zh, zh-MO
+zh-HK >> zh-MO
+
+** test: testChinese
+
+@supported=zh-CN, zh-TW, iw
+zh-Hant-TW >> zh-TW
+zh-Hant >> zh-TW
+zh-TW >> zh-TW
+zh-Hans-CN >> zh-CN
+zh-CN >> zh-CN
+zh >> zh-CN
+
+@favor=script
+zh-Hant-TW >> zh-TW
+zh-Hant >> zh-TW
+zh-TW >> zh-TW
+zh-Hans-CN >> zh-CN
+zh-CN >> zh-CN
+zh >> zh-CN
+
+** test: testenGB
+
+@supported=fr, en, en-GB, es-419, es-MX, es
+en-NZ >> en-GB
+es-ES >> es
+es-AR >> es-419
+es-MX >> es-MX
+
+@favor=script
+en-NZ >> en-GB
+es-ES >> es
+es-AR >> es-419
+es-MX >> es-MX
+
+** test: testFallbacks
+
+@supported=91, en, hi
+sa >> hi
+
+@favor=script
+sa >> hi
+
+** test: testBasics
+
+@supported=fr, en-GB, en
+en-GB >> en-GB
+en >> en
+fr >> fr
+ja >> fr # return first if no match
+
+@favor=script
+en-GB >> en-GB
+en >> en
+fr >> fr
+ja >> fr
+
+** test: testFallback
+
+# check that script fallbacks are handled right
+
+@supported=zh-CN, zh-TW, iw
+zh-Hant >> zh-TW
+zh >> zh-CN
+zh-Hans-CN >> zh-CN
+zh-Hant-HK >> zh-TW
+he-IT >> iw
+
+@favor=script
+zh-Hant >> zh-TW
+zh >> zh-CN
+zh-Hans-CN >> zh-CN
+zh-Hant-HK >> zh-TW
+he-IT >> iw
+
+** test: testSpecials
+
+# check that nearby languages are handled
+
+@supported=en, fil, ro, nn
+tl >> fil
+mo >> ro
+nb >> nn
+
+# make sure default works
+
+ja >> en
+
+@favor=script
+tl >> fil
+mo >> ro
+nb >> nn
+ja >> en
+
+** test: testRegionalSpecials
+
+# verify that en-AU is closer to en-GB than to en (which is en-US)
+
+@supported=en, en-GB, es, es-419
+es-MX >> es-419
+en-AU >> en-GB
+es-ES >> es
+
+@favor=script
+es-MX >> es-419
+en-AU >> en-GB
+es-ES >> es
+
+** test: testHK
+
+# HK and MO are closer to each other for Hant than to TW
+
+@supported=zh, zh-TW, zh-MO
+zh-HK >> zh-MO
+@supported=zh, zh-TW, zh-HK
+zh-MO >> zh-HK
+
+@favor=script
+@supported=zh, zh-TW, zh-MO
+zh-HK >> zh-MO
+@supported=zh, zh-TW, zh-HK
+zh-MO >> zh-HK
+
+** test: testMatch-matchOnMazimized
+
+@supported=zh, zh-Hant
+und-TW >> zh-Hant # und-TW should be closer to zh-Hant than to zh
+
+@supported=en-Hant-TW, und-TW
+zh-Hant >> und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
+zh >> und-TW # zh should be closer to und-TW than to en-Hant-TW
+
+@favor=script
+@supported=zh, zh-Hant
+und-TW >> zh-Hant
+@supported=en-Hant-TW, und-TW
+zh-Hant >> und-TW
+zh >> und-TW
+
+** test: testMatchGrandfatheredCode
+
+@supported=fr, i-klingon, en-Latn-US
+en-GB-oed >> en-Latn-US
+
+@favor=script
+en-GB-oed >> en-Latn-US
+
+** test: testGetBestMatchForList-exactMatch
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja, de >> ja
+
+@favor=script
+ja, de >> ja
+
+** test: testGetBestMatchForList-simpleVariantMatch
+@supported=fr, en-GB, ja, es-ES, es-MX
+de, en-US >> en-GB # Intentionally avoiding a perfect-match or two candidates for variant matches.
+
+# Fallback.
+
+de, zh >> fr
+
+@favor=script
+de, en-US >> en-GB
+de, zh >> fr
+
+** test: testGetBestMatchForList-matchOnMaximized
+# Check that if the preference is maximized already, it works as well.
+
+@supported=en, ja
+ja-Jpan-JP, en-AU >> ja # Match for ja-Jpan-JP (maximized already)
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+ja-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
+
+# Check that if the preference is maximized already, it works as well.
+
+ja-Jpan-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
+
+@favor=script
+ja-Jpan-JP, en-AU >> ja
+ja-JP, en-US >> ja
+ja-Jpan-JP, en-US >> ja
+
+** test: testGetBestMatchForList-noMatchOnMaximized
+# Regression test for http://b/5714572 .
+# de maximizes to de-DE. Pick the exact match for the secondary language instead.
+@supported=en, de, fr, ja
+de-CH, fr >> de
+
+@favor=script
+de-CH, fr >> de
+
+** test: testBestMatchForTraditionalChinese
+
+# Scenario: An application that only supports Simplified Chinese (and some other languages),
+# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
+# zh-Hans, it wouldn't make much of a difference.
+
+# The script distance (simplified vs. traditional Han) is considered small enough
+# to be an acceptable match. The regional difference is considered almost insignificant.
+
+@supported=fr, zh-Hans-CN, en-US
+zh-TW >> zh-Hans-CN
+zh-Hant >> zh-Hans-CN
+
+# For geopolitical reasons, you might want to avoid a zh-Hant -> zh-Hans match.
+# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
+# change your call to getBestMatch to include a 2nd language preference.
+# "en" is a better match since its distance to "en-US" is closer than the distance
+# from "zh-TW" to "zh-CN" (script distance).
+
+zh-TW, en >> en-US
+zh-Hant-CN, en >> en-US
+zh-Hans, en >> zh-Hans-CN
+
+@favor=script
+zh-TW >> zh-Hans-CN
+zh-Hant >> zh-Hans-CN
+zh-TW, en >> en-US
+zh-Hant-CN, en >> en-US
+zh-Hans, en >> zh-Hans-CN
+
+** test: testUndefined
+# When the undefined language doesn't match anything in the list,
+# getBestMatch returns the default, as usual.
+
+@supported=it, fr
+und >> it
+
+# When it *does* occur in the list, bestMatch returns it, as expected.
+@supported=it, und
+und >> und
+
+# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
+# tags, the undefined language would normally match English. But that would produce the
+# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
+# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
+
+# To avoid that, we change the matcher's definitions of max
+# so that max("und")="und". That produces the following, more desirable
+# results:
+
+@supported=it, en
+und >> it
+@supported=it, und
+en >> it
+
+@favor=script
+@supported=it, fr
+und >> it
+@supported=it, und
+und >> und
+@supported=it, en
+und >> it
+@supported=it, und
+en >> it
+
+** test: testGetBestMatch-regionDistance
+
+@supported=es-AR, es
+es-MX >> es-AR
+@supported=fr, en, en-GB
+en-CA >> en-GB
+@supported=de-AT, de-DE, de-CH
+de >> de-DE
+
+@favor=script
+@supported=es-AR, es
+es-MX >> es-AR
+@supported=fr, en, en-GB
+en-CA >> en-GB
+@supported=de-AT, de-DE, de-CH
+de >> de-DE
+
+** test: testAsymmetry
+
+@supported=mul, nl
+af >> nl # af => nl
+@supported=mul, af
+nl >> mul # but nl !=> af
+
+@favor=script
+@supported=mul, nl
+af >> nl
+@supported=mul, af
+nl >> mul
+
+** test: testGetBestMatchForList-matchOnMaximized2
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja-JP, en-GB >> ja # Match for ja-JP, with likely region subtag
+
+# Check that if the preference is maximized already, it works as well.
+
+ja-Jpan-JP, en-GB >> ja # Match for ja-Jpan-JP (maximized already)
+
+@favor=script
+ja-JP, en-GB >> ja
+ja-Jpan-JP, en-GB >> ja
+
+** test: testGetBestMatchForList-closeEnoughMatchOnMaximized
+
+@supported=en-GB, en, de, fr, ja
+de-CH, fr >> de
+en-US, ar, nl, de, ja >> en
+
+@favor=script
+de-CH, fr >> de
+en-US, ar, nl, de, ja >> en
+
+** test: testGetBestMatchForPortuguese
+
+# pt might be supported and not pt-PT
+
+# European user who prefers Spanish over Brazilian Portuguese as a fallback.
+
+@supported=pt-PT, pt-BR, es, es-419
+pt-PT, es, pt >> pt-PT
+@supported=pt-PT, pt, es, es-419
+pt-PT, es, pt >> pt-PT # pt implicit
+
+# Brazilian user who prefers South American Spanish over European Portuguese as a fallback.
+# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
+# matchers as "pt-BR" is a much more common language.
+
+@supported=pt-PT, pt-BR, es, es-419
+pt, es-419, pt-PT >> pt-BR
+pt-PT, es, pt >> pt-PT
+@supported=pt-PT, pt, es, es-419
+pt-PT, es, pt >> pt-PT
+pt, es-419, pt-PT >> pt
+
+@supported=pt-BR, es, es-419
+pt, es-419, pt-PT >> pt-BR
+
+# Code that adds the user's country can get "pt-US" for a user's language.
+# That should fall back to "pt-BR".
+
+@supported=pt-PT, pt-BR, es, es-419
+pt-US, pt-PT >> pt-BR
+@supported=pt-PT, pt, es, es-419
+pt-US, pt-PT, pt >> pt # pt-BR implicit
+
+@favor=script
+@supported=pt-PT, pt-BR, es, es-419
+pt-PT, es, pt >> pt-PT
+@supported=pt-PT, pt, es, es-419
+pt-PT, es, pt >> pt-PT
+
+@supported=pt-PT, pt-BR, es, es-419
+pt, es-419, pt-PT >> pt-BR
+pt-PT, es, pt >> pt-PT
+@supported=pt-PT, pt, es, es-419
+pt-PT, es, pt >> pt-PT
+pt, es-419, pt-PT >> pt
+
+@supported=pt-BR, es, es-419
+pt, es-419, pt-PT >> pt-BR
+
+@supported=pt-PT, pt-BR, es, es-419
+pt-US, pt-PT >> pt-BR
+@supported=pt-PT, pt, es, es-419
+pt-US, pt-PT, pt >> pt
+
+** test: testVariantWithScriptMatch 1 and 2
+
+@supported=fr, en, sv
+en-GB >> en
+@supported=en, sv
+en-GB, sv >> en
+
+@favor=script
+@supported=fr, en, sv
+en-GB >> en
+@supported=en, sv
+en-GB, sv >> en
+
+** test: testLongLists
+
+@supported=en, sv
+sv >> sv
+
+@supported=af, am, ar, az, be, bg, bn, bs, ca, cs, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu
+sv >> sv
+
+@supported=af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA
+sv >> sv
+
+@favor=script
+@supported=en, sv
+sv >> sv
+
+@supported=af, am, ar, az, be, bg, bn, bs, ca, cs, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu
+sv >> sv
+
+@supported=af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA
+sv >> sv
+
+** test: test8288
+
+@supported=it, en
+und >> it
+und, en >> en
+
+# examples from
+# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
+# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
+
+@favor=script
+und >> it
+und, en >> en
+
+** test: testUnHack
+
+@supported=en-NZ, en-IT
+en-US >> en-NZ
+
+@favor=script
+en-US >> en-NZ
+
+** test: testEmptySupported => null
+en >> null
+
+# testVariantsAndExtensions
+
+** test: tests the .combine() method
+
+@supported=und, fr
+fr-BE-fonipa >> fr | | fr-BE-fonipa
+@supported=und, fr-CA
+fr-BE-fonipa >> fr-CA | | fr-BE-fonipa
+@supported=und, fr-fonupa
+fr-BE-fonipa >> fr-fonupa | | fr-BE-fonipa
+@supported=und, no
+nn-BE-fonipa >> no | | no-BE-fonipa
+@supported=und, en-GB-u-sd-gbsct
+en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin >> en-GB-u-sd-gbsct | | en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
+
+@supported=en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK
+fr-PSCRACK >> fr-PSCRACK
+fr >> en-PSCRACK
+de-CH >> en-PSCRACK
+
+@favor=script
+@supported=und, fr
+fr-BE-fonipa >> fr
+@supported=und, fr-CA
+fr-BE-fonipa >> fr-CA
+@supported=und, fr-fonupa
+fr-BE-fonipa >> fr-fonupa
+@supported=und, no
+nn-BE-fonipa >> no | | no-BE-fonipa
+@supported=und, en-GB-u-sd-gbsct
+en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin >> en-GB-u-sd-gbsct | | en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
+
+@supported=en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK
+fr-PSCRACK >> fr-PSCRACK
+fr >> en-PSCRACK
+de-CH >> en-PSCRACK
+
+** test: testClusters
+# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
+
+@supported=und, es, es-MA, es-MX, es-419
+es-AR >> es-419
+@supported=und, es-MA, es, es-419, es-MX
+es-AR >> es-419
+@supported=und, es, es-MA, es-MX, es-419
+es-EA >> es
+@supported=und, es-MA, es, es-419, es-MX
+es-EA >> es
+
+# of course, fall back to within cluster
+
+@supported=und, es, es-MA, es-MX
+es-AR >> es-MX
+@supported=und, es-MA, es, es-MX
+es-AR >> es-MX
+@supported=und, es-MA, es-MX, es-419
+es-EA >> es-MA
+@supported=und, es-MA, es-419, es-MX
+es-EA >> es-MA
+
+# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
+
+@supported=und, en, en-GU, en-IN, en-GB
+en-ZA >> en-GB
+@supported=und, en-GU, en, en-GB, en-IN
+en-ZA >> en-GB
+@supported=und, en, en-GU, en-IN, en-GB
+en-VI >> en
+@supported=und, en-GU, en, en-GB, en-IN
+en-VI >> en
+
+# of course, fall back to within cluster
+
+@supported=und, en, en-GU, en-IN
+en-ZA >> en-IN
+@supported=und, en-GU, en, en-IN
+en-ZA >> en-IN
+@supported=und, en-GU, en-IN, en-GB
+en-VI >> en-GU
+@supported=und, en-GU, en-GB, en-IN
+en-VI >> en-GU
+
+@favor=script
+@supported=und, es, es-MA, es-MX, es-419
+es-AR >> es-419
+@supported=und, es-MA, es, es-419, es-MX
+es-AR >> es-419
+@supported=und, es, es-MA, es-MX, es-419
+es-EA >> es
+@supported=und, es-MA, es, es-419, es-MX
+es-EA >> es
+
+@supported=und, es, es-MA, es-MX
+es-AR >> es-MX
+@supported=und, es-MA, es, es-MX
+es-AR >> es-MX
+@supported=und, es-MA, es-MX, es-419
+es-EA >> es-MA
+@supported=und, es-MA, es-419, es-MX
+es-EA >> es-MA
+
+@supported=und, en, en-GU, en-IN, en-GB
+en-ZA >> en-GB
+@supported=und, en-GU, en, en-GB, en-IN
+en-ZA >> en-GB
+@supported=und, en, en-GU, en-IN, en-GB
+en-VI >> en
+@supported=und, en-GU, en, en-GB, en-IN
+en-VI >> en
+
+@supported=und, en, en-GU, en-IN
+en-ZA >> en-IN
+@supported=und, en-GU, en, en-IN
+en-ZA >> en-IN
+@supported=und, en-GU, en-IN, en-GB
+en-VI >> en-GU
+@supported=und, en-GU, en-GB, en-IN
+en-VI >> en-GU
+
+** test: testThreshold
+@supported=50, und, fr-CA-fonupa
+@threshold=60
+fr-BE-fonipa >> fr-CA-fonupa | | fr-BE-fonipa
+@supported=und, fr-Cyrl-CA-fonupa
+fr-BE-fonipa >> fr-Cyrl-CA-fonupa | | fr-Cyrl-BE-fonipa
+@threshold=50
+fr-BE-fonipa >> und
+
+@favor=script
+@supported=50, und, fr-CA-fonupa
+@threshold=
+fr-BE-fonipa >> fr-CA-fonupa | | fr-BE-fonipa
+@supported=und, fr-Cyrl-CA-fonupa
+fr-BE-fonipa >> und
+
+** test: testScriptFirst
+@supported=ru, fr
+zh, pl >> ru
+zh-Cyrl, pl >> ru
+@supported=hr, en-Cyrl
+sr >> hr
+@supported=da, ru, hr
+sr >> da
+
+@favor=script
+@supported=ru, fr
+zh, pl >> fr
+zh-Cyrl, pl >> ru
+@supported=hr, en-Cyrl
+sr >> en-Cyrl
+@supported=da, ru, hr
+sr >> ru
+
+## III
+
+** test: testBasicsWithDefault
+@supported=en-GB, en
+@default=fr
+en-GB >> en-GB
+en-US >> en
+fr >> fr
+ja >> fr
+
+@favor=script
+en-GB >> en-GB
+en-US >> en
+fr >> en-GB
+ja >> fr
+
+** test: testEmptyWithDefault
+@default=en
+fr >> en
+
+** test: testGetBestMatchForList_exactMatch
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja, de >> ja
+
+** test: testGetBestMatchForList_simpleVariantMatch
+# Intentionally avoiding a perfect-match or two candidates for variant matches.
+@supported=fr, en-GB, ja, es-ES, es-MX
+de, en-US >> en-GB
+# Fall back.
+de, zh >> fr
+
+** test: TestEuHack
+@supported=en-NZ, en-IT
+en-US >> en-NZ
+
+** test: TestBasics
+@supported=fr, en-GB, en
+en-GB >> en-GB
+en-US >> en
+fr-FR >> fr
+ja-JP >> fr
+# For a language that doesn't match anything, return the default.
+zu >> en-GB
+zxx >> fr
+
+@favor=script
+en-GB >> en-GB
+en-US >> en
+fr-FR >> fr
+ja-JP >> fr
+zu >> en-GB
+zxx >> en
+
+** test: TestExactMatch
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja, de >> ja
+
+** test: TestSimpleVariantMatch
+@supported=fr, en-GB, ja, es-ES, es-MX
+de, en-US >> en-GB
+de, zh >> fr
+
+** test: TestMatchOnMaximized
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins
+# over the secondary preference en-GB.
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja-JP, en-GB >> ja
+# Check that if the preference is maximized already, it works as well.
+ja-Jpan-JP, en-GB >> ja
+@supported=fr, zh-Hant, en
+zh, en >> en
+
+@favor=script
+zh, en >> en
+
+** test: TestCloseEnoughMatchOnMaximized
+@supported=en-GB, en, de, fr, ja
+de-CH, fr >> de
+en-US, ar, nl, de, ja >> en
+
+** test: TestGetBestMatchForPortuguese
+# 1. a supported set containing an explicit pt: {pt-PT, pt-BR, es, es-419}
+# 2. a supported set containing an implicit pt: {pt-PT, pt, es, es-419}
+# 3. a supported set containing no pt: {pt-BR, es, es-419}
+# European user who prefers Spanish over Brazilian Portuguese as a fallback.
+@supported=pt-PT, pt-BR, es, es-419
+pt-PT, es, pt >> pt-PT
+@supported=pt-PT, pt, es, es-419
+pt-PT, es, pt >> pt-PT
+@supported=pt-BR, es, es-419
+pt-PT, es, pt >> pt-BR
+
+# Brazilian user who prefers South American Spanish over European Portuguese
+# as a fallback. The asymmetry between this case and above is because it's
+# "pt-PT" that's missing between the matchers.
+@supported=pt-PT, pt-BR, es, es-419
+pt, es-419, pt-PT >> pt-BR
+@supported=pt-PT, pt, es, es-419
+pt, es-419, pt-PT >> pt
+@supported=pt-BR, es, es-419
+pt, es-419, pt-PT >> pt-BR
+
+# Sometimes we get "pt-US" for a user's language (which CLDR doesn't
+# recognize) but we deal with that as a synonym for "pt-BR".
+@supported=pt-PT, pt-BR, es, es-419
+pt-US, pt-PT >> pt-BR
+@supported=pt-PT, pt, es, es-419
+pt-US, pt-PT >> pt
+
+@favor=script
+@supported=pt-BR, es, es-419
+pt-PT, es, pt >> pt-BR
+@supported=pt-PT, pt, es, es-419
+pt-US, pt-PT >> pt
+
+** test: TestScriptAndRegion
+@supported=en-GB, en
+en-CA >> en-GB
+# fr-CA is a "close enough" match to "fr" to be returned in favor of "en-GB"
+@supported=fr, en-GB, en
+fr-CA, en-CA >> fr
+@supported=zh-Hant, zh-TW
+zh-HK >> zh-Hant
+
+@favor=script
+@supported=en-GB, en
+en-CA >> en-GB
+@supported=fr, en-GB, en
+fr-CA, en-CA >> fr
+@supported=zh-Hant, zh-TW
+zh-HK >> zh-Hant
+
+** test: TestFallback
+@supported=zh-CN, zh-TW, iw
+zh-Hant >> zh-TW
+zh >> zh-CN
+zh-Hans-CN >> zh-CN
+zh-Hant-HK >> zh-TW
+he-IT >> iw
+
+** test: TestFallbackWithDefault
+# Check that script fallbacks are handled right and that we don't have to
+# fall back to the default.
+@supported=zh-CN, zh-TW, iw
+@default=fr
+zh-Hant >> zh-TW
+zh >> zh-CN
+zh-Hans-CN >> zh-CN
+zh-Hant-HK >> zh-TW
+he-IT >> iw
+
+@favor=script
+zh-Hant >> zh-TW
+zh >> zh-CN
+zh-Hans-CN >> zh-CN
+zh-Hant-HK >> zh-TW
+he-IT >> iw
+
+** test: TestSpecials
+# Check that nearby languages are handled.
+@supported=en, fil, ro, nn
+tl >> fil
+mo >> ro
+nb >> nn
+ja >> en # Make sure default works.
+
+** test: TestRegionalSpecials
+# Verify that en-AU is closer to en-GB than to en (which is en-US).
+@supported=en, en-GB, es-ES, es-419
+en-AU >> en-GB
+# Following 2 cases test closer/smaller region difference.
+es-MX >> es-419
+es-PT >> es-ES
+
+@favor=script
+en-AU >> en-GB
+es-MX >> es-419
+es-PT >> es-ES
+
+** test: TestEmpty
+fr >> null
+
+** test: TestUndefined
+# When the undefined language doesn't match anything in the list,
+# return the default.
+@supported=it, fr
+und >> it
+# When it *does* occur in the list, return it.
+@supported=it, und
+und >> und
+# The unusual part:
+# max("und") = "en-Latn-US", and since matching is based on
+# maximized tags, the undefined language would normally match
+# English. But that would produce the counterintuitive results
+# that BestMatchFor("und", LanguageMatcher("it,en")) would be "en",
+# and BestMatchFor("en", LanguageMatcher("it,und")) would be "und".
+
+# To avoid that, we change the matcher's definitions of max
+# (AddLikelySubtagsWithDefaults) so that max("und")="und". That
+# produces the following, more desirable results:
+@supported=it, en
+und >> it
+@supported=it, und
+en >> it
+
+** test: TestVariantWithScriptMatch
+@supported=fr, en, sv
+en-GB >> en
+en-GB, sv >> en
+
+@favor=script
+en-GB, sv >> en
+
+** test: Serbian
+@supported=und, sr
+sr-ME >> sr
+@supported=und, sr-ME
+sr >> sr-ME
+@supported=und, sr-Latn
+bs >> und
+@supported=und, bs
+sr-Latn >> und
+@supported=und, sr
+bs >> und
+@supported=und, bs
+sr >> und
+@supported=und, sr-Latn
+sr >> sr-Latn
+@supported=und, sr
+sr-Latn >> sr
+
+@favor=script
+sr-ME >> sr
+@supported=und, sr-ME
+sr >> sr-ME
+@supported=und, sr-Latn
+bs >> sr-Latn
+@supported=und, bs
+sr-Latn >> bs
+@supported=und, sr
+bs >> und
+@supported=und, bs
+sr >> und
+@supported=und, sr-Latn
+sr >> sr-Latn
+@supported=und, sr
+sr-Latn >> sr
+
+** test: MatchGooglePrivateUseSubtag
+@supported=fr, x-bork, en-Latn-US
+x-piglatin >> fr
+x-bork >> x-bork
+@supported=fr, en-GB, x-bork, es-ES, es-419
+x-piglatin >> fr
+x-bork >> x-bork
+
+@favor=script
+@supported=fr, x-bork, en-Latn-US
+x-piglatin >> x-bork
+x-bork >> x-bork
+@supported=fr, en-GB, x-bork, es-ES, es-419
+x-piglatin >> x-bork
+x-bork >> x-bork
+
+** test: MatchGrandfatheredCode
+@supported=fr, i-klingon, en-Latn-US
+en-GB-oed >> en-Latn-US
+i-klingon >> tlh
+
+@favor=script
+en-GB-oed >> en-Latn-US
+i-klingon >> tlh
+
+** test: MatchGooglePseudoLocale
+# Google pseudo locales using variant subtags.
+# (See below for the region code based pseudo locales.)
+@supported=fr, en-PSACCENT, ar-PSBIDI, en-PSCRACK, zh-Hans-PSCRACK, pt-PT-PSCRACK, pt
+de >> fr
+en-US >> fr
+en >> fr
+ar-PSBIDI >> ar-PSBIDI
+en-PSACCENT >> en-PSACCENT
+en-PSCRACK >> en-PSCRACK
+pt-BR >> pt
+pt-PT-PSCRACK >> pt-PT-PSCRACK
+zh-Hans-PSCRACK >> zh-Hans-PSCRACK
+
+@favor=script
+de >> fr
+en-US >> fr
+en >> fr
+ar-PSBIDI >> ar-PSBIDI
+en-PSACCENT >> en-PSACCENT
+en-PSCRACK >> en-PSCRACK
+pt-BR >> pt
+pt-PT-PSCRACK >> pt-PT-PSCRACK
+zh-Hans-PSCRACK >> zh-Hans-PSCRACK
+
+** test: MatchGooglePseudoLocaleWithFallbacks
+# Pseudo locales based on the fall back option (XA..XC region codes).
+@supported=fr, en-XA, ar-XB, en-XC, zh-Hans-XC, pt
+de >> fr
+en-US >> fr
+en >> fr
+ar-XB >> ar-XB
+en-XA >> en-XA
+en-XC >> en-XC
+pt-BR >> pt
+zh-Hans-XC >> zh-Hans-XC
+
+@favor=script
+de >> fr
+en-US >> fr
+en >> fr
+ar-XB >> ar-XB
+en-XA >> en-XA
+en-XC >> en-XC
+pt-BR >> pt
+zh-Hans-XC >> zh-Hans-XC
+
+** test: DoNotMatchGooglePseudoLocale
+@supported=fr, en-XA, ar-XB, en-PSACCENT, ar-PSBIDI, en-DE, pt, ar-SY, ar-PSCRACK
+de >> fr
+# We wouldn't want to return pseudo locales when there's a good match for an
+# ordinary locale.
+# Note: If LanguageMatcher was not aware of PSACCENT, it would consider the
+# distance from "en" to "en-PSACCENT" smaller than to "en-DE" (the standard
+# variant distance is smaller than a region distance).
+en >> en-DE
+ar-EG >> ar-SY
+pt-BR >> pt
+ar-XB >> ar-XB
+ar-PSBIDI >> ar-XB # These are equivalent.
+en-XA >> en-XA
+en-PSACCENT >> en-XA # These are equivalent.
+ar-PSCRACK >> ar-PSCRACK
+
+@favor=script
+de >> en-DE
+en >> en-DE
+ar-EG >> ar-SY
+pt-BR >> pt
+ar-XB >> ar-XB
+ar-PSBIDI >> ar-XB # These are equivalent.
+en-XA >> en-XA
+en-PSACCENT >> en-XA # These are equivalent.
+ar-PSCRACK >> ar-PSCRACK
+
+** test: BestMatchForTraditionalChinese
+# Scenario: An application that only supports Simplified Chinese (and some
+# other languages), but does not support Traditional Chinese. zh-Hans-CN
+# could be replaced with zh-CN, zh, or zh-Hans, it wouldn't make much of a
+# difference.
+# The script distance (simplified vs. traditional Han) is considered small
+# enough to be an acceptable match. The regional difference is considered
+# almost insignificant.
+@supported=fr, zh-Hans-CN, en-US
+zh-TW >> zh-Hans-CN
+zh-Hant >> zh-Hans-CN
+
+# For geopolitical reasons, you might want to avoid a zh-Hant -> zh-Hans
+# match. In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is
+# requested, you can change your call to getBestMatch to include a 2nd
+# language preference. "en" is a better match since its distance to "en-US"
+# is closer than the distance from "zh-TW" to "zh-CN" (script distance).
+zh-TW, en >> en-US
+zh-Hant-CN, en >> en-US
+zh-Hans, en >> zh-Hans-CN
+
+** test: MaxBeforeEquals
+# Compare maximized forms of earlier items before testing equality
+# of later items.
+@supported=en, fr-CA
+en-US, fr-CA >> en
+
+@favor=script
+en-US, fr-CA >> en
+
+** test: SiblingDefaultRegion
+@supported=de-AT, de-DE, de-CH
+de >> de-DE
+
+** test: ReturnDefaultInsteadOfNullForEmptyPriorityList
+@default=und
+de >> und
+
+** test: ReturnSpecifiedDefaultForNoMatch
+@supported=de, en, fr
+@default=und
+hi >> und
+
+@favor=script
+hi >> und
+
+** test: MatchedLanguageIgnoresDefault
+@supported=de, en, fr
+@default=und
+fr >> fr
+
+@favor=script
+fr >> fr
+
+## GenX
+
+** test: TwoSpanishes
+@supported=es, es-MX
+@default=und
+es-001 >> es
+und >> und
+ca >> und
+gl-ES >> es
+es >> es
+es-MX >> es-MX
+es-002 >> es
+es-003 >> es-MX
+es-005 >> es-MX
+es-019 >> es-MX
+es-029 >> es-MX
+es-419 >> es-MX
+es-142 >> es
+es-150 >> es
+es-AD >> es
+es-AR >> es-MX
+es-BO >> es-MX
+es-BZ >> es-MX
+es-CA >> es-MX
+es-CL >> es-MX
+es-CO >> es-MX
+es-CR >> es-MX
+es-CU >> es-MX
+es-DO >> es-MX
+es-EC >> es-MX
+es-ES >> es
+es-GI >> es
+es-GQ >> es
+es-GT >> es-MX
+es-HN >> es-MX
+es-NI >> es-MX
+es-PA >> es-MX
+es-PE >> es-MX
+es-PH >> es
+es-PR >> es-MX
+es-PY >> es-MX
+es-SV >> es-MX
+es-US >> es-MX
+es-UY >> es-MX
+es-VE >> es-MX
+
+@favor=script
+es-001 >> es
+und >> und
+ca >> es
+gl-ES >> es
+es >> es
+es-MX >> es-MX
+es-002 >> es
+es-003 >> es-MX
+es-005 >> es-MX
+es-019 >> es-MX
+es-029 >> es-MX
+es-419 >> es-MX
+es-142 >> es
+es-150 >> es
+es-AD >> es
+es-AR >> es-MX
+es-BO >> es-MX
+es-BZ >> es-MX
+es-CA >> es-MX
+es-CL >> es-MX
+es-CO >> es-MX
+es-CR >> es-MX
+es-CU >> es-MX
+es-DO >> es-MX
+es-EC >> es-MX
+es-ES >> es
+es-GI >> es
+es-GQ >> es
+es-GT >> es-MX
+es-HN >> es-MX
+es-NI >> es-MX
+es-PA >> es-MX
+es-PE >> es-MX
+es-PH >> es
+es-PR >> es-MX
+es-PY >> es-MX
+es-SV >> es-MX
+es-US >> es-MX
+es-UY >> es-MX
+es-VE >> es-MX
+
+** test: Three Spanishes
+@supported=es, es-419, es-MX
+@default=und
+es-001 >> es
+und >> und
+ca >> und
+gl-ES >> es
+es >> es
+es-419 >> es-419
+es-002 >> es
+es-003 >> es-419
+es-005 >> es-419
+es-019 >> es-419
+es-029 >> es-419
+es-142 >> es
+es-150 >> es
+es-AD >> es
+es-AR >> es-419
+es-BO >> es-419
+es-BZ >> es-419
+es-CA >> es-419
+es-CL >> es-419
+es-CO >> es-419
+es-CR >> es-419
+es-CU >> es-419
+es-DO >> es-419
+es-EC >> es-419
+es-ES >> es
+es-GI >> es
+es-GQ >> es
+es-GT >> es-419
+es-HN >> es-419
+es-MX >> es-MX
+es-NI >> es-419
+es-PA >> es-419
+es-PE >> es-419
+es-PH >> es
+es-PR >> es-419
+es-PY >> es-419
+es-SV >> es-419
+es-US >> es-419
+es-UY >> es-419
+es-VE >> es-419
+
+@favor=script
+es-001 >> es
+und >> und
+ca >> es
+gl-ES >> es
+es >> es
+es-419 >> es-419
+es-002 >> es
+es-003 >> es-419
+es-005 >> es-419
+es-019 >> es-419
+es-029 >> es-419
+es-142 >> es
+es-150 >> es
+es-AD >> es
+es-AR >> es-419
+es-BO >> es-419
+es-BZ >> es-419
+es-CA >> es-419
+es-CL >> es-419
+es-CO >> es-419
+es-CR >> es-419
+es-CU >> es-419
+es-DO >> es-419
+es-EC >> es-419
+es-ES >> es
+es-GI >> es
+es-GQ >> es
+es-GT >> es-419
+es-HN >> es-419
+es-MX >> es-MX
+es-NI >> es-419
+es-PA >> es-419
+es-PE >> es-419
+es-PH >> es
+es-PR >> es-419
+es-PY >> es-419
+es-SV >> es-419
+es-US >> es-419
+es-UY >> es-419
+es-VE >> es-419
+
+** test: Englishes
+@supported=en-GB, en-US
+@default=und
+und >> und
+ja >> und
+fr-CA >> und
+
+# Great Britain fallback
+en-AU >> en-GB
+en-BZ >> en-GB
+en-CA >> en-GB
+en-IN >> en-GB
+en-IE >> en-GB
+en-JM >> en-GB
+en-NZ >> en-GB
+en-PK >> en-GB
+en-TT >> en-GB
+en-ZA >> en-GB
+
+# United States fallback
+en-US >> en-US
+en >> en-US
+
+@favor=script
+und >> und
+ja >> und
+fr-CA >> en-GB
+en-AU >> en-GB
+en-BZ >> en-GB
+en-CA >> en-GB
+en-IN >> en-GB
+en-IE >> en-GB
+en-JM >> en-GB
+en-NZ >> en-GB
+en-PK >> en-GB
+en-TT >> en-GB
+en-ZA >> en-GB
+en-US >> en-US
+en >> en-US
+
+** test: TestFallback
+# manyEnMatcher
+@supported=en-GB, en-US, en, en-AU
+@default=und
+und >> und
+ja >> und
+fr-CA >> und
+
+# nonUsMatcher
+fr >> und
+
+# onlyAuMatcher
+@supported=en-AU, ja, ca
+fr >> und
+
+# noEnMatcher
+@supported=pl, ja, ca
+fr >> und
+
+@favor=script
+@supported=en-GB, en-US, en, en-AU
+und >> und
+ja >> und
+fr-CA >> en-GB
+fr >> en-GB
+@supported=en-AU, ja, ca
+fr >> en-AU
+@supported=pl, ja, ca
+fr >> pl
+
+## Go
+
+** test: basics
+@supported=fr, en-GB, en
+en-GB >> en-GB
+en-US >> en
+fr-FR >> fr
+ja-JP >> fr
+
+** test: script fallbacks
+@supported=zh-CN, zh-TW, iw
+zh-Hant >> zh-TW
+zh >> zh-CN
+zh-Hans-CN >> zh-CN
+zh-Hant-HK >> zh-TW
+@default=iw
+he-IT >> iw
+
+@favor=script
+he-IT >> iw
+
+** test: language-specific script fallbacks 1
+@supported=en, sr, nl
+sr-Latn >> sr
+sh >> en
+hr >> en
+bs >> en
+nl-Cyrl >> en # Mark: Expected value should be en not sr. Script difference exceeds threshold, so can't be nl
+
+@favor=script
+sr-Latn >> sr
+hr >> en
+bs >> en
+nl-Cyrl >> sr
+
+** test: language-specific script fallbacks 2
+@supported=en, sr-Latn
+sr >> sr-Latn
+sr-Cyrl >> sr-Latn
+@default=und
+hr >> und
+
+@favor=script
+@default=
+sr >> sr-Latn
+sr-Cyrl >> sr-Latn
+@default=und
+hr >> en
+
+** test: don't match hr to sr-Latn
+@supported=en, sr-Latn
+hr >> en
+
+@favor=script
+hr >> en
+
+** test: both deprecated and not
+@supported=fil, tl, iw, he
+he-IT >> iw
+he >> iw
+iw >> iw
+fil-IT >> fil
+fil >> fil
+tl >> fil
+
+@favor=script
+he-IT >> iw
+he >> iw
+iw >> iw
+fil-IT >> fil
+fil >> fil
+tl >> fil
+
+** test: nearby languages: Nynorsk to Bokmål
+@supported=en, nb
+nn >> nb
+
+@favor=script
+nn >> nb
+
+** test: nearby languages: Danish does not match nn
+@supported=en, nn
+da >> en
+
+@favor=script
+da >> en
+
+** test: nearby languages: Danish matches no
+@supported=en, no
+da >> no
+
+@favor=script
+da >> no
+
+** test: nearby languages: Danish matches nb
+@supported=en, nb
+da >> nb
+
+** test: prefer matching languages over language variants.
+@supported=nn, en-GB
+no, en-US >> nn
+nb, en-US >> nn
+
+@favor=script
+no, en-US >> nn
+nb, en-US >> nn
+
+** test: deprecated version is closer than same language with other differences
+@supported=nl, he, en-GB
+iw, en-US >> he
+
+@favor=script
+iw, en-US >> he
+
+** test: macro equivalent is closer than same language with other differences
+@supported=nl, zh, en-GB, no
+cmn, en-US >> zh
+nb, en-US >> no
+
+@favor=script
+cmn, en-US >> zh
+nb, en-US >> no
+
+** test: legacy equivalent is closer than same language with other differences
+@supported=nl, fil, en-GB
+tl, en-US >> fil
+
+@favor=script
+tl, en-US >> fil
+
+** test: distinguish near equivalents
+@supported=en, ro, mo, ro-MD
+ro >> ro
+mo >> ro # ro=mo for the locale matcher
+ro-MD >> ro-MD
+
+@favor=script
+ro >> ro
+mo >> ro # ro=mo for the locale matcher
+ro-MD >> ro-MD
+
+** test: maximization of legacy
+@supported=sr-Cyrl, sr-Latn, ro, ro-MD
+sh >> sr-Latn
+mo >> ro
+
+@favor=script
+sh >> sr-Latn
+mo >> ro
+
+** test: empty
+fr >> null
+en >> null
+
+** test: private use subtags
+@supported=fr, en-GB, x-bork, es-ES, es-419
+x-piglatin >> fr
+x-bork >> x-bork
+
+** test: grandfathered codes
+@supported=fr, i-klingon, en-Latn-US
+en-GB-oed >> en-Latn-US
+i-klingon >> tlh
+
+
+** test: simple variant match
+@supported=fr, en-GB, ja, es-ES, es-MX
+de, en-US >> en-GB
+de, zh >> fr
+
+** test: best match for traditional Chinese
+@supported=fr, zh-Hans-CN, en-US
+zh-TW >> zh-Hans-CN
+zh-Hant >> zh-Hans-CN
+zh-TW, en >> en-US
+zh-Hant-CN, en >> en-US
+zh-Hans, en >> zh-Hans-CN
+
+** test: return first among likely-subtags equivalent locales
+# Was: more specific script should win in case regions are identical
+# with some different results.
+@supported=af, af-Latn, af-Arab
+af >> af
+af-ZA >> af
+af-Latn-ZA >> af
+af-Latn >> af
+
+@favor=script
+af >> af
+af-ZA >> af
+af-Latn-ZA >> af
+af-Latn >> af
+
+# Was: more specific region should win
+# with some different results.
+@supported=nl, nl-NL, nl-BE
+@favor=
+nl >> nl
+nl-Latn >> nl
+nl-Latn-NL >> nl
+nl-NL >> nl
+
+@favor=script
+nl >> nl
+nl-Latn >> nl
+nl-Latn-NL >> nl
+nl-NL >> nl
+
+# Was: more specific region wins over more specific script
+# with some different results.
+@supported=nl, nl-Latn, nl-NL, nl-BE
+@favor=
+nl >> nl
+nl-Latn >> nl
+nl-NL >> nl
+nl-Latn-NL >> nl
+
+@favor=script
+nl >> nl
+nl-Latn >> nl
+nl-NL >> nl
+nl-Latn-NL >> nl
+
+** test: region may replace matched if matched is enclosing
+@supported=es-419, es
+@default=es-MX
+es-MX >> es-419
+@default=
+es-SG >> es
+
+@favor=script
+@default=es-MX
+es-MX >> es-419
+@default=
+es-SG >> es
+
+** test: region distance Portuguese
+@supported=pt, pt-PT
+pt-ES >> pt-PT
+
+@favor=script
+pt-ES >> pt-PT
+
+** test: if no preferred locale specified, pick top language, not regional
+@supported=en, fr, fr-CA, fr-CH
+fr-US >> fr
+
+@favor=script
+fr-US >> fr
+
+** test: region distance German
+@supported=de-AT, de-DE, de-CH
+de >> de-DE
+
+** test: en-AU is closer to en-GB than to en (which is en-US)
+@supported=en, en-GB, es-ES, es-419
+en-AU >> en-GB
+@default=es-MX
+es-MX >> es-419
+@default=
+es-PT >> es-ES
+
+@favor=script
+en-AU >> en-GB
+es-MX >> es-419
+@default=
+es-PT >> es-ES
+
+** test: undefined
+@supported=it, fr
+und >> it
+
+** test: und does not match en
+@supported=it, en
+und >> it
+
+** test: undefined in priority list
+@supported=it, und
+und >> und
+en >> it
+
+** test: undefined
+@supported=it, fr, zh
+und-FR >> fr
+und-CN >> zh
+und-Hans >> zh
+und-Hant >> zh
+und-Latn >> it
+
+@favor=script
+und-FR >> fr
+und-CN >> zh
+und-Hans >> zh
+und-Hant >> zh
+und-Latn >> it
+
+** test: match on maximized tag
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja-JP, en-GB >> ja
+ja-Jpan-JP, en-GB >> ja
+
+** test: pick best maximized tag
+@supported=ja, ja-Jpan-US, ja-JP, en, ru
+ja-Jpan, ru >> ja
+ja-JP, ru >> ja
+ja-US, ru >> ja-Jpan-US
+
+@favor=script
+ja-Jpan, ru >> ja
+ja-JP, ru >> ja
+ja-US, ru >> ja-Jpan-US
+
+** test: termination: pick best maximized match
+@supported=ja, ja-Jpan, ja-JP, en, ru
+ja-Jpan-JP, ru >> ja
+ja-Jpan, ru >> ja
+
+@favor=script
+ja-Jpan-JP, ru >> ja
+ja-Jpan, ru >> ja
+
+** test: same language over exact, but distinguish when user is explicit
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja, de >> ja
+@supported=en, de, fr, ja
+de-CH, fr >> de
+@supported=en-GB, nl
+en, nl >> en-GB
+en, nl, en-GB >> en-GB
+
+@favor=script
+@supported=fr, en-GB, ja, es-ES, es-MX
+ja, de >> ja
+@supported=en, de, fr, ja
+de-CH, fr >> de
+@supported=en-GB, nl
+en, nl >> en-GB
+en, nl, en-GB >> en-GB
+
+** test: parent relation preserved
+@supported=en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK
+en-150 >> en-GB
+en-AU >> en-GB
+en-BE >> en-GB
+en-GG >> en-GB
+en-GI >> en-GB
+en-HK >> en-GB
+en-IE >> en-GB
+en-IM >> en-GB
+en-IN >> en-GB
+en-JE >> en-GB
+en-MT >> en-GB
+en-NZ >> en-GB
+en-PK >> en-GB
+en-SG >> en-GB
+en-DE >> en-GB
+@default=es-AR
+es-AR >> es-419
+@default=es-BO
+es-BO >> es-419
+@default=es-CL
+es-CL >> es-419
+@default=es-CO
+es-CO >> es-419
+@default=es-CR
+es-CR >> es-419
+@default=es-CU
+es-CU >> es-419
+@default=es-DO
+es-DO >> es-419
+@default=es-EC
+es-EC >> es-419
+@default=es-GT
+es-GT >> es-419
+@default=es-HN
+es-HN >> es-419
+@default=es-MX
+es-MX >> es-419
+@default=es-NI
+es-NI >> es-419
+@default=es-PA
+es-PA >> es-419
+@default=es-PE
+es-PE >> es-419
+@default=es-PR
+es-PR >> es-419
+@default=
+es-PT >> es
+@default=es-PY
+es-PY >> es-419
+@default=es-SV
+es-SV >> es-419
+@default=
+es-US >> es-419
+@default=es-UY
+es-UY >> es-419
+@default=es-VE
+es-VE >> es-419
+@default=
+pt-AO >> pt-PT
+pt-CV >> pt-PT
+pt-GW >> pt-PT
+pt-MO >> pt-PT
+pt-MZ >> pt-PT
+pt-ST >> pt-PT
+pt-TL >> pt-PT
+
+@favor=script
+en-150 >> en-GB
+en-AU >> en-GB
+en-BE >> en-GB
+en-GG >> en-GB
+en-GI >> en-GB
+en-HK >> en-GB
+en-IE >> en-GB
+en-IM >> en-GB
+en-IN >> en-GB
+en-JE >> en-GB
+en-MT >> en-GB
+en-NZ >> en-GB
+en-PK >> en-GB
+en-SG >> en-GB
+en-DE >> en-GB
+@default=es-AR
+es-AR >> es-419
+@default=es-BO
+es-BO >> es-419
+@default=es-CL
+es-CL >> es-419
+@default=es-CO
+es-CO >> es-419
+@default=es-CR
+es-CR >> es-419
+@default=es-CU
+es-CU >> es-419
+@default=es-DO
+es-DO >> es-419
+@default=es-EC
+es-EC >> es-419
+@default=es-GT
+es-GT >> es-419
+@default=es-HN
+es-HN >> es-419
+@default=es-MX
+es-MX >> es-419
+@default=es-NI
+es-NI >> es-419
+@default=es-PA
+es-PA >> es-419
+@default=es-PE
+es-PE >> es-419
+@default=es-PR
+es-PR >> es-419
+@default=
+es-PT >> es
+@default=es-PY
+es-PY >> es-419
+@default=es-SV
+es-SV >> es-419
+@default=
+es-US >> es-419
+@default=es-UY
+es-UY >> es-419
+@default=es-VE
+es-VE >> es-419
+@default=
+pt-AO >> pt-PT
+pt-CV >> pt-PT
+pt-GW >> pt-PT
+pt-MO >> pt-PT
+pt-MZ >> pt-PT
+pt-ST >> pt-PT
+pt-TL >> pt-PT
+
+** test: preserve extensions
+@supported=en, de, sl-NEDIS
+@default=de-u-co-phonebk
+de-FR-u-co-phonebk >> de
+@default=sl-NEDIS-u-cu-eur
+sl-NEDIS-u-cu-eur >> sl-NEDIS
+sl-u-cu-eur >> sl-NEDIS
+sl-HR-NEDIS-u-cu-eur >> sl-NEDIS
+@default=de-t-m0-iso-i0-pinyin
+de-t-m0-iso-i0-pinyin >> de
+
+@favor=script
+@default=de-u-co-phonebk
+de-FR-u-co-phonebk >> de
+@default=sl-NEDIS-u-cu-eur
+sl-NEDIS-u-cu-eur >> sl-NEDIS
+sl-u-cu-eur >> sl-NEDIS
+sl-HR-NEDIS-u-cu-eur >> sl-NEDIS
+@default=de-t-m0-iso-i0-pinyin
+de-t-m0-iso-i0-pinyin >> de
+
+## ULS
+
+** test: testEmptyUserLanguagesGetsEmpty_getBestMatches
+@supported=de
+ >> de
+
+** test: testNoStrongMatchGetsEmpty_getBestMatches
+@supported=de
+fr >> de
+
+@favor=script
+fr >> de
+
+** test: testLooseMatchForGeneral_getBestMatches
+@supported=es-419
+es-MX >> es-419
+
+@favor=script
+es-MX >> es-419
+
+** test: testLooseMatchForEnglish_getBestMatches
+@supported=en, en-GB
+en-CA >> en-GB
+
+@favor=script
+en-CA >> en-GB
+
+** test: testLooseMatchForChinese_getBestMatches
+@supported=zh
+zh-TW >> zh
+
+@favor=script
+zh-TW >> zh
+
+## Geo
+
+** test: testGetBestMatchWithMinMatchScore
+@supported=fr-FR, fr, fr-CA, en
+@default=und
+fr >> fr-FR # First likely-subtags equivalent match is chosen.
+@supported=en, fr, fr-CA
+fr-FR >> fr # Parent match is chosen.
+@supported=en, fr-CA
+fr-FR >> fr-CA # Sibling match is chosen.
+@supported=fr-CA, fr-FR
+fr >> fr-FR # Inferred region match is chosen.
+fr-SN >> fr-CA
+@supported=en, fr-FR
+fr >> fr-FR # Child match is chosen.
+@supported=de, en, it
+fr >> und
+@supported=iw, en
+iw-Latn >> und
+@supported=iw, no
+ru >> und
+@supported=iw-Latn, iw-Cyrl, iw
+ru >> und
+@supported=iw, iw-Latn
+ru >> und
+en >> und
+@supported=en, uk
+ru >> und
+@supported=zh-TW, en
+zh-CN >> zh-TW
+@supported=ja
+ru >> und
+
+@favor=script
+@supported=fr-FR, fr, fr-CA, en
+fr >> fr-FR
+@supported=en, fr, fr-CA
+fr-FR >> fr
+@supported=en, fr-CA
+fr-FR >> fr-CA
+@supported=fr-CA, fr-FR
+fr >> fr-FR
+fr-SN >> fr-CA
+@supported=en, fr-FR
+fr >> fr-FR
+@supported=de, en, it
+fr >> en
+@supported=iw, en
+iw-Latn >> en
+@supported=iw, no
+ru >> und
+@supported=iw-Latn, iw-Cyrl, iw
+ru >> iw-Cyrl
+@supported=iw, iw-Latn
+ru >> und
+en >> iw-Latn
+@supported=en, uk
+ru >> uk
+@supported=zh-TW, en
+zh-CN >> zh-TW
+@supported=ja
+ru >> und
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
index 8fe0fe8..79fe285 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
@@ -19,10 +19,15 @@
import com.ibm.icu.util.ULocale;
/**
- * Off-line-built data for LocaleMatcher.
+ * Offline-built data for LocaleMatcher.
* Mostly but not only the data for mapping locales to their maximized forms.
*/
public class LocaleDistance {
+ /**
+ * Bit flag used on the last character of a subtag in the trie.
+ * Must be set consistently by the builder and the lookup code.
+ */
+ public static final int END_OF_SUBTAG = 0x80;
/** Distance value bit flag, set by the builder. */
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
@@ -148,15 +153,21 @@
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
private LocaleDistance(Data data) {
- this.trie = new BytesTrie(data.trie, 0);
- this.regionToPartitionsIndex = data.regionToPartitionsIndex;
- this.partitionArrays = data.partitionArrays;
- this.paradigmLSRs = data.paradigmLSRs;
+ trie = new BytesTrie(data.trie, 0);
+ regionToPartitionsIndex = data.regionToPartitionsIndex;
+ partitionArrays = data.partitionArrays;
+ paradigmLSRs = data.paradigmLSRs;
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
- this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
+ minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
+ // For the default demotion value, use the
+ // default region distance between unrelated Englishes.
+ // Thus, unless demotion is turned off,
+ // a mere region difference for one desired locale
+ // is as good as a perfect match for the next following desired locale.
+ // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
LSR en = new LSR("en", "Latn", "US");
LSR enGB = new LSR("en", "Latn", "GB");
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
@@ -188,18 +199,18 @@
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
- public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
+ public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int desLangDistance = trieNext(iter, desired.language, false);
- long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
+ long desLangState = desLangDistance >= 0 && supportedLSRs.length > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int bestIndex = -1;
- for (int slIndex = 0; slIndex < supportedLsrs.length; ++slIndex) {
- LSR supported = supportedLsrs[slIndex];
+ for (int slIndex = 0; slIndex < supportedLSRs.length; ++slIndex) {
+ LSR supported = supportedLSRs[slIndex];
boolean star = false;
int distance = desLangDistance;
if (distance >= 0) {
@@ -227,6 +238,11 @@
star = true;
}
assert 0 <= distance && distance <= 100;
+ // We implement "favor subtag" by reducing the language subtag distance
+ // (unscientifically reducing it to a quarter of the normal value),
+ // so that the script distance is relatively more important.
+ // For example, given a default language distance of 80, we reduce it to 20,
+ // which is below the default threshold of 50, which is the default script distance.
if (favorSubtag == FavorSubtag.SCRIPT) {
distance >>= 2;
}
@@ -312,9 +328,10 @@
int desLength = desiredPartitions.length();
int suppLength = supportedPartitions.length();
if (desLength == 1 && suppLength == 1) {
- BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
+ // Fastpath for single desired/supported partitions.
+ BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | END_OF_SUBTAG);
if (result.hasNext()) {
- result = iter.next(supportedPartitions.charAt(0) | 0x80);
+ result = iter.next(supportedPartitions.charAt(0) | END_OF_SUBTAG);
if (result.hasValue()) {
return iter.getValue();
}
@@ -328,11 +345,11 @@
for (int di = 0;;) {
// Look up each desired-partition string only once,
// not for each (desired, supported) pair.
- BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
+ BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | END_OF_SUBTAG);
if (result.hasNext()) {
long desState = suppLength > 1 ? iter.getState64() : 0;
for (int si = 0;;) {
- result = iter.next(supportedPartitions.charAt(si++) | 0x80);
+ result = iter.next(supportedPartitions.charAt(si++) | END_OF_SUBTAG);
int d;
if (result.hasValue()) {
d = iter.getValue();
@@ -391,7 +408,7 @@
}
} else {
// last character of this subtag
- BytesTrie.Result result = iter.next(c | 0x80);
+ BytesTrie.Result result = iter.next(c | END_OF_SUBTAG);
if (wantValue) {
if (result.hasValue()) {
int value = iter.getValue();
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
index 3b63705..de42587 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
@@ -180,10 +180,12 @@
// VisibleForTesting
public LSR makeMaximizedLsrFrom(ULocale locale) {
- String name = locale.getName();
+ String name = locale.getName(); // Faster than .toLanguageTag().
if (name.startsWith("@x=")) {
+ String tag = locale.toLanguageTag();
+ assert tag.startsWith("x-");
// Private use language tag x-subtag-subtag...
- return new LSR(name, "", "");
+ return new LSR(tag, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
@@ -238,7 +240,7 @@
language = getCanonical(languageAliases, language);
// (We have no script mappings.)
region = getCanonical(regionAliases, region);
- return INSTANCE.maximize(language, script, region);
+ return maximize(language, script, region);
}
/**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
index 1f3bf81..f15a44b 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
@@ -23,7 +23,7 @@
/**
* Immutable class that picks the best match between a user's desired locales and
- * and application's supported locales.
+ * an application's supported locales.
*
* <p>Example:
* <pre>
@@ -54,18 +54,22 @@
* 3. other supported locales.
* This may change in future versions.
*
- * <p>All classes implementing this interface should be immutable. Often a
- * product will just need one static instance, built with the languages
+ * <p>Often a product will just need one matcher instance, built with the languages
* that it supports. However, it may want multiple instances with different
* default languages based on additional information, such as the domain.
*
+ * <p>This class is not intended for public subclassing.
+ *
* @author markdavis@google.com
* @stable ICU 4.4
*/
-public class LocaleMatcher {
+public final class LocaleMatcher {
private static final LSR UND_LSR = new LSR("und","","");
+ // In ULocale, "und" and "" make the same object.
private static final ULocale UND_ULOCALE = new ULocale("und");
+ // In Locale, "und" and "" make different objects.
private static final Locale UND_LOCALE = new Locale("und");
+ private static final Locale EMPTY_LOCALE = new Locale("");
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
@@ -253,43 +257,44 @@
* best-matching desired locale, such as the -t- and -u- extensions.
* May replace some fields of the supported locale.
* The result is the locale that should be used for date and number formatting, collation, etc.
+ * Returns null if getSupportedLocale() returns null.
*
- * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
*
- * @return the service locale, combining the best-matching desired and supported locales.
+ * @return a locale combining the best-matching desired and supported locales.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
- public ULocale makeServiceULocale() {
+ public ULocale makeResolvedULocale() {
ULocale bestDesired = getDesiredULocale();
- ULocale serviceLocale = supportedULocale;
- if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
- ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
-
- // Copy the region from bestDesired, if there is one.
- String region = bestDesired.getCountry();
- if (!region.isEmpty()) {
- b.setRegion(region);
- }
-
- // Copy the variants from bestDesired, if there are any.
- // Note that this will override any serviceLocale variants.
- // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
- String variants = bestDesired.getVariant();
- if (!variants.isEmpty()) {
- b.setVariant(variants);
- }
-
- // Copy the extensions from bestDesired, if there are any.
- // Note that this will override any serviceLocale extensions.
- // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
- // (replacing calendar).
- for (char extensionKey : bestDesired.getExtensionKeys()) {
- b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
- }
- serviceLocale = b.build();
+ if (supportedULocale == null || bestDesired == null ||
+ supportedULocale.equals(bestDesired)) {
+ return supportedULocale;
}
- return serviceLocale;
+ ULocale.Builder b = new ULocale.Builder().setLocale(supportedULocale);
+
+ // Copy the region from bestDesired, if there is one.
+ String region = bestDesired.getCountry();
+ if (!region.isEmpty()) {
+ b.setRegion(region);
+ }
+
+ // Copy the variants from bestDesired, if there are any.
+ // Note that this will override any supportedULocale variants.
+ // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+ String variants = bestDesired.getVariant();
+ if (!variants.isEmpty()) {
+ b.setVariant(variants);
+ }
+
+ // Copy the extensions from bestDesired, if there are any.
+ // Note that this will override any supportedULocale extensions.
+ // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+ // (replacing calendar).
+ for (char extensionKey : bestDesired.getExtensionKeys()) {
+ b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
+ }
+ return b.build();
}
/**
@@ -298,15 +303,17 @@
* May replace some fields of the supported locale.
* The result is the locale that should be used for
* date and number formatting, collation, etc.
+ * Returns null if getSupportedLocale() returns null.
*
- * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
*
- * @return the service locale, combining the best-matching desired and supported locales.
+ * @return a locale combining the best-matching desired and supported locales.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
- public Locale makeServiceLocale() {
- return makeServiceULocale().toLocale();
+ public Locale makeResolvedLocale() {
+ ULocale resolved = makeResolvedULocale();
+ return resolved != null ? resolved.toLocale() : null;
}
}
@@ -320,8 +327,8 @@
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
private final Map<LSR, Integer> supportedLsrToIndex;
// Array versions of the supportedLsrToIndex keys and values.
- // The distance lookup loops over the supportedLsrs and returns the index of the best match.
- private final LSR[] supportedLsrs;
+ // The distance lookup loops over the supportedLSRs and returns the index of the best match.
+ private final LSR[] supportedLSRs;
private final int[] supportedIndexes;
private final ULocale defaultULocale;
private final Locale defaultLocale;
@@ -334,7 +341,7 @@
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
- public static class Builder {
+ public static final class Builder {
private List<ULocale> supportedLocales;
private int thresholdDistance = -1;
private Demotion demotion;
@@ -394,7 +401,7 @@
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
- * @param locale the list of locale
+ * @param locale another locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
@@ -411,7 +418,7 @@
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
- * @param locale the list of locale
+ * @param locale another locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
@@ -514,7 +521,7 @@
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{LocaleMatcher.Builder");
- if (!supportedLocales.isEmpty()) {
+ if (supportedLocales != null && !supportedLocales.isEmpty()) {
s.append(" supported={").append(supportedLocales.toString()).append('}');
}
if (defaultLocale != null) {
@@ -572,50 +579,62 @@
private LocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
+ int supportedLocalesLength = builder.supportedLocales != null ?
+ builder.supportedLocales.size() : 0;
+ ULocale udef = builder.defaultLocale;
+ Locale def = null;
+ int idef = -1;
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
- int supportedLocalesLength = builder.supportedLocales.size();
supportedULocales = new ULocale[supportedLocalesLength];
supportedLocales = new Locale[supportedLocalesLength];
// Supported LRSs in input order.
LSR lsrs[] = new LSR[supportedLocalesLength];
// Also find the first supported locale whose LSR is
// the same as that for the default locale.
- ULocale udef = builder.defaultLocale;
- Locale def = null;
LSR defLSR = null;
- int idef = -1;
if (udef != null) {
def = udef.toLocale();
defLSR = getMaximalLsrOrUnd(udef);
}
int i = 0;
- for (ULocale locale : builder.supportedLocales) {
- supportedULocales[i] = locale;
- supportedLocales[i] = locale.toLocale();
- LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
- if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
- idef = i;
+ if (supportedLocalesLength > 0) {
+ for (ULocale locale : builder.supportedLocales) {
+ supportedULocales[i] = locale;
+ supportedLocales[i] = locale.toLocale();
+ LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
+ if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
+ idef = i;
+ }
+ ++i;
}
- ++i;
}
// We need an unordered map from LSR to first supported locale with that LSR,
- // and an ordered list of (LSR, Indexes).
+ // and an ordered list of (LSR, supported index).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
// 1. Default locale, if it is supported.
- // 2. Priority locales in builder order.
+ // 2. Priority locales (aka "paradigm locales") in builder order.
// 3. Remaining locales in builder order.
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
+ // Note: We could work with a single LinkedHashMap by storing ~i (the binary-not index)
+ // for the default and paradigm locales, counting the number of those locales,
+ // and keeping two indexes to fill the LSR and index arrays with
+ // priority vs. normal locales. In that loop we would need to entry.setValue(~i)
+ // to restore non-negative indexes in the map.
+ // Probably saves little but less readable.
Map<LSR, Integer> otherLsrToIndex = null;
if (idef >= 0) {
supportedLsrToIndex.put(defLSR, idef);
}
i = 0;
for (ULocale locale : supportedULocales) {
- if (i == idef) { continue; }
+ if (i == idef) {
+ ++i;
+ continue;
+ }
LSR lsr = lsrs[i];
if (defLSR == null) {
assert i == 0;
@@ -624,7 +643,15 @@
defLSR = lsr;
idef = 0;
supportedLsrToIndex.put(lsr, 0);
- } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
+ } else if (idef >= 0 && lsr.equals(defLSR)) {
+ // lsr.equals(defLSR) means that this supported locale is
+ // a duplicate of the default locale.
+ // Either an explicit default locale is supported, and we added it before the loop,
+ // or there is no explicit default locale, and this is
+ // a duplicate of the first supported locale.
+ // In both cases, idef >= 0 now, so otherwise we can skip the comparison.
+ // For a duplicate, putIfAbsent() is a no-op, so nothing to do.
+ } else if (LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
putIfAbsent(supportedLsrToIndex, lsr, i);
} else {
if (otherLsrToIndex == null) {
@@ -637,12 +664,12 @@
if (otherLsrToIndex != null) {
supportedLsrToIndex.putAll(otherLsrToIndex);
}
- int numSuppLsrs = supportedLsrToIndex.size();
- supportedLsrs = new LSR[numSuppLsrs];
- supportedIndexes = new int[numSuppLsrs];
+ int supportedLSRsLength = supportedLsrToIndex.size();
+ supportedLSRs = new LSR[supportedLSRsLength];
+ supportedIndexes = new int[supportedLSRsLength];
i = 0;
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
- supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
+ supportedLSRs[i] = entry.getKey(); // = lsrs[entry.getValue()]
supportedIndexes[i++] = entry.getValue();
}
@@ -671,7 +698,7 @@
}
private static final LSR getMaximalLsrOrUnd(Locale locale) {
- if (locale.equals(UND_LOCALE)) {
+ if (locale.equals(UND_LOCALE) || locale.equals(EMPTY_LOCALE)) {
return UND_LSR;
} else {
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
@@ -766,7 +793,7 @@
* Parses the string like {@link LocalePriorityList} does and
* returns the supported locale which best matches one of the desired locales.
*
- * @param desiredLocaleList Typically a user's languages, in order of preference (descending),
+ * @param desiredLocaleList Typically a user's languages,
* as a string which is to be parsed like LocalePriorityList does.
* @return the best-matching supported locale.
* @stable ICU 4.4
@@ -808,9 +835,13 @@
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
+ private Result defaultResult() {
+ return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ }
+
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
if (suppIndex < 0) {
- return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ return defaultResult();
} else if (desiredLocale != null) {
return new Result(desiredLocale, supportedULocales[suppIndex],
null, supportedLocales[suppIndex], 0, suppIndex);
@@ -822,7 +853,7 @@
private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
if (suppIndex < 0) {
- return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ return defaultResult();
} else if (desiredLocale != null) {
return new Result(null, supportedULocales[suppIndex],
desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
@@ -858,7 +889,7 @@
public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
Iterator<ULocale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
- return makeResult(UND_ULOCALE, null, -1);
+ return defaultResult();
}
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
@@ -891,7 +922,7 @@
public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
Iterator<Locale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
- return makeResult(UND_LOCALE, null, -1);
+ return defaultResult();
}
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
@@ -920,7 +951,7 @@
return suppIndex;
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
- desiredLSR, supportedLsrs, bestDistance, favorSubtag);
+ desiredLSR, supportedLSRs, bestDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
bestDistance = bestIndexAndDistance & 0xff;
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
@@ -933,6 +964,7 @@
break;
}
desiredLSR = remainingIter.next();
+ ++desiredIndex;
}
if (bestSupportedLsrIndex < 0) {
if (TRACE_MATCHER) {
@@ -969,8 +1001,8 @@
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
- XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
- new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
+ getMaximalLsrOrUnd(desired),
+ new LSR[] { getMaximalLsrOrUnd(supported) },
thresholdDistance, favorSubtag) & 0xff;
return (100 - distance) / 100.0;
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java
index 0726b1d..bce7a14 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java
@@ -13,7 +13,8 @@
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -24,10 +25,10 @@
/**
* Provides an immutable list of languages/locales in priority order.
* The string format is based on the Accept-Language format
- * <a href="http://www.ietf.org/rfc/rfc2616.txt">http://www.ietf.org/rfc/rfc2616.txt</a>, such as
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), such as
* "af, en, fr;q=0.9". Syntactically it is slightly
* more lenient, in allowing extra whitespace between elements, extra commas,
- * and more than 3 decimals (on input), and pins between 0 and 1.
+ * and more than 3 decimals (on input). The qvalues must be between 0 and 1.
*
* <p>In theory, Accept-Language indicates the relative 'quality' of each item,
* but in practice, all of the browsers just take an ordered list, like
@@ -70,7 +71,6 @@
* @stable ICU 4.4
*/
public class LocalePriorityList implements Iterable<ULocale> {
- private static final double D0 = 0.0d;
private static final Double D1 = 1.0d;
private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*");
@@ -91,6 +91,8 @@
/**
* Creates a Builder and adds a locale with a specified weight.
+ * A zero or negative weight leads to removing the locale.
+ * A weight greater than 1 is pinned to 1.
*
* @param locale locale/language to be added
* @param weight value from 0.0 to 1.0
@@ -109,7 +111,7 @@
* @stable ICU 4.4
*/
public static Builder add(LocalePriorityList list) {
- return new Builder().add(list);
+ return new Builder(list);
}
/**
@@ -154,13 +156,14 @@
@Override
public String toString() {
final StringBuilder result = new StringBuilder();
- for (final ULocale language : languagesAndWeights.keySet()) {
+ for (Entry<ULocale, Double> entry : languagesAndWeights.entrySet()) {
+ ULocale language = entry.getKey();
+ double weight = entry.getValue();
if (result.length() != 0) {
result.append(", ");
}
result.append(language);
- double weight = languagesAndWeights.get(language);
- if (weight != D1) {
+ if (weight != 1.0) {
result.append(";q=").append(weight);
}
}
@@ -221,13 +224,31 @@
* These store the input languages and weights, in chronological order,
* where later additions override previous ones.
*/
- private final Map<ULocale, Double> languageToWeight
- = new LinkedHashMap<>();
+ private Map<ULocale, Double> languageToWeight;
+ /**
+ * The builder is reusable but rarely reused. Avoid cloning the map when not needed.
+ * Exactly one of languageToWeight and built is null.
+ */
+ private LocalePriorityList built;
+ private boolean hasWeights = false; // other than 1.0
- /*
+ /**
* Private constructor, only used by LocalePriorityList
*/
private Builder() {
+ languageToWeight = new LinkedHashMap<>();
+ }
+
+ private Builder(LocalePriorityList list) {
+ built = list;
+ for (Double value : list.languagesAndWeights.values()) {
+ double weight = value;
+ assert 0.0 < weight && weight <= 1.0;
+ if (weight != 1.0) {
+ hasWeights = true;
+ break;
+ }
+ }
}
/**
@@ -249,27 +270,48 @@
* @stable ICU 4.4
*/
public LocalePriorityList build(boolean preserveWeights) {
- // Walk through the input list, collecting the items with the same weights.
- final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<>(
- myDescendingDouble);
- for (final ULocale lang : languageToWeight.keySet()) {
- Double weight = languageToWeight.get(lang);
- Set<ULocale> s = doubleCheck.get(weight);
- if (s == null) {
- doubleCheck.put(weight, s = new LinkedHashSet<>());
- }
- s.add(lang);
+ if (built != null) {
+ // Calling build() again without changing anything in between.
+ // Just return the same immutable list.
+ return built;
}
- // We now have a bunch of items sorted by weight, then chronologically.
- // We can now create a list in the right order
- final Map<ULocale, Double> temp = new LinkedHashMap<>();
- for (Entry<Double, Set<ULocale>> langEntry : doubleCheck.entrySet()) {
- final Double weight = langEntry.getKey();
- for (final ULocale lang : langEntry.getValue()) {
- temp.put(lang, preserveWeights ? weight : D1);
+ Map<ULocale, Double> temp;
+ if (hasWeights) {
+ // Walk through the input list, collecting the items with the same weights.
+ final TreeMap<Double, List<ULocale>> weightToLanguages =
+ new TreeMap<>(myDescendingDouble);
+ for (Entry<ULocale, Double> entry : languageToWeight.entrySet()) {
+ ULocale lang = entry.getKey();
+ Double weight = entry.getValue();
+ List<ULocale> s = weightToLanguages.get(weight);
+ if (s == null) {
+ weightToLanguages.put(weight, s = new LinkedList<>());
+ }
+ s.add(lang);
}
+ // We now have a bunch of items sorted by weight, then chronologically.
+ // We can now create a list in the right order.
+ if (weightToLanguages.size() <= 1) {
+ // There is at most one weight.
+ temp = languageToWeight;
+ if (weightToLanguages.isEmpty() || weightToLanguages.firstKey() == 1.0) {
+ hasWeights = false;
+ }
+ } else {
+ temp = new LinkedHashMap<>();
+ for (Entry<Double, List<ULocale>> langEntry : weightToLanguages.entrySet()) {
+ final Double weight = preserveWeights ? langEntry.getKey() : D1;
+ for (final ULocale lang : langEntry.getValue()) {
+ temp.put(lang, weight);
+ }
+ }
+ }
+ } else {
+ // Nothing to sort.
+ temp = languageToWeight;
}
- return new LocalePriorityList(Collections.unmodifiableMap(temp));
+ languageToWeight = null;
+ return built = new LocalePriorityList(Collections.unmodifiableMap(temp));
}
/**
@@ -280,9 +322,8 @@
* @stable ICU 4.4
*/
public Builder add(final LocalePriorityList list) {
- for (final ULocale language : list.languagesAndWeights
- .keySet()) {
- add(language, list.languagesAndWeights.get(language));
+ for (Entry<ULocale, Double> entry : list.languagesAndWeights.entrySet()) {
+ add(entry.getKey(), entry.getValue());
}
return this;
}
@@ -295,7 +336,7 @@
* @stable ICU 4.4
*/
public Builder add(final ULocale locale) {
- return add(locale, D1);
+ return add(locale, 1.0);
}
/**
@@ -307,7 +348,7 @@
*/
public Builder add(ULocale... locales) {
for (final ULocale languageCode : locales) {
- add(languageCode, D1);
+ add(languageCode, 1.0);
}
return this;
}
@@ -315,7 +356,8 @@
/**
* Adds a locale with a specified weight.
* Overrides any previous weight for the locale.
- * Removes a locale if the weight is zero.
+ * A zero or negative weight leads to removing the locale.
+ * A weight greater than 1 is pinned to 1.
*
* @param locale language/locale to add
* @param weight value between 0.0 and 1.1
@@ -323,15 +365,24 @@
* @stable ICU 4.4
*/
public Builder add(final ULocale locale, double weight) {
+ if (languageToWeight == null) {
+ // Builder reuse after build().
+ languageToWeight = new LinkedHashMap<>(built.languagesAndWeights);
+ built = null;
+ }
if (languageToWeight.containsKey(locale)) {
languageToWeight.remove(locale);
}
- if (weight <= D0) {
+ Double value;
+ if (weight <= 0.0) {
return this; // skip zeros
- } else if (weight > D1) {
- weight = D1;
+ } else if (weight >= 1.0) {
+ value = D1;
+ } else {
+ value = weight;
+ hasWeights = true;
}
- languageToWeight.put(locale, weight);
+ languageToWeight.put(locale, value);
return this;
}
@@ -349,9 +400,9 @@
if (itemMatcher.reset(item).matches()) {
final ULocale language = new ULocale(itemMatcher.group(1));
final double weight = Double.parseDouble(itemMatcher.group(2));
- if (!(weight >= D0 && weight <= D1)) { // do ! for NaN
- throw new IllegalArgumentException("Illegal weight, must be 0..1: "
- + weight);
+ if (!(0.0 <= weight && weight <= 1.0)) { // do ! for NaN
+ throw new IllegalArgumentException(
+ "Illegal weight, must be 0..1: " + weight);
}
add(language, weight);
} else if (item.length() != 0) {
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
index 32c361f..f20cc68 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@@ -11,8 +11,11 @@
import java.io.BufferedReader;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@@ -111,14 +114,111 @@
@Test
public void testBasics() {
- final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
- .add(ULocale.ENGLISH).build());
+ LocaleMatcher matcher = newLocaleMatcher(
+ LocalePriorityList.
+ add(ULocale.FRENCH).add(ULocale.UK).add(ULocale.ENGLISH).
+ build());
logln(matcher.toString());
assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.JAPAN));
+
+ // API coverage
+ List<Locale> locales = new ArrayList<>();
+ locales.add(Locale.FRENCH);
+ locales.add(Locale.UK);
+ matcher = LocaleMatcher.builder().
+ setSupportedLocales(locales).addSupportedLocale(Locale.ENGLISH).
+ setDefaultLocale(Locale.GERMAN).build();
+ assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
+ assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
+ assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
+ assertEquals(ULocale.GERMAN, matcher.getBestMatch(ULocale.JAPAN));
+
+ ULocale udesired = new ULocale("en_GB"); // distinct object from ULocale.UK
+ LocaleMatcher.Result result = matcher.getBestMatchResult(udesired);
+ assertTrue("exactly desired en-GB object", udesired == result.getDesiredULocale());
+ assertEquals(Locale.UK, result.getDesiredLocale());
+ assertEquals(0, result.getDesiredIndex());
+ assertEquals(ULocale.UK, result.getSupportedULocale());
+ assertEquals(Locale.UK, result.getSupportedLocale());
+ assertEquals(1, result.getSupportedIndex());
+
+ LocalePriorityList list = LocalePriorityList.add(ULocale.JAPAN, ULocale.US).build();
+ result = matcher.getBestMatchResult(list);
+ assertEquals(1, result.getDesiredIndex());
+ assertEquals(Locale.US, result.getDesiredLocale());
+
+ Locale desired = new Locale("en", "US"); // distinct object from Locale.US
+ result = matcher.getBestLocaleResult(desired);
+ assertEquals(ULocale.US, result.getDesiredULocale());
+ assertTrue("exactly desired en-US object", desired == result.getDesiredLocale());
+ assertEquals(0, result.getDesiredIndex());
+ assertEquals(ULocale.ENGLISH, result.getSupportedULocale());
+ assertEquals(Locale.ENGLISH, result.getSupportedLocale());
+ assertEquals(2, result.getSupportedIndex());
+
+ result = matcher.getBestMatchResult(ULocale.JAPAN);
+ assertNull(result.getDesiredLocale());
+ assertNull(result.getDesiredULocale());
+ assertEquals(-1, result.getDesiredIndex());
+ assertEquals(ULocale.GERMAN, result.getSupportedULocale());
+ assertEquals(Locale.GERMAN, result.getSupportedLocale());
+ assertEquals(-1, result.getSupportedIndex());
+ }
+
+ private static final String locString(ULocale loc) {
+ return loc != null ? loc.getName() : "(null)";
+ }
+
+ @Test
+ public void testSupportedDefault() {
+ // The default locale is one of the supported locales.
+ List<ULocale> locales = Arrays.asList(
+ new ULocale("fr"), new ULocale("en_GB"), new ULocale("en"));
+ LocaleMatcher matcher = LocaleMatcher.builder().
+ setSupportedULocales(locales).
+ setDefaultULocale(locales.get(1)).
+ build();
+ ULocale best = matcher.getBestMatch("en_GB");
+ assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US");
+ assertEquals("getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR");
+ assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP");
+ assertEquals("getBestMatch(ja_JP)", "en_GB", locString(best));
+ LocaleMatcher.Result result = matcher.getBestMatchResult(new ULocale("ja_JP"));
+ assertEquals("getBestMatchResult(ja_JP).supp",
+ "en_GB", locString(result.getSupportedULocale()));
+ assertEquals("getBestMatchResult(ja_JP).suppIndex",
+ 1, result.getSupportedIndex());
+ }
+
+ @Test
+ public void testUnsupportedDefault() {
+ // The default locale does not match any of the supported locales.
+ List<ULocale> locales = Arrays.asList(
+ new ULocale("fr"), new ULocale("en_GB"), new ULocale("en"));
+ LocaleMatcher matcher = LocaleMatcher.builder().
+ setSupportedULocales(locales).
+ setDefaultULocale(new ULocale("de")).
+ build();
+ ULocale best = matcher.getBestMatch("en_GB");
+ assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
+ best = matcher.getBestMatch("en_US");
+ assertEquals("getBestMatch(en_US)", "en", locString(best));
+ best = matcher.getBestMatch("fr_FR");
+ assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
+ best = matcher.getBestMatch("ja_JP");
+ assertEquals("getBestMatch(ja_JP)", "de", locString(best));
+ LocaleMatcher.Result result = matcher.getBestMatchResult(new ULocale("ja_JP"));
+ assertEquals("getBestMatchResult(ja_JP).supp",
+ "de", locString(result.getSupportedULocale()));
+ assertEquals("getBestMatchResult(ja_JP).suppIndex",
+ -1, result.getSupportedIndex());
}
@Test
@@ -178,8 +278,15 @@
@Test
public void testEmpty() {
- final LocaleMatcher matcher = newLocaleMatcher("");
+ final LocaleMatcher matcher = LocaleMatcher.builder().build();
assertNull(matcher.getBestMatch(ULocale.FRENCH));
+ LocaleMatcher.Result result = matcher.getBestMatchResult(ULocale.FRENCH);
+ assertNull(result.getDesiredULocale());
+ assertNull(result.getDesiredLocale());
+ assertEquals(-1, result.getDesiredIndex());
+ assertNull(result.getSupportedULocale());
+ assertNull(result.getSupportedLocale());
+ assertEquals(-1, result.getSupportedIndex());
}
static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
@@ -197,12 +304,12 @@
@Test
public void testMatch_none() {
double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
- assertTrue("Actual < 0: " + match, 0 <= match);
- assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
+ assertTrue("Actual >= 0: " + match, 0 <= match);
+ assertTrue("Actual < 0.2 (~ language + script distance): " + match, 0.2 > match);
}
@Test
- public void testMatch_matchOnMazimized() {
+ public void testMatch_matchOnMaximized() {
ULocale undTw = new ULocale("und_TW");
ULocale zhHant = new ULocale("zh_Hant");
double matchZh = match(undTw, new ULocale("zh"));
@@ -220,6 +327,20 @@
}
@Test
+ public void testResolvedLocale() {
+ LocaleMatcher matcher = LocaleMatcher.builder().
+ addSupportedULocale(new ULocale("ar-EG")).
+ build();
+ ULocale desired = new ULocale("ar-SA-u-nu-latn");
+ LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
+ assertEquals("best", "ar_EG", result.getSupportedLocale().toString());
+ ULocale resolved = result.makeResolvedULocale();
+ assertEquals("ar-EG + ar-SA-u-nu-latn = ar-SA-u-nu-latn",
+ "ar-SA-u-nu-latn",
+ resolved.toLanguageTag());
+ }
+
+ @Test
public void testMatchGrandfatheredCode() {
final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
assertEquals("en_Latn_US", matcher.getBestMatch("en_GB_oed").toString());
@@ -517,6 +638,14 @@
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
}
+ @Test
+ public void testCanonicalize() {
+ LocaleMatcher matcher = LocaleMatcher.builder().build();
+ assertEquals("bh --> bho", new ULocale("bho"), matcher.canonicalize(new ULocale("bh")));
+ assertEquals("mo-200 --> ro-CZ", new ULocale("ro_CZ"),
+ matcher.canonicalize(new ULocale("mo_200")));
+ }
+
private static final class PerfCase {
ULocale desired;
ULocale expectedShort;
@@ -850,6 +979,18 @@
}
}
+ private static Locale toLocale(ULocale ulocale) {
+ return ulocale != null ? ulocale.toLocale() : null;
+ }
+
+ private static Iterable<Locale> localesFromULocales(Collection<ULocale> ulocales) {
+ List<Locale> locales = new ArrayList<>(ulocales.size());
+ for (ULocale ulocale : ulocales) {
+ locales.add(ulocale.toLocale());
+ }
+ return locales;
+ }
+
@Test
@Parameters(method = "readTestCases")
public void dataDriven(TestCase test) {
@@ -886,19 +1027,73 @@
ULocale expMatch = getULocaleOrNull(test.expMatch);
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
ULocale bestSupported = matcher.getBestMatch(test.desired);
- assertEquals("bestSupported", expMatch, bestSupported);
+ assertEquals("bestSupported ULocale from string", expMatch, bestSupported);
+ LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
+ if (desired.getULocales().size() == 1) {
+ ULocale desiredULocale = desired.iterator().next();
+ bestSupported = matcher.getBestMatch(desiredULocale);
+ assertEquals("bestSupported ULocale from ULocale", expMatch, bestSupported);
+ Locale desiredLocale = desiredULocale.toLocale();
+ Locale bestSupportedLocale = matcher.getBestLocale(desiredLocale);
+ assertEquals("bestSupported Locale from Locale",
+ toLocale(expMatch), bestSupportedLocale);
+
+ LocaleMatcher.Result result = matcher.getBestMatchResult(desiredULocale);
+ assertEquals("result.getSupportedULocale from ULocale",
+ expMatch, result.getSupportedULocale());
+ assertEquals("result.getSupportedLocale from ULocale",
+ toLocale(expMatch), result.getSupportedLocale());
+
+ result = matcher.getBestLocaleResult(desiredLocale);
+ assertEquals("result.getSupportedULocale from Locale",
+ expMatch, result.getSupportedULocale());
+ assertEquals("result.getSupportedLocale from Locale",
+ toLocale(expMatch), result.getSupportedLocale());
+ } else {
+ bestSupported = matcher.getBestMatch(desired);
+ assertEquals("bestSupported ULocale from ULocale iterator",
+ expMatch, bestSupported);
+ Locale bestSupportedLocale = matcher.getBestLocale(
+ localesFromULocales(desired.getULocales()));
+ assertEquals("bestSupported Locale from Locale iterator",
+ toLocale(expMatch), bestSupportedLocale);
+ }
} else {
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
- assertEquals("bestSupported", expMatch, result.getSupportedULocale());
+ assertEquals("result.getSupportedULocale from ULocales",
+ expMatch, result.getSupportedULocale());
+ assertEquals("result.getSupportedLocale from ULocales",
+ toLocale(expMatch), result.getSupportedLocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
- assertEquals("bestDesired", expDesired, result.getDesiredULocale());
+ assertEquals("result.getDesiredULocale from ULocales",
+ expDesired, result.getDesiredULocale());
+ assertEquals("result.getDesiredLocale from ULocales",
+ toLocale(expDesired), result.getDesiredLocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
- ULocale combined = result.makeServiceULocale();
- assertEquals("combined", expCombined, combined);
+ assertEquals("combined ULocale from ULocales", expCombined, result.makeResolvedULocale());
+ assertEquals("combined Locale from ULocales", toLocale(expCombined), result.makeResolvedLocale());
+ }
+
+ result = matcher.getBestLocaleResult(localesFromULocales(desired.getULocales()));
+ assertEquals("result.getSupportedULocale from Locales",
+ expMatch, result.getSupportedULocale());
+ assertEquals("result.getSupportedLocale from Locales",
+ toLocale(expMatch), result.getSupportedLocale());
+ if (!test.expDesired.isEmpty()) {
+ ULocale expDesired = getULocaleOrNull(test.expDesired);
+ assertEquals("result.getDesiredULocale from Locales",
+ expDesired, result.getDesiredULocale());
+ assertEquals("result.getDesiredLocale from Locales",
+ toLocale(expDesired), result.getDesiredLocale());
+ }
+ if (!test.expCombined.isEmpty()) {
+ ULocale expCombined = getULocaleOrNull(test.expCombined);
+ assertEquals("combined ULocale from Locales", expCombined, result.makeResolvedULocale());
+ assertEquals("combined Locale from Locales", toLocale(expCombined), result.makeResolvedLocale());
}
}
}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocalePriorityListTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocalePriorityListTest.java
index 98185fd..bc1e3ab 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocalePriorityListTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocalePriorityListTest.java
@@ -9,6 +9,8 @@
package com.ibm.icu.dev.test.util;
+import java.util.Set;
+
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -23,37 +25,106 @@
*/
@RunWith(JUnit4.class)
public class LocalePriorityListTest extends TestFmwk {
- @Test
- public void testLanguagePriorityList() {
- final String expected = "af, en, fr";
+ @Test
+ public void testLanguagePriorityList() {
+ final String expected = "af, en, fr";
- LocalePriorityList list = LocalePriorityList.add("af, en, fr;q=0.9").build();
- assertEquals(expected, list.toString());
+ LocalePriorityList list = LocalePriorityList.add("af, en, fr;q=0.9").build();
+ assertEquals(expected, list.toString());
- // check looseness, and that later values win
- LocalePriorityList list2 = LocalePriorityList.add(
- ", fr ; q = 0.9 , en;q=0.1 , af, en, de;q=0, ").build();
- assertEquals(expected, list2.toString());
- assertEquals(list, list2);
+ // check looseness, and that later values win
+ LocalePriorityList list2 = LocalePriorityList.add(
+ ", fr ; q = 0.9 , en;q=0.1 , af, en, de;q=0, ").build();
+ assertEquals(expected, list2.toString());
+ assertEquals(list, list2);
- LocalePriorityList list3 = LocalePriorityList
- .add(new ULocale("af"))
- .add(ULocale.FRENCH, 0.9d)
- .add(ULocale.ENGLISH)
- .build();
- assertEquals(expected, list3.toString());
- assertEquals(list, list3);
+ LocalePriorityList list3 = LocalePriorityList
+ .add(new ULocale("af"))
+ .add(ULocale.FRENCH, 0.9d)
+ .add(ULocale.ENGLISH)
+ .build();
+ assertEquals(expected, list3.toString());
+ assertEquals(list, list3);
- LocalePriorityList list4 = LocalePriorityList
- .add(list).build();
- assertEquals(expected, list4.toString());
- assertEquals(list, list4);
+ LocalePriorityList list4 = LocalePriorityList.add(list).build();
+ assertEquals(expected, list4.toString());
+ assertEquals(list, list4);
- LocalePriorityList list5 = LocalePriorityList.add("af, fr;q=0.9, en").build(true);
- assertEquals("af, en, fr;q=0.9", list5.toString());
- }
+ LocalePriorityList list5 = LocalePriorityList.add("af, fr;q=0.9, en").build(true);
+ assertEquals("af, en, fr;q=0.9", list5.toString());
+ }
-private void assertEquals(Object expected, Object string) {
- assertEquals("", expected, string);
-}
+ @Test
+ public void testGetULocales() {
+ LocalePriorityList list = LocalePriorityList.add("af, en, fr").build();
+ Set<ULocale> locales = list.getULocales();
+ assertEquals("number of locales", 3, locales.size());
+ assertTrue("fr", locales.contains(ULocale.FRENCH));
+ }
+
+ @Test
+ public void testIterator() {
+ LocalePriorityList list = LocalePriorityList.add("af, en, fr").build();
+ ULocale af = new ULocale("af");
+ int count = 0;
+ for (ULocale locale : list) {
+ assertTrue("expected locale",
+ locale.equals(af) || locale.equals(ULocale.ENGLISH) ||
+ locale.equals(ULocale.FRENCH));
+ ++count;
+ }
+ assertEquals("number of locales", 3, count);
+ }
+
+ @Test
+ public void testQValue() {
+ try {
+ LocalePriorityList.add("de;q=-0.1");
+ errln("negative accept-language qvalue should fail");
+ } catch(IllegalArgumentException expected) {
+ // good
+ }
+ try {
+ LocalePriorityList.add("de;q=1.001");
+ errln("accept-language qvalue > 1 should fail");
+ } catch(IllegalArgumentException expected) {
+ // good
+ }
+ LocalePriorityList list = LocalePriorityList.add("de;q=0.555555555").build(true);
+ double weight = list.getWeight(ULocale.GERMAN);
+ assertTrue("many decimals", 0.555 <= weight && weight <= 0.556);
+ }
+
+ @Test
+ public void testReuse() {
+ // Test reusing a Builder after build(), and some other code coverage.
+ LocalePriorityList.Builder builder =
+ LocalePriorityList.add("el;q=0.5, de, fr;q=0.2, el;q=0");
+ LocalePriorityList list = builder.build(true);
+ assertEquals("initial list", "de, fr;q=0.2", list.toString());
+ list = builder.add(ULocale.FRENCH, 1.0).build(true);
+ assertEquals("upgrade French", "de, fr", list.toString());
+ list = builder.add(ULocale.ITALIAN, 0.1).build(true);
+ assertEquals("add Italian", "de, fr, it;q=0.1", list.toString());
+ builder = LocalePriorityList.add(list);
+ list = builder.build(true);
+ assertEquals("cloned Builder", "de, fr, it;q=0.1", list.toString());
+ list = builder.add(ULocale.ITALIAN).build(true);
+ assertEquals("upgrage Italian", "de, fr, it", list.toString());
+ // Start over with all 1.0 weights.
+ builder = LocalePriorityList.add("de, fr");
+ list = builder.build(true);
+ assertEquals("simple", "de, fr", list.toString());
+ // Add another list.
+ LocalePriorityList list2 = LocalePriorityList.add(ULocale.ITALIAN, 0.2).build(true);
+ assertEquals("list2", "it;q=0.2", list2.toString());
+ list = builder.add(list2).build(true);
+ assertEquals("list+list2", "de, fr, it;q=0.2", list.toString());
+ list = builder.add(ULocale.JAPANESE).build(true);
+ assertEquals("list+list2+ja", "de, fr, ja, it;q=0.2", list.toString());
+ }
+
+ private void assertEquals(Object expected, Object string) {
+ assertEquals("", expected, string);
+ }
}
diff --git a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java
index 327f714..a104c35 100644
--- a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java
+++ b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java
@@ -153,7 +153,7 @@
bytes[length++] = (byte) c;
} else {
// Mark the last character as a terminator to avoid overlap matches.
- bytes[length++] = (byte) (c | 0x80);
+ bytes[length++] = (byte) (c | LocaleDistance.END_OF_SUBTAG);
break;
}
}