ICU-21144 LocaleMatcher setMaxDistance(), isMatch()
diff --git a/icu4c/source/common/localematcher.cpp b/icu4c/source/common/localematcher.cpp
index a7a1137..bb18e23 100644
--- a/icu4c/source/common/localematcher.cpp
+++ b/icu4c/source/common/localematcher.cpp
@@ -141,6 +141,8 @@
LocaleMatcher::Builder::~Builder() {
delete supportedLocales_;
delete defaultLocale_;
+ delete maxDistanceDesired_;
+ delete maxDistanceSupported_;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
@@ -267,6 +269,24 @@
return *this;
}
+LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
+ const Locale &supported) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ Locale *desiredClone = desired.clone();
+ Locale *supportedClone = supported.clone();
+ if (desiredClone == nullptr || supportedClone == nullptr) {
+ delete desiredClone; // in case only one could not be allocated
+ delete supportedClone;
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ delete maxDistanceDesired_;
+ delete maxDistanceSupported_;
+ maxDistanceDesired_ = desiredClone;
+ maxDistanceSupported_ = supportedClone;
+ return *this;
+}
+
#if 0
/**
* <i>Internal only!</i>
@@ -351,9 +371,6 @@
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
if (U_FAILURE(errorCode)) { return; }
- if (thresholdDistance < 0) {
- thresholdDistance = localeDistance.getDefaultScriptDistance();
- }
const Locale *def = builder.defaultLocale_;
LSR builderDefaultLSR;
const LSR *defLSR = nullptr;
@@ -470,6 +487,25 @@
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
}
+
+ if (thresholdDistance >= 0) {
+ // already copied
+ } else if (builder.maxDistanceDesired_ != nullptr) {
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
+ &pSuppLSR, 1,
+ LocaleDistance::shiftDistance(100), favorSubtag, direction);
+ if (U_SUCCESS(errorCode)) {
+ // +1 for an exclusive threshold from an inclusive max.
+ thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
+ } else {
+ thresholdDistance = 0;
+ }
+ } else {
+ thresholdDistance = localeDistance.getDefaultScriptDistance();
+ }
}
LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
@@ -695,6 +731,18 @@
return supportedIndexes[bestSupportedLsrIndex];
}
+UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
+ UErrorCode &errorCode) const {
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+ &pSuppLSR, 1,
+ LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
+ return indexAndDistance >= 0;
+}
+
double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
diff --git a/icu4c/source/common/locdistance.h b/icu4c/source/common/locdistance.h
index ad84151..5682eec 100644
--- a/icu4c/source/common/locdistance.h
+++ b/icu4c/source/common/locdistance.h
@@ -39,6 +39,10 @@
return shiftedDistance / (1 << DISTANCE_SHIFT);
}
+ static int32_t getDistanceFloor(int32_t indexAndDistance) {
+ return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
+ }
+
static int32_t getIndex(int32_t indexAndDistance) {
// assert indexAndDistance >= 0;
return indexAndDistance >> INDEX_SHIFT;
@@ -79,10 +83,6 @@
// tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
- static int32_t getDistanceFloor(int32_t indexAndDistance) {
- return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
- }
-
LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
LocaleDistance(const LocaleDistance &other) = delete;
LocaleDistance &operator=(const LocaleDistance &other) = delete;
diff --git a/icu4c/source/common/unicode/localematcher.h b/icu4c/source/common/unicode/localematcher.h
index 3ec71df..42741bf 100644
--- a/icu4c/source/common/unicode/localematcher.h
+++ b/icu4c/source/common/unicode/localematcher.h
@@ -480,6 +480,31 @@
return *this;
}
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Sets the maximum distance for an acceptable match.
+ * The matcher will return a match for a pair of locales only if
+ * they match at least as well as the pair given here.
+ *
+ * For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
+ * (desired, support) locales have a distance no greater than a region subtag difference.
+ * This is much stricter than the CLDR default.
+ *
+ * The details of locale matching are subject to changes in
+ * CLDR data and in the algorithm.
+ * Specifying a maximum distance in relative terms via a sample pair of locales
+ * insulates from changes that affect all distance metrics similarly,
+ * but some changes will necessarily affect relative distances between
+ * different pairs of locales.
+ *
+ * @param desired the desired locale for distance comparison.
+ * @param supported the supported locale for distance comparison.
+ * @return this Builder object
+ * @draft ICU 68
+ */
+ Builder &setMaxDistance(const Locale &desired, const Locale &supported);
+#endif // U_HIDE_DRAFT_API
+
/**
* Sets the UErrorCode if an error occurred while setting parameters.
* Preserves older error codes in the outErrorCode.
@@ -522,6 +547,8 @@
bool withDefault_ = true;
ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
+ Locale *maxDistanceDesired_ = nullptr;
+ Locale *maxDistanceSupported_ = nullptr;
};
// FYI No public LocaleMatcher constructors in C++; use the Builder.
@@ -620,6 +647,23 @@
Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
#endif // U_HIDE_DRAFT_API
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Returns true if the pair of locales matches acceptably.
+ * This is influenced by Builder options such as setDirection(), setFavorSubtag(),
+ * and setMaxDistance().
+ *
+ * @param desired The desired locale.
+ * @param supported The supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the pair of locales matches acceptably.
+ * @draft ICU 68
+ */
+ UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+#endif // U_HIDE_DRAFT_API
+
#ifndef U_HIDE_INTERNAL_API
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
diff --git a/icu4c/source/test/intltest/localematchertest.cpp b/icu4c/source/test/intltest/localematchertest.cpp
index 62364ae..bbef5ca 100644
--- a/icu4c/source/test/intltest/localematchertest.cpp
+++ b/icu4c/source/test/intltest/localematchertest.cpp
@@ -61,6 +61,7 @@
void testNoDefault();
void testDemotion();
void testDirection();
+ void testMaxDistanceAndIsMatch();
void testMatch();
void testResolvedLocale();
void testDataDriven();
@@ -86,6 +87,7 @@
TESTCASE_AUTO(testNoDefault);
TESTCASE_AUTO(testDemotion);
TESTCASE_AUTO(testDirection);
+ TESTCASE_AUTO(testMaxDistanceAndIsMatch);
TESTCASE_AUTO(testMatch);
TESTCASE_AUTO(testResolvedLocale);
TESTCASE_AUTO(testDataDriven);
@@ -380,6 +382,36 @@
}
}
+void LocaleMatcherTest::testMaxDistanceAndIsMatch() {
+ IcuTestErrorCode errorCode(*this, "testMaxDistanceAndIsMatch");
+ LocaleMatcher::Builder builder;
+ LocaleMatcher standard = builder.build(errorCode);
+ Locale germanLux("de-LU");
+ Locale germanPhoenician("de-Phnx-AT");
+ Locale greek("el");
+ assertTrue("standard de-LU / de", standard.isMatch(germanLux, Locale::getGerman(), errorCode));
+ assertFalse("standard de-Phnx-AT / de",
+ standard.isMatch(germanPhoenician, Locale::getGerman(), errorCode));
+
+ // Allow a script difference to still match.
+ LocaleMatcher loose =
+ builder.setMaxDistance(germanPhoenician, Locale::getGerman()).build(errorCode);
+ assertTrue("loose de-LU / de", loose.isMatch(germanLux, Locale::getGerman(), errorCode));
+ assertTrue("loose de-Phnx-AT / de",
+ loose.isMatch(germanPhoenician, Locale::getGerman(), errorCode));
+ assertFalse("loose el / de", loose.isMatch(greek, Locale::getGerman(), errorCode));
+
+ // Allow at most a regional difference.
+ LocaleMatcher regional =
+ builder.setMaxDistance(Locale("de-AT"), Locale::getGerman()).build(errorCode);
+ assertTrue("regional de-LU / de",
+ regional.isMatch(Locale("de-LU"), Locale::getGerman(), errorCode));
+ assertFalse("regional da / no", regional.isMatch(Locale("da"), Locale("no"), errorCode));
+ assertFalse("regional zh-Hant / zh",
+ regional.isMatch(Locale::getChinese(), Locale::getTraditionalChinese(), errorCode));
+}
+
+
void LocaleMatcherTest::testMatch() {
IcuTestErrorCode errorCode(*this, "testMatch");
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
index fb16814..e08c118 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
@@ -92,7 +92,7 @@
return shiftedDistance / (1 << DISTANCE_SHIFT);
}
- private static final int getDistanceFloor(int indexAndDistance) {
+ public static final int getDistanceFloor(int indexAndDistance) {
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
index 400090a..6a22bb8 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
@@ -385,6 +385,8 @@
private boolean withDefault = true;
private FavorSubtag favor;
private Direction direction;
+ private ULocale maxDistanceDesired;
+ private ULocale maxDistanceSupported;
private Builder() {}
@@ -558,6 +560,66 @@
}
/**
+ * Sets the maximum distance for an acceptable match.
+ * The matcher will return a match for a pair of locales only if
+ * they match at least as well as the pair given here.
+ *
+ * <p>For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
+ * (desired, support) locales have a distance no greater than a region subtag difference.
+ * This is much stricter than the CLDR default.
+ *
+ * <p>The details of locale matching are subject to changes in
+ * CLDR data and in the algorithm.
+ * Specifying a maximum distance in relative terms via a sample pair of locales
+ * insulates from changes that affect all distance metrics similarly,
+ * but some changes will necessarily affect relative distances between
+ * different pairs of locales.
+ *
+ * @param desired the desired locale for distance comparison.
+ * @param supported the supported locale for distance comparison.
+ * @return this Builder object
+ * @draft ICU 68
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setMaxDistance(Locale desired, Locale supported) {
+ if (desired == null || supported == null) {
+ throw new IllegalArgumentException("desired/supported locales must not be null");
+ }
+ return setMaxDistance(ULocale.forLocale(desired), ULocale.forLocale(supported));
+ }
+
+ /**
+ * Sets the maximum distance for an acceptable match.
+ * The matcher will return a match for a pair of locales only if
+ * they match at least as well as the pair given here.
+ *
+ * <p>For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
+ * (desired, support) locales have a distance no greater than a region subtag difference.
+ * This is much stricter than the CLDR default.
+ *
+ * <p>The details of locale matching are subject to changes in
+ * CLDR data and in the algorithm.
+ * Specifying a maximum distance in relative terms via a sample pair of locales
+ * insulates from changes that affect all distance metrics similarly,
+ * but some changes will necessarily affect relative distances between
+ * different pairs of locales.
+ *
+ * @param desired the desired locale for distance comparison.
+ * @param supported the supported locale for distance comparison.
+ * @return this Builder object
+ * @draft ICU 68
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setMaxDistance(ULocale desired, ULocale supported) {
+ if (desired == null || supported == null) {
+ throw new IllegalArgumentException("desired/supported locales must not be null");
+ }
+ maxDistanceDesired = desired;
+ maxDistanceSupported = supported;
+ return this;
+ }
+
+ /**
* <i>Internal only!</i>
*
* @param thresholdDistance the thresholdDistance to set, with -1 = default
@@ -650,8 +712,6 @@
}
private LocaleMatcher(Builder builder) {
- thresholdDistance = builder.thresholdDistance < 0 ?
- LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
ULocale udef = builder.defaultLocale;
Locale def = null;
LSR defLSR = null;
@@ -737,6 +797,22 @@
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
favorSubtag = builder.favor;
direction = builder.direction;
+
+ int threshold;
+ if (builder.thresholdDistance >= 0) {
+ threshold = builder.thresholdDistance;
+ } else if (builder.maxDistanceDesired != null) {
+ int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(builder.maxDistanceDesired),
+ new LSR[] { getMaximalLsrOrUnd(builder.maxDistanceSupported) }, 1,
+ LocaleDistance.shiftDistance(100), favorSubtag, direction);
+ // +1 for an exclusive threshold from an inclusive max.
+ threshold = LocaleDistance.getDistanceFloor(indexAndDistance) + 1;
+ } else {
+ threshold = LocaleDistance.INSTANCE.getDefaultScriptDistance();
+ }
+ thresholdDistance = threshold;
+
if (TRACE_MATCHER) {
System.err.printf("new LocaleMatcher: %s\n", toString());
}
@@ -1052,6 +1128,44 @@
}
/**
+ * Returns true if the pair of locales matches acceptably.
+ * This is influenced by Builder options such as setDirection(), setFavorSubtag(),
+ * and setMaxDistance().
+ *
+ * @param desired The desired locale.
+ * @param supported The supported locale.
+ * @return true if the pair of locales matches acceptably.
+ * @draft ICU 68
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isMatch(Locale desired, Locale supported) {
+ int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(desired),
+ new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
+ LocaleDistance.shiftDistance(thresholdDistance), favorSubtag, direction);
+ return indexAndDistance >= 0;
+ }
+
+ /**
+ * Returns true if the pair of locales matches acceptably.
+ * This is influenced by Builder options such as setDirection(), setFavorSubtag(),
+ * and setMaxDistance().
+ *
+ * @param desired The desired locale.
+ * @param supported The supported locale.
+ * @return true if the pair of locales matches acceptably.
+ * @draft ICU 68
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isMatch(ULocale desired, ULocale supported) {
+ int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(desired),
+ new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
+ LocaleDistance.shiftDistance(thresholdDistance), favorSubtag, direction);
+ return indexAndDistance >= 0;
+ }
+
+ /**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different.
*
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
index d4df833..97e2dab 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@@ -678,6 +678,29 @@
}
@Test
+ public void testMaxDistanceAndIsMatch() {
+ LocaleMatcher.Builder builder = LocaleMatcher.builder();
+ LocaleMatcher standard = builder.build();
+ ULocale germanLux = new ULocale("de-LU");
+ ULocale germanPhoenician = new ULocale("de-Phnx-AT");
+ ULocale greek = new ULocale("el");
+ assertTrue("standard de-LU / de", standard.isMatch(germanLux, ULocale.GERMAN));
+ assertFalse("standard de-Phnx-AT / de", standard.isMatch(germanPhoenician, ULocale.GERMAN));
+
+ // Allow a script difference to still match.
+ LocaleMatcher loose = builder.setMaxDistance(germanPhoenician, ULocale.GERMAN).build();
+ assertTrue("loose de-LU / de", loose.isMatch(germanLux, ULocale.GERMAN));
+ assertTrue("loose de-Phnx-AT / de", loose.isMatch(germanPhoenician, ULocale.GERMAN));
+ assertFalse("loose el / de", loose.isMatch(greek, ULocale.GERMAN));
+
+ // Allow at most a regional difference.
+ LocaleMatcher regional = builder.setMaxDistance(new Locale("de", "AT"), Locale.GERMAN).build();
+ assertTrue("regional de-LU / de", regional.isMatch(new Locale("de", "LU"), Locale.GERMAN));
+ assertFalse("regional da / no", regional.isMatch(new Locale("da"), new Locale("no")));
+ assertFalse("regional zh-Hant / zh", regional.isMatch(Locale.CHINESE, Locale.TRADITIONAL_CHINESE));
+ }
+
+ @Test
public void testCanonicalize() {
LocaleMatcher matcher = LocaleMatcher.builder().build();
assertEquals("bh --> bho", new ULocale("bho"), matcher.canonicalize(new ULocale("bh")));