ICU-20467 get XLocaleMatcher ready for drop-in
Get XLocaleMatcher ready for replacing the LocaleMatcher code.
More simplifications beyond ICU-20330 PR #409, smaller data, some more optimizations.
New API ready to be moved over.
- less work for region partitions distance lookup:
- encode each array of single-character partition strings as one string
- look up each desired partition only once, not for each (desired, supported) pair
- look up the * fallback region distance only for the first mismatch, not for each non-matching pair
- skip region distance lookup if minRegionDistance>=remainingThreshold
- locale distance table: remove subtables that contain only *-* with default script/region distance
- mark intermediate subtag matches via last-character bit 7, not also with a match value
- likely subtags data: prune trailing *-only levels, and skip *-only script levels; likely subtags perf test
- likely subtags: skip_script=1; LSR.indexForRegion(ill-formed)=0 not negative
- likely subtags small optimization: array lookup for first letter of language subtag
- defaultDemotionPerDesiredLocale=distance(en, en-GB)
- favor=script: still reject a script mismatch
- if an explicit default locale is given, prefer that (by LSR), not the first supported locale
- XLocaleMatcher.Builder: copy supported locales into a List not a Set to preserve input indexes; duplicates are harmless
- match by LSR only, not exact locale match; results consistent with no fastpath, simpler, sometimes a little slower
- internal getBestMatch() returns just the suppIndex
- store the best desired locale & index in an LSR iterator
- make an LSR from Locale without ULocale detour
- adjust the XLocaleMatcher API as proposed; remove unused internal methods; clean up LocalePriorityList docs
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
index dd32de0..317f544 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
@@ -5,7 +5,9 @@
import java.util.Objects;
final class LSR {
- static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
+ static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
+
+ static final boolean DEBUG_OUTPUT = false;
final String language;
final String script;
@@ -21,27 +23,27 @@
}
/**
- * Returns a non-negative index for a well-formed region code.
+ * Returns a positive index (>0) for a well-formed region code.
* Do not rely on a particular region->index mapping; it may change.
- * Returns -1 for ill-formed strings.
+ * Returns 0 for ill-formed strings.
*/
static final int indexForRegion(String region) {
if (region.length() == 2) {
int a = region.charAt(0) - 'A';
- if (a < 0 || 25 < a) { return -1; }
+ if (a < 0 || 25 < a) { return 0; }
int b = region.charAt(1) - 'A';
- if (b < 0 || 25 < b) { return -1; }
- return 26 * a + b + 1000;
+ if (b < 0 || 25 < b) { return 0; }
+ return 26 * a + b + 1001;
} else if (region.length() == 3) {
int a = region.charAt(0) - '0';
- if (a < 0 || 9 < a) { return -1; }
+ if (a < 0 || 9 < a) { return 0; }
int b = region.charAt(1) - '0';
- if (b < 0 || 9 < b) { return -1; }
+ if (b < 0 || 9 < b) { return 0; }
int c = region.charAt(2) - '0';
- if (c < 0 || 9 < c) { return -1; }
- return (10 * a + b) * 10 + c;
+ if (c < 0 || 9 < c) { return 0; }
+ return (10 * a + b) * 10 + c + 1;
}
- return -1;
+ return 0;
}
@Override
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java
index b6fad04..a6bdbf6 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java
@@ -26,7 +26,7 @@
* Reads source data from ICU resource bundles.
*/
class LikelySubtagsBuilder {
- private static final boolean DEBUG_OUTPUT = false;
+ private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static ICUResourceBundle getSupplementalDataBundle(String name) {
return ICUResourceBundle.getBundleInstance(
@@ -84,12 +84,33 @@
private static final class TrieBuilder {
byte[] bytes = new byte[24];
+ int length = 0;
BytesTrieBuilder tb = new BytesTrieBuilder();
- void addMapping(String s, int value) {
- // s contains only ASCII characters.
- s.getBytes(0, s.length(), bytes, 0);
- tb.add(bytes, s.length(), value);
+ void addValue(int value) {
+ assert value >= 0;
+ tb.add(bytes, length, value);
+ }
+
+ void addStar() {
+ bytes[length++] = '*';
+ }
+
+ void addSubtag(String s) {
+ assert !s.isEmpty();
+ assert !s.equals("*");
+ int end = s.length() - 1;
+ for (int i = 0;; ++i) {
+ char c = s.charAt(i);
+ assert c <= 0x7f;
+ if (i < end) {
+ bytes[length++] = (byte) c;
+ } else {
+ // Mark the last character as a terminator to avoid overlap matches.
+ bytes[length++] = (byte) (c | 0x80);
+ break;
+ }
+ }
}
BytesTrie build() {
@@ -114,44 +135,70 @@
TrieBuilder trieBuilder = new TrieBuilder();
Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
- // Bogus LSR at index 0 for some code to easily distinguish between
- // intermediate match points and real result values.
- LSR bogus = new LSR("", "", "");
- lsrIndexes.put(bogus, 0);
+ // Reserve index 0 as "no value":
+ // The runtime lookup returns 0 for an intermediate match with no value.
+ lsrIndexes.put(new LSR("", "", ""), 0); // arbitrary LSR
+ // Reserve index 1 for SKIP_SCRIPT:
+ // The runtime lookup returns 1 for an intermediate match with a value.
+ lsrIndexes.put(new LSR("skip", "script", ""), 1); // looks good when printing the data
// We could prefill the lsrList with common locales to give them small indexes,
// and see if that improves performance a little.
for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
+ trieBuilder.length = 0;
String lang = ls.getKey();
if (lang.equals("und")) {
- lang = "*";
+ trieBuilder.addStar();
+ } else {
+ trieBuilder.addSubtag(lang);
}
- // Create a match point for the language.
- trieBuilder.addMapping(lang, 0);
Map<String, Map<String, LSR>> scriptTable = ls.getValue();
- for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
- String script = sr.getKey();
- if (script.isEmpty()) {
- script = "*";
+ boolean skipScript = false;
+ if (scriptTable.size() == 1) {
+ Map<String, LSR> regionTable = scriptTable.get("");
+ if (regionTable.size() == 1) {
+ // Prune the script and region levels from language with
+ // only * for scripts and regions.
+ int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
+ trieBuilder.addValue(i);
+ continue;
+ } else {
+ // Prune the script level from language with only * for scripts
+ // but with real regions.
+ // Set an intermediate value as a signal to the lookup code.
+ trieBuilder.addValue(XLikelySubtags.SKIP_SCRIPT);
+ skipScript = true;
}
- // Match point for lang+script.
- trieBuilder.addMapping(lang + script, 0);
- Map<String, LSR> regionTable = sr.getValue();
- for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
- String region = r2lsr.getKey();
- if (region.isEmpty()) {
- region = "*";
- }
- // Map the whole lang+script+region to a unique, dense index of the LSR.
- LSR lsr = r2lsr.getValue();
- Integer index = lsrIndexes.get(lsr);
- int i;
- if (index != null) {
- i = index.intValue();
+ }
+ int scriptStartLength = trieBuilder.length;
+ for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
+ trieBuilder.length = scriptStartLength;
+ if (!skipScript) {
+ String script = sr.getKey();
+ if (script.isEmpty()) {
+ trieBuilder.addStar();
} else {
- i = lsrIndexes.size();
- lsrIndexes.put(lsr, i);
+ trieBuilder.addSubtag(script);
}
- trieBuilder.addMapping(lang + script + region, i);
+ }
+ Map<String, LSR> regionTable = sr.getValue();
+ if (regionTable.size() == 1) {
+ // Prune the region level from language+script with only * for regions.
+ int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
+ trieBuilder.addValue(i);
+ continue;
+ }
+ int regionStartLength = trieBuilder.length;
+ for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
+ trieBuilder.length = regionStartLength;
+ String region = r2lsr.getKey();
+ // Map the whole lang+script+region to a unique, dense index of the LSR.
+ if (region.isEmpty()) {
+ trieBuilder.addStar();
+ } else {
+ trieBuilder.addSubtag(region);
+ }
+ int i = uniqueIdForLsr(lsrIndexes, r2lsr.getValue());
+ trieBuilder.addValue(i);
}
}
}
@@ -161,6 +208,17 @@
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
}
+ private static int uniqueIdForLsr(Map<LSR, Integer> lsrIndexes, LSR lsr) {
+ Integer index = lsrIndexes.get(lsr);
+ if (index != null) {
+ return index.intValue();
+ } else {
+ int i = lsrIndexes.size();
+ lsrIndexes.put(lsr, i);
+ return i;
+ }
+ }
+
private static Map<String, Map<String, Map<String, LSR>>> makeTable(
AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
@@ -176,11 +234,8 @@
final String region = ltp.region;
ltp = lsrFromLocaleID(value.getString()); // target
- String languageTarget = ltp.language;
- final String scriptTarget = ltp.script;
- final String regionTarget = ltp.region;
+ set(result, language, script, region, ltp);
- set(result, language, script, region, languageTarget, scriptTarget, regionTarget);
// now add aliases
Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
@@ -189,13 +244,12 @@
if (languageAlias.equals(language) && regionAlias.equals(region)) {
continue;
}
- set(result, languageAlias, script, regionAlias,
- languageTarget, scriptTarget, regionTarget);
+ set(result, languageAlias, script, regionAlias, ltp);
}
}
}
// hack
- set(result, "und", "Latn", "", "en", "Latn", "US");
+ set(result, "und", "Latn", "", new LSR("en", "Latn", "US"));
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
@@ -242,23 +296,16 @@
}
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
- final String language, final String script, final String region,
- final String languageTarget, final String scriptTarget, final String regionTarget) {
- LSR target = new LSR(languageTarget, scriptTarget, regionTarget);
- set(langTable, language, script, region, target);
- }
-
- private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
final String language, final String script, final String region, LSR newValue) {
Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
Map<String, LSR> regionTable = getSubtable(scriptTable, script);
regionTable.put(region, newValue);
}
- private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
- Map<V, T> subTable = table.get(language);
+ private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K subtag) {
+ Map<V, T> subTable = table.get(subtag);
if (subTable == null) {
- table.put(language, subTable = new TreeMap<>());
+ table.put(subtag, subTable = new TreeMap<>());
}
return subTable;
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
index 44c7169..56735a8 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
@@ -2,10 +2,11 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
-import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
+import java.util.TreeMap;
+import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
@@ -14,9 +15,21 @@
* Mostly but not only the data for mapping locales to their maximized forms.
*/
public class LocaleDistance {
+ /** Distance value bit flag, set by the builder. */
+ static final int DISTANCE_SKIP_SCRIPT = 0x80;
+ /** Distance value bit flag, set by trieNext(). */
+ private static final int DISTANCE_IS_FINAL = 0x100;
+ private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
+ DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+ // Indexes into array of distances.
+ static final int IX_DEF_LANG_DISTANCE = 0;
+ static final int IX_DEF_SCRIPT_DISTANCE = 1;
+ static final int IX_DEF_REGION_DISTANCE = 2;
+ static final int IX_MIN_REGION_DISTANCE = 3;
+ static final int IX_LIMIT = 4;
private static final int ABOVE_THRESHOLD = 100;
- private static final boolean DEBUG_OUTPUT = false;
+ private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
@@ -28,7 +41,7 @@
* Maps each region to zero or more single-character partitions.
*/
private final byte[] regionToPartitionsIndex;
- private final String[][] partitionArrays;
+ private final String[] partitionArrays;
/**
* Used to get the paradigm region for a cluster, if there is one.
@@ -38,6 +51,8 @@
private final int defaultLanguageDistance;
private final int defaultScriptDistance;
private final int defaultRegionDistance;
+ private final int minRegionDistance;
+ private final int defaultDemotionPerDesiredLocale;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
@@ -45,42 +60,40 @@
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
LocaleDistance(BytesTrie trie,
- byte[] regionToPartitionsIndex, String[][] partitionArrays,
- Set<LSR> paradigmLSRs) {
+ byte[] regionToPartitionsIndex, String[] partitionArrays,
+ Set<LSR> paradigmLSRs, int[] distances) {
this.trie = trie;
- if (DEBUG_OUTPUT) {
- System.out.println("*** locale distance");
- testOnlyPrintDistanceTable();
- }
this.regionToPartitionsIndex = regionToPartitionsIndex;
this.partitionArrays = partitionArrays;
this.paradigmLSRs = paradigmLSRs;
+ defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE];
+ defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE];
+ defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE];
+ this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE];
- BytesTrie iter = new BytesTrie(trie);
- BytesTrie.Result result = iter.next('*');
- assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
- defaultLanguageDistance = iter.getValue();
- result = iter.next('*');
- assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
- defaultScriptDistance = iter.getValue();
- result = iter.next('*');
- assert result.hasValue();
- defaultRegionDistance = iter.getValue();
+ LSR en = new LSR("en", "Latn", "US");
+ LSR enGB = new LSR("en", "Latn", "GB");
+ defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
+ 50, FavorSubtag.LANGUAGE) & 0xff;
+
+ if (DEBUG_OUTPUT) {
+ System.out.println("*** locale distance");
+ System.out.println("defaultLanguageDistance=" + defaultLanguageDistance);
+ System.out.println("defaultScriptDistance=" + defaultScriptDistance);
+ System.out.println("defaultRegionDistance=" + defaultRegionDistance);
+ testOnlyPrintDistanceTable();
+ }
}
// VisibleForTesting
public int testOnlyDistance(ULocale desired, ULocale supported,
- int threshold, DistanceOption distanceOption) {
+ int threshold, FavorSubtag favorSubtag) {
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
- threshold, distanceOption) & 0xff;
+ threshold, favorSubtag) & 0xff;
}
- public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
- // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
- // than regions, so they might be considered the "normal" case.
-
/**
* Finds the supported LSR with the smallest distance from the desired one.
* Equivalent LSR subtags must be normalized into a canonical form.
@@ -90,13 +103,12 @@
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
- int threshold, DistanceOption distanceOption) {
+ int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
- // Set wantValue=true so that iter reads & skips the match point value.
- int desLangDistance = trieNext(iter, desired.language, true, true);
+ int desLangDistance = trieNext(iter, desired.language, false);
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int bestIndex = -1;
@@ -105,26 +117,31 @@
boolean star = false;
int distance = desLangDistance;
if (distance >= 0) {
+ assert (distance & DISTANCE_IS_FINAL) == 0;
if (slIndex != 0) {
iter.resetToState64(desLangState);
}
- distance = trieNext(iter, supported.language, true, true);
+ distance = trieNext(iter, supported.language, true);
}
// Note: The data builder verifies that there are no rules with "any" (*) language and
// real (non *) script or region subtags.
// This means that if the lookup for either language fails we can use
// the default distances without further lookups.
- if (distance < 0) { // <*, *>
+ int flags;
+ if (distance >= 0) {
+ flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ } else { // <*, *>
if (desired.language.equals(supported.language)) {
distance = 0;
} else {
distance = defaultLanguageDistance;
}
+ flags = 0;
star = true;
}
assert 0 <= distance && distance <= 100;
- boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
- if (scriptFirst) {
+ if (favorSubtag == FavorSubtag.SCRIPT) {
distance >>= 2;
}
if (distance >= threshold) {
@@ -132,18 +149,17 @@
}
int scriptDistance;
- if (star) {
+ if (star || flags != 0) {
if (desired.script.equals(supported.script)) {
scriptDistance = 0;
} else {
scriptDistance = defaultScriptDistance;
}
} else {
- scriptDistance = getDesSuppDistance(iter, iter.getState64(),
- desired.script, supported.script, false);
- }
- if (scriptFirst) {
- scriptDistance >>= 1;
+ scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
+ desired.script, supported.script);
+ flags = scriptDistance & DISTANCE_IS_FINAL;
+ scriptDistance &= ~DISTANCE_IS_FINAL;
}
distance += scriptDistance;
if (distance >= threshold) {
@@ -152,27 +168,24 @@
if (desired.region.equals(supported.region)) {
// regionDistance = 0
- } else if (star) {
+ } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
distance += defaultRegionDistance;
} else {
- long startState = iter.getState64();
+ int remainingThreshold = threshold - distance;
+ if (minRegionDistance >= remainingThreshold) {
+ continue;
+ }
// From here on we know the regions are not equal.
- // Map each region to zero or more partitions. (zero = one empty string)
+ // Map each region to zero or more partitions. (zero = one non-matching string)
+ // (Each array of single-character partition strings is encoded as one string.)
// If either side has more than one, then we find the maximum distance.
// This could be optimized by adding some more structure, but probably not worth it.
- final String[] desiredPartitions = partitionsForRegion(desired);
- final String[] supportedPartitions = partitionsForRegion(supported);
- int regionDistance;
-
- if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
- regionDistance = getRegionPartitionsDistance(iter, startState,
- desiredPartitions, supportedPartitions, threshold - distance);
- } else {
- regionDistance = getDesSuppDistance(iter, startState,
- desiredPartitions[0], supportedPartitions[0], true);
- }
- distance += regionDistance;
+ distance += getRegionPartitionsDistance(
+ iter, iter.getState64(),
+ partitionsForRegion(desired),
+ partitionsForRegion(supported),
+ remainingThreshold);
}
if (distance < threshold) {
if (distance == 0) {
@@ -185,101 +198,140 @@
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
}
- private int getRegionPartitionsDistance(BytesTrie iter, long startState,
- String[] desiredPartitions, String[] supportedPartitions, int threshold) {
- int regionDistance = -1;
- for (String dp : desiredPartitions) {
- for (String sp : supportedPartitions) {
- if (regionDistance >= 0) { // no need to reset in first iteration
- iter.resetToState64(startState);
- }
- int d = getDesSuppDistance(iter, startState, dp, sp, true);
- if (regionDistance < d) {
- if (d >= threshold) {
- return d;
- }
- regionDistance = d;
- }
- }
- }
- assert regionDistance >= 0;
- return regionDistance;
- }
-
- // Modified from
- // DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
- private static final int getDesSuppDistance(BytesTrie iter, long startState,
- String desired, String supported, boolean finalSubtag) {
+ private static final int getDesSuppScriptDistance(BytesTrie iter, long startState,
+ String desired, String supported) {
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
- int distance = trieNext(iter, desired, false, true);
+ int distance = trieNext(iter, desired, false);
if (distance >= 0) {
- distance = trieNext(iter, supported, true, !finalSubtag);
+ distance = trieNext(iter, supported, true);
}
if (distance < 0) {
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
- assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
- if (!finalSubtag && desired.equals(supported)) {
- distance = 0; // same language or script
+ assert result.hasValue();
+ if (desired.equals(supported)) {
+ distance = 0; // same script
} else {
distance = iter.getValue();
assert distance >= 0;
}
+ if (result == BytesTrie.Result.FINAL_VALUE) {
+ distance |= DISTANCE_IS_FINAL;
+ }
}
return distance;
}
- private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
+ private static final int getRegionPartitionsDistance(BytesTrie iter, long startState,
+ String desiredPartitions, String supportedPartitions, int threshold) {
+ int desLength = desiredPartitions.length();
+ int suppLength = supportedPartitions.length();
+ if (desLength == 1 && suppLength == 1) {
+ BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
+ if (result.hasNext()) {
+ result = iter.next(supportedPartitions.charAt(0) | 0x80);
+ if (result.hasValue()) {
+ return iter.getValue();
+ }
+ }
+ return getFallbackRegionDistance(iter, startState);
+ }
+
+ int regionDistance = 0;
+ // Fall back to * only once, not for each pair of partition strings.
+ boolean star = false;
+ for (int di = 0;;) {
+ // Look up each desired-partition string only once,
+ // not for each (desired, supported) pair.
+ BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
+ if (result.hasNext()) {
+ long desState = suppLength > 1 ? iter.getState64() : 0;
+ for (int si = 0;;) {
+ result = iter.next(supportedPartitions.charAt(si++) | 0x80);
+ int d;
+ if (result.hasValue()) {
+ d = iter.getValue();
+ } else if (star) {
+ d = 0;
+ } else {
+ d = getFallbackRegionDistance(iter, startState);
+ star = true;
+ }
+ if (d >= threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ if (si < suppLength) {
+ iter.resetToState64(desState);
+ } else {
+ break;
+ }
+ }
+ } else if (!star) {
+ int d = getFallbackRegionDistance(iter, startState);
+ if (d >= threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ star = true;
+ }
+ if (di < desLength) {
+ iter.resetToState64(startState);
+ } else {
+ break;
+ }
+ }
+ return regionDistance;
+ }
+
+ private static final int getFallbackRegionDistance(BytesTrie iter, long startState) {
+ BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
+ assert result.hasValue();
+ int distance = iter.getValue();
+ assert distance >= 0;
+ return distance;
+ }
+
+ private static final int trieNext(BytesTrie iter, String s, boolean wantValue) {
if (s.isEmpty()) {
return -1; // no empty subtags in the distance data
}
- BytesTrie.Result result;
- int end = s.length() - 1;
- for (int i = 0;; ++i) {
+ for (int i = 0, end = s.length() - 1;; ++i) {
int c = s.charAt(i);
- assert c <= 0x7f;
if (i < end) {
- result = iter.next(c);
- if (!result.hasNext()) {
+ if (!iter.next(c).hasNext()) {
return -1;
}
} else {
// last character of this subtag
- result = iter.next(c | 0x80);
- break;
+ BytesTrie.Result result = iter.next(c | 0x80);
+ if (wantValue) {
+ if (result.hasValue()) {
+ int value = iter.getValue();
+ if (result == BytesTrie.Result.FINAL_VALUE) {
+ value |= DISTANCE_IS_FINAL;
+ }
+ return value;
+ }
+ } else {
+ if (result.hasNext()) {
+ return 0;
+ }
+ }
+ return -1;
}
}
- if (wantValue) {
- if (wantNext) {
- if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
- return iter.getValue();
- }
- } else {
- if (result.hasValue()) {
- return iter.getValue();
- }
- }
- } else {
- if (wantNext) {
- if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
- return 0;
- }
- } else {
- if (result.hasValue()) {
- return 0;
- }
- }
- }
- return -1;
}
@Override
public String toString() {
- return testOnlyGetDistanceTable(true).toString();
+ return testOnlyGetDistanceTable().toString();
}
- private String[] partitionsForRegion(LSR lsr) {
- // ill-formed region -> one empty string
- int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
+ private String partitionsForRegion(LSR lsr) {
+ // ill-formed region -> one non-matching string
+ int pIndex = regionToPartitionsIndex[lsr.regionIndex];
return partitionArrays[pIndex];
}
@@ -296,48 +348,50 @@
return defaultRegionDistance;
}
+ int getDefaultDemotionPerDesiredLocale() {
+ return defaultDemotionPerDesiredLocale;
+ }
+
+ // TODO: When we build data offline,
+ // write test code to compare the loaded table with the builder output.
+ // Fail if different, with instructions for how to update the data file.
// VisibleForTesting
- public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
- Map<String, Integer> map = new LinkedHashMap<>();
+ public Map<String, Integer> testOnlyGetDistanceTable() {
+ Map<String, Integer> map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
- int numSubtags = 0;
int length = entry.bytesLength();
for (int i = 0; i < length; ++i) {
byte b = entry.byteAt(i);
if (b == '*') {
// One * represents a (desired, supported) = (ANY, ANY) pair.
sb.append("*-*-");
- numSubtags += 2;
} else {
if (b >= 0) {
sb.append((char) b);
} else { // end of subtag
sb.append((char) (b & 0x7f)).append('-');
- ++numSubtags;
}
}
}
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
- if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
- sb.setLength(sb.length() - 1);
- String s = sb.toString();
- if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
- // Re-insert single-ANY match points to show consistent structure
- // for the test code.
- map.put(s.substring(0, s.length() - 2), 0);
- }
- map.put(s, entry.value);
- }
+ sb.setLength(sb.length() - 1);
+ map.put(sb.toString(), entry.value);
}
return map;
}
// VisibleForTesting
public void testOnlyPrintDistanceTable() {
- for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
- System.out.println(mapping);
+ for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable().entrySet()) {
+ String suffix = "";
+ int value = mapping.getValue();
+ if ((value & DISTANCE_SKIP_SCRIPT) != 0) {
+ value &= ~DISTANCE_SKIP_SCRIPT;
+ suffix = " skip script";
+ }
+ System.out.println(mapping.getKey() + '=' + value + suffix);
}
}
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java
index aa5bc53..83cbe4a 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java
@@ -29,7 +29,7 @@
public final class LocaleDistanceBuilder {
private static final String ANY = "�"; // matches any character. Uses value above any subtag.
- private static final boolean DEBUG_OUTPUT = false;
+ private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static String fixAny(String string) {
return "*".equals(string) ? ANY : string;
@@ -135,7 +135,6 @@
void addSubtag(String s, int value) {
assert !s.isEmpty();
- assert value >= 0;
assert !s.equals(ANY);
int end = s.length() - 1;
for (int i = 0;; ++i) {
@@ -149,7 +148,9 @@
break;
}
}
- tb.add(bytes, length, value);
+ if (value >= 0) {
+ tb.add(bytes, length, value);
+ }
}
BytesTrie build() {
@@ -166,7 +167,7 @@
}
private static final class DistanceTable {
- final int nodeDistance; // distance for the lookup so far
+ int nodeDistance; // distance for the lookup so far
final Map<String, Map<String, DistanceTable>> subtables;
DistanceTable(int distance) {
@@ -188,7 +189,8 @@
return nodeDistance ^ subtables.hashCode();
}
- public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
+ private int getDistance(String desired, String supported,
+ Output<DistanceTable> distanceTable, boolean starEquals) {
boolean star = false;
Map<String, DistanceTable> sub2 = subtables.get(desired);
if (sub2 == null) {
@@ -214,6 +216,10 @@
return result;
}
+ private DistanceTable getAnyAnyNode() {
+ return subtables.get(ANY).get(ANY);
+ }
+
void copy(DistanceTable other) {
for (Map.Entry<String, Map<String, DistanceTable>> e1 : other.subtables.entrySet()) {
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
@@ -330,6 +336,34 @@
addSubtables(desiredLang, supportedLang, r);
}
+ void prune(int level, int[] distances) {
+ for (Map<String, DistanceTable> suppNodeMap : subtables.values()) {
+ for (DistanceTable node : suppNodeMap.values()) {
+ node.prune(level + 1, distances);
+ }
+ }
+ if (subtables.size() == 1) {
+ DistanceTable next = getAnyAnyNode();
+ if (level == 1) {
+ // Remove script table -*-*-50 where there are no other script rules
+ // and no following region rules.
+ // If there are region rules, then mark this table for skipping.
+ if (next.nodeDistance == distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE]) {
+ if (next.subtables.isEmpty()) {
+ subtables.clear();
+ } else {
+ nodeDistance |= LocaleDistance.DISTANCE_SKIP_SCRIPT;
+ }
+ }
+ } else if (level == 2) {
+ // Remove region table -*-*-4 where there are no other region rules.
+ if (next.nodeDistance == distances[LocaleDistance.IX_DEF_REGION_DISTANCE]) {
+ subtables.clear();
+ }
+ }
+ }
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n');
@@ -356,6 +390,10 @@
}
void toTrie(TrieBuilder builder) {
+ if (nodeDistance >= 0 && (nodeDistance & LocaleDistance.DISTANCE_SKIP_SCRIPT) != 0) {
+ getAnyAnyNode().toTrie(builder);
+ return;
+ }
int startLength = builder.length;
for (Map.Entry<String, Map<String, DistanceTable>> desSuppNode : subtables.entrySet()) {
String desired = desSuppNode.getKey();
@@ -367,7 +405,7 @@
builder.addStar(node.nodeDistance);
node.toTrie(builder);
} else {
- builder.addSubtag(desired, 0);
+ builder.addSubtag(desired, -1);
int desiredLength = builder.length;
for (Map.Entry<String, DistanceTable> suppNode : suppNodeMap.entrySet()) {
String supported = suppNode.getKey();
@@ -508,6 +546,7 @@
final Multimap<String, String> variableToPartition = rmb.variableToPartitions;
final DistanceTable defaultDistanceTable = new DistanceTable(-1);
+ int minRegionDistance = 100;
for (Rule rule : rules) {
List<String> desired = rule.desired;
List<String> supported = rule.supported;
@@ -519,6 +558,9 @@
}
} else {
// language-script-region
+ if (rule.distance < minRegionDistance) {
+ minRegionDistance = rule.distance;
+ }
Collection<String> desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2));
Collection<String> supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2));
for (String desiredRegion2 : desiredRegions) {
@@ -534,11 +576,25 @@
}
}
+ int[] distances = new int[LocaleDistance.IX_LIMIT];
+ DistanceTable node = defaultDistanceTable.getAnyAnyNode();
+ distances[LocaleDistance.IX_DEF_LANG_DISTANCE] = node.nodeDistance;
+ node = node.getAnyAnyNode();
+ distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE] = node.nodeDistance;
+ node = node.getAnyAnyNode();
+ distances[LocaleDistance.IX_DEF_REGION_DISTANCE] = node.nodeDistance;
+ distances[LocaleDistance.IX_MIN_REGION_DISTANCE] = minRegionDistance;
+
+ defaultDistanceTable.prune(0, distances);
+ assert defaultDistanceTable.getAnyAnyNode().subtables.isEmpty();
+ defaultDistanceTable.subtables.remove(ANY);
+
TrieBuilder trieBuilder = new TrieBuilder();
defaultDistanceTable.toTrie(trieBuilder);
BytesTrie trie = trieBuilder.build();
return new LocaleDistance(
- trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs);
+ trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
+ paradigmLSRs, distances);
}
private static int checkStars(String desired, String supported, boolean allStars) {
@@ -587,7 +643,7 @@
// build() output
Multimap<String, String> variableToPartitions;
private byte[] regionToPartitionsIndex;
- private String[][] partitionArrays;
+ private String[] partitionArrays;
RegionMapperBuilder(TerritoryContainment tc) {
regionSet = new RegionSet(tc);
@@ -623,7 +679,7 @@
void ensureRegionIsVariable(List<String> lsrList) {
String region = lsrList.get(2);
if (!isKnownVariable(region)) {
- assert LSR.indexForRegion(region) >= 0; // well-formed region subtag
+ assert LSR.indexForRegion(region) > 0; // well-formed region subtag
String variable = "$" + region;
add(variable, region);
lsrList.set(2, variable);
@@ -639,7 +695,7 @@
// Example: {"1", "5"}
Map<Collection<String>, Integer> partitionStrings = new LinkedHashMap<>();
// pIndex 0: default value in regionToPartitionsIndex
- Collection<String> noPartitions = Collections.singleton("");
+ Collection<String> noPartitions = Collections.singleton(".");
makeUniqueIndex(partitionStrings, noPartitions);
// Example: "$americas" -> {"1", "5"}
@@ -697,13 +753,24 @@
regionToPartitionsIndex[regionIndex] = (byte) pIndex;
}
}
+ // LSR.indexForRegion(ill-formed region) returns 0.
+ // Its regionToPartitionsIndex must also be 0 for the noPartitions value.
+ assert regionToPartitionsIndex[0] == 0;
- // Turn the Collection of Collections into an array of arrays.
+ // Turn the Collection of Collections of single-character strings
+ // into an array of strings.
Collection<Collection<String>> list = partitionStrings.keySet();
- partitionArrays = new String[list.size()][];
+ partitionArrays = new String[list.size()];
+ StringBuilder sb = new StringBuilder();
int i = 0;
for (Collection<String> partitions : list) {
- partitionArrays[i++] = partitions.toArray(new String[partitions.size()]);
+ assert !partitions.isEmpty();
+ sb.setLength(0);
+ for (String p : partitions) {
+ assert p.length() == 1;
+ sb.append(p);
+ }
+ partitionArrays[i++] = sb.toString();
}
}
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
index 26b540f..0873b6d 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
@@ -2,10 +2,9 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
+import java.util.Locale;
import java.util.Map;
-import java.util.Set;
+import java.util.TreeMap;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
@@ -15,11 +14,14 @@
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
- private static final boolean DEBUG_OUTPUT = false;
+ static final int SKIP_SCRIPT = 1;
+
+ private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
- static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
+ // VisibleForTesting
+ public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
static final class Data {
private final Map<String, String> languageAliases;
@@ -46,6 +48,7 @@
private final long trieUndState;
private final long trieUndZzzzState;
private final int defaultLsrIndex;
+ private final long[] trieFirstLetterStates = new long[26];
private final LSR[] lsrs;
private XLikelySubtags(XLikelySubtags.Data data) {
@@ -56,20 +59,24 @@
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
BytesTrie.Result result = trie.next('*');
- assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
- int value = trie.getValue();
- assert value == 0;
+ assert result.hasNext();
trieUndState = trie.getState64();
result = trie.next('*');
- assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
- value = trie.getValue();
- assert value == 0;
+ assert result.hasNext();
trieUndZzzzState = trie.getState64();
result = trie.next('*');
assert result.hasValue();
defaultLsrIndex = trie.getValue();
trie.reset();
+ for (char c = 'a'; c <= 'z'; ++c) {
+ result = trie.next(c);
+ if (result == BytesTrie.Result.NO_VALUE) {
+ trieFirstLetterStates[c - 'a'] = trie.getState64();
+ }
+ trie.reset();
+ }
+
if (DEBUG_OUTPUT) {
System.out.println("*** likely subtags");
for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
@@ -83,19 +90,31 @@
return canonical == null ? alias : canonical;
}
- LSR makeMaximizedLsrFrom(ULocale locale) {
+ // VisibleForTesting
+ public LSR makeMaximizedLsrFrom(ULocale locale) {
String name = locale.getName();
if (name.startsWith("@x=")) {
// Private use language tag x-subtag-subtag...
return new LSR(name, "", "");
}
+ return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+ locale.getVariant());
+ }
+ LSR makeMaximizedLsrFrom(Locale locale) {
+ String tag = locale.toLanguageTag();
+ if (tag.startsWith("x-")) {
+ // Private use language tag x-subtag-subtag...
+ return new LSR(tag, "", "");
+ }
+ return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+ locale.getVariant());
+ }
+
+ private LSR makeMaximizedLsr(String language, String script, String region, String variant) {
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
// They should match only themselves,
// not other locales with what looks like the same language and script subtags.
- String language = locale.getLanguage();
- String script = locale.getScript();
- String region = locale.getCountry();
if (region.length() == 2 && region.charAt(0) == 'X') {
switch (region.charAt(1)) {
case 'A':
@@ -112,7 +131,6 @@
}
}
- String variant = locale.getVariant();
if (variant.startsWith("PS")) {
switch (variant) {
case "PSACCENT":
@@ -130,7 +148,7 @@
}
language = getCanonical(languageAliases, language);
- // script is ok
+ // (We have no script mappings.)
region = getCanonical(regionAliases, region);
return INSTANCE.maximize(language, script, region);
}
@@ -139,14 +157,31 @@
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
*/
private LSR maximize(String language, String script, String region) {
- int retainOldMask = 0;
- BytesTrie iter = new BytesTrie(trie);
- // language lookup
if (language.equals("und")) {
language = "";
}
+ if (script.equals("Zzzz")) {
+ script = "";
+ }
+ if (region.equals("ZZ")) {
+ region = "";
+ }
+ if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) {
+ return new LSR(language, script, region); // already maximized
+ }
+
+ int retainOldMask = 0;
+ BytesTrie iter = new BytesTrie(trie);
long state;
- int value = trieNext(iter, language, false);
+ int value;
+ // Small optimization: Array lookup for first language letter.
+ int c0;
+ if (language.length() >= 2 && 0 <= (c0 = language.charAt(0) - 'a') && c0 <= 25 &&
+ (state = trieFirstLetterStates[c0]) != 0) {
+ value = trieNext(iter.resetToState64(state), language, 1);
+ } else {
+ value = trieNext(iter, language, 0);
+ }
if (value >= 0) {
if (!language.isEmpty()) {
retainOldMask |= 4;
@@ -157,45 +192,54 @@
iter.resetToState64(trieUndState); // "und" ("*")
state = 0;
}
- // script lookup
- if (script.equals("Zzzz")) {
- script = "";
- }
- value = trieNext(iter, script, false);
- if (value >= 0) {
+
+ if (value > 0) {
+ // Intermediate or final value from just language.
+ if (value == SKIP_SCRIPT) {
+ value = 0;
+ }
if (!script.isEmpty()) {
retainOldMask |= 2;
}
- state = iter.getState64();
} else {
- retainOldMask |= 2;
- if (state == 0) {
- iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
- } else {
- iter.resetToState64(state);
- value = trieNext(iter, "", false);
- assert value == 0;
+ value = trieNext(iter, script, 0);
+ if (value >= 0) {
+ if (!script.isEmpty()) {
+ retainOldMask |= 2;
+ }
state = iter.getState64();
+ } else {
+ retainOldMask |= 2;
+ if (state == 0) {
+ iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ assert value >= 0;
+ state = iter.getState64();
+ }
}
}
- // region lookup
- if (region.equals("ZZ")) {
- region = "";
- }
- value = trieNext(iter, region, true);
- if (value >= 0) {
+
+ if (value > 0) {
+ // Final value from just language or language+script.
if (!region.isEmpty()) {
retainOldMask |= 1;
}
} else {
- retainOldMask |= 1;
- if (state == 0) {
- value = defaultLsrIndex;
+ value = trieNext(iter, region, 0);
+ if (value >= 0) {
+ if (!region.isEmpty()) {
+ retainOldMask |= 1;
+ }
} else {
- iter.resetToState64(state);
- value = trieNext(iter, "", true);
- if (value < 0) { // TODO: should never happen?! just assert value >= 0?
- return null;
+ retainOldMask |= 1;
+ if (state == 0) {
+ value = defaultLsrIndex;
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ assert value > 0;
}
}
}
@@ -220,34 +264,34 @@
return new LSR(language, script, region);
}
- private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
+ private static final int trieNext(BytesTrie iter, String s, int i) {
BytesTrie.Result result;
if (s.isEmpty()) {
result = iter.next('*');
} else {
int end = s.length() - 1;
- for (int i = 0;; ++i) {
- result = iter.next(s.charAt(i));
+ for (;; ++i) {
+ int c = s.charAt(i);
if (i < end) {
- if (!result.hasNext()) {
+ if (!iter.next(c).hasNext()) {
return -1;
}
} else {
// last character of this subtag
+ result = iter.next(c | 0x80);
break;
}
}
}
- if (!finalSubtag) {
- if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
- return 0; // value should be 0, don't care
- }
- } else {
- if (result.hasValue()) {
- return iter.getValue();
- }
+ switch (result) {
+ case NO_MATCH: return -1;
+ case NO_VALUE: return 0;
+ case INTERMEDIATE_VALUE:
+ assert iter.getValue() == SKIP_SCRIPT;
+ return SKIP_SCRIPT;
+ case FINAL_VALUE: return iter.getValue();
+ default: return -1;
}
- return -1;
}
LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
@@ -263,11 +307,16 @@
// value00 = lookup(result.language, "", "")
BytesTrie iter = new BytesTrie(trie);
- int value = trieNext(iter, result.language, false);
+ int value = trieNext(iter, result.language, 0);
assert value >= 0;
- value = trieNext(iter, "", false);
- assert value >= 0;
- value = trieNext(iter, "", true);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ assert value >= 0;
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ }
+ }
+ assert value > 0;
LSR value00 = lsrs[value];
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
@@ -292,26 +341,24 @@
}
private Map<String, LSR> getTable() {
- Map<String, LSR> map = new LinkedHashMap<>();
- Set<String> prefixes = new HashSet<>();
+ Map<String, LSR> map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
int length = entry.bytesLength();
for (int i = 0; i < length;) {
byte b = entry.byteAt(i++);
- sb.append((char) b);
- if (i < length && prefixes.contains(sb.toString())) {
- sb.append('-');
+ if (b == '*') {
+ sb.append("*-");
+ } else if (b >= 0) {
+ sb.append((char) b);
+ } else { // end of subtag
+ sb.append((char) (b & 0x7f)).append('-');
}
}
- String s = sb.toString();
- if (entry.value == 0) {
- // intermediate match point
- prefixes.add(s);
- } else {
- map.put(s, lsrs[entry.value]);
- }
+ assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
+ sb.setLength(sb.length() - 1);
+ map.put(sb.toString(), lsrs[entry.value]);
}
return map;
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java
index a527c61..f7ffeb2 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java
@@ -3,198 +3,485 @@
package com.ibm.icu.impl.locale;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
-import java.util.Set;
-import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
/**
- * Immutable class that picks best match between user's desired locales and application's supported locales.
+ * Immutable class that picks the best match between a user's desired locales and
+ * and application's supported locales.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
+ * <p>TODO: Migration notes.
+ *
* @author markdavis
*/
public final class XLocaleMatcher {
private static final LSR UND_LSR = new LSR("und","","");
- private static final ULocale UND_LOCALE = new ULocale("und");
- private static final Iterator<ULocale> NULL_ITERATOR = null;
+ private static final ULocale UND_ULOCALE = new ULocale("und");
+ private static final Locale UND_LOCALE = new Locale("und");
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
- // List of indexes, optimized for one or two.
- private static final class Indexes {
- // Some indexes without further object creation and auto-boxing.
- int first, second = -1;
- // We could turn the List into an int array + length and manage its growth.
- List<Integer> remaining;
+ private static abstract class LsrIterator implements Iterator<LSR> {
+ int bestDesiredIndex = -1;
- Indexes(int firstIndex) {
- first = firstIndex;
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
}
- void add(int i) {
- if (second < 0) {
- second = i;
- } else {
- if (remaining == null) {
- remaining = new ArrayList<>();
- }
- remaining.add(i);
- }
- }
- int getFirst() { return first; }
- int get(int i) { // returns -1 when i >= length
- if (i == 0) {
- return first;
- } else if (i == 1) {
- return second;
- } else if (remaining != null && (i -= 2) < remaining.size()) {
- return remaining.get(i);
- } else {
- return -1;
- }
- }
+
+ public abstract void rememberCurrent(int desiredIndex);
}
- // TODO: Make public, and add public methods that return it.
- private static final class Result {
- private Result(ULocale desired, ULocale supported,
- /* Locale jdesired, */ Locale jsupported,
+ /**
+ * Builder option for whether the language subtag or the script subtag is most important.
+ *
+ * @see Builder#setFavorSubtag(FavorSubtag)
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public enum FavorSubtag {
+ /**
+ * Language differences are most important, then script differences, then region differences.
+ * (This is the default behavior.)
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ LANGUAGE,
+ /**
+ * Makes script differences matter relatively more than language differences.
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ SCRIPT
+ }
+
+ /**
+ * Builder option for whether all desired locales are treated equally or
+ * earlier ones are preferred.
+ *
+ * @see Builder#setDemotionPerDesiredLocale(Demotion)
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public enum Demotion {
+ /**
+ * All desired locales are treated equally.
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ NONE,
+ /**
+ * Earlier desired locales are preferred.
+ *
+ * <p>From each desired locale to the next,
+ * the distance to any supported locale is increased by an additional amount
+ * which is at least as large as most region mismatches.
+ * A later desired locale has to have a better match with some supported locale
+ * due to more than merely having the same region subtag.
+ *
+ * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
+ * yields <code>Result(en-GB, en)</code> because
+ * with the demotion of sv its perfect match is no better than
+ * the region distance between the earlier desired locale en-GB and en=en-US.
+ *
+ * <p>Notes:
+ * <ul>
+ * <li>In some cases, language and/or script differences can be as small as
+ * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+ * <li>It is possible for certain region differences to be larger than usual,
+ * and larger than the demotion.
+ * (As of CLDR 35 there is no such case, but
+ * this is possible in future versions of the data.)
+ * </ul>
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ REGION
+ }
+
+ /**
+ * Data for the best-matching pair of a desired and a supported locale.
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Result {
+ private final ULocale desiredULocale;
+ private final ULocale supportedULocale;
+ private final Locale desiredLocale;
+ private final Locale supportedLocale;
+ private final int desiredIndex;
+ private final int supportedIndex;
+
+ private Result(ULocale udesired, ULocale usupported,
+ Locale desired, Locale supported,
int desIndex, int suppIndex) {
+ desiredULocale = udesired;
+ supportedULocale = usupported;
desiredLocale = desired;
supportedLocale = supported;
- // desiredJavaLocale = jdesired;
- supportedJavaLocale = jsupported;
desiredIndex = desIndex;
supportedIndex = suppIndex;
}
- ULocale desiredLocale;
- ULocale supportedLocale;
- // Locale desiredJavaLocale;
- Locale supportedJavaLocale;
- int desiredIndex;
- @SuppressWarnings("unused") // unused until public, for other wrappers
- int supportedIndex;
+ /**
+ * Returns the best-matching desired locale.
+ * null if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale getDesiredULocale() {
+ return desiredULocale == null && desiredLocale != null ?
+ ULocale.forLocale(desiredLocale) : desiredULocale;
+ }
+ /**
+ * Returns the best-matching desired locale.
+ * null if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale getDesiredLocale() {
+ return desiredLocale == null && desiredULocale != null ?
+ desiredULocale.toLocale() : desiredLocale;
+ }
+
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is null if the list of supported locales is empty and
+ * no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale getSupportedULocale() { return supportedULocale; }
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is null if the list of supported locales is empty and
+ * no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale getSupportedLocale() { return supportedLocale; }
+
+ /**
+ * Returns the index of the best-matching desired locale in the input Iterable order.
+ * -1 if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching desired locale, or -1.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getDesiredIndex() { return desiredIndex; }
+
+ /**
+ * Returns the index of the best-matching supported locale in the constructor’s or builder’s input order
+ * (“set” Collection plus “added” locales).
+ * If the matcher was built from a locale list string, then the iteration order is that
+ * of a LocalePriorityList built from the same string.
+ * -1 if the list of supported locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching supported locale, or -1.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getSupportedIndex() { return supportedIndex; }
+
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+ *
+ * @return the service locale, combining the best-matching desired and supported locales.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale makeServiceULocale() {
+ ULocale bestDesired = getDesiredULocale();
+ ULocale serviceLocale = supportedULocale;
+ if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
+ ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
+
+ // Copy the region from bestDesired, if there is one.
+ // TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty.
+ String region = bestDesired.getCountry();
+ if (!region.isEmpty()) {
+ b.setRegion(region);
+ }
+
+ // Copy the variants from bestDesired, if there are any.
+ // Note that this will override any serviceLocale variants.
+ // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+ // TODO: Why replace? Why not append?
+ String variants = bestDesired.getVariant();
+ if (!variants.isEmpty()) {
+ b.setVariant(variants);
+ }
+
+ // Copy the extensions from bestDesired, if there are any.
+ // Note that this will override any serviceLocale extensions.
+ // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+ // (replacing calendar).
+ // TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale??
+ // (Unsure about this one.)
+ for (char extensionKey : bestDesired.getExtensionKeys()) {
+ b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
+ }
+ serviceLocale = b.build();
+ }
+ return serviceLocale;
+ }
+
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+ *
+ * @return the service locale, combining the best-matching desired and supported locales.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale makeServiceLocale() {
+ return makeServiceULocale().toLocale();
+ }
}
- // normally the default values, but can be set via constructor
-
private final int thresholdDistance;
- private final int demotionPerAdditionalDesiredLocale;
- private final DistanceOption distanceOption;
+ private final int demotionPerDesiredLocale;
+ private final FavorSubtag favorSubtag;
- // built based on application's supported languages in constructor
-
- private final ULocale[] supportedLocales;
- private final Locale[] supportedJavaLocales;
- private final Map<ULocale, Integer> supportedToIndex;
- private final Map<LSR, Indexes> supportedLsrToIndexes;
- // Array versions of the supportedLsrToIndexes keys and values.
+ // These are in input order.
+ private final ULocale[] supportedULocales;
+ private final Locale[] supportedLocales;
+ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+ private final Map<LSR, Integer> supportedLsrToIndex;
+ // Array versions of the supportedLsrToIndex keys and values.
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
private final LSR[] supportedLsrs;
- private final Indexes[] supportedIndexes;
- private final ULocale defaultLocale;
- private final Locale defaultJavaLocale;
+ private final int[] supportedIndexes;
+ private final ULocale defaultULocale;
+ private final Locale defaultLocale;
private final int defaultLocaleIndex;
+ /**
+ * LocaleMatcher Builder.
+ *
+ * @see XLocaleMatcher#builder()
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
public static class Builder {
- /**
- * Supported locales. A Set, to avoid duplicates.
- * Maintains iteration order for consistent matching behavior (first best match wins).
- */
- private Set<ULocale> supportedLocales;
+ private List<ULocale> supportedLocales;
private int thresholdDistance = -1;
- private int demotionPerAdditionalDesiredLocale = -1;;
+ private Demotion demotion;
private ULocale defaultLocale;
- private DistanceOption distanceOption;
+ private FavorSubtag favor;
+
/**
+ * Parses the string like {@link LocalePriorityList} does and
+ * sets the supported locales accordingly.
+ * Clears any previously set/added supported locales first.
+ *
* @param locales the languagePriorityList to set
* @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
public Builder setSupportedLocales(String locales) {
- return setSupportedLocales(LocalePriorityList.add(locales).build());
+ return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
}
- public Builder setSupportedLocales(Iterable<ULocale> locales) {
- supportedLocales = new LinkedHashSet<>(); // maintain order
- for (ULocale locale : locales) {
- supportedLocales.add(locale);
- }
+
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setSupportedULocales(Collection<ULocale> locales) {
+ supportedLocales = new ArrayList<>(locales);
return this;
}
- public Builder setSupportedLocales(Collection<ULocale> locales) {
- supportedLocales = new LinkedHashSet<>(locales); // maintain order
- return this;
- }
- public Builder setSupportedJavaLocales(Collection<Locale> locales) {
- supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
+
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setSupportedLocales(Collection<Locale> locales) {
+ supportedLocales = new ArrayList<>(locales.size());
for (Locale locale : locales) {
supportedLocales.add(ULocale.forLocale(locale));
}
return this;
}
- public Builder addSupportedLocale(ULocale locale) {
+
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder addSupportedULocale(ULocale locale) {
if (supportedLocales == null) {
- supportedLocales = new LinkedHashSet<>();
+ supportedLocales = new ArrayList<>();
}
supportedLocales.add(locale);
return this;
}
+
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
public Builder addSupportedLocale(Locale locale) {
- return addSupportedLocale(ULocale.forLocale(locale));
+ return addSupportedULocale(ULocale.forLocale(locale));
}
/**
+ * Sets the default locale; if null, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ *
+ * @param defaultLocale the default locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setDefaultULocale(ULocale defaultLocale) {
+ this.defaultLocale = defaultLocale;
+ return this;
+ }
+
+ /**
+ * Sets the default locale; if null, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ *
+ * @param defaultLocale the default locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setDefaultLocale(Locale defaultLocale) {
+ this.defaultLocale = ULocale.forLocale(defaultLocale);
+ return this;
+ }
+
+ /**
+ * If SCRIPT, then the language differences are smaller than script differences.
+ * This is used in situations (such as maps) where
+ * it is better to fall back to the same script than a similar language.
+ *
+ * @param subtag the subtag to favor
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setFavorSubtag(FavorSubtag subtag) {
+ this.favor = subtag;
+ return this;
+ }
+
+ /**
+ * Option for whether all desired locales are treated equally or
+ * earlier ones are preferred (this is the default).
+ *
+ * @param demotion the demotion per desired locale to set.
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setDemotionPerDesiredLocale(Demotion demotion) {
+ this.demotion = demotion;
+ return this;
+ }
+
+ /**
+ * <i>Internal only!</i>
+ *
* @param thresholdDistance the thresholdDistance to set, with -1 = default
* @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
*/
- public Builder setThresholdDistance(int thresholdDistance) {
+ @Deprecated
+ public Builder internalSetThresholdDistance(int thresholdDistance) {
if (thresholdDistance > 100) {
thresholdDistance = 100;
}
this.thresholdDistance = thresholdDistance;
return this;
}
- /**
- * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
- * @return this Builder object
- */
- public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
- this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
- return this;
- }
/**
- * Set the default language, with null = default = first supported language
- * @param defaultLocale the default language
- * @return this Builder object
+ * Builds and returns a new locale matcher.
+ * This builder can continue to be used.
+ *
+ * @return new XLocaleMatcher.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- public Builder setDefaultLanguage(ULocale defaultLocale) {
- this.defaultLocale = defaultLocale;
- return this;
- }
-
- /**
- * If true, then the language differences are smaller than than script differences.
- * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
- * @param distanceOption the distance option
- * @return this Builder object
- */
- public Builder setDistanceOption(DistanceOption distanceOption) {
- this.distanceOption = distanceOption;
- return this;
- }
-
public XLocaleMatcher build() {
return new XLocaleMatcher(this);
}
@@ -208,22 +495,25 @@
if (defaultLocale != null) {
s.append(" default=").append(defaultLocale.toString());
}
- if (distanceOption != null) {
- s.append(" distance=").append(distanceOption.toString());
+ if (favor != null) {
+ s.append(" distance=").append(favor.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
- if (demotionPerAdditionalDesiredLocale >= 0) {
- s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
+ if (demotion != null) {
+ s.append(" demotion=").append(demotion.toString());
}
return s.append('}').toString();
}
}
/**
- * Returns a builder used in chaining parameters for building a Locale Matcher.
- * @return this Builder object
+ * Returns a builder used in chaining parameters for building a LocaleMatcher.
+ *
+ * @return a new Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
public static Builder builder() {
return new Builder();
@@ -235,16 +525,9 @@
}
/** Convenience method */
public XLocaleMatcher(LocalePriorityList supportedLocales) {
- this(builder().setSupportedLocales(supportedLocales));
- }
- /** Convenience method */
- public XLocaleMatcher(Set<ULocale> supportedLocales) {
- this(builder().setSupportedLocales(supportedLocales));
+ this(builder().setSupportedULocales(supportedLocales.getULocales()));
}
- /**
- * Creates a locale matcher with the given Builder parameters.
- */
private XLocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
@@ -252,82 +535,101 @@
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
int supportedLocalesLength = builder.supportedLocales.size();
- supportedLocales = new ULocale[supportedLocalesLength];
- supportedJavaLocales = new Locale[supportedLocalesLength];
- supportedToIndex = new HashMap<>(supportedLocalesLength);
+ supportedULocales = new ULocale[supportedLocalesLength];
+ supportedLocales = new Locale[supportedLocalesLength];
+ // Supported LRSs in input order.
+ LSR lsrs[] = new LSR[supportedLocalesLength];
+ // Also find the first supported locale whose LSR is
+ // the same as that for the default locale.
+ ULocale udef = builder.defaultLocale;
+ Locale def = null;
+ LSR defLSR = null;
+ int idef = -1;
+ if (udef != null) {
+ def = udef.toLocale();
+ defLSR = getMaximalLsrOrUnd(udef);
+ }
+ int i = 0;
+ for (ULocale locale : builder.supportedLocales) {
+ supportedULocales[i] = locale;
+ supportedLocales[i] = locale.toLocale();
+ LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
+ if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
+ idef = i;
+ }
+ ++i;
+ }
+
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, Indexes).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
- // 1. First supported locale.
+ // 1. Default locale, if it is supported.
// 2. Priority locales in builder order.
// 3. Remaining locales in builder order.
- supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
- Map<LSR, Indexes> otherLsrToIndexes = null;
- LSR firstLSR = null;
- int i = 0;
- for (ULocale locale : builder.supportedLocales) {
- supportedLocales[i] = locale;
- supportedJavaLocales[i] = locale.toLocale();
- // supportedToIndex.putIfAbsent(locale, i)
- Integer oldIndex = supportedToIndex.get(locale);
- if (oldIndex == null) {
- supportedToIndex.put(locale, i);
- }
- LSR lsr = getMaximalLsrOrUnd(locale);
- if (i == 0) {
- firstLSR = lsr;
- supportedLsrToIndexes.put(lsr, new Indexes(0));
- } else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
- addIndex(supportedLsrToIndexes, lsr, i);
+ supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
+ Map<LSR, Integer> otherLsrToIndex = null;
+ if (idef >= 0) {
+ supportedLsrToIndex.put(defLSR, idef);
+ }
+ i = 0;
+ for (ULocale locale : supportedULocales) {
+ if (i == idef) { continue; }
+ LSR lsr = lsrs[i];
+ if (defLSR == null) {
+ assert i == 0;
+ udef = locale;
+ def = supportedLocales[0];
+ defLSR = lsr;
+ idef = 0;
+ supportedLsrToIndex.put(lsr, 0);
+ } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
+ putIfAbsent(supportedLsrToIndex, lsr, i);
} else {
- if (otherLsrToIndexes == null) {
- otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
+ if (otherLsrToIndex == null) {
+ otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
}
- addIndex(otherLsrToIndexes, lsr, i);
+ putIfAbsent(otherLsrToIndex, lsr, i);
}
++i;
}
- if (otherLsrToIndexes != null) {
- supportedLsrToIndexes.putAll(otherLsrToIndexes);
+ if (otherLsrToIndex != null) {
+ supportedLsrToIndex.putAll(otherLsrToIndex);
}
- int numSuppLsrs = supportedLsrToIndexes.size();
- supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
- supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
- ULocale def;
- Locale jdef = null;
- int idef = -1;
- if (builder.defaultLocale != null) {
- def = builder.defaultLocale;
- } else if (supportedLocalesLength > 0) {
- def = supportedLocales[0]; // first language
- jdef = supportedJavaLocales[0];
- idef = 0;
- } else {
- def = null;
+ int numSuppLsrs = supportedLsrToIndex.size();
+ supportedLsrs = new LSR[numSuppLsrs];
+ supportedIndexes = new int[numSuppLsrs];
+ i = 0;
+ for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
+ supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
+ supportedIndexes[i++] = entry.getValue();
}
- if (jdef == null && def != null) {
- jdef = def.toLocale();
- }
+
+ defaultULocale = udef;
defaultLocale = def;
- defaultJavaLocale = jdef;
defaultLocaleIndex = idef;
- demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
- LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
- builder.demotionPerAdditionalDesiredLocale;
- distanceOption = builder.distanceOption;
+ demotionPerDesiredLocale =
+ builder.demotion == Demotion.NONE ? 0 :
+ LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
+ favorSubtag = builder.favor;
}
- private static final void addIndex(Map<LSR, Indexes> lsrToIndexes, LSR lsr, int i) {
- Indexes indexes = lsrToIndexes.get(lsr);
- if (indexes == null) {
- lsrToIndexes.put(lsr, new Indexes(i));
- } else {
- indexes.add(i);
+ private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
+ Integer index = lsrToIndex.get(lsr);
+ if (index == null) {
+ lsrToIndex.put(lsr, i);
}
}
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
+ if (locale.equals(UND_ULOCALE)) {
+ return UND_LSR;
+ } else {
+ return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+ }
+ }
+
+ private static final LSR getMaximalLsrOrUnd(Locale locale) {
if (locale.equals(UND_LOCALE)) {
return UND_LSR;
} else {
@@ -335,161 +637,11 @@
}
}
- /** Convenience method */
- public ULocale getBestMatch(ULocale ulocale) {
- return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
- }
- /** Convenience method */
- public ULocale getBestMatch(String languageList) {
- return getBestMatch(LocalePriorityList.add(languageList).build(), null);
- }
- /** Convenience method */
- public ULocale getBestMatch(ULocale... locales) {
- return getBestMatch(Arrays.asList(locales), null);
- }
- /** Convenience method */
- public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
- return getBestMatch(desiredLocales, null);
- }
+ private static final class ULocaleLsrIterator extends LsrIterator {
+ private Iterator<ULocale> locales;
+ private ULocale current, remembered;
- /**
- * Get the best match between the desired languages and supported languages
- * @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
- * @param outputBestDesired The one of the desired languages that matched best (can be null).
- * Set to null if the best match was not below the threshold distance.
- * @return the best match.
- */
- public ULocale getBestMatch(Iterable<ULocale> desiredLocales, Output<ULocale> outputBestDesired) {
- Iterator<ULocale> desiredIter = desiredLocales.iterator();
- if (!desiredIter.hasNext()) {
- if (outputBestDesired != null) {
- outputBestDesired.value = null;
- }
- if (TRACE_MATCHER) {
- System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
- }
- return defaultLocale;
- }
- ULocale desiredLocale = desiredIter.next();
- return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
- }
-
- /**
- * @param desiredLocale First desired locale.
- * @param remainingIter Remaining desired locales, null or empty if none.
- * @param outputBestDesired If not null,
- * will be set to the desired locale that matches the best supported one.
- * @return the best supported locale.
- */
- private ULocale getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter,
- Output<ULocale> outputBestDesired) {
- Result result = getBestMatch(desiredLocale, remainingIter);
- if (outputBestDesired != null) {
- outputBestDesired.value = result.desiredLocale;
- }
- return result.supportedLocale;
- }
-
- private Result getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter) {
- int desiredIndex = 0;
- int bestDesiredIndex = -1;
- ULocale bestDesiredLocale = null;
- int bestSupportedLsrIndex = 0;
- for (int bestDistance = thresholdDistance; bestDistance > 0;
- bestDistance -= demotionPerAdditionalDesiredLocale) {
- // Quick check for exact locale match.
- Integer supportedIndex = supportedToIndex.get(desiredLocale);
- if (supportedIndex != null) {
- if (TRACE_MATCHER) {
- System.err.printf("Returning %s: desired=supported\n", desiredLocale);
- }
- int suppIndex = supportedIndex;
- return new Result(desiredLocale, supportedLocales[suppIndex],
- supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
- }
- // Quick check for exact maximized LSR.
- LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
- Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
- if (indexes != null) {
- // If this is a supported LSR, return the first locale.
- // We already know the exact locale isn't there.
- int suppIndex = indexes.getFirst();
- ULocale result = supportedLocales[suppIndex];
- if (TRACE_MATCHER) {
- System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
- }
- return new Result(desiredLocale, result,
- supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
- }
- int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
- desiredLSR, supportedLsrs, bestDistance, distanceOption);
- if (bestIndexAndDistance >= 0) {
- bestDistance = bestIndexAndDistance & 0xff;
- bestDesiredIndex = desiredIndex;
- bestDesiredLocale = desiredLocale;
- bestSupportedLsrIndex = bestIndexAndDistance >> 8;
- if (bestDistance == 0) {
- break;
- }
- }
- if (remainingIter == null || !remainingIter.hasNext()) {
- break;
- }
- desiredLocale = remainingIter.next();
- ++desiredIndex;
- }
- if (bestDesiredIndex < 0) {
- if (TRACE_MATCHER) {
- System.err.printf("Returning default %s: no good match\n", defaultLocale);
- }
- return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
- }
- // Pick exact match if there is one.
- // The length of the list is normally 1.
- Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
- int suppIndex;
- for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
- ULocale locale = supportedLocales[suppIndex];
- if (bestDesiredLocale.equals(locale)) {
- if (TRACE_MATCHER) {
- System.err.printf("Returning %s: desired=best matching supported language\n",
- bestDesiredLocale);
- }
- return new Result(bestDesiredLocale, locale,
- supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
- }
- }
- // Otherwise return the first of the supported languages that share the best-matching LSR.
- suppIndex = bestSupportedIndexes.getFirst();
- ULocale result = supportedLocales[suppIndex];
- if (TRACE_MATCHER) {
- System.err.printf("Returning %s: first best matching supported language\n", result);
- }
- return new Result(bestDesiredLocale, result,
- supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
- }
-
- /**
- * Get the best match between the desired languages and supported languages
- * @param desiredLocale the supplied user's language.
- * @param outputBestDesired The one of the desired languages that matched best.
- * Set to null if the best match was not below the threshold distance.
- * @return the best match.
- */
- public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
- return getBestMatch(desiredLocale, null, outputBestDesired);
- }
-
- /**
- * Converts Locales to ULocales on the fly.
- */
- private static final class LocalesWrapper implements Iterator<ULocale> {
- private Iterator<Locale> locales;
- // Cache locales to avoid conversion of the result.
- private Locale first, second;
- private List<Locale> remaining;
-
- LocalesWrapper(Iterator<Locale> locales) {
+ ULocaleLsrIterator(Iterator<ULocale> locales) {
this.locales = locales;
}
@@ -499,148 +651,217 @@
}
@Override
- public ULocale next() {
- Locale locale = locales.next();
- if (first == null) {
- first = locale;
- } else if (second == null) {
- second = locale;
- } else {
- if (remaining == null) {
- remaining = new ArrayList<>();
- }
- remaining.add(locale);
- }
- return ULocale.forLocale(locale);
- }
-
- Locale getJavaLocale(int i) {
- if (i == 0) {
- return first;
- } else if (i == 1) {
- return second;
- } else {
- // TODO: test code coverage
- return remaining.get(i - 2);
- }
+ public LSR next() {
+ current = locales.next();
+ return getMaximalLsrOrUnd(current);
}
@Override
- public void remove() {
- throw new UnsupportedOperationException();
+ public void rememberCurrent(int desiredIndex) {
+ bestDesiredIndex = desiredIndex;
+ remembered = current;
}
}
- public Locale getBestJavaMatch(Iterable<Locale> desiredLocales, Output<Locale> outputBestDesired) {
+ private static final class LocaleLsrIterator extends LsrIterator {
+ private Iterator<Locale> locales;
+ private Locale current, remembered;
+
+ LocaleLsrIterator(Iterator<Locale> locales) {
+ this.locales = locales;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return locales.hasNext();
+ }
+
+ @Override
+ public LSR next() {
+ current = locales.next();
+ return getMaximalLsrOrUnd(current);
+ }
+
+ @Override
+ public void rememberCurrent(int desiredIndex) {
+ bestDesiredIndex = desiredIndex;
+ remembered = current;
+ }
+ }
+
+ public ULocale getBestMatch(ULocale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
+ }
+
+ public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
+ Iterator<ULocale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return defaultULocale;
+ }
+ ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
+ }
+
+ public ULocale getBestMatch(String desiredLocaleList) {
+ return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
+ }
+
+ public Locale getBestLocale(Locale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+ }
+
+ public Locale getBestLocale(Iterable<Locale> desiredLocales) {
Iterator<Locale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
- if (outputBestDesired != null) {
- outputBestDesired.value = null;
- }
- if (TRACE_MATCHER) {
- System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
- }
- return defaultJavaLocale;
+ return defaultLocale;
}
- LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
- ULocale desiredLocale = wrapper.next();
- Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
- if (outputBestDesired != null) {
- outputBestDesired.value = result.desiredIndex >= 0 ?
- wrapper.getJavaLocale(result.desiredIndex) : null;
- }
- return result.supportedJavaLocale;
+ LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
- public Locale getBestJavaMatch(Locale desiredLocale, Output<Locale> outputBestDesired) {
- ULocale desiredULocale = ULocale.forLocale(desiredLocale);
- Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
- if (outputBestDesired != null) {
- outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
+ private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
+ if (suppIndex < 0) {
+ return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ } else if (desiredLocale != null) {
+ return new Result(desiredLocale, supportedULocales[suppIndex],
+ null, supportedLocales[suppIndex], 0, suppIndex);
+ } else {
+ return new Result(lsrIter.remembered, supportedULocales[suppIndex],
+ null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
}
- return result.supportedJavaLocale;
}
- /** Combine features of the desired locale into those of the supported, and return result. */
- public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
- // for examples of extensions, variants, see
- // http://unicode.org/repos/cldr/tags/latest/common/bcp47/
- // http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
-
- if (!bestSupported.equals(bestDesired) && bestDesired != null) {
- // add region, variants, extensions
- ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
-
- // copy the region from the desired, if there is one
- String region = bestDesired.getCountry();
- if (!region.isEmpty()) {
- b.setRegion(region);
- }
-
- // copy the variants from desired, if there is one
- // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
- String variants = bestDesired.getVariant();
- if (!variants.isEmpty()) {
- b.setVariant(variants);
- }
-
- // copy the extensions from desired, if there are any
- // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
- for (char extensionKey : bestDesired.getExtensionKeys()) {
- b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
- }
- bestSupported = b.build();
+ private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
+ if (suppIndex < 0) {
+ return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ } else if (desiredLocale != null) {
+ return new Result(null, supportedULocales[suppIndex],
+ desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
+ } else {
+ return new Result(null, supportedULocales[suppIndex],
+ lsrIter.remembered, supportedLocales[suppIndex],
+ lsrIter.bestDesiredIndex, suppIndex);
}
- return bestSupported;
}
- /** Returns the distance between the two languages. The values are not necessarily symmetric.
- * @param desired A locale desired by the user
- * @param supported A locale supported by a program.
- * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
- * A language is first maximized with add likely subtags, then compared.
+ public Result getBestMatchResult(ULocale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return makeResult(desiredLocale, null, suppIndex);
+ }
+
+ /**
+ * Returns the best match between the desired and supported locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @return the best-matching pair of a desired and a supported locale.
*/
- public int distance(ULocale desired, ULocale supported) {
- return LocaleDistance.INSTANCE.getBestIndexAndDistance(
- XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
- new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
- thresholdDistance, distanceOption) & 0xff;
+ public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
+ Iterator<ULocale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return makeResult(UND_ULOCALE, null, -1);
+ }
+ ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return makeResult(null, lsrIter, suppIndex);
}
- /** Convenience method */
- public int distance(String desiredLanguage, String supportedLanguage) {
- return LocaleDistance.INSTANCE.getBestIndexAndDistance(
- XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
- new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
- thresholdDistance, distanceOption) & 0xff;
+ public Result getBestLocaleResult(Locale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return makeResult(desiredLocale, null, suppIndex);
+ }
+
+ public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
+ Iterator<Locale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return makeResult(UND_LOCALE, null, -1);
+ }
+ LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return makeResult(null, lsrIter, suppIndex);
+ }
+
+ /**
+ * @param desiredLSR The first desired locale's LSR.
+ * @param remainingIter Remaining desired LSRs, null or empty if none.
+ * @return the index of the best-matching supported locale, or -1 if there is no good match.
+ */
+ private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
+ int desiredIndex = 0;
+ int bestSupportedLsrIndex = -1;
+ for (int bestDistance = thresholdDistance;;) {
+ // Quick check for exact maximized LSR.
+ Integer index = supportedLsrToIndex.get(desiredLSR);
+ if (index != null) {
+ int suppIndex = index;
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
+ supportedULocales[suppIndex]);
+ }
+ if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
+ return suppIndex;
+ }
+ int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ desiredLSR, supportedLsrs, bestDistance, favorSubtag);
+ if (bestIndexAndDistance >= 0) {
+ bestDistance = bestIndexAndDistance & 0xff;
+ if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
+ bestSupportedLsrIndex = bestIndexAndDistance >> 8;
+ }
+ if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+ break;
+ }
+ if (remainingIter == null || !remainingIter.hasNext()) {
+ break;
+ }
+ desiredLSR = remainingIter.next();
+ }
+ if (bestSupportedLsrIndex < 0) {
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning default %s: no good match\n", defaultULocale);
+ }
+ return -1;
+ }
+ int suppIndex = supportedIndexes[bestSupportedLsrIndex];
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning %s: best matching supported locale\n",
+ supportedULocales[suppIndex]);
+ }
+ return suppIndex;
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
- if (supportedLocales.length > 0) {
- s.append(" supported={").append(supportedLocales[0].toString());
- for (int i = 1; i < supportedLocales.length; ++i) {
- s.append(", ").append(supportedLocales[1].toString());
+ if (supportedULocales.length > 0) {
+ s.append(" supported={").append(supportedULocales[0].toString());
+ for (int i = 1; i < supportedULocales.length; ++i) {
+ s.append(", ").append(supportedULocales[i].toString());
}
s.append('}');
}
- s.append(" default=").append(Objects.toString(defaultLocale));
- if (distanceOption != null) {
- s.append(" distance=").append(distanceOption.toString());
+ s.append(" default=").append(Objects.toString(defaultULocale));
+ if (favorSubtag != null) {
+ s.append(" distance=").append(favorSubtag.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
- s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
+ s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
return s.append('}').toString();
}
- /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
- public double match(ULocale desired, ULocale supported) {
- return (100-distance(desired, supported))/100.0;
- }
-
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
@@ -652,11 +873,16 @@
* @param supported Supported locale
* @param supportedMax Maximized locale (using likely subtags)
* @return value between 0 and 1, inclusive.
- * @deprecated Use the form with 2 parameters instead.
+ * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
*/
@Deprecated
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
- return match(desired, supported);
+ // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+ int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
+ new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
+ thresholdDistance, favorSubtag) & 0xff;
+ return (100 - distance) / 100.0;
}
/**
@@ -671,11 +897,4 @@
// TODO
return null;
}
-
- /**
- * @return the thresholdDistance. Any distance above this value is treated as a match failure.
- */
- public int getThresholdDistance() {
- return thresholdDistance;
- }
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
index 7df1a85..c1bf6af 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
@@ -25,9 +25,9 @@
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R3;
-import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XLocaleMatcher;
import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
+import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
/**
* Provides a way to match the languages (locales) supported by a product to the
@@ -864,30 +864,15 @@
transient ULocale xDefaultLanguage = null;
transient boolean xFavorScript = false;
- /**
- * Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch).
- * The values are not necessarily symmetric.
- * @param desired A locale desired by the user
- * @param supported A locale supported by a program.
- * @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance).
- * A language is first maximized with add likely subtags, then compared.
- * @internal
- * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
- */
- @Deprecated
- public int distance(ULocale desired, ULocale supported) {
- return getLocaleMatcher().distance(desired, supported);
- }
-
private synchronized XLocaleMatcher getLocaleMatcher() {
if (xLocaleMatcher == null) {
Builder builder = XLocaleMatcher.builder();
- builder.setSupportedLocales(languagePriorityList);
+ builder.setSupportedULocales(languagePriorityList.getULocales());
if (xDefaultLanguage != null) {
- builder.setDefaultLanguage(xDefaultLanguage);
+ builder.setDefaultULocale(xDefaultLanguage);
}
if (xFavorScript) {
- builder.setDistanceOption(DistanceOption.SCRIPT_FIRST);
+ builder.setFavorSubtag(FavorSubtag.SCRIPT);
}
xLocaleMatcher = builder.build();
}
@@ -908,7 +893,13 @@
*/
@Deprecated
public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
- return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired);
+ if (outputBestDesired == null) {
+ return getLocaleMatcher().getBestMatch(desiredLanguages);
+ } else {
+ XLocaleMatcher.Result result = getLocaleMatcher().getBestMatchResult(desiredLanguages);
+ outputBestDesired.value = result.getDesiredULocale();
+ return result.getSupportedULocale();
+ }
}
/**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java
index b8a1a74..0726b1d 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java
@@ -22,43 +22,45 @@
import java.util.regex.Pattern;
/**
- * Provides an immutable list of languages (locales) in priority order.
- * The string format is based on the Accept-Language format
+ * Provides an immutable list of languages/locales in priority order.
+ * The string format is based on the Accept-Language format
* <a href="http://www.ietf.org/rfc/rfc2616.txt">http://www.ietf.org/rfc/rfc2616.txt</a>, such as
* "af, en, fr;q=0.9". Syntactically it is slightly
* more lenient, in allowing extra whitespace between elements, extra commas,
* and more than 3 decimals (on input), and pins between 0 and 1.
+ *
* <p>In theory, Accept-Language indicates the relative 'quality' of each item,
- * but in practice, all of the browsers just take an ordered list, like
+ * but in practice, all of the browsers just take an ordered list, like
* "en, fr, de", and synthesize arbitrary quality values that put these in the
* right order, like: "en, fr;q=0.7, de;q=0.3". The quality values in these de facto
* semantics thus have <b>nothing</b> to do with the relative qualities of the
* original. Accept-Language also doesn't
* specify the interpretation of multiple instances, eg what "en, fr, en;q=.5"
* means.
- * <p>There are various ways to build a LanguagePriorityList, such
+ * <p>There are various ways to build a LocalePriorityList, such
* as using the following equivalent patterns:
- *
+ *
* <pre>
- * list = LanguagePriorityList.add("af, en, fr;q=0.9").build();
- *
- * list2 = LanguagePriorityList
+ * list = LocalePriorityList.add("af, en, fr;q=0.9").build();
+ *
+ * list2 = LocalePriorityList
* .add(ULocale.forString("af"))
* .add(ULocale.ENGLISH)
* .add(ULocale.FRENCH, 0.9d)
* .build();
* </pre>
- * When the list is built, the internal values are sorted in descending order by
- * weight, and then by input order. That is, if two languages have the same weight, the first one in the original order
- * comes first. If exactly the same language tag appears multiple times,
- * the last one wins.
- *
- * There are two options when building. If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following:
+ * When the list is built, the internal values are sorted in descending order by weight,
+ * and then by input order.
+ * That is, if two languages/locales have the same weight, the first one in the original order comes first.
+ * If exactly the same language tag appears multiple times, the last one wins.
+ *
+ * <p>There are two options when building.
+ * If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following:
* <pre> en;q=1.0
* de;q=1.0
* fr;q=0.7
* ja;q=0.3</pre>
- * If it is off (the default), then all weights are reset to 1.0 after reordering.
+ * If it is off (the default), then all weights are reset to 1.0 after reordering.
* This is to match the effect of the Accept-Language semantics as used in browsers, and results in the following:
* * <pre> en;q=1.0
* de;q=1.0
@@ -73,49 +75,48 @@
private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*");
private static final Pattern weightSplitter = Pattern
- .compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)");
+ .compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)");
private final Map<ULocale, Double> languagesAndWeights;
/**
- * Add a language code to the list being built, with weight 1.0.
- *
- * @param languageCode locale/language to be added
- * @return internal builder, for chaining
+ * Creates a Builder and adds locales, each with weight 1.0.
+ *
+ * @param locales locales/languages to be added
+ * @return a new builder with these locales, for chaining
* @stable ICU 4.4
*/
- public static Builder add(ULocale... languageCode) {
- return new Builder().add(languageCode);
+ public static Builder add(ULocale... locales) {
+ return new Builder().add(locales);
}
/**
- * Add a language code to the list being built, with specified weight.
- *
- * @param languageCode locale/language to be added
+ * Creates a Builder and adds a locale with a specified weight.
+ *
+ * @param locale locale/language to be added
* @param weight value from 0.0 to 1.0
- * @return internal builder, for chaining
+ * @return a new builder with this locale, for chaining
* @stable ICU 4.4
*/
- public static Builder add(ULocale languageCode, final double weight) {
- return new Builder().add(languageCode, weight);
+ public static Builder add(ULocale locale, final double weight) {
+ return new Builder().add(locale, weight);
}
/**
- * Add a language priority list.
- *
- * @param languagePriorityList list to add all the members of
- * @return internal builder, for chaining
+ * Creates a Builder and adds locales with weights.
+ *
+ * @param list list of locales with weights
+ * @return a new builder with these locales, for chaining
* @stable ICU 4.4
*/
- public static Builder add(LocalePriorityList languagePriorityList) {
- return new Builder().add(languagePriorityList);
+ public static Builder add(LocalePriorityList list) {
+ return new Builder().add(list);
}
/**
- * Add language codes to the list being built, using a string in rfc2616
- * (lenient) format, where each language is a valid {@link ULocale}.
- *
- * @param acceptLanguageString String in rfc2616 format (but leniently parsed)
- * @return internal builder, for chaining
+ * Creates a Builder, parses the RFC 2616 string, and adds locales with weights accordingly.
+ *
+ * @param acceptLanguageString String in RFC 2616 format (leniently parsed)
+ * @return a new builder with these locales, for chaining
* @stable ICU 4.4
*/
public static Builder add(String acceptLanguageString) {
@@ -123,15 +124,27 @@
}
/**
- * Return the weight for a given language, or null if there is none. Note that
- * the weights may be adjusted from those used to build the list.
- *
- * @param language to get weight of
+ * Returns the weight for a given language/locale, or null if there is none.
+ * Note that the weights may be adjusted from those used to build the list.
+ *
+ * @param locale to get weight of
* @return weight
* @stable ICU 4.4
*/
- public Double getWeight(ULocale language) {
- return languagesAndWeights.get(language);
+ public Double getWeight(ULocale locale) {
+ return languagesAndWeights.get(locale);
+ }
+
+ /**
+ * Returns the locales as an immutable Set view.
+ * The set has the same iteration order as this object itself.
+ *
+ * @return the locales
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Set<ULocale> getULocales() {
+ return languagesAndWeights.keySet();
}
/**
@@ -158,6 +171,7 @@
* {@inheritDoc}
* @stable ICU 4.4
*/
+ @Override
public Iterator<ULocale> iterator() {
return languagesAndWeights.keySet().iterator();
}
@@ -199,7 +213,7 @@
}
/**
- * Class used for building LanguagePriorityLists
+ * Class used for building LocalePriorityLists.
* @stable ICU 4.4
*/
public static class Builder {
@@ -207,8 +221,8 @@
* These store the input languages and weights, in chronological order,
* where later additions override previous ones.
*/
- private final Map<ULocale, Double> languageToWeight
- = new LinkedHashMap<ULocale, Double>();
+ private final Map<ULocale, Double> languageToWeight
+ = new LinkedHashMap<>();
/*
* Private constructor, only used by LocalePriorityList
@@ -219,7 +233,7 @@
/**
* Creates a LocalePriorityList. This is equivalent to
* {@link Builder#build(boolean) Builder.build(false)}.
- *
+ *
* @return A LocalePriorityList
* @stable ICU 4.4
*/
@@ -229,27 +243,26 @@
/**
* Creates a LocalePriorityList.
- *
- * @param preserveWeights when true, the weights originally came
- * from a language priority list specified by add() are preserved.
+ *
+ * @param preserveWeights when true, each locale's given weight is preserved.
* @return A LocalePriorityList
* @stable ICU 4.4
*/
public LocalePriorityList build(boolean preserveWeights) {
// Walk through the input list, collecting the items with the same weights.
- final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<Double, Set<ULocale>>(
+ final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<>(
myDescendingDouble);
for (final ULocale lang : languageToWeight.keySet()) {
Double weight = languageToWeight.get(lang);
Set<ULocale> s = doubleCheck.get(weight);
if (s == null) {
- doubleCheck.put(weight, s = new LinkedHashSet<ULocale>());
+ doubleCheck.put(weight, s = new LinkedHashSet<>());
}
s.add(lang);
}
// We now have a bunch of items sorted by weight, then chronologically.
// We can now create a list in the right order
- final Map<ULocale, Double> temp = new LinkedHashMap<ULocale, Double>();
+ final Map<ULocale, Double> temp = new LinkedHashMap<>();
for (Entry<Double, Set<ULocale>> langEntry : doubleCheck.entrySet()) {
final Double weight = langEntry.getKey();
for (final ULocale lang : langEntry.getValue()) {
@@ -260,73 +273,72 @@
}
/**
- * Adds a LocalePriorityList
- *
- * @param languagePriorityList a LocalePriorityList
+ * Adds locales with weights.
+ *
+ * @param list list of locales with weights
* @return this, for chaining
* @stable ICU 4.4
*/
- public Builder add(
- final LocalePriorityList languagePriorityList) {
- for (final ULocale language : languagePriorityList.languagesAndWeights
+ public Builder add(final LocalePriorityList list) {
+ for (final ULocale language : list.languagesAndWeights
.keySet()) {
- add(language, languagePriorityList.languagesAndWeights.get(language));
+ add(language, list.languagesAndWeights.get(language));
}
return this;
}
/**
- * Adds a new language code, with weight = 1.0.
- *
- * @param languageCode to add with weight 1.0
+ * Adds a locale with weight 1.0.
+ *
+ * @param locale to add with weight 1.0
* @return this, for chaining
* @stable ICU 4.4
*/
- public Builder add(final ULocale languageCode) {
- return add(languageCode, D1);
+ public Builder add(final ULocale locale) {
+ return add(locale, D1);
}
/**
- * Adds language codes, with each having weight = 1.0.
- *
- * @param languageCodes List of language codes.
+ * Adds locales, each with weight 1.0.
+ *
+ * @param locales locales/languages to be added
* @return this, for chaining.
* @stable ICU 4.4
*/
- public Builder add(ULocale... languageCodes) {
- for (final ULocale languageCode : languageCodes) {
+ public Builder add(ULocale... locales) {
+ for (final ULocale languageCode : locales) {
add(languageCode, D1);
}
return this;
}
/**
- * Adds a new supported languageCode, with specified weight. Overrides any
- * previous weight for the language.
- *
- * @param languageCode language/locale to add
+ * Adds a locale with a specified weight.
+ * Overrides any previous weight for the locale.
+ * Removes a locale if the weight is zero.
+ *
+ * @param locale language/locale to add
* @param weight value between 0.0 and 1.1
* @return this, for chaining.
* @stable ICU 4.4
*/
- public Builder add(final ULocale languageCode,
- double weight) {
- if (languageToWeight.containsKey(languageCode)) {
- languageToWeight.remove(languageCode);
+ public Builder add(final ULocale locale, double weight) {
+ if (languageToWeight.containsKey(locale)) {
+ languageToWeight.remove(locale);
}
if (weight <= D0) {
return this; // skip zeros
} else if (weight > D1) {
weight = D1;
}
- languageToWeight.put(languageCode, weight);
+ languageToWeight.put(locale, weight);
return this;
}
/**
- * Adds rfc2616 list.
- *
- * @param acceptLanguageList in rfc2616 format
+ * Parses the RFC 2616 string, and adds locales with weights accordingly.
+ *
+ * @param acceptLanguageList in RFC 2616 format (leniently parsed)
* @return this, for chaining.
* @stable ICU 4.4
*/
@@ -351,6 +363,7 @@
}
private static Comparator<Double> myDescendingDouble = new Comparator<Double>() {
+ @Override
public int compare(Double o1, Double o2) {
int result = o1.compareTo(o2);
return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order.
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
index 0b1f7cd..e80c7f5 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@@ -451,7 +451,7 @@
@Test
public void testExactMatches() {
String lastBase = "";
- TreeSet<ULocale> sorted = new TreeSet<ULocale>();
+ TreeSet<ULocale> sorted = new TreeSet<>();
for (ULocale loc : ULocale.getAvailableLocales()) {
String language = loc.getLanguage();
if (!lastBase.equals(language)) {
@@ -650,10 +650,7 @@
ULocale bulgarian = new ULocale("bg");
ULocale russian = new ULocale("ru");
- assertEquals("es-419/MX", 4, matcher.distance(new ULocale("es","419"), new ULocale("es","MX")));
- assertEquals("es-ES/DE", 4, matcher.distance(new ULocale("es","DE"), new ULocale("es","ES")));
-
- Output<ULocale> outputBestDesired = new Output<ULocale>();
+ Output<ULocale> outputBestDesired = new Output<>();
ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
assertEquals(ULocale.ITALIAN, best);
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
index c5d57ca..cb32b1f 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
@@ -4,9 +4,7 @@
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import org.junit.Ignore;
import org.junit.Test;
@@ -15,7 +13,7 @@
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
@@ -94,8 +92,8 @@
newLikelyTime += System.nanoTime()-temp;
temp = System.nanoTime();
- int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, DistanceOption.REGION_FIRST);
- int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, DistanceOption.REGION_FIRST);
+ int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, FavorSubtag.LANGUAGE);
+ int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, FavorSubtag.LANGUAGE);
newTimeMinusLikely += System.nanoTime()-temp;
}
}
@@ -113,50 +111,6 @@
}
@Test
- public void testInternalTable() {
- Set<String> strings = localeDistance.testOnlyGetDistanceTable(false).keySet();
- // Check that the table has a depth of exactly 3 (desired, supported) pairs everyplace
- // by removing every prefix of a 6-subtag string from a copy of the set of strings.
- // Any remaining string is not a prefix of a full-depth string.
- Set<String> remaining = new HashSet<>(strings);
- // Check that ANY, ANY is always present.
- assertTrue("*-*", strings.contains("*-*"));
- for (String s : strings) {
- int num = countSubtags(s);
- assertTrue(s, 1 <= num && num <= 6);
- if (num > 1) {
- String oneShorter = removeLastSubtag(s);
- assertTrue(oneShorter, strings.contains(oneShorter));
- }
- if (num == 2 || num == 4) {
- String sPlusAnyAny = s + "-*-*";
- assertTrue(sPlusAnyAny, strings.contains(sPlusAnyAny));
- } else if (num == 6) {
- for (;; --num) {
- remaining.remove(s);
- if (num == 1) { break; }
- s = removeLastSubtag(s);
- }
- }
- }
- assertTrue("strings that do not lead to 6-subtag matches", remaining.isEmpty());
- }
-
- private static final int countSubtags(String s) {
- if (s.isEmpty()) { return 0; }
- int num = 1;
- for (int pos = 0; (pos = s.indexOf('-', pos)) >= 0; ++pos) {
- ++num;
- }
- return num;
- }
-
- private static final String removeLastSubtag(String s) {
- int last = s.lastIndexOf('-');
- return s.substring(0, last);
- }
-
- @Test
public void testShowDistanceTable() {
if (isVerbose()) {
localeDistance.testOnlyPrintDistanceTable();
@@ -173,7 +127,7 @@
class MyTestFileHandler extends DataDrivenTestHelper {
Output<ULocale> bestDesired = new Output<>();
- private DistanceOption distanceOption = DistanceOption.REGION_FIRST;
+ private FavorSubtag favorSubtag = FavorSubtag.LANGUAGE;
private Integer threshold = localeDistance.getDefaultScriptDistance();
@Override
@@ -182,20 +136,21 @@
breakpoint = false; // put debugger breakpoint here to break at @debug in test file
}
Arguments args = new Arguments(arguments);
- int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, distanceOption);
- int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, distanceOption);
String desiredTag = args.desired.toLanguageTag();
String supportedTag = args.supported.toLanguageTag();
final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
- if (assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment, args.desiredToSupported, desiredToSupportedActual)) {
- assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment, args.supportedToDesired, supportedToDesiredActual);
- }
+ int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, favorSubtag);
+ assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment,
+ args.supportedToDesired, supportedToDesiredActual);
+ int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, favorSubtag);
+ assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment,
+ args.desiredToSupported, desiredToSupportedActual);
}
@Override
public void handleParams(String comment, List<String> arguments) {
String switchArg = arguments.get(0);
- if (switchArg.equals("@DistanceOption")) {
- distanceOption = DistanceOption.valueOf(arguments.get(1));
+ if (switchArg.equals("@FavorSubtag")) {
+ favorSubtag = FavorSubtag.valueOf(arguments.get(1));
} else if (switchArg.equals("@Threshold")) {
threshold = Integer.valueOf(arguments.get(1));
} else {
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
index 7a4df3b..f06e8be 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
@@ -7,7 +7,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
@@ -16,12 +15,12 @@
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
+import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.impl.locale.XLocaleMatcher;
+import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
import junitparams.JUnitParamsRunner;
@@ -51,8 +50,9 @@
}
@SuppressWarnings("unused")
- private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
- return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
+ private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
+ return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
+ internalSetThresholdDistance(d).build();
}
// public void testParentLocales() {
@@ -104,10 +104,6 @@
// }
- private void assertEquals(Object expected, Object string) {
- assertEquals("", expected, string);
- }
-
/**
* If all the base languages are the same, then each sublocale matches
* itself most closely
@@ -139,40 +135,41 @@
check2(sorted);
}
+ private static final ULocale posix = new ULocale("en_US_POSIX");
+
/**
* @param sorted
*/
private void check2(Set<ULocale> sorted) {
- // TODO Auto-generated method stub
logln("Checking: " + sorted);
XLocaleMatcher matcher = newXLocaleMatcher(
LocalePriorityList.add(
sorted.toArray(new ULocale[sorted.size()]))
.build());
for (ULocale loc : sorted) {
- String stringLoc = loc.toString();
- assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+ // The result may not be the exact same locale, but it must be equivalent.
+ // Variants and extensions are ignored.
+ if (loc.equals(posix)) { continue; }
+ ULocale max = ULocale.addLikelySubtags(loc);
+ ULocale best = matcher.getBestMatch(loc);
+ ULocale maxBest = ULocale.addLikelySubtags(best);
+ assertEquals(loc.toString(), max, maxBest);
}
}
@Test
- public void testComputeDistance_monkeyTest() {
- String[] codes = ULocale.getISOCountries();
- Random random = new Random();
- XLocaleMatcher lm = newXLocaleMatcher();
- for (int i = 0; i < 1000; ++i) {
- String x = codes[random.nextInt(codes.length)];
- String y = codes[random.nextInt(codes.length)];
- double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
- if (x.equals("ZZ") || y.equals("ZZ")) {
- assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
- } else if (x.equals(y)) {
- assertEquals("dist(x,x) = 0", 0.0, d);
- } else {
- assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
- assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
- }
- }
+ public void testDemotion() {
+ LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
+ LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
+ XLocaleMatcher noDemotion = XLocaleMatcher.builder().
+ setSupportedULocales(supported.getULocales()).
+ setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
+ assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
+
+ XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
+ setSupportedULocales(supported.getULocales()).
+ setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
+ assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
}
private static final class PerfCase {
@@ -304,9 +301,9 @@
for (PerfCase pc : pcs) {
final ULocale desired = pc.desired;
- assertEquals(pc.expectedShort, matcherShort.getBestMatch(desired));
- assertEquals(pc.expectedLong, matcherLong.getBestMatch(desired));
- assertEquals(pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
+ assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
+ assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
+ assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
@@ -350,9 +347,11 @@
String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
+
+ maximizePerf();
}
- private long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
+ private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
@@ -361,7 +360,7 @@
return (delta / iterations);
}
- private long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
+ private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
@@ -370,6 +369,37 @@
return (delta / iterations);
}
+ private void maximizePerf() {
+ final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+ "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+ "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+ "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+ "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+ "zh-CN, zh-TW, zu";
+ LocalePriorityList list = LocalePriorityList.add(tags).build();
+ int few = 1000;
+ long t = timeMaximize(list, few); // warm up
+ t = timeMaximize(list, few); // measure for scale
+ long targetTime = 100000000L; // 10^8 ns = 0.1s
+ int iterations = (int)((targetTime * few) / t);
+ t = timeMaximize(list, iterations);
+ int length = 0;
+ for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
+ System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
+ t + " ns / " + iterations + " iterations / " + length + " locales");
+ }
+
+ // returns total ns not per iteration
+ private static long timeMaximize(Iterable<ULocale> list, int iterations) {
+ long start = System.nanoTime();
+ for (int i = iterations; i > 0; --i) {
+ for (ULocale locale : list) {
+ XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+ }
+ }
+ return System.nanoTime() - start;
+ }
+
private static final class TestCase implements Cloneable {
private static final String ENDL = System.getProperties().getProperty("line.separator");
@@ -384,7 +414,7 @@
String supported = "";
String def = "";
- String distance = "";
+ String favor = "";
String threshold = "";
String desired = "";
String expMatch = "";
@@ -405,12 +435,12 @@
supported = "";
def = "";
- distance = "";
+ favor = "";
threshold = "";
}
String toInputsKey() {
- return supported + '+' + def + '+' + distance + '+' + threshold + '+' + desired;
+ return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
}
private static void appendLine(StringBuilder sb, String line) {
@@ -471,9 +501,9 @@
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
test.defaultLine = line;
test.def = suffix;
- } else if ((suffix = getSuffixAfterPrefix(line, limit, "@distance=")) != null) {
+ } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
test.distanceLine = line;
- test.distance = suffix;
+ test.favor = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
test.thresholdLine = line;
test.threshold = suffix;
@@ -531,31 +561,31 @@
@Parameters(method = "readTestCases")
public void dataDriven(TestCase test) {
XLocaleMatcher matcher;
- if (test.def.isEmpty() && test.distance.isEmpty() && test.threshold.isEmpty()) {
+ if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
matcher = new XLocaleMatcher(test.supported);
} else {
XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
builder.setSupportedLocales(test.supported);
if (!test.def.isEmpty()) {
- builder.setDefaultLanguage(new ULocale(test.def));
+ builder.setDefaultULocale(new ULocale(test.def));
}
- if (!test.distance.isEmpty()) {
- DistanceOption distance;
- switch (test.distance) {
+ if (!test.favor.isEmpty()) {
+ FavorSubtag favor;
+ switch (test.favor) {
case "normal":
- distance = DistanceOption.REGION_FIRST;
+ favor = FavorSubtag.LANGUAGE;
break;
case "script":
- distance = DistanceOption.SCRIPT_FIRST;
+ favor = FavorSubtag.SCRIPT;
break;
default:
- throw new IllegalArgumentException("unsupported distance value " + test.distance);
+ throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
}
- builder.setDistanceOption(distance);
+ builder.setFavorSubtag(favor);
}
if (!test.threshold.isEmpty()) {
int threshold = Integer.valueOf(test.threshold);
- builder.setThresholdDistance(threshold);
+ builder.internalSetThresholdDistance(threshold);
}
matcher = builder.build();
}
@@ -566,16 +596,15 @@
assertEquals("bestSupported", expMatch, bestSupported);
} else {
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
- Output<ULocale> bestDesired = new Output<>();
- ULocale bestSupported = matcher.getBestMatch(desired, bestDesired);
- assertEquals("bestSupported", expMatch, bestSupported);
+ XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
+ assertEquals("bestSupported", expMatch, result.getSupportedULocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
- assertEquals("bestDesired", expDesired, bestDesired.value);
+ assertEquals("bestDesired", expDesired, result.getDesiredULocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
- ULocale combined = XLocaleMatcher.combine(bestSupported, bestDesired.value);
+ ULocale combined = result.makeServiceULocale();
assertEquals("combined", expCombined, combined);
}
}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
index bd653a7..21c9b60 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
@@ -10,7 +10,7 @@
# Lines starting with an '@' sign provide matcher parameters.
# @supported=<comma-separated supported languages>
# @default=<default language> # no value = no explicit default
-# @distance=[normal|script] # no value = no explicit setting
+# @favor=[normal|script] # no value = no explicit setting
# @threshold=<number 0..100> # no value = no explicit setting
#
# A line with ">>" is a getBestMatch() test case:
@@ -93,7 +93,7 @@
@supported=zh, zh-MO
zh-HK >> zh-MO
-@distance=script
+@favor=script
@supported=es-419, es-ES
es-AR >> es-419
@supported=es-ES, es-419
@@ -153,7 +153,7 @@
zh-CN >> zh-CN
zh >> zh-CN
-@distance=script
+@favor=script
zh-Hant-TW >> zh-TW
zh-Hant >> zh-TW
zh-TW >> zh-TW
@@ -169,7 +169,7 @@
es-AR >> es-419
es-MX >> es-MX
-@distance=script
+@favor=script
en-NZ >> en-GB
es-ES >> es
es-AR >> es-419
@@ -180,7 +180,7 @@
@supported=91, en, hi
sa >> hi
-@distance=script
+@favor=script
sa >> hi
** test: testBasics
@@ -191,7 +191,7 @@
fr >> fr
ja >> fr # return first if no match
-@distance=script
+@favor=script
en-GB >> en-GB
en >> en
fr >> fr
@@ -208,7 +208,7 @@
zh-Hant-HK >> zh-TW
he-IT >> iw
-@distance=script
+@favor=script
zh-Hant >> zh-TW
zh >> zh-CN
zh-Hans-CN >> zh-CN
@@ -228,7 +228,7 @@
ja >> en
-@distance=script
+@favor=script
tl >> fil
mo >> ro
nb >> nn
@@ -243,7 +243,7 @@
en-AU >> en-GB
es-ES >> es
-@distance=script
+@favor=script
es-MX >> es-419
en-AU >> en-GB
es-ES >> es
@@ -257,7 +257,7 @@
@supported=zh, zh-TW, zh-HK
zh-MO >> zh-HK
-@distance=script
+@favor=script
@supported=zh, zh-TW, zh-MO
zh-HK >> zh-MO
@supported=zh, zh-TW, zh-HK
@@ -272,7 +272,7 @@
zh-Hant >> und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
zh >> und-TW # zh should be closer to und-TW than to en-Hant-TW
-@distance=script
+@favor=script
@supported=zh, zh-Hant
und-TW >> zh-Hant
@supported=en-Hant-TW, und-TW
@@ -284,14 +284,14 @@
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
-@distance=script
+@favor=script
en-GB-oed >> en-Latn-US
** test: testGetBestMatchForList-exactMatch
@supported=fr, en-GB, ja, es-ES, es-MX
ja, de >> ja
-@distance=script
+@favor=script
ja, de >> ja
** test: testGetBestMatchForList-simpleVariantMatch
@@ -302,7 +302,7 @@
de, zh >> fr
-@distance=script
+@favor=script
de, en-US >> en-GB
de, zh >> fr
@@ -320,7 +320,7 @@
ja-Jpan-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
-@distance=script
+@favor=script
ja-Jpan-JP, en-AU >> ja
ja-JP, en-US >> ja
ja-Jpan-JP, en-US >> ja
@@ -331,7 +331,7 @@
@supported=en, de, fr, ja
de-CH, fr >> de
-@distance=script
+@favor=script
de-CH, fr >> de
** test: testBestMatchForTraditionalChinese
@@ -357,7 +357,7 @@
zh-Hant-CN, en >> en-US
zh-Hans, en >> zh-Hans-CN
-@distance=script
+@favor=script
zh-TW >> zh-Hans-CN
zh-Hant >> zh-Hans-CN
zh-TW, en >> en-US
@@ -389,7 +389,7 @@
@supported=it, und
en >> it
-@distance=script
+@favor=script
@supported=it, fr
und >> it
@supported=it, und
@@ -408,7 +408,7 @@
@supported=de-AT, de-DE, de-CH
de >> de-DE
-@distance=script
+@favor=script
@supported=es-AR, es
es-MX >> es-AR
@supported=fr, en, en-GB
@@ -423,7 +423,7 @@
@supported=mul, af
nl >> mul # but nl !=> af
-@distance=script
+@favor=script
@supported=mul, nl
af >> nl
@supported=mul, af
@@ -440,7 +440,7 @@
ja-Jpan-JP, en-GB >> ja # Match for ja-Jpan-JP (maximized already)
-@distance=script
+@favor=script
ja-JP, en-GB >> ja
ja-Jpan-JP, en-GB >> ja
@@ -450,7 +450,7 @@
de-CH, fr >> de
en-US, ar, nl, de, ja >> en
-@distance=script
+@favor=script
de-CH, fr >> de
en-US, ar, nl, de, ja >> en
@@ -487,7 +487,7 @@
@supported=pt-PT, pt, es, es-419
pt-US, pt-PT, pt >> pt # pt-BR implicit
-@distance=script
+@favor=script
@supported=pt-PT, pt-BR, es, es-419
pt-PT, es, pt >> pt-PT
@supported=pt-PT, pt, es, es-419
@@ -515,7 +515,7 @@
@supported=en, sv
en-GB, sv >> en
-@distance=script
+@favor=script
@supported=fr, en, sv
en-GB >> en
@supported=en, sv
@@ -532,7 +532,7 @@
@supported=af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA
sv >> sv
-@distance=script
+@favor=script
@supported=en, sv
sv >> sv
@@ -552,7 +552,7 @@
# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
-@distance=script
+@favor=script
und >> it
und, en >> en
@@ -561,7 +561,7 @@
@supported=en-NZ, en-IT
en-US >> en-NZ
-@distance=script
+@favor=script
en-US >> en-NZ
** test: testEmptySupported => null
@@ -587,7 +587,7 @@
fr >> en-PSCRACK
de-CH >> en-PSCRACK
-@distance=script
+@favor=script
@supported=und, fr
fr-BE-fonipa >> fr
@supported=und, fr-CA
@@ -649,7 +649,7 @@
@supported=und, en-GU, en-GB, en-IN
en-VI >> en-GU
-@distance=script
+@favor=script
@supported=und, es, es-MA, es-MX, es-419
es-AR >> es-419
@supported=und, es-MA, es, es-419, es-MX
@@ -695,12 +695,12 @@
@threshold=50
fr-BE-fonipa >> und
-@distance=script
+@favor=script
@supported=50, und, fr-CA-fonupa
@threshold=
fr-BE-fonipa >> fr-CA-fonupa | | fr-BE-fonipa
@supported=und, fr-Cyrl-CA-fonupa
-fr-BE-fonipa >> fr-Cyrl-CA-fonupa | fr-BE-fonipa
+fr-BE-fonipa >> und
** test: testScriptFirst
@supported=ru, fr
@@ -711,7 +711,7 @@
@supported=da, ru, hr
sr >> da
-@distance=script
+@favor=script
@supported=ru, fr
zh, pl >> fr
zh-Cyrl, pl >> ru
@@ -730,11 +730,11 @@
fr >> fr
ja >> fr
-@distance=script
+@favor=script
en-GB >> en-GB
en-US >> en
fr >> en-GB
-ja >> en-GB
+ja >> fr
** test: testEmptyWithDefault
@default=en
@@ -765,7 +765,7 @@
zu >> en-GB
zxx >> fr
-@distance=script
+@favor=script
en-GB >> en-GB
en-US >> en
fr-FR >> fr
@@ -792,7 +792,7 @@
@supported=fr, zh-Hant, en
zh, en >> en
-@distance=script
+@favor=script
zh, en >> en
** test: TestCloseEnoughMatchOnMaximized
@@ -829,7 +829,7 @@
@supported=pt-PT, pt, es, es-419
pt-US, pt-PT >> pt
-@distance=script
+@favor=script
@supported=pt-BR, es, es-419
pt-PT, es, pt >> pt-BR
@supported=pt-PT, pt, es, es-419
@@ -844,7 +844,7 @@
@supported=zh-Hant, zh-TW
zh-HK >> zh-Hant
-@distance=script
+@favor=script
@supported=en-GB, en
en-CA >> en-GB
@supported=fr, en-GB, en
@@ -871,7 +871,7 @@
zh-Hant-HK >> zh-TW
he-IT >> iw
-@distance=script
+@favor=script
zh-Hant >> zh-TW
zh >> zh-CN
zh-Hans-CN >> zh-CN
@@ -894,7 +894,7 @@
es-MX >> es-419
es-PT >> es-ES
-@distance=script
+@favor=script
en-AU >> en-GB
es-MX >> es-419
es-PT >> es-ES
@@ -930,7 +930,7 @@
en-GB >> en
en-GB, sv >> en
-@distance=script
+@favor=script
en-GB, sv >> en
** test: Serbian
@@ -951,7 +951,7 @@
@supported=und, sr
sr-Latn >> sr
-@distance=script
+@favor=script
sr-ME >> sr
@supported=und, sr-ME
sr >> sr-ME
@@ -976,7 +976,7 @@
x-piglatin >> fr
x-bork >> x-bork
-@distance=script
+@favor=script
@supported=fr, x-bork, en-Latn-US
x-piglatin >> x-bork
x-bork >> x-bork
@@ -989,7 +989,7 @@
en-GB-oed >> en-Latn-US
i-klingon >> tlh
-@distance=script
+@favor=script
en-GB-oed >> en-Latn-US
i-klingon >> tlh
@@ -1007,7 +1007,7 @@
pt-PT-PSCRACK >> pt-PT-PSCRACK
zh-Hans-PSCRACK >> zh-Hans-PSCRACK
-@distance=script
+@favor=script
de >> fr
en-US >> fr
en >> fr
@@ -1030,7 +1030,7 @@
pt-BR >> pt
zh-Hans-XC >> zh-Hans-XC
-@distance=script
+@favor=script
de >> fr
en-US >> fr
en >> fr
@@ -1052,20 +1052,20 @@
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
-ar-PSBIDI >> ar-PSBIDI
+ar-PSBIDI >> ar-XB # These are equivalent.
en-XA >> en-XA
-en-PSACCENT >> en-PSACCENT
+en-PSACCENT >> en-XA # These are equivalent.
ar-PSCRACK >> ar-PSCRACK
-@distance=script
+@favor=script
de >> en-DE
en >> en-DE
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
-ar-PSBIDI >> ar-PSBIDI
+ar-PSBIDI >> ar-XB # These are equivalent.
en-XA >> en-XA
-en-PSACCENT >> en-PSACCENT
+en-PSACCENT >> en-XA # These are equivalent.
ar-PSCRACK >> ar-PSCRACK
** test: BestMatchForTraditionalChinese
@@ -1095,7 +1095,7 @@
@supported=en, fr-CA
en-US, fr-CA >> en
-@distance=script
+@favor=script
en-US, fr-CA >> en
** test: SiblingDefaultRegion
@@ -1111,15 +1111,15 @@
@default=und
hi >> und
-@distance=script
-hi >> de
+@favor=script
+hi >> und
** test: MatchedLanguageIgnoresDefault
@supported=de, en, fr
@default=und
fr >> fr
-@distance=script
+@favor=script
fr >> fr
## GenX
@@ -1168,9 +1168,9 @@
es-UY >> es-MX
es-VE >> es-MX
-@distance=script
+@favor=script
es-001 >> es
-und >> es
+und >> und
ca >> es
gl-ES >> es
es >> es
@@ -1254,9 +1254,9 @@
es-UY >> es-419
es-VE >> es-419
-@distance=script
+@favor=script
es-001 >> es
-und >> es
+und >> und
ca >> es
gl-ES >> es
es >> es
@@ -1319,9 +1319,9 @@
en-US >> en-US
en >> en-US
-@distance=script
-und >> en-GB
-ja >> en-GB
+@favor=script
+und >> und
+ja >> und
fr-CA >> en-GB
en-AU >> en-GB
en-BZ >> en-GB
@@ -1355,10 +1355,10 @@
@supported=pl, ja, ca
fr >> und
-@distance=script
+@favor=script
@supported=en-GB, en-US, en, en-AU
-und >> en-GB
-ja >> en-GB
+und >> und
+ja >> und
fr-CA >> en-GB
fr >> en-GB
@supported=en-AU, ja, ca
@@ -1384,7 +1384,7 @@
@default=iw
he-IT >> iw
-@distance=script
+@favor=script
he-IT >> iw
** test: language-specific script fallbacks 1
@@ -1395,7 +1395,7 @@
bs >> en
nl-Cyrl >> en # Mark: Expected value should be en not sr. Script difference exceeds threshold, so can't be nl
-@distance=script
+@favor=script
sr-Latn >> sr
hr >> en
bs >> en
@@ -1408,7 +1408,7 @@
@default=und
hr >> und
-@distance=script
+@favor=script
@default=
sr >> sr-Latn
sr-Cyrl >> sr-Latn
@@ -1419,45 +1419,45 @@
@supported=en, sr-Latn
hr >> en
-@distance=script
+@favor=script
hr >> en
** test: both deprecated and not
@supported=fil, tl, iw, he
he-IT >> iw
-he >> he
+he >> iw
iw >> iw
fil-IT >> fil
fil >> fil
-tl >> tl
+tl >> fil
-@distance=script
+@favor=script
he-IT >> iw
-he >> he
+he >> iw
iw >> iw
fil-IT >> fil
fil >> fil
-tl >> tl
+tl >> fil
** test: nearby languages: Nynorsk to Bokmål
@supported=en, nb
nn >> nb
-@distance=script
+@favor=script
nn >> nb
** test: nearby languages: Danish does not match nn
@supported=en, nn
da >> en
-@distance=script
+@favor=script
da >> en
** test: nearby languages: Danish matches no
@supported=en, no
da >> no
-@distance=script
+@favor=script
da >> no
** test: nearby languages: Danish matches nb
@@ -1469,7 +1469,7 @@
no, en-US >> nn
nb, en-US >> nn
-@distance=script
+@favor=script
no, en-US >> nn
nb, en-US >> nn
@@ -1477,7 +1477,7 @@
@supported=nl, he, en-GB
iw, en-US >> he
-@distance=script
+@favor=script
iw, en-US >> he
** test: macro equivalent is closer than same language with other differences
@@ -1485,7 +1485,7 @@
cmn, en-US >> zh
nb, en-US >> no
-@distance=script
+@favor=script
cmn, en-US >> zh
nb, en-US >> no
@@ -1493,18 +1493,18 @@
@supported=nl, fil, en-GB
tl, en-US >> fil
-@distance=script
+@favor=script
tl, en-US >> fil
** test: distinguish near equivalents
@supported=en, ro, mo, ro-MD
ro >> ro
-mo >> mo
+mo >> ro # ro=mo for the locale matcher
ro-MD >> ro-MD
-@distance=script
+@favor=script
ro >> ro
-mo >> mo
+mo >> ro # ro=mo for the locale matcher
ro-MD >> ro-MD
** test: maximization of legacy
@@ -1512,7 +1512,7 @@
sh >> sr-Latn
mo >> ro
-@distance=script
+@favor=script
sh >> sr-Latn
mo >> ro
@@ -1544,31 +1544,50 @@
zh-Hant-CN, en >> en-US
zh-Hans, en >> zh-Hans-CN
-** test: more specific script should win in case regions are identical
+** test: return first among likely-subtags equivalent locales
+# Was: more specific script should win in case regions are identical
+# with some different results.
@supported=af, af-Latn, af-Arab
af >> af
af-ZA >> af
af-Latn-ZA >> af
-af-Latn >> af-Latn
+af-Latn >> af
-@distance=script
+@favor=script
af >> af
af-ZA >> af
af-Latn-ZA >> af
-af-Latn >> af-Latn
+af-Latn >> af
-** test: more specific region should win
+# Was: more specific region should win
+# with some different results.
@supported=nl, nl-NL, nl-BE
+@favor=
nl >> nl
nl-Latn >> nl
nl-Latn-NL >> nl
-nl-NL >> nl-NL
+nl-NL >> nl
-@distance=script
+@favor=script
nl >> nl
nl-Latn >> nl
nl-Latn-NL >> nl
-nl-NL >> nl-NL
+nl-NL >> nl
+
+# Was: more specific region wins over more specific script
+# with some different results.
+@supported=nl, nl-Latn, nl-NL, nl-BE
+@favor=
+nl >> nl
+nl-Latn >> nl
+nl-NL >> nl
+nl-Latn-NL >> nl
+
+@favor=script
+nl >> nl
+nl-Latn >> nl
+nl-NL >> nl
+nl-Latn-NL >> nl
** test: region may replace matched if matched is enclosing
@supported=es-419, es
@@ -1577,37 +1596,24 @@
@default=
es-SG >> es
-@distance=script
+@favor=script
@default=es-MX
es-MX >> es-419
@default=
es-SG >> es
-** test: more specific region wins over more specific script
-@supported=nl, nl-Latn, nl-NL, nl-BE
-nl >> nl
-nl-Latn >> nl-Latn
-nl-NL >> nl-NL
-nl-Latn-NL >> nl
-
-@distance=script
-nl >> nl
-nl-Latn >> nl-Latn
-nl-NL >> nl-NL
-nl-Latn-NL >> nl
-
** test: region distance Portuguese
@supported=pt, pt-PT
pt-ES >> pt-PT
-@distance=script
+@favor=script
pt-ES >> pt-PT
** test: if no preferred locale specified, pick top language, not regional
@supported=en, fr, fr-CA, fr-CH
fr-US >> fr
-@distance=script
+@favor=script
fr-US >> fr
** test: region distance German
@@ -1622,7 +1628,7 @@
@default=
es-PT >> es-ES
-@distance=script
+@favor=script
en-AU >> en-GB
es-MX >> es-419
@default=
@@ -1649,7 +1655,7 @@
und-Hant >> zh
und-Latn >> it
-@distance=script
+@favor=script
und-FR >> fr
und-CN >> zh
und-Hans >> zh
@@ -1664,22 +1670,22 @@
** test: pick best maximized tag
@supported=ja, ja-Jpan-US, ja-JP, en, ru
ja-Jpan, ru >> ja
-ja-JP, ru >> ja-JP
+ja-JP, ru >> ja
ja-US, ru >> ja-Jpan-US
-@distance=script
+@favor=script
ja-Jpan, ru >> ja
-ja-JP, ru >> ja-JP
+ja-JP, ru >> ja
ja-US, ru >> ja-Jpan-US
** test: termination: pick best maximized match
@supported=ja, ja-Jpan, ja-JP, en, ru
ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja-Jpan
+ja-Jpan, ru >> ja
-@distance=script
+@favor=script
ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja-Jpan
+ja-Jpan, ru >> ja
** test: same language over exact, but distinguish when user is explicit
@supported=fr, en-GB, ja, es-ES, es-MX
@@ -1690,7 +1696,7 @@
en, nl >> en-GB
en, nl, en-GB >> en-GB
-@distance=script
+@favor=script
@supported=fr, en-GB, ja, es-ES, es-MX
ja, de >> ja
@supported=en, de, fr, ja
@@ -1767,7 +1773,7 @@
pt-ST >> pt-PT
pt-TL >> pt-PT
-@distance=script
+@favor=script
en-150 >> en-GB
en-AU >> en-GB
en-BE >> en-GB
@@ -1845,7 +1851,7 @@
@default=de-t-m0-iso-i0-pinyin
de-t-m0-iso-i0-pinyin >> de
-@distance=script
+@favor=script
@default=de-u-co-phonebk
de-FR-u-co-phonebk >> de
@default=sl-NEDIS-u-cu-eur
@@ -1865,28 +1871,28 @@
@supported=de
fr >> de
-@distance=script
+@favor=script
fr >> de
** test: testLooseMatchForGeneral_getBestMatches
@supported=es-419
es-MX >> es-419
-@distance=script
+@favor=script
es-MX >> es-419
** test: testLooseMatchForEnglish_getBestMatches
@supported=en, en-GB
en-CA >> en-GB
-@distance=script
+@favor=script
en-CA >> en-GB
** test: testLooseMatchForChinese_getBestMatches
@supported=zh
zh-TW >> zh
-@distance=script
+@favor=script
zh-TW >> zh
## Geo
@@ -1894,7 +1900,7 @@
** test: testGetBestMatchWithMinMatchScore
@supported=fr-FR, fr, fr-CA, en
@default=und
-fr >> fr # Exact match is chosen.
+fr >> fr-FR # First likely-subtags equivalent match is chosen.
@supported=en, fr, fr-CA
fr-FR >> fr # Parent match is chosen.
@supported=en, fr-CA
@@ -1922,9 +1928,9 @@
@supported=ja
ru >> und
-@distance=script
+@favor=script
@supported=fr-FR, fr, fr-CA, en
-fr >> fr
+fr >> fr-FR
@supported=en, fr, fr-CA
fr-FR >> fr
@supported=en, fr-CA
@@ -1935,19 +1941,19 @@
@supported=en, fr-FR
fr >> fr-FR
@supported=de, en, it
-fr >> de
+fr >> en
@supported=iw, en
iw-Latn >> en
@supported=iw, no
-ru >> iw
+ru >> und
@supported=iw-Latn, iw-Cyrl, iw
ru >> iw-Cyrl
@supported=iw, iw-Latn
-ru >> iw
+ru >> und
en >> iw-Latn
@supported=en, uk
ru >> uk
@supported=zh-TW, en
zh-CN >> zh-TW
@supported=ja
-ru >> ja
+ru >> und