blob: 0a70709ee987388a90f700cc34a996d24b32066e [file] [log] [blame]
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.dev.tool.locale;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.impl.locale.LSR;
import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
import com.ibm.icu.impl.locale.XCldrStub.Predicate;
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.BytesTrieBuilder;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
public final class LocaleDistanceBuilder {
private static final String ANY = "�"; // matches any character. Uses value above any subtag.
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static String fixAny(String string) {
return "*".equals(string) ? ANY : string;
}
private static ICUResourceBundle getSupplementalDataBundle(String name) {
return ICUResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, name,
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
}
private static final class TerritoryContainment {
/** Directed, acyclic containment graph. Maps each container to its direct contents. */
final Multimap<String, String> graph = TreeMultimap.create();
/** Maps each container to all of its contents, direct and indirect. */
final Multimap<String, String> resolved = TreeMultimap.create();
/** Maps each container only to its leaf contents. */
final Multimap<String, String> toLeavesOnly = TreeMultimap.create();
/** The leaves of the graph. */
final Set<String> leaves;
TerritoryContainment(ICUResourceBundle supplementalData) {
UResource.Value value = supplementalData.getValueWithFallback("territoryContainment");
UResource.Key key = new UResource.Key();
addContainments(key, value);
resolve("001");
for (Map.Entry<String, Set<String>> entry : resolved.asMap().entrySet()) {
String container = entry.getKey();
for (String contained : entry.getValue()) {
if (resolved.get(contained) == null) { // a leaf node (usually a country)
toLeavesOnly.put(container, contained);
}
}
}
leaves = toLeavesOnly.get("001");
}
private void addContainments(UResource.Key key, UResource.Value value) {
UResource.Table containers = value.getTable();
for (int i = 0; containers.getKeyAndValue(i, key, value); ++i) {
if (key.length() <= 3) {
String container = key.toString();
String[] contents = value.getStringArrayOrStringAsArray();
for (String s : contents) {
graph.put(container, s);
}
} else {
addContainments(key, value); // containedGroupings etc.
}
}
}
private Set<String> resolve(String region) {
Set<String> contained = graph.get(region);
if (contained == null) {
return Collections.emptySet();
}
resolved.putAll(region, contained); // do top level
// then recursively
for (String subregion : contained) {
resolved.putAll(region, resolve(subregion));
}
return resolved.get(region);
}
}
private static final class Rule {
final List<String> desired;
final List<String> supported;
final int distance;
final boolean oneway;
Rule(List<String> desired, List<String> supported, int distance, boolean oneway) {
this.desired = desired;
this.supported = supported;
this.distance = distance;
this.oneway = oneway;
}
}
private static final <T> int makeUniqueIndex(Map<T, Integer> objectToInt, T source) {
Integer result = objectToInt.get(source);
if (result == null) {
int newResult = objectToInt.size();
objectToInt.put(source, newResult);
return newResult;
} else {
return result;
}
}
private static final class TrieBuilder {
byte[] bytes = new byte[24];
int length = 0;
BytesTrieBuilder tb = new BytesTrieBuilder();
void addStar(int value) {
assert value >= 0;
bytes[length++] = '*';
tb.add(bytes, length, value);
}
void addSubtag(String s, int value) {
assert !s.isEmpty();
assert !s.equals(ANY);
int end = s.length() - 1;
for (int i = 0;; ++i) {
char c = s.charAt(i);
assert c <= 0x7f;
if (i < end) {
bytes[length++] = (byte) c;
} else {
// Mark the last character as a terminator to avoid overlap matches.
bytes[length++] = (byte) (c | LocaleDistance.END_OF_SUBTAG);
break;
}
}
if (value >= 0) {
tb.add(bytes, length, value);
}
}
byte[] build() {
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
// Allocate an array with just the necessary capacity,
// so that we do not hold on to a larger array for a long time.
byte[] bytes = new byte[buffer.remaining()];
buffer.get(bytes);
if (DEBUG_OUTPUT) {
System.out.println("distance trie size: " + bytes.length + " bytes");
}
return bytes;
}
}
private static final class DistanceTable {
int nodeDistance; // distance for the lookup so far
final Map<String, Map<String, DistanceTable>> subtables;
DistanceTable(int distance) {
nodeDistance = distance;
subtables = new TreeMap<>();
}
@Override
public boolean equals(Object obj) {
DistanceTable other;
return this == obj ||
(obj != null
&& obj.getClass() == this.getClass()
&& nodeDistance == (other = (DistanceTable) obj).nodeDistance
&& subtables.equals(other.subtables));
}
@Override
public int hashCode() {
return nodeDistance ^ subtables.hashCode();
}
private int getDistance(String desired, String supported,
Output<DistanceTable> distanceTable, boolean starEquals) {
boolean star = false;
Map<String, DistanceTable> sub2 = subtables.get(desired);
if (sub2 == null) {
sub2 = subtables.get(ANY); // <*, supported>
star = true;
}
DistanceTable value = sub2.get(supported); // <*/desired, supported>
if (value == null) {
value = sub2.get(ANY); // <*/desired, *>
if (value == null && !star) {
sub2 = subtables.get(ANY); // <*, supported>
value = sub2.get(supported);
if (value == null) {
value = sub2.get(ANY); // <*, *>
}
}
star = true;
}
if (distanceTable != null) {
distanceTable.value = value;
}
int result = starEquals && star && desired.equals(supported) ? 0 : value.nodeDistance;
return result;
}
private DistanceTable getAnyAnyNode() {
return subtables.get(ANY).get(ANY);
}
void copy(DistanceTable other) {
for (Map.Entry<String, Map<String, DistanceTable>> e1 : other.subtables.entrySet()) {
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
DistanceTable value = e2.getValue();
addSubtable(e1.getKey(), e2.getKey(), value.nodeDistance);
}
}
}
DistanceTable addSubtable(String desired, String supported, int distance) {
Map<String, DistanceTable> sub2 = subtables.get(desired);
if (sub2 == null) {
subtables.put(desired, sub2 = new TreeMap<>());
}
DistanceTable oldNode = sub2.get(supported);
if (oldNode != null) {
return oldNode;
}
final DistanceTable newNode = new DistanceTable(distance);
sub2.put(supported, newNode);
return newNode;
}
/**
* Return null if value doesn't exist
*/
private DistanceTable getNode(String desired, String supported) {
Map<String, DistanceTable> sub2 = subtables.get(desired);
if (sub2 == null) {
return null;
}
return sub2.get(supported);
}
/** add table for each subitem that matches and doesn't have a table already
*/
void addSubtables(
String desired, String supported,
Predicate<DistanceTable> action) {
DistanceTable node = getNode(desired, supported);
if (node == null) {
// get the distance it would have
Output<DistanceTable> node2 = new Output<>();
int distance = getDistance(desired, supported, node2, true);
// now add it
node = addSubtable(desired, supported, distance);
if (node2.value != null) {
DistanceTable nextTable = node2.value;
node.copy(nextTable);
}
}
action.test(node);
}
void addSubtables(String desiredLang, String supportedLang,
String desiredScript, String supportedScript,
int percentage) {
// add to all the values that have the matching desiredLang and supportedLang
@SuppressWarnings("unused")
boolean haveKeys = false;
for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
String key1 = e1.getKey();
final boolean desiredIsKey = desiredLang.equals(key1);
if (desiredIsKey || desiredLang.equals(ANY)) {
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
String key2 = e2.getKey();
final boolean supportedIsKey = supportedLang.equals(key2);
haveKeys |= (desiredIsKey && supportedIsKey);
if (supportedIsKey || supportedLang.equals(ANY)) {
DistanceTable value = e2.getValue();
value.addSubtable(desiredScript, supportedScript, percentage);
}
}
}
}
// now add the sequence explicitly
DistanceTable dt = new DistanceTable(-1);
dt.addSubtable(desiredScript, supportedScript, percentage);
CopyIfEmpty r = new CopyIfEmpty(dt);
addSubtables(desiredLang, supportedLang, r);
}
void addSubtables(String desiredLang, String supportedLang,
String desiredScript, String supportedScript,
String desiredRegion, String supportedRegion,
int percentage) {
// add to all the values that have the matching desiredLang and supportedLang
@SuppressWarnings("unused")
boolean haveKeys = false;
for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
String key1 = e1.getKey();
final boolean desiredIsKey = desiredLang.equals(key1);
if (desiredIsKey || desiredLang.equals(ANY)) {
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
String key2 = e2.getKey();
final boolean supportedIsKey = supportedLang.equals(key2);
haveKeys |= (desiredIsKey && supportedIsKey);
if (supportedIsKey || supportedLang.equals(ANY)) {
DistanceTable value = e2.getValue();
value.addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
}
}
}
}
// now add the sequence explicitly
DistanceTable dt = new DistanceTable(-1);
dt.addSubtable(desiredRegion, supportedRegion, percentage);
AddSub r = new AddSub(desiredScript, supportedScript, dt);
addSubtables(desiredLang, supportedLang, r);
}
void prune(int level, int[] distances) {
for (Map<String, DistanceTable> suppNodeMap : subtables.values()) {
for (DistanceTable node : suppNodeMap.values()) {
node.prune(level + 1, distances);
}
}
if (subtables.size() == 1) {
DistanceTable next = getAnyAnyNode();
if (level == 1) {
// Remove script table -*-*-50 where there are no other script rules
// and no following region rules.
// If there are region rules, then mark this table for skipping.
if (next.nodeDistance == distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE]) {
if (next.subtables.isEmpty()) {
subtables.clear();
} else {
nodeDistance |= LocaleDistance.DISTANCE_SKIP_SCRIPT;
}
}
} else if (level == 2) {
// Remove region table -*-*-4 where there are no other region rules.
if (next.nodeDistance == distances[LocaleDistance.IX_DEF_REGION_DISTANCE]) {
subtables.clear();
}
}
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n');
return toString("", sb).toString();
}
private StringBuilder toString(String indent, StringBuilder buffer) {
String indent2 = indent.isEmpty() ? "" : "\t";
for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
final Map<String, DistanceTable> subsubtable = e1.getValue();
buffer.append(indent2).append(e1.getKey());
String indent3 = "\t";
for (Map.Entry<String, DistanceTable> e2 : subsubtable.entrySet()) {
DistanceTable value = e2.getValue();
buffer.append(indent3).append(e2.getKey());
buffer.append('\t').append(value.nodeDistance);
value.toString(indent+"\t\t\t", buffer);
buffer.append('\n');
indent3 = indent+'\t';
}
indent2 = indent;
}
return buffer;
}
void toTrie(TrieBuilder builder) {
if (nodeDistance >= 0 && (nodeDistance & LocaleDistance.DISTANCE_SKIP_SCRIPT) != 0) {
getAnyAnyNode().toTrie(builder);
return;
}
int startLength = builder.length;
for (Map.Entry<String, Map<String, DistanceTable>> desSuppNode : subtables.entrySet()) {
String desired = desSuppNode.getKey();
Map<String, DistanceTable> suppNodeMap = desSuppNode.getValue();
// Collapse ANY-ANY into one single *.
if (desired.equals(ANY)) {
assert suppNodeMap.size() == 1;
DistanceTable node = suppNodeMap.get(ANY);
builder.addStar(node.nodeDistance);
node.toTrie(builder);
} else {
builder.addSubtag(desired, -1);
int desiredLength = builder.length;
for (Map.Entry<String, DistanceTable> suppNode : suppNodeMap.entrySet()) {
String supported = suppNode.getKey();
assert !supported.equals(ANY);
DistanceTable node = suppNode.getValue();
builder.addSubtag(supported, node.nodeDistance);
node.toTrie(builder);
builder.length = desiredLength;
}
}
builder.length = startLength;
}
}
}
private static final class CopyIfEmpty implements Predicate<DistanceTable> {
private final DistanceTable toCopy;
CopyIfEmpty(DistanceTable resetIfNotNull) {
this.toCopy = resetIfNotNull;
}
@Override
public boolean test(DistanceTable node) {
if (node.subtables.isEmpty()) {
node.copy(toCopy);
}
return true;
}
}
private static final class AddSub implements Predicate<DistanceTable> {
private final String desiredSub;
private final String supportedSub;
private final CopyIfEmpty r;
AddSub(String desiredSub, String supportedSub, DistanceTable distanceTableToCopy) {
this.r = new CopyIfEmpty(distanceTableToCopy);
this.desiredSub = desiredSub;
this.supportedSub = supportedSub;
}
@Override
public boolean test(DistanceTable node) {
if (node == null) {
throw new IllegalArgumentException("bad structure");
} else {
node.addSubtables(desiredSub, supportedSub, r);
}
return true;
}
}
private static Collection<String> getIdsFromVariable(
Multimap<String, String> variableToPartition, String variable) {
if (variable.equals("*")) {
return Collections.singleton("*");
}
Collection<String> result = variableToPartition.get(variable);
if (result == null || result.isEmpty()) {
throw new IllegalArgumentException("Variable not defined: " + variable);
}
return result;
}
// VisibleForTesting
public static LocaleDistance.Data build() {
// From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
// and then paradigmLocales, matchVariable, and the last languageMatch items.
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
String[] paradigms = supplementalData.getValueWithFallback(
"languageMatchingInfo/written/paradigmLocales").getStringArray();
// LinkedHashSet for stable order; otherwise a unit test is flaky.
Set<LSR> paradigmLSRs = new LinkedHashSet<>(); // could be TreeSet if LSR were Comparable
for (String paradigm : paradigms) {
ULocale pl = new ULocale(paradigm);
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);
// Clear the LSR flags to make the data equality test in
// LocaleDistanceTest happy.
paradigmLSRs.add(new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS));
}
TerritoryContainment tc = new TerritoryContainment(supplementalData);
RegionMapperBuilder rmb = new RegionMapperBuilder(tc);
UResource.Value value = supplementalData.getValueWithFallback(
"languageMatchingInfo/written/matchVariable");
UResource.Table variables = value.getTable();
UResource.Key key = new UResource.Key();
for (int i = 0; variables.getKeyAndValue(i, key, value); ++i) {
String variable = "$" + key.toString();
String regions = value.getString();
rmb.add(variable, regions);
}
// Parse the rules.
// We could almost process them while reading them from the source data,
// but a rule may contain a region code rather than a variable.
// We need to create a variable for each such region code
// before rmb.build() and before processing the rules.
Splitter bar = Splitter.on('_');
int prevSize = 0;
value = supplementalData.getValueWithFallback("languageMatchingNew/written");
UResource.Array matches = value.getArray();
List<Rule> rules = new ArrayList<>(matches.getSize());
for (int i = 0; matches.getValue(i, value); ++i) {
String[] tuple = value.getStringArray();
int distance = Integer.parseInt(tuple[2]);
boolean oneway = tuple.length >= 4 && tuple[3].equals("1");
List<String> desired = new ArrayList<>(bar.splitToList(tuple[0]));
List<String> supported = new ArrayList<>(bar.splitToList(tuple[1]));
int size = desired.size();
if (size != supported.size()) {
throw new IllegalArgumentException("uneven languageMatches pair");
}
if (size < prevSize) {
throw new IllegalArgumentException("languageMatches out of order");
}
prevSize = size;
// Implementation shortcuts assume:
// - At any level, either both or neither rule subtags are *.
// - If the rule language subtags are *, the other-level subtags must also be *.
// If there are rules that do not fit these constraints,
// then we need to revise the implementation.
int langStars = checkStars(desired.get(0), supported.get(0), false);
if (size >= 2) {
checkStars(desired.get(1), supported.get(1), langStars == 2);
}
if (size == 3) {
checkStars(desired.get(2), supported.get(2), langStars == 2);
rmb.ensureRegionIsVariable(desired);
rmb.ensureRegionIsVariable(supported);
}
rules.add(new Rule(desired, supported, distance, oneway));
}
rmb.build();
/**
* Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX.
* We generate a mapping from $A1 to a set of partitions {P1, P2}
* When we hit a rule that contains a variable,
* we replace that rule by multiple rules for the partitions.
*/
final Multimap<String, String> variableToPartition = rmb.variableToPartitions;
final DistanceTable defaultDistanceTable = new DistanceTable(-1);
int minRegionDistance = 100;
for (Rule rule : rules) {
List<String> desired = rule.desired;
List<String> supported = rule.supported;
if (rule.desired.size() <= 2) {
// language-only or language-script
add(defaultDistanceTable, desired, supported, rule.distance);
if (!rule.oneway && !desired.equals(supported)) {
add(defaultDistanceTable, supported, desired, rule.distance);
}
} else {
// language-script-region
if (rule.distance < minRegionDistance) {
minRegionDistance = rule.distance;
}
Collection<String> desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2));
Collection<String> supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2));
for (String desiredRegion2 : desiredRegions) {
desired.set(2, desiredRegion2.toString()); // fix later
for (String supportedRegion2 : supportedRegions) {
supported.set(2, supportedRegion2.toString()); // fix later
add(defaultDistanceTable, desired, supported, rule.distance);
if (!rule.oneway) {
add(defaultDistanceTable, supported, desired, rule.distance);
}
}
}
}
}
int[] distances = new int[LocaleDistance.IX_LIMIT];
DistanceTable node = defaultDistanceTable.getAnyAnyNode();
distances[LocaleDistance.IX_DEF_LANG_DISTANCE] = node.nodeDistance;
node = node.getAnyAnyNode();
distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE] = node.nodeDistance;
node = node.getAnyAnyNode();
distances[LocaleDistance.IX_DEF_REGION_DISTANCE] = node.nodeDistance;
distances[LocaleDistance.IX_MIN_REGION_DISTANCE] = minRegionDistance;
defaultDistanceTable.prune(0, distances);
assert defaultDistanceTable.getAnyAnyNode().subtables.isEmpty();
defaultDistanceTable.subtables.remove(ANY);
TrieBuilder trieBuilder = new TrieBuilder();
defaultDistanceTable.toTrie(trieBuilder);
byte[] trie = trieBuilder.build();
return new LocaleDistance.Data(
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
paradigmLSRs, distances);
}
private static int checkStars(String desired, String supported, boolean allStars) {
int stars = (desired.equals("*") ? 1 : 0) + (supported.equals("*") ? 1 : 0);
if (stars == 1) {
throw new IllegalArgumentException("either both or neither rule subtags must be *: " +
desired + ", " + supported);
}
if (allStars && stars != 2) {
throw new IllegalArgumentException("both language subtags are * --> " +
"both rule subtags on all levels must be *: " +
desired + ", " + supported);
}
return stars;
}
private static void add(DistanceTable languageDesired2Supported,
List<String> desired, List<String> supported, int percentage) {
int size = desired.size();
if (size != supported.size() || size < 1 || size > 3) {
throw new IllegalArgumentException();
}
final String desiredLang = fixAny(desired.get(0));
final String supportedLang = fixAny(supported.get(0));
if (size == 1) {
languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
} else {
final String desiredScript = fixAny(desired.get(1));
final String supportedScript = fixAny(supported.get(1));
if (size == 2) {
languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
} else {
final String desiredRegion = fixAny(desired.get(2));
final String supportedRegion = fixAny(supported.get(2));
languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
}
}
}
private static final class RegionMapperBuilder {
private final Set<String> variables = new HashSet<>();
final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
final private RegionSet regionSet;
private final TerritoryContainment tc;
// build() output
Multimap<String, String> variableToPartitions;
private byte[] regionToPartitionsIndex;
private String[] partitionArrays;
RegionMapperBuilder(TerritoryContainment tc) {
regionSet = new RegionSet(tc);
this.tc = tc;
}
private boolean isKnownVariable(String variable) {
return variables.contains(variable) || variable.equals("*");
}
void add(String variable, String barString) {
assert !isKnownVariable(variable);
assert variable.startsWith("$");
assert !variable.startsWith("$!");
variables.add(variable);
Set<String> tempRegions = regionSet.parseSet(barString);
for (String region : tempRegions) {
regionToRawPartition.put(region, variable);
}
// now add the inverse variable
Set<String> inverse = regionSet.inverse();
String inverseVariable = "$!" + variable.substring(1);
assert !isKnownVariable(inverseVariable);
variables.add(inverseVariable);
for (String region : inverse) {
regionToRawPartition.put(region, inverseVariable);
}
}
void ensureRegionIsVariable(List<String> lsrList) {
String region = lsrList.get(2);
if (!isKnownVariable(region)) {
assert LSR.indexForRegion(region) > 0; // well-formed region subtag
String variable = "$" + region;
add(variable, region);
lsrList.set(2, variable);
}
}
void build() {
// Partitions as sets of variables.
// LinkedHashMap to store & number unique sets.
// Example: {"$!cnsar", "$!enUS", "$!maghreb", "$americas"}
Map<Collection<String>, Integer> partitionVariables = new LinkedHashMap<>();
// Partitions as sets of lookup ID strings.
// Example: {"1", "5"}
Map<Collection<String>, Integer> partitionStrings = new LinkedHashMap<>();
// pIndex 0: default value in regionToPartitionsIndex
Collection<String> noPartitions = Collections.singleton(".");
makeUniqueIndex(partitionStrings, noPartitions);
// Example: "$americas" -> {"1", "5"}
variableToPartitions = TreeMultimap.create();
// Maps the index of each region code to a pIndex into partitionStrings.
regionToPartitionsIndex = new byte[LSR.REGION_INDEX_LIMIT];
// Maps a partition string to the set of region codes in that partition.
// Example: "5" -> {"PR", "US", "VI"}
Multimap<String, String> partitionToRegions = TreeMultimap.create();
for (Map.Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
final String region = e.getKey();
final Collection<String> rawPartition = e.getValue();
// Single-character string.
// Must be an ASCII character and must not be '*'.
// Used to start with α.
char partitionChar = (char) ('0' + makeUniqueIndex(partitionVariables, rawPartition));
assert partitionChar <= 0x7f;
String partition = String.valueOf(partitionChar);
int pIndex = makeUniqueIndex(partitionStrings, Collections.singleton(partition));
// The pIndex must fit into a byte.
// For Java code simplicity, we want it to also be non-negative.
assert pIndex <= 0x7f;
regionToPartitionsIndex[LSR.indexForRegion(region)] = (byte) pIndex;
partitionToRegions.put(partition, region);
for (String variable : rawPartition) {
variableToPartitions.put(variable, partition);
}
}
// We get a mapping of each macro to the partitions it intersects with.
// Example: "419" -> {"1", "5"}
Multimap<String,String> macroToPartitions = TreeMultimap.create();
for (Map.Entry<String, Set<String>> e : tc.resolved.asMap().entrySet()) {
String macro = e.getKey();
for (Map.Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
String partition = e2.getKey();
if (!Collections.disjoint(e.getValue(), e2.getValue())) {
macroToPartitions.put(macro, partition);
}
}
}
// Create a combined mapping from a region code, which can be a macro region,
// via the getRegionIndex() of that region code,
// to a set of single-character partition strings.
for (Map.Entry<String, Set<String>> m2p : macroToPartitions.asMap().entrySet()) {
String macro = m2p.getKey();
int regionIndex = LSR.indexForRegion(macro);
if (regionToPartitionsIndex[regionIndex] == 0) {
Set<String> partitions = m2p.getValue();
int pIndex = makeUniqueIndex(partitionStrings, partitions);
regionToPartitionsIndex[regionIndex] = (byte) pIndex;
}
}
// LSR.indexForRegion(ill-formed region) returns 0.
// Its regionToPartitionsIndex must also be 0 for the noPartitions value.
assert regionToPartitionsIndex[0] == 0;
// Turn the Collection of Collections of single-character strings
// into an array of strings.
Collection<Collection<String>> list = partitionStrings.keySet();
partitionArrays = new String[list.size()];
StringBuilder sb = new StringBuilder();
int i = 0;
for (Collection<String> partitions : list) {
assert !partitions.isEmpty();
sb.setLength(0);
for (String p : partitions) {
assert p.length() == 1;
sb.append(p);
}
partitionArrays[i++] = sb.toString();
}
}
}
/**
* Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
* All macroregions are fully resolved to sets of non-macro regions.
* <br>Syntax is simple for now:
* <pre>regionSet := region ([-+] region)*</pre>
* No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
*/
private static final class RegionSet {
private enum Operation {add, remove}
private final TerritoryContainment tc;
// temporaries used in processing
final private Set<String> tempRegions = new TreeSet<>();
private Operation operation = null;
RegionSet(TerritoryContainment tc) {
this.tc = tc;
}
private Set<String> parseSet(String barString) {
operation = Operation.add;
int last = 0;
tempRegions.clear();
int i = 0;
for (; i < barString.length(); ++i) {
char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
switch(c) {
case '+':
add(barString, last, i);
last = i+1;
operation = Operation.add;
break;
case '-':
add(barString, last, i);
last = i+1;
operation = Operation.remove;
break;
}
}
add(barString, last, i);
return tempRegions;
}
private Set<String> inverse() {
TreeSet<String> result = new TreeSet<>(tc.leaves);
result.removeAll(tempRegions);
return result;
}
private void add(String barString, int last, int i) {
if (i > last) {
String region = barString.substring(last,i);
changeSet(operation, region);
}
}
private void changeSet(Operation operation, String region) {
Collection<String> contained = tc.toLeavesOnly.get(region);
if (contained != null && !contained.isEmpty()) {
if (Operation.add == operation) {
tempRegions.addAll(contained);
} else {
tempRegions.removeAll(contained);
}
} else if (Operation.add == operation) {
tempRegions.add(region);
} else {
tempRegions.remove(region);
}
}
}
private static final String TXT_PATH = "/tmp";
private static final String TXT_FILE_BASE_NAME = "langInfo";
private static final String TXT_FILE_NAME = TXT_FILE_BASE_NAME + ".txt";
private static PrintWriter openWriter() throws IOException {
File file = new File(TXT_PATH, TXT_FILE_NAME);
return new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(file), StandardCharsets.UTF_8), 4096));
}
private static void printManyHexBytes(PrintWriter out, byte[] bytes) {
for (int i = 0;; ++i) {
if (i == bytes.length) {
out.println();
break;
}
if (i != 0 && (i & 0xf) == 0) {
out.println();
}
out.format("%02x", bytes[i] & 0xff);
}
}
public static final void main(String[] args) throws IOException {
XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
LocaleDistance.Data distanceData = build();
System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
try (PrintWriter out = openWriter()) {
out.println("// © 2019 and later: Unicode, Inc. and others.\n" +
"// License & terms of use: http://www.unicode.org/copyright.html\n" +
"// Generated by ICU4J LocaleDistanceBuilder.\n" +
TXT_FILE_BASE_NAME + ":table(nofallback){");
out.println(" likely{");
out.println(" languageAliases{ // " + likelyData.languageAliases.size());
for (Map.Entry<String, String> entry :
new TreeMap<>(likelyData.languageAliases).entrySet()) {
out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
}
out.println(" } // languageAliases");
out.println(" regionAliases{ // " + likelyData.regionAliases.size());
for (Map.Entry<String, String> entry :
new TreeMap<>(likelyData.regionAliases).entrySet()) {
out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
}
out.println(" } // regionAliases");
out.println(" trie:bin{ // BytesTrie: " + likelyData.trie.length + " bytes");
printManyHexBytes(out, likelyData.trie);
out.println(" } // trie");
out.println(" lsrs{ // " + likelyData.lsrs.length);
for (LSR lsr : likelyData.lsrs) {
out.println(" \"" + lsr.language + "\",\"" +
lsr.script + "\",\"" + lsr.region + "\",");
}
out.println(" } // lsrs");
out.println(" } // likely");
out.println(" match{");
out.println(" trie:bin{ // BytesTrie: " + distanceData.trie.length + " bytes");
printManyHexBytes(out, distanceData.trie);
out.println(" } // trie");
out.println(" regionToPartitions:bin{ // " +
distanceData.regionToPartitionsIndex.length + " bytes");
printManyHexBytes(out, distanceData.regionToPartitionsIndex);
out.println(" } // regionToPartitions");
out.print(" partitions{");
boolean first = true;
for (String p : distanceData.partitionArrays) {
if (first) {
first = false;
} else {
out.append(',');
}
out.append('"').print(p);
out.append('"');
}
out.println("}");
out.println(" paradigms{");
for (LSR lsr : distanceData.paradigmLSRs) {
out.println(" \"" + lsr.language + "\",\"" +
lsr.script + "\",\"" + lsr.region + "\",");
}
out.println(" }");
out.print(" distances:intvector{");
first = true;
for (int d : distanceData.distances) {
if (first) {
first = false;
} else {
out.append(',');
}
out.print(d);
}
out.println("}");
out.println(" } // match");
out.println("}");
}
}
}