blob: b022ba658fff8137f2be12ad63f00bb63aa7bc48 [file] [log] [blame]
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.ant;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.base.Ascii;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
/** Helper class to reslove ID configuration. */
final class LocaleIdResolver {
/** Returns the expanded set of target locale IDs based on the given ID specifications. */
public static ImmutableSet<String> expandTargetIds(
Set<String> idSpecs, SupplementalData supplementalData) {
return new LocaleIdResolver(supplementalData).resolve(idSpecs);
}
private final SupplementalData supplementalData;
private LocaleIdResolver(SupplementalData supplementalData) {
this.supplementalData = checkNotNull(supplementalData);
}
// ---- Code below here is to expand the incoming set of locale IDs ----
private static final Pattern WILDCARD_LOCALE = Pattern.compile("[a-z]{2,3}(?:_[A-Z][a-z]{3})?");
private ImmutableSet<String> resolve(Set<String> idSpecs) {
ImmutableSet<String> allAvailableIds = supplementalData.getAvailableLocaleIds();
// Get the minimized wildcard set, converting things like "en_Latn" --> "en".
ImmutableSet<String> wildcardIds = idSpecs.stream()
.filter(supplementalData.getAvailableLocaleIds()::contains)
.filter(id -> WILDCARD_LOCALE.matcher(id).matches())
.map(this::removeDefaultScript)
.collect(toImmutableSet());
// Get the set of IDs which are implied by the wildcard IDs.
Set<String> targetIds = new TreeSet<>();
allAvailableIds.forEach(id -> addWildcardMatches(id, wildcardIds::contains, targetIds));
// Get the IDs which don't need to be in the config (because they are implied).
Set<String> redundant = Sets.intersection(idSpecs, targetIds);
if (!redundant.isEmpty()) {
System.err.println("Configuration lists redundant locale IDs");
System.err.println("The following IDs should be removed from the configuration:");
Iterables.partition(redundant, 16)
.forEach(ids -> System.err.println(String.join(", ", ids)));
// Note that the minimal configuration includes aliases.
Set<String> minimalConfigIds = new TreeSet<>(Sets.difference(idSpecs, targetIds));
minimalConfigIds.remove("root");
ImmutableListMultimap<Character, String> idsByFirstChar =
Multimaps.index(minimalConfigIds, s -> s.charAt(0));
System.err.println("Canonical ID list is:");
for (char c: idsByFirstChar.keySet()) {
System.err.println(" // " + Ascii.toUpperCase(c));
Iterables.partition(idsByFirstChar.get(c), 16)
.forEach(ids -> System.err.println(" " + String.join(", ", ids)));
System.err.println();
}
System.err.flush();
throw new IllegalStateException("Non-canonical configuration");
}
// We return the set of IDs made up of:
// 1: The original IDs specified by the configuration (and any parent IDs).
// 2: IDs expanded from wildcard IDs (e.g. "en_Latn_GB" & "en_Latn" from "en").
// (this is what's already in targetIds).
// 3: The "root" ID.
idSpecs.forEach(id -> addRecursively(id, targetIds));
return ImmutableSet.<String>builder().add("root").addAll(targetIds).build();
}
// E.g. "xx_Fooo" --> "xx" --> "xx_Baar_YY" ==> "xx_Fooo"
// E.g. "xx_Fooo" --> "xx" --> "xx_Fooo_YY" ==> "xx"
private String removeDefaultScript(String id) {
if (id.contains("_")) {
String lang = id.substring(0, id.indexOf("_"));
String maxId = supplementalData.maximize(lang)
.orElseThrow(
() -> new IllegalStateException("cannot maximize language subtag: " + lang));
if (maxId.startsWith(id)) {
return lang;
}
}
return id;
}
private void addRecursively(String id, Set<String> dst) {
while (!id.equals("root") && dst.add(id)) {
id = supplementalData.getParent(id);
}
}
private boolean addWildcardMatches(
String id, Predicate<String> isWildcard, Set<String> dst) {
if (id.equals("root")) {
return false;
}
String parentId = supplementalData.getParent(id);
int index = parentId.indexOf("_");
String parentIdLang = (index < 0)? parentId: parentId.substring(0, index);
index = id.indexOf("_");
String idLang = (index < 0)? id: id.substring(0, index);
if (parentIdLang.equals(idLang) && (isWildcard.test(parentId) || addWildcardMatches(parentId, isWildcard, dst))) {
// Only add child locales here if their language matches their parent; need this to handle nn (child of no)
dst.add(id);
return true;
}
return false;
}
}