ICU-21135 Fix pseudo locales to filter only non-root paths and avoid aliases.
See #1157
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java
index 8747b1f..36c14e2 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java
@@ -9,7 +9,9 @@
import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
import static java.util.function.Function.identity;
import static java.util.regex.Pattern.CASE_INSENSITIVE;
+import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Arrays;
import java.util.Set;
@@ -113,25 +115,42 @@
private final CldrDataSupplier src;
private final Set<String> srcIds;
private final CldrData enData;
+ private final ImmutableSet<CldrPath> pathsToProcess;
PseudoSupplier(CldrDataSupplier src) {
this.src = checkNotNull(src);
this.srcIds = src.getAvailableLocaleIds();
- // Use resolved data to ensure we get all the values (e.g. values in "en_001").
+ // Start with resolved data so we can merge values from "en" and "en_001" for coverage
+ // and supply the unfiltered values if someone wants the resolved version of the pseudo
+ // locale data.
this.enData = src.getDataForLocale("en", RESOLVED);
+ // But since we don't want to filter paths which come from the "root" locale (such as
+ // aliases) then we need to find the union of "English" paths we expect to filter.
+ this.pathsToProcess = getUnresolvedPaths(src, "en", "en_001");
// Just check that we aren't wrapping an already wrapped supplier.
PseudoType.getLocaleIds()
.forEach(id -> checkArgument(!srcIds.contains(id),
"pseudo locale %s already supported by given data supplier", id));
}
+ private static ImmutableSet<CldrPath> getUnresolvedPaths(
+ CldrDataSupplier src, String... ids) {
+
+ ImmutableSet.Builder<CldrPath> paths = ImmutableSet.builder();
+ for (String id : ids) {
+ src.getDataForLocale(id, UNRESOLVED).accept(ARBITRARY, v -> paths.add(v.getPath()));
+ }
+ return paths.build();
+ }
+
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus));
}
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
if (PseudoType.getLocaleIds().contains(localeId)) {
- return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId));
+ return new PseudoLocaleData(
+ enData, pathsToProcess, resolution, PseudoType.fromId(localeId));
} else {
return src.getDataForLocale(localeId, resolution);
}
@@ -210,11 +229,18 @@
private final PseudoType type;
private final boolean isResolved;
+ private final ImmutableSet<CldrPath> pathsToProcess;
- private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) {
+ private PseudoLocaleData(
+ CldrData srcData,
+ ImmutableSet<CldrPath> pathsToProcess,
+ CldrResolution resolution,
+ PseudoType type) {
+
super(srcData);
this.isResolved = checkNotNull(resolution) == RESOLVED;
this.type = checkNotNull(type);
+ this.pathsToProcess = pathsToProcess;
}
@Override
@@ -236,7 +262,7 @@
CldrValue defaultReturnValue = isResolved ? value : null;
// This makes it look like we have explicit values only for the included paths.
- if (!IS_PSEUDO_PATH.test(path)) {
+ if (!pathsToProcess.contains(path) || !IS_PSEUDO_PATH.test(path)) {
return defaultReturnValue;
}
String fullPath = value.getFullPath();
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java
index 494a819..54b758a 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java
@@ -45,7 +45,7 @@
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("xx", target, source, other)
- .addInheritedData("xx", inherited);
+ .addLocaleData("root", inherited);
CldrDataSupplier transformed =
AlternateLocaleData.transform(
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
@@ -97,10 +97,10 @@
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
- // Even though the missing target is not matched (so no change there) the source is always
- // removed from the transformed data.
- assertValuesUnordered(unresolved, other);
- assertValuesUnordered(resolved, other);
+ // If there's no target the alt-path mapping is incomplete and we do nothing (this matches
+ // the old CLDR tool behaviour and reasonable but can hide inconsistencies in CLDR data).
+ assertValuesUnordered(unresolved, source, other);
+ assertValuesUnordered(resolved, source, other);
}
@Test
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java
index 8c2322d..4ffd2f3 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java
@@ -40,8 +40,12 @@
value(excluded, "Skipped"),
value(pattern, "'plus' HH:mm; 'minus' HH:mm"),
value(narrow, "Skipped"))
- .addInheritedData("en",
- value(inherited, "UTC"));
+ .addLocaleData("en_001", value(inherited, "UTC"))
+ .setLocaleParent("en", "en_001")
+ // Root is the eventual parent of everything but its value should not appear, even
+ // though the expansion would apply if the paths were overridden.
+ .addLocaleData("root", value(ldmlPath("delimiters/quotationStart"), "“"))
+ .addLocaleData("root", value(ldmlPath("delimiters/quotationEnd"), "”"));
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB");
@@ -75,12 +79,12 @@
.addLocaleData("en",
value(included, "{Hello} {0} {World} 100x"),
value(pattern, "'plus' HH:mm; 'minus' HH:mm"))
- .addInheritedData("en",
- value(inherited, "UTC"));
+ .addLocaleData("en_001", value(inherited, "UTC"))
+ .setLocaleParent("en", "en_001");
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
- // The pseudo locale should combine both explicit and inherited data from 'en'.
+ // The pseudo locale should combine both explicit and inherited data from 'en' and 'en_001'.
CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
// These are a kind of golden data test because it's super hard to really reason about
@@ -101,7 +105,7 @@
@Test
public void testLatinNumbering() {
CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn");
- FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn);
+ FakeDataSupplier src = new FakeDataSupplier().addLocaleData("root", latn);
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
index f218b4b..4445fac 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
@@ -65,7 +65,7 @@
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
simpleResult("/durationUnits/foo", "Bar"));
//ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
- addInheritedMapping("xx",
+ addRootMapping(
ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
simpleResult("/Keys/sometype", "Value"));
@@ -86,7 +86,7 @@
// This is included because the resource bundle path is the same as above. Note that we
// have to use the index to distinguish results here (this corresponds to the line number
// or the real when the real regex based config is used and determines result ordering).
- addInheritedMapping("xx",
+ addRootMapping(
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
simpleResult("/Currencies/USD", 2, "US Dollar"));
@@ -110,7 +110,7 @@
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
- addInheritedMapping("xx",
+ addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
@@ -125,7 +125,7 @@
@Test
public void testParentPathsNotIncludedByDefault() {
// Same as above but swapping inherited vs explicit mappings.
- addInheritedMapping("xx",
+ addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
@@ -154,11 +154,11 @@
public void testHiddenLabelsIncludeParentPaths() {
// Testing that the existence of a child element using a hidden label *does* trigger the
// parent element to be included.
- addInheritedMapping("xx",
+ addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
- addInheritedMapping("xx",
+ addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
@@ -226,13 +226,13 @@
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
- addInheritedMapping("xx",
+ addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
// This is the only explicit mapping and it triggers the sibling _and_ the parent.
- addInheritedMapping("xx",
+ addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
@@ -375,8 +375,8 @@
transformer.addResults(value, results);
}
- private void addInheritedMapping(String locale, CldrValue value, Result... results) {
- src.addInheritedData(locale, value);
+ private void addRootMapping(CldrValue value, Result... results) {
+ src.addLocaleData("root", value);
transformer.addResults(value, results);
}
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java
index 287ff35..cf6bb83 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java
@@ -2,10 +2,10 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
-import static com.google.common.base.Preconditions.checkArgument;
-
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
@@ -17,7 +17,9 @@
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
+import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.collect.TreeBasedTable;
@@ -27,20 +29,17 @@
public final class FakeDataSupplier extends CldrDataSupplier {
private final Map<CldrPath, CldrValue> nonLocaleData = new LinkedHashMap<>();
private final Table<String, CldrPath, CldrValue> unresolvedData = TreeBasedTable.create();
- private final Table<String, CldrPath, CldrValue> resolvedData = TreeBasedTable.create();
+ private final Map<String, String> parentLocales = new HashMap<>();
public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) {
Arrays.stream(values).forEach(v -> {
unresolvedData.put(localeId, v.getPath(), v);
- resolvedData.put(localeId, v.getPath(), v);
});
return this;
}
- public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) {
- Arrays.stream(values)
- .forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null,
- "path already present in unresolved CLDR data: %s", v.getPath()));
+ public FakeDataSupplier setLocaleParent(String localeId, String parentId) {
+ parentLocales.put(localeId, parentId);
return this;
}
@@ -50,9 +49,27 @@
}
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
- Table<String, CldrPath, CldrValue> data =
- resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData;
- return CldrDataSupplier.forValues(data.row(localeId).values());
+ Collection<CldrValue> values;
+ if (resolution == CldrResolution.UNRESOLVED) {
+ values = unresolvedData.row(localeId).values();
+ } else {
+ // This is not "real" resolving since it doesn't handle aliases etc. but it's good
+ // enough for tests.
+ Map<CldrPath, CldrValue> resolved = new HashMap<>();
+ while (true) {
+ unresolvedData.row(localeId).forEach((p, v) -> {
+ if (!resolved.containsKey(p)) {
+ resolved.put(p, v);
+ }
+ });
+ if (localeId.equals("root")) {
+ break;
+ }
+ localeId = parentLocales.getOrDefault(localeId, "root");
+ }
+ values = resolved.values();
+ }
+ return CldrDataSupplier.forValues(values);
}
@Override public CldrData getDataForType(CldrDataType type) {
@@ -61,7 +78,8 @@
}
@Override public Set<String> getAvailableLocaleIds() {
- return Collections.unmodifiableSet(resolvedData.rowKeySet());
+ return Collections.unmodifiableSet(
+ Sets.union(unresolvedData.rowKeySet(), ImmutableSet.of("root")));
}
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) {