blob: 1a46ce40eb4d8b315e27abedf7edd2a7fd4f4560 [file] [log] [blame]
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.ant;
import static com.google.common.base.CharMatcher.inRange;
import static com.google.common.base.CharMatcher.is;
import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableTable.toImmutableTable;
import static com.google.common.collect.Tables.immutableCell;
import static java.util.stream.Collectors.joining;
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData;
import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
import org.unicode.icu.tool.cldrtoicu.PseudoLocales;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.base.Ascii;
import com.google.common.base.CaseFormat;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Iterables;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Sets;
import com.google.common.collect.Table.Cell;
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
public final class ConvertIcuDataTask extends Task {
private static final Splitter LIST_SPLITTER =
Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings();
private static final CharMatcher DIGIT_OR_UNDERSCORE = inRange('0', '9').or(is('_'));
private static final CharMatcher UPPER_UNDERSCORE = inRange('A', 'Z').or(DIGIT_OR_UNDERSCORE);
private static final CharMatcher LOWER_UNDERSCORE = inRange('a', 'z').or(DIGIT_OR_UNDERSCORE);
private static final CharMatcher VALID_ENUM_CHAR = LOWER_UNDERSCORE.or(UPPER_UNDERSCORE);
private Path cldrPath;
private CldrDraftStatus minimumDraftStatus;
// Set of default locale ID specifiers (wildcard IDs which are expanded).
private LocaleIds localeIds = null;
// Per directory overrides (fully specified locale IDs).
private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create();
private final SetMultimap<IcuLocaleDir, String> inheritLanguageSubtag = HashMultimap.create();
private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
// Don't try and resolve actual paths until inside the execute method.
private final List<AltPath> altPaths = new ArrayList<>();
// TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales()
private boolean includePseudoLocales = false;
private Predicate<String> idFilter = id -> true;
@SuppressWarnings("unused")
public void setOutputDir(String path) {
// Use String here since on some systems Ant doesn't support automatically converting Path instances.
config.setOutputDir(Paths.get(path));
}
@SuppressWarnings("unused")
public void setCldrDir(String path) {
// Use String here since on some systems Ant doesn't support automatically converting Path instances.
this.cldrPath = checkNotNull(Paths.get(path));
}
@SuppressWarnings("unused")
public void setCldrVersion(String cldrVersion) {
config.setCldrVersion(cldrVersion);
}
@SuppressWarnings("unused")
public void setMinimalDraftStatus(String status) {
minimumDraftStatus = resolve(CldrDraftStatus.class, status);
}
@SuppressWarnings("unused")
public void setOutputTypes(String types) {
ImmutableList<OutputType> typeList =
LIST_SPLITTER
.splitToList(types).stream()
.map(s -> resolve(OutputType.class, s))
.collect(toImmutableList());
if (!typeList.isEmpty()) {
config.setOutputTypes(typeList);
}
}
@SuppressWarnings("unused")
public void setSpecialsDir(String path) {
// Use String here since on some systems Ant doesn't support automatically converting Path instances.
config.setSpecialsDir(Paths.get(path));
}
@SuppressWarnings("unused")
public void setIncludePseudoLocales(boolean includePseudoLocales) {
this.includePseudoLocales = includePseudoLocales;
}
@SuppressWarnings("unused")
public void setLocaleIdFilter(String idFilterRegex) {
this.idFilter = Pattern.compile(idFilterRegex).asPredicate();
}
@SuppressWarnings("unused")
public void setEmitReport(boolean emit) {
config.setEmitReport(emit);
}
public static final class LocaleIds extends Task {
private ImmutableSet<String> ids;
@SuppressWarnings("unused")
public void addText(String localeIds) {
this.ids = parseLocaleIds(localeIds);
}
@Override
public void init() throws BuildException {
checkBuild(!ids.isEmpty(), "Locale IDs must be specified");
}
}
public static final class Directory extends Task {
private IcuLocaleDir dir;
private ImmutableSet<String> inheritLanguageSubtag = ImmutableSet.of();
private final List<ForcedAlias> forcedAliases = new ArrayList<>();
private LocaleIds localeIds = null;
@SuppressWarnings("unused")
public void setDir(String directory) {
this.dir = resolve(IcuLocaleDir.class, directory);
}
@SuppressWarnings("unused")
public void setInheritLanguageSubtag(String localeIds) {
this.inheritLanguageSubtag = parseLocaleIds(localeIds);
}
@SuppressWarnings("unused")
public void addConfiguredForcedAlias(ForcedAlias alias) {
forcedAliases.add(alias);
}
@SuppressWarnings("unused")
public void addConfiguredLocaleIds(LocaleIds localeIds) {
checkBuild(this.localeIds == null,
"Cannot add more that one <localeIds> element for <directory>: %s", dir);
this.localeIds = localeIds;
}
@Override
public void init() throws BuildException {
checkBuild(dir != null, "Directory attribute 'dir' must be specified");
checkBuild(localeIds != null, "<localeIds> must be specified for <directory>: %s", dir);
}
}
public static final class ForcedAlias extends Task {
private String source = "";
private String target = "";
@SuppressWarnings("unused")
public void setSource(String source) {
this.source = whitespace().trimFrom(source);
}
@SuppressWarnings("unused")
public void setTarget(String target) {
this.target = whitespace().trimFrom(target);
}
@Override
public void init() throws BuildException {
checkBuild(!source.isEmpty(), "Alias source must not be empty");
checkBuild(!target.isEmpty(), "Alias target must not be empty");
}
}
public static final class AltPath extends Task {
private String source = "";
private String target = "";
private ImmutableSet<String> localeIds = ImmutableSet.of();
@SuppressWarnings("unused")
public void setTarget(String target) {
this.target = target.replace('\'', '"');
}
@SuppressWarnings("unused")
public void setSource(String source) {
this.source = source.replace('\'', '"');
}
@SuppressWarnings("unused")
public void setLocales(String localeIds) {
this.localeIds = parseLocaleIds(localeIds);
}
@Override
public void init() throws BuildException {
checkBuild(!source.isEmpty(), "Source path not be empty");
checkBuild(!target.isEmpty(), "Target path not be empty");
}
}
@SuppressWarnings("unused")
public void addConfiguredLocaleIds(LocaleIds localeIds) {
checkBuild(this.localeIds == null, "Cannot add more that one <localeIds> element");
this.localeIds = localeIds;
}
@SuppressWarnings("unused")
public void addConfiguredDirectory(Directory filter) {
checkState(!perDirectoryIds.containsKey(filter.dir),
"directory %s specified twice", filter.dir);
ImmutableSet<String> ids = filter.localeIds.ids;
perDirectoryIds.putAll(filter.dir, ids);
// Check that any locale IDs marked to inherit the base language (instead of root) are
// listed in the set of generated locales.
inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag);
if (!ids.containsAll(filter.inheritLanguageSubtag)) {
log(String.format(
"WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed "
+ "in <localeIds> for that directory (%s): %s",
filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids))));
perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag);
}
// Check that locales specified for forced aliases in this directory are also listed in
// the set of generated locales.
filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target));
Set<String> sourceIds =
filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet());
if (!ids.containsAll(sourceIds)) {
Set<String> missingIds = Sets.difference(sourceIds, ids);
log(String.format(
"WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed "
+ "in <localeIds> for that directory (%s): %s",
filter.dir, String.join(", ", missingIds)));
perDirectoryIds.putAll(filter.dir, missingIds);
}
Set<String> targetIds =
filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet());
if (!ids.containsAll(targetIds)) {
Set<String> missingIds = Sets.difference(targetIds, ids);
log(String.format(
"WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed "
+ "in <localeIds> for that directory (%s): %s",
filter.dir, String.join(", ", missingIds)));
perDirectoryIds.putAll(filter.dir, missingIds);
}
}
// Aliases on the outside are applied to all directories.
@SuppressWarnings("unused")
public void addConfiguredForcedAlias(ForcedAlias alias) {
for (IcuLocaleDir dir : IcuLocaleDir.values()) {
config.addForcedAlias(dir, alias.source, alias.target);
}
}
@SuppressWarnings("unused")
public void addConfiguredAltPath(AltPath altPath) {
// Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs).
// Wait until the "execute()" method since in future we expect to use the configured CLDR
// directory explicitly there.
altPaths.add(altPath);
}
@SuppressWarnings("unused")
public void execute() throws BuildException {
checkBuild(localeIds != null, "<localeIds> must be specified");
CldrDataSupplier src = CldrDataSupplier
.forCldrFilesIn(cldrPath)
.withDraftStatusAtLeast(minimumDraftStatus);
// We must do this wrapping of the data supplier _before_ creating the supplemental data
// instance since adding pseudo locales affects the set of available locales.
// TODO: Move some/all of this into the base converter and control it via the config.
if (!altPaths.isEmpty()) {
src = AlternateLocaleData.transform(src, getGlobalAltPaths(), getLocaleAltPaths());
}
if (includePseudoLocales) {
src = PseudoLocales.addPseudoLocalesTo(src);
}
SupplementalData supplementalData = SupplementalData.create(src);
ImmutableSet<String> defaultTargetIds =
LocaleIdResolver.expandTargetIds(this.localeIds.ids, supplementalData);
for (IcuLocaleDir dir : IcuLocaleDir.values()) {
Iterable<String> ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds);
config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test));
// We should only have locale IDs like "zh_Hant" here (language + script) and only
// those which would naturally inherit to "root"
inheritLanguageSubtag.get(dir).forEach(id -> {
checkArgument(id.matches("[a-z]{2}_[A-Z][a-z]{3}"),
"Invalid locale ID for inheritLanguageSubtag (expect '<lang>_<Script>'): ", id);
checkArgument(supplementalData.getParent(id).equals("root"),
"Invalid locale ID for inheritLanguageSubtag (parent must be 'root'): ", id);
config.addForcedParent(dir, id, id.substring(0, 2));
});
}
config.setMinimumDraftStatus(minimumDraftStatus);
LdmlConverter.convert(src, supplementalData, config.build());
}
private ImmutableMap<CldrPath, CldrPath> getGlobalAltPaths() {
// This fails if the same key appears more than once.
return altPaths.stream()
.filter(a -> a.localeIds.isEmpty())
.collect(toImmutableMap(
a -> parseDistinguishingPath(a.target),
a -> parseDistinguishingPath(a.source)));
}
private ImmutableTable<String, CldrPath, CldrPath> getLocaleAltPaths() {
return altPaths.stream()
.flatMap(
a -> a.localeIds.stream().map(
id -> immutableCell(
id,
parseDistinguishingPath(a.target),
parseDistinguishingPath(a.source))))
// Weirdly there's no collector method to just collect cells.
.collect(toImmutableTable(Cell::getRowKey, Cell::getColumnKey, Cell::getValue));
}
private static void checkBuild(boolean condition, String message, Object... args) {
if (!condition) {
throw new BuildException(String.format(message, args));
}
}
private static ImmutableSet<String> parseLocaleIds(String localeIds) {
// Need to filter out '//' style end-of-line comments first (replace with \n to avoid
// inadvertantly joining two elements.
localeIds = localeIds.replaceAll("//[^\n]*\n", "\n");
return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
}
private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {
checkArgument(!name.isEmpty(), "enumeration name cannot be empty");
checkArgument(VALID_ENUM_CHAR.matchesAllOf(name),
"invalid enumeration name '%s'; expected only ASCII letters or '_'", name);
CaseFormat format;
if (UPPER_UNDERSCORE.matchesAllOf(name)) {
format = CaseFormat.UPPER_UNDERSCORE;
} else if (LOWER_UNDERSCORE.matchesAllOf(name)) {
format = CaseFormat.LOWER_UNDERSCORE;
} else {
// Mixed case with '_' is not permitted.
checkArgument(!name.contains("_"),
"invalid enumeration name '%s'; mixed case with underscore not allowed: %s", name);
format =
Ascii.isLowerCase(name.charAt(0)) ? CaseFormat.LOWER_CAMEL : CaseFormat.UPPER_CAMEL;
}
try {
return Enum.valueOf(enumClass, format.to(CaseFormat.UPPER_UNDERSCORE, name));
} catch (IllegalArgumentException e) {
String validNames =
Arrays.stream(enumClass.getEnumConstants())
.map(Object::toString)
.collect(joining(", "));
throw new IllegalArgumentException(
"invalid enumeration name " + name + "; expected one of; " + validNames);
}
}
}