blob: b896d2126a7c25d80d0779ef7ee31423c4c40e94 [file] [log] [blame]
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Ordering.natural;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.SetMultimap;
/**
* An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures
* that transformation results are correctly processed when being added to IcuData instances.
*/
abstract class AbstractPathValueMapper {
// Matches "/foo/bar" or "/foo/bar[N]" as a resource bundle path, capturing the path and
// optional index separately. Note that this is very sloppy matching and the path string will
// also be parsed via RbPath.parse().
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
private final CldrData cldrData;
private final PathValueTransformer transformer;
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
AbstractPathValueMapper(CldrData cldrData, PathValueTransformer transformer) {
this.cldrData = checkNotNull(cldrData);
this.transformer = checkNotNull(transformer);
}
/**
* Returns a new {@code IcuData} instance produced by post-processing a set of results
* generated by calling sub-class method {@link #addResults()}. This is the only method which
* need be directly invoked by the sub-class implementation (other methods are optionally used
* from within the {@link #addResults()} callback).
*/
final IcuData generateIcuData(String icuName, boolean hasFallback) {
// This subclass mostly exists to control the fact that results need to be added in one go
// to the IcuData because of how referenced paths are handled. If results could be added in
// multiple passes, you could have confusing situations in which values has path references
// in them but the referenced paths have not been transformed yet. Forcing the subclass to
// implement a single method to generate all results at once ensures that we control the
// lifecycle of the data and how results are processed as they are added to the IcuData.
checkState(resultsByRbPath.isEmpty(),
"results must not be added outside the call to addResults(): %s", resultsByRbPath);
addResults();
IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback));
resultsByRbPath.clear();
return icuData;
}
/**
* Implemented by sub-classes to return all results to be added to the IcuData instance. The
* primary job of this callback is to generate transformed results (typically by calling
* {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the
* results to this mapper using {@link #addResult(RbPath, Result)}.
*
* <p>This method is called once for each call to {@link #generateIcuData(String, boolean)} and
* is responsible for adding all necessary results for the returned {@link IcuData}.
*/
abstract void addResults();
/**
* Returns the CLDR data used for this transformation. Note that a subclass mapper might have
* other data for different purposes, but this data instance is the one from which variables
* are resolved. A sub-class mapper might access this for additional processing.
*/
final CldrData getCldrData() {
return cldrData;
}
/**
* Transforms a single value into a sequence of results using this mapper's {@link
* PathValueTransformer}, which can be added to the mapper (possibly after optional
* post-processing).
*/
final Stream<Result> transformValue(CldrValue value) {
return transformer.transform(value, this::getVarsFn).stream();
}
/**
* Adds a transformed result to the mapper. This should be called by the sub-class mapper in
* its implementation of the {@link #addResults()} method.
*
* <p>Note that the given path will often (but not always) be just the path of the result.
*/
final void addResult(RbPath path, Result result) {
resultsByRbPath.put(path, result);
}
// Callback function used by the transform() method to resolve variables from CLDR data.
private String getVarsFn(CldrPath p) {
CldrValue cldrValue = cldrData.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
}
// Fills in any fallback results and orders the results by the resource bundle path.
private ImmutableListMultimap<RbPath, Result> finalizeResults() {
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, this::getVarsFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
}
return out.build();
}
/**
* Adds transformation results on the specified multi-map to this data instance. Results are
* processed in list order and handled differently according to whether they are grouped, or
* represent an alias value.
*
* If the value of an ungrouped result is itself a resource bundle path (including possibly
* having an array index) then the referenced value is assumed to be an existing path whose
* value is then substituted.
*/
private static IcuData addResultsToIcuData(
ImmutableListMultimap<RbPath, Result> results, IcuData icuData) {
// Ordering of paths should not matter here (IcuData will re-sort them) and ordering of
// values for a given key is preserved by list multimaps.
ListMultimap<RbPath, ValueOrAlias> map = ArrayListMultimap.create();
// IMPORTANT: This code MUST use the keys of the results map (rather than extracting the
// paths from the results). This is because paths can be post-processed after the result
// is obtained, which can affect output ordering as well as the path mappings.
for (RbPath rbPath : results.keySet()) {
for (Result r : results.get(rbPath)) {
if (r.isGrouped()) {
// Grouped results have all values in a single entry and cannot be aliases.
map.put(rbPath, ValueOrAlias.value(RbValue.of(r.getValues())));
} else if (rbPath.isAlias()) {
// Aliases (which should be single values) are not expanded to their referenced
// values (whereas non-aliases might be). This is really just a hack to work
// around the fact that RbPath/RbValue is not properly typed and we have to use
// heuristics to determine whether to replace a resource bundle path with its
// referenced value.
checkArgument(r.getValues().size() == 1,
"explicit aliases must be singleton values: %s", r);
map.put(rbPath, ValueOrAlias.value(Iterables.getOnlyElement(r.getValues())));
} else {
// Ungrouped results are one value per entry, but might later be expanded into
// grouped results if they are a path referencing a grouped entry.
r.getValues().forEach(v -> map.put(rbPath, ValueOrAlias.parse(v)));
}
}
}
// This works because insertion order is maintained for values of each path.
map.forEach((p, v) -> icuData.add(p, v.resolve(map)));
return icuData;
}
/*
* An unfortunately messy little interface to handle to way that aliases are defined in the
* path value mappers. A mapper Result is permitted to contain values which are actually
* aliases to other resource bundle elements. This is typically used in fallback values, where
* the fallback is a functional value. For example:
* fallback=/weekData/001:intvector[0]
*
* This is messy because when we process the Results from the mapper to put them into the
* IcuData instance, we cannot be sure we can resolve these "aliases" at the time that they
* are encountered (the target value might not be present yet). So we need to wait until
* all the values are in place and then do a 2nd pass to resolve things.
*
* So far path replacement is strictly limited to fallback results, so perhaps it could be
* handled more directly in the Result class, though it is possible for a single result to
* contain multiple path references:
* fallback=/weekData/001:intvector[2] /weekData/001:intvector[3]
*/
private interface ValueOrAlias {
// A simple value doesn't need resolving, and doesn't care if the given map is null (*).
static ValueOrAlias value(RbValue v) {
return src -> v;
}
// Helper for (common) singleton values.
static ValueOrAlias value(String v) {
return value(RbValue.of(v));
}
static ValueOrAlias parse(String valueOrAlias) {
Matcher m = ARRAY_INDEX.matcher(valueOrAlias);
if (!m.matches()) {
return value(valueOrAlias);
}
// The only constraint is that the "path" value starts with a leading '/', but parsing into
// the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
// captured value contains '/' characters to represent path delimiters.
RbPath path = RbPath.parse(m.group(1));
// If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
int index = m.group(2) != null ? Integer.parseUnsignedInt(m.group(2)) : 0;
return src -> {
checkState(src != null, "recursive alias resolution is not supported");
List<ValueOrAlias> values = src.get(path);
checkArgument(!values.isEmpty(), "no such alias value: /%s", path);
checkArgument(index < values.size(),
"index for alias /%s[%s] is out of bounds", path, index);
// By passing 'null' to the recursive call to resolve, we prevent the resolution
// from being recursive (*). This could be changed to pass 'src' and achieve
// arbitrary recursive resolving if needed, put that's currently unnecessary (and
// should probably be guarded against unbounded recursion if it is ever enabled).
return values.get(index).resolve(null);
};
}
RbValue resolve(ListMultimap<RbPath, ValueOrAlias> src);
}
}