| // © 2019 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| package org.unicode.icu.tool.cldrtoicu.mapper; |
| |
| import static com.google.common.base.Preconditions.checkArgument; |
| import static com.google.common.base.Preconditions.checkNotNull; |
| import static com.google.common.base.Preconditions.checkState; |
| import static com.google.common.collect.Ordering.natural; |
| |
| import java.util.List; |
| import java.util.Set; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.stream.Stream; |
| |
| import org.unicode.cldr.api.CldrData; |
| import org.unicode.cldr.api.CldrPath; |
| import org.unicode.cldr.api.CldrValue; |
| import org.unicode.icu.tool.cldrtoicu.IcuData; |
| import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; |
| import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; |
| import org.unicode.icu.tool.cldrtoicu.RbPath; |
| import org.unicode.icu.tool.cldrtoicu.RbValue; |
| |
| import com.google.common.collect.ArrayListMultimap; |
| import com.google.common.collect.ImmutableListMultimap; |
| import com.google.common.collect.Iterables; |
| import com.google.common.collect.LinkedHashMultimap; |
| import com.google.common.collect.ListMultimap; |
| import com.google.common.collect.SetMultimap; |
| |
| /** |
| * An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures |
| * that transformation results are correctly processed when being added to IcuData instances. |
| */ |
| abstract class AbstractPathValueMapper { |
| // Matches "/foo/bar" or "/foo/bar[N]" as a resource bundle path, capturing the path and |
| // optional index separately. Note that this is very sloppy matching and the path string will |
| // also be parsed via RbPath.parse(). |
| private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$"); |
| |
| private final CldrData cldrData; |
| private final PathValueTransformer transformer; |
| |
| // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for |
| // each key. The reason is that result comparison is not "consistent with equals", and |
| // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() |
| // method), and it does this even if using the add() method of the sorted set (this is in |
| // fact in violation of the stated behaviour of Set#add). |
| private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create(); |
| |
| AbstractPathValueMapper(CldrData cldrData, PathValueTransformer transformer) { |
| this.cldrData = checkNotNull(cldrData); |
| this.transformer = checkNotNull(transformer); |
| } |
| |
| /** |
| * Returns a new {@code IcuData} instance produced by post-processing a set of results |
| * generated by calling sub-class method {@link #addResults()}. This is the only method which |
| * need be directly invoked by the sub-class implementation (other methods are optionally used |
| * from within the {@link #addResults()} callback). |
| */ |
| final IcuData generateIcuData(String icuName, boolean hasFallback) { |
| // This subclass mostly exists to control the fact that results need to be added in one go |
| // to the IcuData because of how referenced paths are handled. If results could be added in |
| // multiple passes, you could have confusing situations in which values has path references |
| // in them but the referenced paths have not been transformed yet. Forcing the subclass to |
| // implement a single method to generate all results at once ensures that we control the |
| // lifecycle of the data and how results are processed as they are added to the IcuData. |
| checkState(resultsByRbPath.isEmpty(), |
| "results must not be added outside the call to addResults(): %s", resultsByRbPath); |
| addResults(); |
| IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback)); |
| resultsByRbPath.clear(); |
| return icuData; |
| } |
| |
| /** |
| * Implemented by sub-classes to return all results to be added to the IcuData instance. The |
| * primary job of this callback is to generate transformed results (typically by calling |
| * {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the |
| * results to this mapper using {@link #addResult(RbPath, Result)}. |
| * |
| * <p>This method is called once for each call to {@link #generateIcuData(String, boolean)} and |
| * is responsible for adding all necessary results for the returned {@link IcuData}. |
| */ |
| abstract void addResults(); |
| |
| /** |
| * Returns the CLDR data used for this transformation. Note that a subclass mapper might have |
| * other data for different purposes, but this data instance is the one from which variables |
| * are resolved. A sub-class mapper might access this for additional processing. |
| */ |
| final CldrData getCldrData() { |
| return cldrData; |
| } |
| |
| /** |
| * Transforms a single value into a sequence of results using this mapper's {@link |
| * PathValueTransformer}, which can be added to the mapper (possibly after optional |
| * post-processing). |
| */ |
| final Stream<Result> transformValue(CldrValue value) { |
| return transformer.transform(value, this::getVarsFn).stream(); |
| } |
| |
| /** |
| * Adds a transformed result to the mapper. This should be called by the sub-class mapper in |
| * its implementation of the {@link #addResults()} method. |
| * |
| * <p>Note that the given path will often (but not always) be just the path of the result. |
| */ |
| final void addResult(RbPath path, Result result) { |
| resultsByRbPath.put(path, result); |
| } |
| |
| // Callback function used by the transform() method to resolve variables from CLDR data. |
| private String getVarsFn(CldrPath p) { |
| CldrValue cldrValue = cldrData.get(p); |
| return cldrValue != null ? cldrValue.getValue() : null; |
| } |
| |
| // Fills in any fallback results and orders the results by the resource bundle path. |
| private ImmutableListMultimap<RbPath, Result> finalizeResults() { |
| ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder(); |
| out.orderValuesBy(natural()); |
| for (RbPath rbPath : resultsByRbPath.keySet()) { |
| Set<Result> existingResults = resultsByRbPath.get(rbPath); |
| out.putAll(rbPath, existingResults); |
| for (Result fallback : transformer.getFallbackResultsFor(rbPath, this::getVarsFn)) { |
| if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { |
| out.put(rbPath, fallback); |
| } |
| } |
| } |
| return out.build(); |
| } |
| |
| /** |
| * Adds transformation results on the specified multi-map to this data instance. Results are |
| * processed in list order and handled differently according to whether they are grouped, or |
| * represent an alias value. |
| * |
| * If the value of an ungrouped result is itself a resource bundle path (including possibly |
| * having an array index) then the referenced value is assumed to be an existing path whose |
| * value is then substituted. |
| */ |
| private static IcuData addResultsToIcuData( |
| ImmutableListMultimap<RbPath, Result> results, IcuData icuData) { |
| |
| // Ordering of paths should not matter here (IcuData will re-sort them) and ordering of |
| // values for a given key is preserved by list multimaps. |
| ListMultimap<RbPath, ValueOrAlias> map = ArrayListMultimap.create(); |
| |
| // IMPORTANT: This code MUST use the keys of the results map (rather than extracting the |
| // paths from the results). This is because paths can be post-processed after the result |
| // is obtained, which can affect output ordering as well as the path mappings. |
| for (RbPath rbPath : results.keySet()) { |
| for (Result r : results.get(rbPath)) { |
| if (r.isGrouped()) { |
| // Grouped results have all values in a single entry and cannot be aliases. |
| map.put(rbPath, ValueOrAlias.value(RbValue.of(r.getValues()))); |
| } else if (rbPath.isAlias()) { |
| // Aliases (which should be single values) are not expanded to their referenced |
| // values (whereas non-aliases might be). This is really just a hack to work |
| // around the fact that RbPath/RbValue is not properly typed and we have to use |
| // heuristics to determine whether to replace a resource bundle path with its |
| // referenced value. |
| checkArgument(r.getValues().size() == 1, |
| "explicit aliases must be singleton values: %s", r); |
| map.put(rbPath, ValueOrAlias.value(Iterables.getOnlyElement(r.getValues()))); |
| } else { |
| // Ungrouped results are one value per entry, but might later be expanded into |
| // grouped results if they are a path referencing a grouped entry. |
| r.getValues().forEach(v -> map.put(rbPath, ValueOrAlias.parse(v))); |
| } |
| } |
| } |
| // This works because insertion order is maintained for values of each path. |
| map.forEach((p, v) -> icuData.add(p, v.resolve(map))); |
| return icuData; |
| } |
| |
| /* |
| * An unfortunately messy little interface to handle to way that aliases are defined in the |
| * path value mappers. A mapper Result is permitted to contain values which are actually |
| * aliases to other resource bundle elements. This is typically used in fallback values, where |
| * the fallback is a functional value. For example: |
| * fallback=/weekData/001:intvector[0] |
| * |
| * This is messy because when we process the Results from the mapper to put them into the |
| * IcuData instance, we cannot be sure we can resolve these "aliases" at the time that they |
| * are encountered (the target value might not be present yet). So we need to wait until |
| * all the values are in place and then do a 2nd pass to resolve things. |
| * |
| * So far path replacement is strictly limited to fallback results, so perhaps it could be |
| * handled more directly in the Result class, though it is possible for a single result to |
| * contain multiple path references: |
| * fallback=/weekData/001:intvector[2] /weekData/001:intvector[3] |
| */ |
| private interface ValueOrAlias { |
| // A simple value doesn't need resolving, and doesn't care if the given map is null (*). |
| static ValueOrAlias value(RbValue v) { |
| return src -> v; |
| } |
| |
| // Helper for (common) singleton values. |
| static ValueOrAlias value(String v) { |
| return value(RbValue.of(v)); |
| } |
| |
| static ValueOrAlias parse(String valueOrAlias) { |
| Matcher m = ARRAY_INDEX.matcher(valueOrAlias); |
| if (!m.matches()) { |
| return value(valueOrAlias); |
| } |
| // The only constraint is that the "path" value starts with a leading '/', but parsing into |
| // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the |
| // captured value contains '/' characters to represent path delimiters. |
| RbPath path = RbPath.parse(m.group(1)); |
| // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]"). |
| int index = m.group(2) != null ? Integer.parseUnsignedInt(m.group(2)) : 0; |
| return src -> { |
| checkState(src != null, "recursive alias resolution is not supported"); |
| List<ValueOrAlias> values = src.get(path); |
| checkArgument(!values.isEmpty(), "no such alias value: /%s", path); |
| checkArgument(index < values.size(), |
| "index for alias /%s[%s] is out of bounds", path, index); |
| // By passing 'null' to the recursive call to resolve, we prevent the resolution |
| // from being recursive (*). This could be changed to pass 'src' and achieve |
| // arbitrary recursive resolving if needed, put that's currently unnecessary (and |
| // should probably be guarded against unbounded recursion if it is ever enabled). |
| return values.get(index).resolve(null); |
| }; |
| } |
| |
| RbValue resolve(ListMultimap<RbPath, ValueOrAlias> src); |
| } |
| } |