blob: 27ee75f1a2545170acab3f6c298b536e48c80358 [file] [log] [blame]
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkNotNull;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PathOrder;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrData.PrefixVisitor.Context;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.cldr.api.PathMatcher;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/**
* An immutable processor which can be configured to process CLDR data according to a series of
* mappings from CLDR paths to "actions".
*
* <p>In typical use a processor would be statically created to bind paths and handler functions
* (actions) together, and calling {@link CldrDataProcessor#process(CldrData, Object, PathOrder)}
* once for each {@link CldrData} instance.
*
* <p>A processor is built by adding a mixture of "actions" to a builder. An action either defines
* how to handle a single value (see {@link SubProcessor#addValueAction addValueAction()}) or how
* to start a new sub-processor at a specific point in the data hierarchy (see {@link
* SubProcessor#addAction addAction()} or {@link SubProcessor#addSubprocessor addSubprocessor()}).
*
* @param <T> the main "state" type used by the processor for the top-level processing.
*/
public class CldrDataProcessor<T> {
/** Returns a processor builder which operates on a "state" of type {@code <T>}. */
public static <T> Builder<T> builder() {
return new Builder<>();
}
/**
* A builder for processing a CLDR data sub-hierarchy.
*
* @param <T> the "state" type used by the processor.
*/
public static abstract class SubProcessor<T> {
final List<PrefixBuilder<?, T>> prefixActions = new ArrayList<>();
final List<ValueAction<T>> valueActions = new ArrayList<>();
private SubProcessor() { }
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is intended for cases where the subtype state does not depend on the
* parent state or the path prefix, but needs some post-processing. For example, the
* subtype state might just be a {@code List} and the elements added to it must be
* combined with the parent state after sub-hierarchy is processing is complete.
*
* <pre>{@code
* processor
* .addAction("//parent/path", ArrayList::new, ParentState::addValues)
* .addValueAction("value/suffix", List::add);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
* @param doneFn called after each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn) {
return addAction(pattern, (t, p) -> newStateFn.get(), doneFn);
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is similar to {@link #addAction(String, Supplier, BiConsumer)} but is
* intended for cases where the subtype state depends on the parent path prefix.
*
* <pre>{@code
* processor
* .addAction("//parent/path[@type=*]", SubState::fromType, ParentState::addSubState)
* .addValueAction("value/suffix", SubState::collectValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
* @param doneFn called after each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn) {
return addAction(pattern, (t, p) -> newStateFn.apply(p), doneFn);
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is intended for the case where the subtype state is derived from the
* parent state (e.g. an inner class) but does not depend on the path prefix at which the
* sub-hierarchy is rooted.
*
* <pre>{@code
* processor
* .addAction("//parent/path", ParentState::newValueCollector)
* .addValueAction("value/suffix", ValueCollector::addValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
*/
public <S> SubProcessor<S> addAction(String pattern, Function<T, S> newStateFn) {
return addAction(pattern, (t, p) -> newStateFn.apply(t));
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is intended for the case where the subtype state is derived from the
* parent state (e.g. an inner class) and the path prefix at which the sub-hierarchy is
* rooted.
*
* <pre>{@code
* processor
* .addAction("//parent/path[@type=*]", ParentState::newCollectorOfType)
* .addValueAction("value/suffix", ValueCollector::addValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern, BiFunction<T, CldrPath, S> newStateFn) {
return addAction(pattern, newStateFn, (t, y) -> {});
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is the most general purpose way to add a sub-hierarchy action and is
* intended for the most complex cases, where subtype state depends on parent state and
* path prefix, and post processing is required. All other implementations of {@code
* addAction} simply delegate to this one in one way or another.
*
* <pre>{@code
* processor
* .addAction("//parent/path[@type=*]", ParentState::newCollector, ParentState::done)
* .addValueAction("value/suffix", ValueCollector::addValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
* @param doneFn called after each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern,
BiFunction<T, CldrPath, S> newStateFn,
BiConsumer<T, ? super S> doneFn) {
PrefixBuilder<S, T> action =
new PrefixBuilder<>(getMatcher(pattern), newStateFn, doneFn);
prefixActions.add(action);
return action;
}
/**
* Returns a new sub-processor for the specified sub-hierarchy rooted at the given
* {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
* the parent.
*
* <p>This method is intended for the case where multiple sub-processors are needed below
* a certain point in the hierarchy, but they all operate on the same state instance.
*
* <pre>{@code
* SubBuilder<MyCollector> subprocessor = processor.addSubprocessor("//parent/path");
* subprocessor.addValueAction("value/suffix", MyCollector::addValue);
* subprocessor.addValueAction("other/suffix", MyCollector::addOtherValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
*/
public SubProcessor<T> addSubprocessor(String pattern) {
return addAction(pattern, (t, p) -> t);
}
/**
* Returns a new sub-processor for the specified sub-hierarchy rooted at the given
* {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
* the parent.
*
* <p>This method is intended for the case where a some setup is required before a
* sub-hierarchy is processed, but the sub-processor state is the same.
*
* <pre>{@code
* SubBuilder<MyCollector> subprocessor = processor
* .addSubprocessor("//parent/path", MyCollector::startFn)
* .addValueAction("value/suffix", MyCollector::addValue);
* }</pre>
*
* @param startFn a handler called when sub-processing begins
* @param pattern the path pattern for the prefix where sub-processing starts.
*/
public SubProcessor<T> addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn) {
return addAction(pattern, (t, p) -> {
startFn.accept(t, p);
return t;
});
}
/**
* Adds an action to handle {@link CldrValue}s found in the current sub-hierarchy
* visitation which match the given {@link PathMatcher} leaf-path pattern.
*
* <p>This method is expected to be called at least once for each sub-hierarchy processor
* in order to handle the actual CLDR values being processed, and the path pattern should
* match leaf-paths in the CLDR data hierarchy, rather than path prefixes.
*
* <p>Multiple value actions can be added to a sub-hierarchy processor, and paths are
* matched in the order the actions are added. It is also possible to mix sub-hierarchy
* actions and value actions on the same processor, but note that sub-hierarchy processors
* will take precedence, so you cannot try to match the same value in both a sub-hierarchy
* processor and a value action.
*
* For example:
* <pre>{@code
* processor
* .addAction("//parent/path", ...)
* .addValueAction("value/suffix", ...);
* // This will never match any values since the sub-hierarchy processor takes precedence!
* processor.addValueAction("//parent/path/value/suffix", ...);
* }</pre>
*
* @param pattern the CLDR path suffix idenifying the values to be processed.
* @param doFn the action to be carried out for each value.
*/
public void addValueAction(String pattern, BiConsumer<T, CldrValue> doFn) {
valueActions.add(new ValueAction<>(getMatcher(pattern), doFn));
}
abstract PathMatcher getMatcher(String pattern);
}
/**
* A root builder of a CLDR data processor.
*
* @param <T> the processor state type.
*/
public static final class Builder<T> extends SubProcessor<T> {
private Builder() { }
/** Returns the immutable CLDR data processor. */
public CldrDataProcessor<T> build() {
return new CldrDataProcessor<>(
Lists.transform(prefixActions, PrefixBuilder::build), valueActions);
}
@Override
PathMatcher getMatcher(String pattern) {
return PathMatcher.of(pattern);
}
}
/**
* A sub-hierarchy data processor rooted at some specified path prefix.
*
* @param <S> the subtype processor state.
* @param <T> the parent processor state.
*/
private static class PrefixBuilder<S, T> extends SubProcessor<S> {
private final PathMatcher matcher;
private final BiFunction<T, CldrPath, S> newStateFn;
private final BiConsumer<T, ? super S> doneFn;
PrefixBuilder(
PathMatcher matcher,
BiFunction<T, CldrPath, S> newStateFn,
BiConsumer<T, ? super S> doneFn) {
this.matcher = checkNotNull(matcher);
this.newStateFn = checkNotNull(newStateFn);
this.doneFn = checkNotNull(doneFn);
}
PrefixAction<S, T> build() {
List<PrefixAction<?, S>> actions = Lists.transform(prefixActions, PrefixBuilder::build);
return new PrefixAction<>(actions, valueActions, matcher, newStateFn, doneFn);
}
@Override PathMatcher getMatcher(String pattern) {
return matcher.withSuffix(pattern);
}
}
private final ImmutableList<PrefixAction<?, T>> prefixActions;
private final ImmutableList<ValueAction<T>> valueActions;
private CldrDataProcessor(
List<PrefixAction<?, T>> prefixActions,
List<ValueAction<T>> valueActions) {
this.prefixActions = ImmutableList.copyOf(prefixActions);
this.valueActions = ImmutableList.copyOf(valueActions);
}
/**
* Processes a CLDR data instance according to the actions registered for this processor in DTD
* order. This method is preferred over {@link #process(CldrData, Object, PathOrder)} and
* eventually the ability to even specify a path order for processing will be removed.
*
* <p>This is the main method used to drive the processing of some CLDR data and is typically
* used like:
*
* <pre>{@code
* MyResult result = CLDR_PROCESSOR.process(data, new MyResult(), DTD);
* }</pre>
* <p>or:*
* <pre>{@code
* MyResult result = CLDR_PROCESSOR.process(data, MyResult.newBuilder(), DTD).build();
* }</pre>
*
* @param data the CLDR data to be processed.
* @param state an instance of the "primary" state.
* @return the given primary state (after modification).
*/
public T process(CldrData data, T state) {
return process(data, state, PathOrder.DTD);
}
/**
* Processes a CLDR data instance according to the actions registered for this processor.
* Callers should prefer using {@link #process(CldrData, Object)} whenever possible and avoid
* relying on path ordering for processing.
*
* @param data the CLDR data to be processed.
* @param state an instance of the "primary" state.
* @param pathOrder the order in which CLDR paths should be visited.
* @return the given primary state (after modification).
*/
public T process(CldrData data, T state, PathOrder pathOrder) {
data.accept(pathOrder, new DispatchingVisitor<>(this, state, s -> {}));
return state;
}
private void dispatchPrefixActions(T state, CldrPath prefix, Context context) {
for (PrefixAction<?, T> a : prefixActions) {
if (a.matches(state, prefix, context)) {
break;
}
}
}
private void dispatchValueActions(T state, CldrValue value) {
for (ValueAction<T> a : valueActions) {
if (a.matches(state, value)) {
break;
}
}
}
/*
* Implementation notes:
*
* "PrefixAction" is a critical part of the design of the path visitor. It acts as a bridge
* between the parent visitation (with state type 'T') and child visitation (state type 'S').
*
* It is the only class to need to know about both types. Both types are known when the
* CldrDataProcessor is made, but during visitation the caller of the "matches" method doesn't
* need to know about the child type, which is why the parent can just have a list of
* "PrefixAction<?, T>" and don't need any magical recasting.
*
* It might only be a few lines of code, but it can only exist in a class which knows about
* both parent and child types (obtaining a new child state is a function of the parent state).
*/
static final class PrefixAction<S, T> extends CldrDataProcessor<S> {
private final PathMatcher matcher;
private final BiFunction<T, CldrPath, S> newStateFn;
private final BiConsumer<T, ? super S> doneFn;
PrefixAction(
List<PrefixAction<?, S>> prefixActions,
List<ValueAction<S>> valueActions,
PathMatcher matcher,
BiFunction<T, CldrPath, S> newStateFn,
BiConsumer<T, ? super S> doneFn) {
super(prefixActions, valueActions);
this.matcher = checkNotNull(matcher);
this.newStateFn = checkNotNull(newStateFn);
this.doneFn = checkNotNull(doneFn);
}
public boolean matches(T state, CldrPath prefix, Context context) {
if (matcher.locallyMatches(prefix)) {
Consumer<S> doneFn = childState -> this.doneFn.accept(state, childState);
context.install(
new DispatchingVisitor<>(this, newStateFn.apply(state, prefix), doneFn),
DispatchingVisitor::done);
return true;
}
return false;
}
}
private static final class ValueAction<T> {
private final PathMatcher matcher;
private BiConsumer<T, CldrValue> doFn;
ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn) {
this.matcher = checkNotNull(matcher);
this.doFn = checkNotNull(doFn);
}
boolean matches(T state, CldrValue value) {
if (matcher.locallyMatches(value.getPath())) {
doFn.accept(state, value);
return true;
}
return false;
}
}
private static final class DispatchingVisitor<T> implements PrefixVisitor {
CldrDataProcessor<T> processor;
private final T state;
private final Consumer<T> doneFn;
DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn) {
this.processor = checkNotNull(processor);
this.state = checkNotNull(state);
this.doneFn = checkNotNull(doneFn);
}
@Override
public void visitPrefixStart(CldrPath prefix, Context context) {
processor.dispatchPrefixActions(state, prefix, context);
}
@Override
public void visitValue(CldrValue value) {
processor.dispatchValueActions(state, value);
}
// Important: This is NOT visitPrefixEnd() since that happens multiple times and isn't
// going to be called for the prefix at which this visitor was started.
void done() {
doneFn.accept(state);
}
}
}