blob: d01bf74712ba94b6b2c156563451be22608477d5 [file] [log] [blame]
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.GRAPHEME;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.SENTENCE;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.LINE_BREAK;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.SENTENCE_BREAK;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.base.Ascii;
import com.google.common.base.CaseFormat;
@RunWith(JUnit4.class)
public class BreakIteratorMapperTest {
enum SegmentationType {
GRAPHEME_CLUSTER_BREAK, LINE_BREAK, SENTENCE_BREAK, WORD_BREAK;
@Override public String toString() {
return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name());
}
}
enum BoundaryType {
GRAPHEME, WORD, LINE, SENTENCE, TITLE;
// E.g. "icu:grapheme"
@Override public String toString() {
return "icu:" + Ascii.toLowerCase(name());
}
}
@Test
public void testSingleSuppression() {
int idx = 0;
CldrData cldrData = cldrData(
suppression(SENTENCE_BREAK, "L.P.", ++idx),
suppression(SENTENCE_BREAK, "Alt.", ++idx),
suppression(SENTENCE_BREAK, "Approx.", ++idx));
IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
RbValue.of("L.P."),
RbValue.of("Alt."),
RbValue.of("Approx."));
}
// In real data, suppression is only a SentenceBreak thing, but we might as well test it for
// other types.
@Test
public void testMultipleSupressionTypes() {
int idx = 0;
CldrData cldrData = cldrData(
suppression(SENTENCE_BREAK, "L.P.", ++idx),
suppression(SENTENCE_BREAK, "Alt.", ++idx),
suppression(SENTENCE_BREAK, "Approx.", ++idx),
suppression(LINE_BREAK, "Foo", ++idx),
suppression(LINE_BREAK, "Bar", ++idx),
suppression(LINE_BREAK, "Baz", ++idx));
IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
RbValue.of("L.P."),
RbValue.of("Alt."),
RbValue.of("Approx."));
assertThat(icuData).hasValuesFor("/exceptions/LineBreak:array",
RbValue.of("Foo"),
RbValue.of("Bar"),
RbValue.of("Baz"));
}
@Test
public void testSpecials_dictionary() {
CldrData specials = cldrData(
dictionary("foo", "<foo deps>"),
dictionary("bar", "<bar deps>"));
IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "<foo deps>");
assertThat(icuData).hasValuesFor("/dictionaries/bar:process(dependency)", "<bar deps>");
}
@Test
public void testSpecials_boundaries() {
CldrData specials = cldrData(
boundaries(GRAPHEME, "<grapheme deps>", null),
boundaries(SENTENCE, "<sentence deps>", "altName"));
IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
.hasValuesFor("/boundaries/grapheme:process(dependency)", "<grapheme deps>");
assertThat(icuData)
.hasValuesFor("/boundaries/sentence_altName:process(dependency)", "<sentence deps>");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue suppression(SegmentationType type, String value, int index) {
StringBuilder cldrPath = new StringBuilder("//ldml/segmentations");
appendAttribute(cldrPath.append("/segmentation"), "type", type);
cldrPath.append("/suppressions[@type=\"standard\"]");
// Suppression is an ordered element, so needs a sort index.
cldrPath.append("/suppression#").append(index);
return CldrValue.parseValue(cldrPath.toString(), value);
}
private static CldrValue dictionary(String type, String dependency) {
StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
cldrPath.append("/icu:dictionaries/icu:dictionary");
appendAttribute(cldrPath, "type", type);
appendAttribute(cldrPath, "icu:dependency", dependency);
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static CldrValue boundaries(BoundaryType type, String dependency, String alt) {
StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
cldrPath.append("/icu:boundaries/").append(type);
appendAttribute(cldrPath, "icu:dependency", dependency);
if (alt != null) {
appendAttribute(cldrPath, "alt", alt);
}
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}