blob: 7af7422f2cf77270f52ad656bb3f9c9c0867fcee [file] [log] [blame]
// Β© 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import com.google.common.base.Joiner;
@RunWith(JUnit4.class)
public class CollationMapperTest {
@Test
public void testEmpty() {
IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
assertThat(icuData).hasName("xx");
assertThat(icuData).hasFallback(true);
assertThat(icuData).getPaths().isEmpty();
// Root gets a couple of special paths added to it due to the need to work around a CLDR
// data bug.
IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
assertThat(rootData).hasName("root");
assertThat(rootData).hasFallback(true);
assertThat(rootData).getPaths().hasSize(2);
assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
}
@Test
public void testDefault() {
CldrData cldrData =
cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/collations/default", "any value");
}
// This tests legacy behaviour which mimics the original converter code. There's no promise
// that it's semantically correct though.
@Test
public void testLastAltRuleOverridesExisting() {
// Note that in DTD order (which is what the paths are processed in) the path with no "alt"
// attribute comes after everything else, but the first "alt" path is overwritten by the
// second. It's not even clear there should ever be two alt paths, or what the paths mean
// (the original code seems to suggest it's looking for the "short" alternate form, but
// the "alt" attribute can have more that the value "short"...)
CldrData cldrData = cldrData(
collationRule("foo", "alt1", "First alt rule"),
collationRule("foo", "alt2", "Second alt rule"),
collationRule("foo", null, "First rule"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
}
@Test
public void testCommentAndWhitespaceStripping() {
CldrData cldrData = cldrData(
collationRule("foo", null,
"# Comments are stripped",
"",
" # As are empty lines and leading/trailing spaces",
" Here is a value ",
"# And more comments to be stripped",
"And another value"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
"Here is a value",
"And another value");
}
// Just in case anything weird happens with non-BMP char sequences:
// <collation type='emoji'>
// <cr><![CDATA[
// # START AUTOGENERATED EMOJI ORDER
// & [last primary ignorable]<<*🦰🦱🦳🦲🏻🏼🏽🏾🏿
// & [before 1]\uFDD1€
// <*πŸ˜€πŸ˜ƒπŸ˜„πŸ˜πŸ˜†πŸ˜…πŸ€£πŸ˜‚πŸ™‚πŸ™ƒπŸ˜‰πŸ˜ŠπŸ˜‡
// <*πŸ₯°πŸ˜πŸ€©πŸ˜˜πŸ˜—β˜ΊπŸ˜šπŸ˜™
// <*πŸ˜‹πŸ˜›πŸ˜œπŸ€ͺπŸ˜πŸ€‘
// ...
@Test
public void testEmoji() {
CldrData cldrData = cldrData(
collationRule("emoji", null,
" # START AUTOGENERATED EMOJI ORDER",
" & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
" & [before 1]\uFDD1€",
" <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
+ "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
+ "\uD83D\uDE07",
" <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
+ "\uD83D\uDE19",
" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
"& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
"& [before 1]\uFDD1€",
"<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
+ "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
+ "\uD83D\uDE07",
"<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
+ "\uD83D\uDE19",
"<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11");
}
@Test
public void testSpecials() {
CldrData specials = cldrData(
CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue collationRule(String type, String alt, String... lines) {
StringBuilder cldrPath = new StringBuilder("//ldml/collations");
appendAttribute(cldrPath.append("/collation"), "type", type);
cldrPath.append("/cr");
if (alt != null) {
appendAttribute(cldrPath, "alt", alt);
}
return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines));
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}