| // Β© 2019 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| package org.unicode.icu.tool.cldrtoicu.mapper; |
| |
| import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString; |
| import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; |
| |
| import java.util.Arrays; |
| import java.util.Optional; |
| |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| import org.unicode.cldr.api.CldrData; |
| import org.unicode.cldr.api.CldrDataSupplier; |
| import org.unicode.cldr.api.CldrValue; |
| import org.unicode.icu.tool.cldrtoicu.IcuData; |
| |
| import com.google.common.base.Joiner; |
| |
| @RunWith(JUnit4.class) |
| public class CollationMapperTest { |
| @Test |
| public void testEmpty() { |
| IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty()); |
| assertThat(icuData).hasName("xx"); |
| assertThat(icuData).hasFallback(true); |
| assertThat(icuData).getPaths().isEmpty(); |
| |
| // Root gets a couple of special paths added to it due to the need to work around a CLDR |
| // data bug. |
| IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty()); |
| assertThat(rootData).hasName("root"); |
| assertThat(rootData).hasFallback(true); |
| assertThat(rootData).getPaths().hasSize(2); |
| assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString()); |
| assertThat(rootData).hasEmptyValue("/collations/standard/Sequence"); |
| } |
| |
| @Test |
| public void testDefault() { |
| CldrData cldrData = |
| cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value")); |
| |
| IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); |
| assertThat(icuData).getPaths().hasSize(1); |
| assertThat(icuData).hasValuesFor("/collations/default", "any value"); |
| } |
| |
| // This tests legacy behaviour which mimics the original converter code. There's no promise |
| // that it's semantically correct though. |
| @Test |
| public void testLastAltRuleOverridesExisting() { |
| // Note that in DTD order (which is what the paths are processed in) the path with no "alt" |
| // attribute comes after everything else, but the first "alt" path is overwritten by the |
| // second. It's not even clear there should ever be two alt paths, or what the paths mean |
| // (the original code seems to suggest it's looking for the "short" alternate form, but |
| // the "alt" attribute can have more that the value "short"...) |
| CldrData cldrData = cldrData( |
| collationRule("foo", "alt1", "First alt rule"), |
| collationRule("foo", "alt2", "Second alt rule"), |
| collationRule("foo", null, "First rule")); |
| |
| IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); |
| assertThat(icuData).getPaths().hasSize(2); |
| assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString()); |
| assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule"); |
| } |
| |
| @Test |
| public void testCommentAndWhitespaceStripping() { |
| CldrData cldrData = cldrData( |
| collationRule("foo", null, |
| "# Comments are stripped", |
| "", |
| " # As are empty lines and leading/trailing spaces", |
| " Here is a value ", |
| "# And more comments to be stripped", |
| "And another value")); |
| |
| IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); |
| assertThat(icuData).hasValuesFor("/collations/foo/Sequence", |
| "Here is a value", |
| "And another value"); |
| } |
| |
| // Just in case anything weird happens with non-BMP char sequences: |
| // <collation type='emoji'> |
| // <cr><![CDATA[ |
| // # START AUTOGENERATED EMOJI ORDER |
| // & [last primary ignorable]<<*π¦°π¦±π¦³π¦²π»πΌπ½πΎπΏ |
| // & [before 1]\uFDD1β¬ |
| // <*ππππππ
π€£ππππππ |
| // <*π₯°ππ€©ππβΊππ |
| // <*ππππ€ͺππ€ |
| // ... |
| @Test |
| public void testEmoji() { |
| CldrData cldrData = cldrData( |
| collationRule("emoji", null, |
| " # START AUTOGENERATED EMOJI ORDER", |
| " & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2" |
| + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF", |
| " & [before 1]\uFDD1β¬", |
| " <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05" |
| + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A" |
| + "\uD83D\uDE07", |
| " <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17βΊ\uD83D\uDE1A" |
| + "\uD83D\uDE19", |
| " <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11")); |
| |
| IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); |
| |
| assertThat(icuData).getPaths().hasSize(2); |
| assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString()); |
| assertThat(icuData).hasValuesFor("/collations/emoji/Sequence", |
| "& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2" |
| + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF", |
| "& [before 1]\uFDD1β¬", |
| "<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05" |
| + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A" |
| + "\uD83D\uDE07", |
| "<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17βΊ\uD83D\uDE1A" |
| + "\uD83D\uDE19", |
| "<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"); |
| } |
| |
| @Test |
| public void testSpecials() { |
| CldrData specials = cldrData( |
| CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""), |
| CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", "")); |
| |
| IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials)); |
| assertThat(icuData).getPaths().hasSize(2); |
| assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule"); |
| assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps"); |
| } |
| |
| private static CldrData cldrData(CldrValue... values) { |
| return CldrDataSupplier.forValues(Arrays.asList(values)); |
| } |
| |
| private static CldrValue collationRule(String type, String alt, String... lines) { |
| StringBuilder cldrPath = new StringBuilder("//ldml/collations"); |
| appendAttribute(cldrPath.append("/collation"), "type", type); |
| cldrPath.append("/cr"); |
| if (alt != null) { |
| appendAttribute(cldrPath, "alt", alt); |
| } |
| return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines)); |
| } |
| |
| private static void appendAttribute(StringBuilder out, String k, Object v) { |
| out.append(String.format("[@%s=\"%s\"]", k, v)); |
| } |
| } |