tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java - external/github.com/unicode-org/icu - Git at Google

 // © 2019 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 package org.unicode.icu.tool.cldrtoicu.mapper;

 import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
 import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;

 import java.util.Arrays;
 import java.util.Optional;

 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.unicode.cldr.api.CldrData;
 import org.unicode.cldr.api.CldrDataSupplier;
 import org.unicode.cldr.api.CldrValue;
 import org.unicode.icu.tool.cldrtoicu.IcuData;

 import com.google.common.base.Joiner;

 @RunWith(JUnit4.class)
 public class CollationMapperTest {
     @Test
     public void testEmpty() {
         IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
         assertThat(icuData).hasName("xx");
         assertThat(icuData).hasFallback(true);
         assertThat(icuData).getPaths().isEmpty();

         // Root gets a couple of special paths added to it due to the need to work around a CLDR
         // data bug.
         IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
         assertThat(rootData).hasName("root");
         assertThat(rootData).hasFallback(true);
         assertThat(rootData).getPaths().hasSize(2);
         assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
         assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
     }

     @Test
     public void testDefault() {
         CldrData cldrData =
             cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));

         IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
         assertThat(icuData).getPaths().hasSize(1);
         assertThat(icuData).hasValuesFor("/collations/default", "any value");
     }

     // This tests legacy behaviour which mimics the original converter code. There's no promise
     // that it's semantically correct though.
     @Test
     public void testLastAltRuleOverridesExisting() {
         // Note that in DTD order (which is what the paths are processed in) the path with no "alt"
         // attribute comes after everything else, but the first "alt" path is overwritten by the
         // second. It's not even clear there should ever be two alt paths, or what the paths mean
         // (the original code seems to suggest it's looking for the "short" alternate form, but
         // the "alt" attribute can have more that the value "short"...)
         CldrData cldrData = cldrData(
             collationRule("foo", "alt1", "First alt rule"),
             collationRule("foo", "alt2", "Second alt rule"),
             collationRule("foo", null, "First rule"));

         IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
         assertThat(icuData).getPaths().hasSize(2);
         assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
         assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
     }

     @Test
     public void testCommentAndWhitespaceStripping() {
         CldrData cldrData = cldrData(
             collationRule("foo", null,
                 "# Comments are stripped",
                 "",
                 "  # As are empty lines and leading/trailing spaces",
                 "  Here is a value  ",
                 "# And more comments to be stripped",
                 "And another value"));

         IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
         assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
             "Here is a value",
             "And another value");
     }

     // Just in case anything weird happens with non-BMP char sequences:
     // <collation type='emoji'>
     //    <cr><![CDATA[
     //      # START AUTOGENERATED EMOJI ORDER
     //      & [last primary ignorable]<<*🦰🦱🦳🦲🏻🏼🏽🏾🏿
     //      & [before 1]\uFDD1€
     //      <*😀😃😄😁😆😅🤣😂🙂🙃😉😊😇
     //      <*🥰😍🤩😘😗☺😚😙
     //      <*😋😛😜🤪😝🤑
     //      ...
     @Test
     public void testEmoji() {
         CldrData cldrData = cldrData(
             collationRule("emoji", null,
                 "  # START AUTOGENERATED EMOJI ORDER",
                 "  & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
                     + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
                 "  & [before 1]\uFDD1€",
                 "  <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
                     + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
                     + "\uD83D\uDE07",
                 "  <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
                     + "\uD83D\uDE19",
                 "  <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));

         IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());

         assertThat(icuData).getPaths().hasSize(2);
         assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
         assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
             "& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
                 + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
             "& [before 1]\uFDD1€",
             "<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
                 + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
                 + "\uD83D\uDE07",
             "<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
                 + "\uD83D\uDE19",
             "<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11");
     }

     @Test
     public void testSpecials() {
         CldrData specials = cldrData(
             CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
             CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));

         IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
         assertThat(icuData).getPaths().hasSize(2);
         assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
         assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
     }

     private static CldrData cldrData(CldrValue... values) {
         return CldrDataSupplier.forValues(Arrays.asList(values));
     }

     private static CldrValue collationRule(String type, String alt, String... lines) {
         StringBuilder cldrPath = new StringBuilder("//ldml/collations");
         appendAttribute(cldrPath.append("/collation"), "type", type);
         cldrPath.append("/cr");
         if (alt != null) {
             appendAttribute(cldrPath, "alt", alt);
         }
         return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines));
     }

     private static void appendAttribute(StringBuilder out, String k, Object v) {
         out.append(String.format("[@%s=\"%s\"]", k, v));
     }
 }
	// © 2019 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html
	package org.unicode.icu.tool.cldrtoicu.mapper;

	import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
	import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;

	import java.util.Arrays;
	import java.util.Optional;

	import org.junit.Test;
	import org.junit.runner.RunWith;
	import org.junit.runners.JUnit4;
	import org.unicode.cldr.api.CldrData;
	import org.unicode.cldr.api.CldrDataSupplier;
	import org.unicode.cldr.api.CldrValue;
	import org.unicode.icu.tool.cldrtoicu.IcuData;

	import com.google.common.base.Joiner;

	@RunWith(JUnit4.class)
	public class CollationMapperTest {
	@Test
	public void testEmpty() {
	IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
	assertThat(icuData).hasName("xx");
	assertThat(icuData).hasFallback(true);
	assertThat(icuData).getPaths().isEmpty();

	// Root gets a couple of special paths added to it due to the need to work around a CLDR
	// data bug.
	IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
	assertThat(rootData).hasName("root");
	assertThat(rootData).hasFallback(true);
	assertThat(rootData).getPaths().hasSize(2);
	assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
	assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
	}

	@Test
	public void testDefault() {
	CldrData cldrData =
	cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));

	IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
	assertThat(icuData).getPaths().hasSize(1);
	assertThat(icuData).hasValuesFor("/collations/default", "any value");
	}

	// This tests legacy behaviour which mimics the original converter code. There's no promise
	// that it's semantically correct though.
	@Test
	public void testLastAltRuleOverridesExisting() {
	// Note that in DTD order (which is what the paths are processed in) the path with no "alt"
	// attribute comes after everything else, but the first "alt" path is overwritten by the
	// second. It's not even clear there should ever be two alt paths, or what the paths mean
	// (the original code seems to suggest it's looking for the "short" alternate form, but
	// the "alt" attribute can have more that the value "short"...)
	CldrData cldrData = cldrData(
	collationRule("foo", "alt1", "First alt rule"),
	collationRule("foo", "alt2", "Second alt rule"),
	collationRule("foo", null, "First rule"));

	IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
	assertThat(icuData).getPaths().hasSize(2);
	assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
	assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
	}

	@Test
	public void testCommentAndWhitespaceStripping() {
	CldrData cldrData = cldrData(
	collationRule("foo", null,
	"# Comments are stripped",
	"",
	" # As are empty lines and leading/trailing spaces",
	" Here is a value ",
	"# And more comments to be stripped",
	"And another value"));

	IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
	assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
	"Here is a value",
	"And another value");
	}

	// Just in case anything weird happens with non-BMP char sequences:
	// <collation type='emoji'>
	// <cr><![CDATA[
	// # START AUTOGENERATED EMOJI ORDER
	// & [last primary ignorable]<<*🦰🦱🦳🦲🏻🏼🏽🏾🏿
	// & [before 1]\uFDD1€
	// <*😀😃😄😁😆😅🤣😂🙂🙃😉😊😇
	// <*🥰😍🤩😘😗☺😚😙
	// <*😋😛😜🤪😝🤑
	// ...
	@Test
	public void testEmoji() {
	CldrData cldrData = cldrData(
	collationRule("emoji", null,
	" # START AUTOGENERATED EMOJI ORDER",
	" & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
	+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
	" & [before 1]\uFDD1€",
	" <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
	+ "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
	+ "\uD83D\uDE07",
	" <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
	+ "\uD83D\uDE19",
	" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));

	IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());

	assertThat(icuData).getPaths().hasSize(2);
	assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
	assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
	"& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
	+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
	"& [before 1]\uFDD1€",
	"<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
	+ "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
	+ "\uD83D\uDE07",
	"<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
	+ "\uD83D\uDE19",
	"<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11");
	}

	@Test
	public void testSpecials() {
	CldrData specials = cldrData(
	CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
	CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));

	IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
	assertThat(icuData).getPaths().hasSize(2);
	assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
	assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
	}

	private static CldrData cldrData(CldrValue... values) {
	return CldrDataSupplier.forValues(Arrays.asList(values));
	}

	private static CldrValue collationRule(String type, String alt, String... lines) {
	StringBuilder cldrPath = new StringBuilder("//ldml/collations");
	appendAttribute(cldrPath.append("/collation"), "type", type);
	cldrPath.append("/cr");
	if (alt != null) {
	appendAttribute(cldrPath, "alt", alt);
	}
	return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines));
	}

	private static void appendAttribute(StringBuilder out, String k, Object v) {
	out.append(String.format("[@%s=\"%s\"]", k, v));
	}
	}