blob: e57981a82193272b6a2de8e7a6a1f4bc19e82b93 [file] [log] [blame]
// © 2020 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.localedistance;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.truth.Truth.assertThat;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import java.util.LinkedHashMap;
import java.util.Map;
import org.junit.Test;
import com.google.common.collect.ImmutableMap;
import com.ibm.icu.util.BytesTrie;
public class TrieTest {
@Test
public void testSimple() {
Trie trie = new Trie();
trie.root().with("answer", t -> t.putPrefixAndValue(42));
assertThat(getRawTrieTable(trie.toByteArray())).containsExactly("answer", 42);
}
@Test
public void testSubSpan() {
Trie trie = new Trie();
trie.root().with("foo", foo -> foo.with("bar", fooBar -> fooBar.putPrefixAndValue(42)));
assertThat(getRawTrieTable(trie.toByteArray())).containsExactly("foo-bar", 42);
}
@Test
public void testHierarchyAndOrdering() {
Trie trie = new Trie();
trie.root().with("foo", foo -> {
foo.with("two", sub -> sub.putPrefixAndValue(3));
foo.with("one", sub -> sub.putPrefixAndValue(2));
foo.with("*", sub -> sub.putPrefixAndValue(1));
});
trie.root().with("bar", bar -> bar.with("baz", baz -> baz.with("quux", quux -> quux.putPrefixAndValue(0))));
// Order is by "subtag" (left-to-right) with lexicographical order of tags (other
// than '*' which is always sorted first).
assertThat(getRawTrieTable(trie.toByteArray()))
.containsExactly(
"bar-baz-quux", 0,
"foo-*", 1,
"foo-one", 2,
"foo-two", 3)
.inOrder();
}
@Test
public void testStarOrdering() {
Trie trie = new Trie();
// Use '$' which has a lower byte value that '*' in ASCII, but when it terminates a prefix,
// it has bit-7 set which makes it sort higher than '*'.
// In other tests it's not clear that '*' is sorted specially since '*' < [a-z] anyway.
trie.root().with("$", foo -> {
// A single '$' sorts after '*' because '$' will have bit-7 set, and '*' will not.
foo.with("$", sub -> sub.putPrefixAndValue(5));
// '$$' sorts below * because the leading '$' won't have bit-7 set.
foo.with("$$", sub -> sub.putPrefixAndValue(3));
foo.with("*", sub -> sub.putPrefixAndValue(4));
});
trie.root().with("*", foo -> {
foo.with("$", sub -> sub.putPrefixAndValue(2));
foo.with("*", sub -> sub.putPrefixAndValue(1));
});
trie.root().with("*", sub -> sub.putPrefixAndValue(0));
// Star is definitely sorted before other entries.
assertThat(getRawTrieTable(trie.toByteArray()))
.containsExactly(
"*", 0,
"*-*", 1,
"*-$", 2,
"$-$$", 3,
"$-*", 4,
"$-$", 5)
.inOrder();
}
@Test
public void testBadTrie_BadValue() {
Trie trie = new Trie();
IllegalArgumentException e =
assertThrows(
IllegalArgumentException.class,
() -> trie.root().with("foo", t -> t.putPrefixAndValue(-1)));
assertThat(e).hasMessageThat().contains("bad trie value");
assertThat(e).hasMessageThat().contains("-1");
}
@Test
public void testBadTrie_NoPrefix() {
Trie trie = new Trie();
IllegalStateException e =
assertThrows(IllegalStateException.class, () -> trie.root().putPrefixAndValue(23));
assertThat(e).hasMessageThat().contains("missing prefix");
assertThat(e).hasMessageThat().contains("23");
}
@Test
public void testBadTrie_BadPrefix() {
Trie trie = new Trie();
IllegalArgumentException e =
assertThrows(
IllegalArgumentException.class,
() -> trie.root().with("ümlaut", t -> t.putPrefixAndValue(0)));
assertThat(e).hasMessageThat().contains("invalid trie character");
assertThat(e).hasMessageThat().contains("ü");
}
@Test
public void testBadTrie_NoStarInPrefix() {
Trie trie = new Trie();
IllegalArgumentException e =
assertThrows(
IllegalArgumentException.class,
() -> trie.root().with("foo*bar", t -> t.putPrefixAndValue(0)));
assertThat(e).hasMessageThat().contains("must not contain '*'");
assertThat(e).hasMessageThat().contains("foo*bar");
}
@Test
public void testBadTrie_TooLong() {
Trie trie = new Trie();
IllegalStateException e =
assertThrows(IllegalStateException.class, () -> infiniteRecursion(trie.root()));
assertThat(e).hasMessageThat().contains("span too long");
}
private static void infiniteRecursion(Trie.Span span) {
span.with("!", TrieTest::infiniteRecursion);
}
private static ImmutableMap<String, Integer> getRawTrieTable(byte[] data) {
// We rebuild the Trie from the byte[] data.
BytesTrie trie = new BytesTrie(data, 0);
// Mostly copied from XLikelySubtags (since the necessary constructor is private).
// Main change is the this no longer uses a TreeMap, since we want to test order.
Map<String, Integer> map = new LinkedHashMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
int length = entry.bytesLength();
for (int i = 0; i < length; i++) {
byte b = entry.byteAt(i);
sb.append((char) (b & 0x7f));
if (b < 0 || b == '*') {
// end of subtag (high bit set or special '*' wildcard)
sb.append("-");
}
}
checkState(sb.length() > 0 && sb.charAt(sb.length() - 1) == '-');
sb.setLength(sb.length() - 1);
map.put(sb.toString(), entry.value);
}
return ImmutableMap.copyOf(map);
}
}