blob: 1ad5cf6ecc706decf2a8cacd0ea82f8441356a92 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2002-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
/**
* Port From: ICU4C v2.1 : collate/CollationIteratorTest
* Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp
**/
package com.ibm.icu.dev.test.collator;
import java.util.Locale;
import java.util.Arrays;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import com.ibm.icu.dev.test.*;
import com.ibm.icu.text.*;
import com.ibm.icu.lang.UCharacter;
public class CollationIteratorTest extends TestFmwk {
String test1 = "What subset of all possible test cases?";
String test2 = "has the highest probability of detecting";
public static void main(String[] args) throws Exception {
new CollationIteratorTest().run(args);
// new CollationIteratorTest().TestNormalizedUnicodeChar();
}
/*
* @bug 4157299
*/
public void TestClearBuffers(/* char* par */) {
RuleBasedCollator c = null;
try {
c = new RuleBasedCollator("&a < b < c & ab = d");
} catch (Exception e) {
warnln("Couldn't create a RuleBasedCollator.");
return;
}
String source = "abcd";
CollationElementIterator i = c.getCollationElementIterator(source);
int e0 = 0;
try {
e0 = i.next(); // save the first collation element
} catch (Exception e) {
errln("call to i.next() failed.");
return;
}
try {
i.setOffset(3); // go to the expanding character
} catch (Exception e) {
errln("call to i.setOffset(3) failed.");
return;
}
try {
i.next(); // but only use up half of it
} catch (Exception e) {
errln("call to i.next() failed.");
return;
}
try {
i.setOffset(0); // go back to the beginning
} catch (Exception e) {
errln("call to i.setOffset(0) failed. ");
}
int e = 0;
try {
e = i.next(); // and get this one again
} catch (Exception ee) {
errln("call to i.next() failed. ");
return;
}
if (e != e0) {
errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0));
}
}
/** @bug 4108762
* Test for getMaxExpansion()
*/
public void TestMaxExpansion(/* char* par */) {
int unassigned = 0xEFFFD;
String rule = "&a < ab < c/aba < d < z < ch";
RuleBasedCollator coll = null;
try {
coll = new RuleBasedCollator(rule);
} catch (Exception e) {
warnln("Fail to create RuleBasedCollator");
return;
}
char ch = 0;
String str = String.valueOf(ch);
CollationElementIterator iter = coll.getCollationElementIterator(str);
while (ch < 0xFFFF) {
int count = 1;
ch ++;
str = String.valueOf(ch);
iter.setText(str);
int order = iter.previous();
// thai management
if (order == 0) {
order = iter.previous();
}
while (iter.previous() != CollationElementIterator.NULLORDER) {
count ++;
}
if (iter.getMaxExpansion(order) < count) {
errln("Failure at codepoint " + ch + ", maximum expansion count < " + count);
}
}
// testing for exact max expansion
ch = 0;
while (ch < 0x61) {
str = String.valueOf(ch);
iter.setText(str);
int order = iter.previous();
if (iter.getMaxExpansion(order) != 1) {
errln("Failure at codepoint 0x" + Integer.toHexString(ch)
+ " maximum expansion count == 1");
}
ch ++;
}
ch = 0x63;
str = String.valueOf(ch);
iter.setText(str);
int temporder = iter.previous();
if (iter.getMaxExpansion(temporder) != 3) {
errln("Failure at codepoint 0x" + Integer.toHexString(ch)
+ " maximum expansion count == 3");
}
ch = 0x64;
str = String.valueOf(ch);
iter.setText(str);
temporder = iter.previous();
if (iter.getMaxExpansion(temporder) != 1) {
errln("Failure at codepoint 0x" + Integer.toHexString(ch)
+ " maximum expansion count == 1");
}
str = UCharacter.toString(unassigned);
iter.setText(str);
temporder = iter.previous();
if (iter.getMaxExpansion(temporder) != 2) {
errln("Failure at codepoint 0x" + Integer.toHexString(ch)
+ " maximum expansion count == 2");
}
// testing jamo
ch = 0x1165;
str = String.valueOf(ch);
iter.setText(str);
temporder = iter.previous();
if (iter.getMaxExpansion(temporder) > 3) {
errln("Failure at codepoint 0x" + Integer.toHexString(ch)
+ " maximum expansion count < 3");
}
// testing special jamo &a<\u1165
rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";
try {
coll = new RuleBasedCollator(rule);
} catch (Exception e) {
errln("Fail to create RuleBasedCollator");
return;
}
iter = coll.getCollationElementIterator(str);
temporder = iter.previous();
if (iter.getMaxExpansion(temporder) != 6) {
errln("Failure at codepoint 0x" + Integer.toHexString(ch)
+ " maximum expansion count == 6");
}
}
/**
* Test for getOffset() and setOffset()
*/
public void TestOffset(/* char* par */) {
RuleBasedCollator en_us;
try {
en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
} catch (Exception e) {
warnln("ERROR: in creation of collator of ENGLISH locale");
return;
}
CollationElementIterator iter = en_us.getCollationElementIterator(test1);
// testing boundaries
iter.setOffset(0);
if (iter.previous() != CollationElementIterator.NULLORDER) {
errln("Error: After setting offset to 0, we should be at the end "
+ "of the backwards iteration");
}
iter.setOffset(test1.length());
if (iter.next() != CollationElementIterator.NULLORDER) {
errln("Error: After setting offset to the end of the string, we "
+ "should be at the end of the forwards iteration");
}
// Run all the way through the iterator, then get the offset
int[] orders = CollationTest.getOrders(iter);
logln("orders.length = " + orders.length);
int offset = iter.getOffset();
if (offset != test1.length()) {
String msg1 = "offset at end != length: ";
String msg2 = " vs ";
errln(msg1 + offset + msg2 + test1.length());
}
// Now set the offset back to the beginning and see if it works
CollationElementIterator pristine = en_us.getCollationElementIterator(test1);
try {
iter.setOffset(0);
} catch(Exception e) {
errln("setOffset failed.");
}
assertEqual(iter, pristine);
// setting offset in the middle of a contraction
String contraction = "change";
RuleBasedCollator tailored = null;
try {
tailored = new RuleBasedCollator("& a < ch");
} catch (Exception e) {
errln("Error: in creation of Spanish collator");
}
iter = tailored.getCollationElementIterator(contraction);
int order[] = CollationTest.getOrders(iter);
iter.setOffset(1); // sets offset in the middle of ch
int order2[] = CollationTest.getOrders(iter);
if (!Arrays.equals(order, order2)) {
errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
}
contraction = "peache";
iter = tailored.getCollationElementIterator(contraction);
iter.setOffset(3);
order = CollationTest.getOrders(iter);
iter.setOffset(4); // sets offset in the middle of ch
order2 = CollationTest.getOrders(iter);
if (!Arrays.equals(order, order2)) {
errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
}
// setting offset in the middle of a surrogate pair
String surrogate = "\ud800\udc00str";
iter = tailored.getCollationElementIterator(surrogate);
order = CollationTest.getOrders(iter);
iter.setOffset(1); // sets offset in the middle of surrogate
order2 = CollationTest.getOrders(iter);
if (!Arrays.equals(order, order2)) {
errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
}
surrogate = "simple\ud800\udc00str";
iter = tailored.getCollationElementIterator(surrogate);
iter.setOffset(6);
order = CollationTest.getOrders(iter);
iter.setOffset(7); // sets offset in the middle of surrogate
order2 = CollationTest.getOrders(iter);
if (!Arrays.equals(order, order2)) {
errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
}
// TODO: try iterating halfway through a messy string.
}
void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
int c1, c2, count = 0;
do {
c1 = i1.next();
c2 = i2.next();
if (c1 != c2) {
errln(" " + count + ": strength(0x" +
Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")");
break;
}
count += 1;
} while (c1 != CollationElementIterator.NULLORDER);
CollationTest.backAndForth(this, i1);
CollationTest.backAndForth(this, i2);
}
/**
* Test for CollationElementIterator.previous()
*
* @bug 4108758 - Make sure it works with contracting characters
*
*/
public void TestPrevious(/* char* par */) {
RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
CollationElementIterator iter = en_us.getCollationElementIterator(test1);
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
// Test with a contracting character sequence
String source;
RuleBasedCollator c1 = null;
try {
c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
} catch (Exception e) {
errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
return;
}
source = "abchdcba";
iter = c1.getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
// Test with an expanding character sequence
RuleBasedCollator c2 = null;
try {
c2 = new RuleBasedCollator("&a < b < c/abd < d");
} catch (Exception e ) {
errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
return;
}
source = "abcd";
iter = c2.getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
// Now try both
RuleBasedCollator c3 = null;
try {
c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
} catch (Exception e) {
errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
return;
}
source = "abcdbchdc";
iter = c3.getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
Collator c4 = null;
try {
c4 = Collator.getInstance(new Locale("th", "TH", ""));
} catch (Exception e) {
errln("Couldn't create a collator");
return;
}
iter = ((RuleBasedCollator)c4).getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
source= "\u0061\u30CF\u3099\u30FC";
Collator c5 = null;
try {
c5 = Collator.getInstance(new Locale("ja", "JP", ""));
} catch (Exception e) {
errln("Couldn't create Japanese collator\n");
}
iter = ((RuleBasedCollator)c5).getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
}
/**
* Test for setText()
*/
public void TestSetText(/* char* par */) {
RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
CollationElementIterator iter1 = en_us.getCollationElementIterator(test1);
CollationElementIterator iter2 = en_us.getCollationElementIterator(test2);
// Run through the second iterator just to exercise it
int c = iter2.next();
int i = 0;
while ( ++i < 10 && c != CollationElementIterator.NULLORDER) {
try {
c = iter2.next();
} catch (Exception e) {
errln("iter2.next() returned an error.");
break;
}
}
// Now set it to point to the same string as the first iterator
try {
iter2.setText(test1);
} catch (Exception e) {
errln("call to iter2->setText(test1) failed.");
return;
}
assertEqual(iter1, iter2);
iter1.reset();
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
CharacterIterator chariter = new StringCharacterIterator(test1);
try {
iter2.setText(chariter);
} catch (Exception e ) {
errln("call to iter2->setText(chariter(test1)) failed.");
return;
}
assertEqual(iter1, iter2);
iter1.reset();
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
try {
iter2.setText(uchariter);
} catch (Exception e ) {
errln("call to iter2->setText(uchariter(test1)) failed.");
return;
}
assertEqual(iter1, iter2);
}
/**
* Test for CollationElementIterator previous and next for the whole set of
* unicode characters.
*/
public void TestUnicodeChar() {
RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
CollationElementIterator iter;
char codepoint;
StringBuffer source = new StringBuffer();
source.append("\u0e4d\u0e4e\u0e4f");
// source.append("\u04e8\u04e9");
iter = en_us.getCollationElementIterator(source.toString());
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
for (codepoint = 1; codepoint < 0xFFFE;) {
source.delete(0, source.length());
while (codepoint % 0xFF != 0) {
if (UCharacter.isDefined(codepoint)) {
source.append(codepoint);
}
codepoint ++;
}
if (UCharacter.isDefined(codepoint)) {
source.append(codepoint);
}
if (codepoint != 0xFFFF) {
codepoint ++;
}
/*if (codepoint >= 0x04fc) {
System.out.println("codepoint " + Integer.toHexString(codepoint));
String str = source.substring(230, 232);
System.out.println(com.ibm.icu.impl.Utility.escape(str));
System.out.println("codepoint " + Integer.toHexString(codepoint)
+ "length " + str.length());
iter = en_us.getCollationElementIterator(str);
CollationTest.backAndForth(this, iter);
}
*/
iter = en_us.getCollationElementIterator(source.toString());
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
}
}
/**
* Test for CollationElementIterator previous and next for the whole set of
* unicode characters with normalization on.
*/
public void TestNormalizedUnicodeChar()
{
// thai should have normalization on
RuleBasedCollator th_th = null;
try {
th_th = (RuleBasedCollator)Collator.getInstance(
new Locale("th", "TH"));
} catch (Exception e) {
warnln("Error creating Thai collator");
return;
}
StringBuffer source = new StringBuffer();
source.append('\uFDFA');
CollationElementIterator iter
= th_th.getCollationElementIterator(source.toString());
CollationTest.backAndForth(this, iter);
for (char codepoint = 0x1; codepoint < 0xfffe;) {
source.delete(0, source.length());
while (codepoint % 0xFF != 0) {
if (UCharacter.isDefined(codepoint)) {
source.append(codepoint);
}
codepoint ++;
}
if (UCharacter.isDefined(codepoint)) {
source.append(codepoint);
}
if (codepoint != 0xFFFF) {
codepoint ++;
}
/*if (((int)codepoint) >= 0xfe00) {
String str = source.substring(185, 190);
System.out.println(com.ibm.icu.impl.Utility.escape(str));
System.out.println("codepoint "
+ Integer.toHexString(codepoint)
+ "length " + str.length());
iter = th_th.getCollationElementIterator(str);
CollationTest.backAndForth(this, iter);
*/
iter = th_th.getCollationElementIterator(source.toString());
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
}
}
/**
* Testing the discontiguous contractions
*/
public void TestDiscontiguous()
{
String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315";
String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC",
// base character blocked
"XD\u0300", "XD\u0300\u0315",
// non blocking combining character
"X\u0319\u0300", "X\u0319\u0300\u0315",
// blocking combining character
"X\u0314\u0300", "X\u0314\u0300\u0315",
// contraction prefix
"ABDC", "AB\u0315C","X\u0300D\u0315",
"X\u0300\u0319\u0315", "X\u0300\u031A\u0315",
// ends not with a contraction character
"X\u0319\u0300D", "X\u0319\u0300\u0315D",
"X\u0300D\u0315D", "X\u0300\u0319\u0315D",
"X\u0300\u031A\u0315D"
};
String tgt[] = {// non blocking combining character
"A D B", "A D BC", "A \u0315 B", "A \u0315 BC",
// base character blocked
"X D \u0300", "X D \u0300\u0315",
// non blocking combining character
"X\u0300 \u0319", "X\u0300\u0315 \u0319",
// blocking combining character
"X \u0314 \u0300", "X \u0314 \u0300\u0315",
// contraction prefix
"AB DC", "AB \u0315 C","X\u0300 D \u0315",
"X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315",
// ends not with a contraction character
"X\u0300 \u0319D", "X\u0300\u0315 \u0319D",
"X\u0300 D\u0315D", "X\u0300\u0315 \u0319D",
"X\u0300 \u031A\u0315D"
};
int count = 0;
try {
RuleBasedCollator coll = new RuleBasedCollator(rulestr);
CollationElementIterator iter
= coll.getCollationElementIterator("");
CollationElementIterator resultiter
= coll.getCollationElementIterator("");
while (count < src.length) {
iter.setText(src[count]);
int s = 0;
while (s < tgt[count].length()) {
int e = tgt[count].indexOf(' ', s);
if (e < 0) {
e = tgt[count].length();
}
String resultstr = tgt[count].substring(s, e);
resultiter.setText(resultstr);
int ce = resultiter.next();
while (ce != CollationElementIterator.NULLORDER) {
if (ce != iter.next()) {
errln("Discontiguos contraction test mismatch at"
+ count);
return;
}
ce = resultiter.next();
}
s = e + 1;
}
iter.reset();
CollationTest.backAndForth(this, iter);
count ++;
}
}
catch (Exception e) {
warnln("Error running discontiguous tests " + e.toString());
}
}
/**
* Test the incremental normalization
*/
public void TestNormalization()
{
String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";
String testdata[] = {"\u1ED9", "o\u0323\u0302",
"\u0300\u0315", "\u0315\u0300",
"A\u0300\u0315B", "A\u0315\u0300B",
"A\u0316\u0315B", "A\u0315\u0316B",
"\u0316\u0300\u0315", "\u0315\u0300\u0316",
"A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
"\u0316\u0315\u0300", "A\u0316\u0315\u0300B"};
RuleBasedCollator coll = null;
try {
coll = new RuleBasedCollator(rules);
coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
} catch (Exception e) {
warnln("ERROR: in creation of collator using rules " + rules);
return;
}
CollationElementIterator iter = coll.getCollationElementIterator("testing");
for (int count = 0; count < testdata.length; count ++) {
iter.setText(testdata[count]);
CollationTest.backAndForth(this, iter);
}
}
}