blob: e78a6c5f472f76a02dcfcfcd5801a155722b57d1 [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2016, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/coll.h"
#include "unicode/tblcoll.h"
#include "unicode/unistr.h"
#include "unicode/sortkey.h"
#include "itercoll.h"
#include "unicode/schriter.h"
#include "unicode/chariter.h"
#include "unicode/uchar.h"
#include "cmemory.h"
static UErrorCode status = U_ZERO_ERROR;
CollationIteratorTest::CollationIteratorTest()
: test1("What subset of all possible test cases?", ""),
test2("has the highest probability of detecting", "")
{
en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
if(U_FAILURE(status)) {
delete en_us;
en_us = 0;
errcheckln(status, "Collator creation failed with %s", u_errorName(status));
return;
}
}
CollationIteratorTest::~CollationIteratorTest()
{
delete en_us;
}
/**
* Test for CollationElementIterator previous and next for the whole set of
* unicode characters.
*/
void CollationIteratorTest::TestUnicodeChar()
{
CollationElementIterator *iter;
UChar codepoint;
UnicodeString source;
for (codepoint = 1; codepoint < 0xFFFE;)
{
source.remove();
while (codepoint % 0xFF != 0)
{
if (u_isdefined(codepoint))
source += codepoint;
codepoint ++;
}
if (u_isdefined(codepoint))
source += codepoint;
if (codepoint != 0xFFFF)
codepoint ++;
iter = en_us->createCollationElementIterator(source);
/* A basic test to see if it's working at all */
backAndForth(*iter);
delete iter;
}
}
/**
* Test for CollationElementIterator.previous()
*
* @bug 4108758 - Make sure it works with contracting characters
*
*/
void CollationIteratorTest::TestPrevious(/* char* par */)
{
UErrorCode status = U_ZERO_ERROR;
CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
// A basic test to see if it's working at all
backAndForth(*iter);
delete iter;
// Test with a contracting character sequence
UnicodeString source;
RuleBasedCollator *c1 = NULL;
c1 = new RuleBasedCollator(
(UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status);
if (c1 == NULL || U_FAILURE(status))
{
errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
delete c1;
return;
}
source = "abchdcba";
iter = c1->createCollationElementIterator(source);
backAndForth(*iter);
delete iter;
delete c1;
// Test with an expanding character sequence
RuleBasedCollator *c2 = NULL;
c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status);
if (c2 == NULL || U_FAILURE(status))
{
errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
delete c2;
return;
}
source = "abcd";
iter = c2->createCollationElementIterator(source);
backAndForth(*iter);
delete iter;
delete c2;
// Now try both
RuleBasedCollator *c3 = NULL;
c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status);
if (c3 == NULL || U_FAILURE(status))
{
errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
delete c3;
return;
}
source = "abcdbchdc";
iter = c3->createCollationElementIterator(source);
backAndForth(*iter);
delete iter;
delete c3;
status=U_ZERO_ERROR;
source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
if(U_FAILURE(status)){
errln("Couldn't create a collator");
}
iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source);
backAndForth(*iter);
delete iter;
delete c4;
source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source);
if(U_FAILURE(status)){
errln("Couldn't create Japanese collator\n");
}
backAndForth(*iter);
delete iter;
delete c5;
}
/**
* Test for getOffset() and setOffset()
*/
void CollationIteratorTest::TestOffset(/* char* par */)
{
CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
UErrorCode status = U_ZERO_ERROR;
// testing boundaries
iter->setOffset(0, status);
if (U_FAILURE(status) || iter->previous(status) != CollationElementIterator::NULLORDER) {
errln("Error: After setting offset to 0, we should be at the end "
"of the backwards iteration");
}
iter->setOffset(test1.length(), status);
if (U_FAILURE(status) || iter->next(status) != CollationElementIterator::NULLORDER) {
errln("Error: After setting offset to end of the string, we should "
"be at the end of the backwards iteration");
}
// Run all the way through the iterator, then get the offset
int32_t orderLength = 0;
Order *orders = getOrders(*iter, orderLength);
int32_t offset = iter->getOffset();
if (offset != test1.length())
{
UnicodeString msg1("offset at end != length: ");
UnicodeString msg2(" vs ");
errln(msg1 + offset + msg2 + test1.length());
}
// Now set the offset back to the beginning and see if it works
CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
iter->setOffset(0, status);
if (U_FAILURE(status))
{
errln("setOffset failed.");
}
else
{
assertEqual(*iter, *pristine);
}
delete pristine;
delete[] orders;
delete iter;
// setting offset in the middle of a contraction
UnicodeString contraction = "change";
status = U_ZERO_ERROR;
RuleBasedCollator tailored("& a < ch", status);
if (U_FAILURE(status)) {
errln("Error: in creation of Spanish collator - %s", u_errorName(status));
return;
}
iter = tailored.createCollationElementIterator(contraction);
Order *order = getOrders(*iter, orderLength);
iter->setOffset(1, status); // sets offset in the middle of ch
int32_t order2Length = 0;
Order *order2 = getOrders(*iter, order2Length);
if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
}
delete[] order;
delete[] order2;
delete iter;
contraction = "peache";
iter = tailored.createCollationElementIterator(contraction);
iter->setOffset(3, status);
order = getOrders(*iter, orderLength);
iter->setOffset(4, status); // sets offset in the middle of ch
order2 = getOrders(*iter, order2Length);
if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
}
delete[] order;
delete[] order2;
delete iter;
// setting offset in the middle of a surrogate pair
UnicodeString surrogate = UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape();
iter = tailored.createCollationElementIterator(surrogate);
order = getOrders(*iter, orderLength);
iter->setOffset(1, status); // sets offset in the middle of surrogate
order2 = getOrders(*iter, order2Length);
if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
}
delete[] order;
delete[] order2;
delete iter;
surrogate = UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape();
iter = tailored.createCollationElementIterator(surrogate);
iter->setOffset(6, status);
order = getOrders(*iter, orderLength);
iter->setOffset(7, status); // sets offset in the middle of surrogate
order2 = getOrders(*iter, order2Length);
if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
}
delete[] order;
delete[] order2;
delete iter;
// TODO: try iterating halfway through a messy string.
}
/**
* Test for setText()
*/
void CollationIteratorTest::TestSetText(/* char* par */)
{
CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
UErrorCode status = U_ZERO_ERROR;
// Run through the second iterator just to exercise it
int32_t c = iter2->next(status);
int32_t i = 0;
while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
{
if (U_FAILURE(status))
{
errln("iter2->next() returned an error.");
delete iter2;
delete iter1;
}
c = iter2->next(status);
}
// Now set it to point to the same string as the first iterator
iter2->setText(test1, status);
if (U_FAILURE(status))
{
errln("call to iter2->setText(test1) failed.");
}
else
{
assertEqual(*iter1, *iter2);
}
iter1->reset();
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
CharacterIterator* chariter = new StringCharacterIterator(test1);
iter2->setText(*chariter, status);
if (U_FAILURE(status))
{
errln("call to iter2->setText(chariter(test1)) failed.");
}
else
{
assertEqual(*iter1, *iter2);
}
// test for an empty string
UnicodeString empty("");
iter1->setText(empty, status);
if (U_FAILURE(status)
|| iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
errln("Empty string should have no CEs.");
}
((StringCharacterIterator *)chariter)->setText(empty);
iter1->setText(*chariter, status);
if (U_FAILURE(status)
|| iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
errln("Empty string should have no CEs.");
}
delete chariter;
delete iter2;
delete iter1;
}
/** @bug 4108762
* Test for getMaxExpansion()
*/
void CollationIteratorTest::TestMaxExpansion(/* char* par */)
{
UErrorCode status = U_ZERO_ERROR;
UnicodeString rule("&a < ab < c/aba < d < z < ch");
RuleBasedCollator *coll = new RuleBasedCollator(rule, status);
UChar ch = 0;
UnicodeString str(ch);
CollationElementIterator *iter = coll->createCollationElementIterator(str);
while (ch < 0xFFFF && U_SUCCESS(status)) {
int count = 1;
uint32_t order;
ch ++;
UnicodeString str(ch);
iter->setText(str, status);
order = iter->previous(status);
/* thai management */
if (CollationElementIterator::isIgnorable(order))
order = iter->previous(status);
while (U_SUCCESS(status)
&& iter->previous(status) != (int32_t)CollationElementIterator::NULLORDER)
{
count ++;
}
if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
errln("Failure at codepoint %d, maximum expansion count < %d\n",
ch, count);
}
}
delete iter;
delete coll;
}
/*
* @bug 4157299
*/
void CollationIteratorTest::TestClearBuffers(/* char* par */)
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);
if (c == NULL || U_FAILURE(status))
{
errln("Couldn't create a RuleBasedCollator.");
delete c;
return;
}
UnicodeString source("abcd");
CollationElementIterator *i = c->createCollationElementIterator(source);
int32_t e0 = i->next(status); // save the first collation element
if (U_FAILURE(status))
{
errln("call to i->next() failed. err=%s", u_errorName(status));
}
else
{
i->setOffset(3, status); // go to the expanding character
if (U_FAILURE(status))
{
errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
}
else
{
i->next(status); // but only use up half of it
if (U_FAILURE(status))
{
errln("call to i->next() failed. err=%s", u_errorName(status));
}
else
{
i->setOffset(0, status); // go back to the beginning
if (U_FAILURE(status))
{
errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
}
else
{
int32_t e = i->next(status); // and get this one again
if (U_FAILURE(status))
{
errln("call to i->next() failed. err=%s", u_errorName(status));
}
else if (e != e0)
{
errln("got 0x%X, expected 0x%X", e, e0);
}
}
}
}
}
delete i;
delete c;
}
/**
* Testing the assignment operator
*/
void CollationIteratorTest::TestAssignment()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *coll =
(RuleBasedCollator *)Collator::createInstance(status);
if (coll == NULL || U_FAILURE(status))
{
errln("Couldn't create a default collator.");
return;
}
UnicodeString source("abcd");
CollationElementIterator *iter1 =
coll->createCollationElementIterator(source);
CollationElementIterator iter2 = *iter1;
if (*iter1 != iter2) {
errln("Fail collation iterator assignment does not produce the same elements");
}
CollationElementIterator iter3(*iter1);
if (*iter1 != iter3) {
errln("Fail collation iterator copy constructor does not produce the same elements");
}
source = CharsToUnicodeString("a\\u0300\\u0325");
coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
CollationElementIterator *iter4
= coll->createCollationElementIterator(source);
CollationElementIterator iter5(*iter4);
if (*iter4 != iter5) {
errln("collation iterator assignment does not produce the same elements");
}
iter4->next(status);
if (U_FAILURE(status) || *iter4 == iter5) {
errln("collation iterator not equal");
}
iter5.next(status);
if (U_FAILURE(status) || *iter4 != iter5) {
errln("collation iterator equal");
}
iter4->next(status);
if (U_FAILURE(status) || *iter4 == iter5) {
errln("collation iterator not equal");
}
iter5.next(status);
if (U_FAILURE(status) || *iter4 != iter5) {
errln("collation iterator equal");
}
CollationElementIterator iter6(*iter4);
if (*iter4 != iter6) {
errln("collation iterator equal");
}
iter4->next(status);
if (U_FAILURE(status) || *iter4 == iter5) {
errln("collation iterator not equal");
}
iter5.next(status);
if (U_FAILURE(status) || *iter4 != iter5) {
errln("collation iterator equal");
}
iter4->next(status);
if (U_FAILURE(status) || *iter4 == iter5) {
errln("collation iterator not equal");
}
iter5.next(status);
if (U_FAILURE(status) || *iter4 != iter5) {
errln("collation iterator equal");
}
delete iter1;
delete iter4;
delete coll;
}
/**
* Testing the constructors
*/
void CollationIteratorTest::TestConstructors()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *coll =
(RuleBasedCollator *)Collator::createInstance(status);
if (coll == NULL || U_FAILURE(status))
{
errln("Couldn't create a default collator.");
return;
}
// testing protected constructor with character iterator as argument
StringCharacterIterator chariter(test1);
CollationElementIterator *iter1 =
coll->createCollationElementIterator(chariter);
if (U_FAILURE(status)) {
errln("Couldn't create collation element iterator with character iterator.");
return;
}
CollationElementIterator *iter2 =
coll->createCollationElementIterator(test1);
// initially the 2 collation element iterators should be the same
if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
|| *iter2 != *iter1) {
errln("CollationElementIterators constructed with the same string data should be the same at the start");
}
assertEqual(*iter1, *iter2);
delete iter1;
delete iter2;
// tests empty strings
UnicodeString empty("");
iter1 = coll->createCollationElementIterator(empty);
chariter.setText(empty);
iter2 = coll->createCollationElementIterator(chariter);
if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
|| *iter2 != *iter1) {
errln("CollationElementIterators constructed with the same string data should be the same at the start");
}
if (iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
errln("Empty string should have no CEs.");
}
if (iter2->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
errln("Empty string should have no CEs.");
}
delete iter1;
delete iter2;
delete coll;
}
/**
* Testing the strength order
*/
void CollationIteratorTest::TestStrengthOrder()
{
int order = 0x0123ABCD;
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *coll =
(RuleBasedCollator *)Collator::createInstance(status);
if (coll == NULL || U_FAILURE(status))
{
errln("Couldn't create a default collator.");
return;
}
coll->setStrength(Collator::PRIMARY);
CollationElementIterator *iter =
coll->createCollationElementIterator(test1);
if (iter == NULL) {
errln("Couldn't create a collation element iterator from default collator");
return;
}
if (iter->strengthOrder(order) != 0x01230000) {
errln("Strength order for a primary strength collator should be the first 2 bytes");
return;
}
coll->setStrength(Collator::SECONDARY);
if (iter->strengthOrder(order) != 0x0123AB00) {
errln("Strength order for a secondary strength collator should be the third byte");
return;
}
coll->setStrength(Collator::TERTIARY);
if (iter->strengthOrder(order) != order) {
errln("Strength order for a tertiary strength collator should be the third byte");
return;
}
delete iter;
delete coll;
}
/**
* Return a string containing all of the collation orders
* returned by calls to next on the specified iterator
*/
UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
{
int32_t order;
UErrorCode status = U_ZERO_ERROR;
while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
{
target += "0x";
appendHex(order, 8, target);
target += " ";
}
return target;
}
void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
{
int32_t c1, c2, count = 0;
UErrorCode status = U_ZERO_ERROR;
do
{
c1 = i1.next(status);
c2 = i2.next(status);
if (c1 != c2)
{
errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
break;
}
count += 1;
}
while (c1 != CollationElementIterator::NULLORDER);
}
void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
{
if (exec)
{
logln("Collation Iteration Tests: ");
}
if(en_us) {
switch (index)
{
case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break;
case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break;
case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break;
case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break;
case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break;
case 5: name = "TestUnicodeChar"; if (exec) TestUnicodeChar(/* par */); break;
case 6: name = "TestAssignment"; if (exec) TestAssignment(/* par */); break;
case 7: name = "TestConstructors"; if (exec) TestConstructors(/* par */); break;
case 8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
default: name = ""; break;
}
} else {
dataerrln("Class iterator not instantiated");
name = "";
}
}
#endif /* #if !UCONFIG_NO_COLLATION */