| /******************************************************************** |
| * COPYRIGHT: |
| * Copyright (c) 1997-1999, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ********************************************************************/ |
| |
| #include "cmemory.h" |
| |
| #ifndef _COLL |
| #include "unicode/coll.h" |
| #endif |
| |
| #ifndef _TBLCOLL |
| #include "unicode/tblcoll.h" |
| #endif |
| |
| #ifndef _UNISTR |
| #include "unicode/unistr.h" |
| #endif |
| |
| #ifndef _SORTKEY |
| #include "unicode/sortkey.h" |
| #endif |
| |
| #ifndef _ITERCOLL |
| #include "itercoll.h" |
| #endif |
| |
| #include "unicode/schriter.h" |
| #include "unicode/chariter.h" |
| #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0]) |
| |
| static UErrorCode status = U_ZERO_ERROR; |
| |
| const UnicodeString CollationIteratorTest::test1 = "What subset of all possible test cases?"; |
| const UnicodeString CollationIteratorTest::test2 = "has the highest probability of detecting"; |
| |
| CollationIteratorTest::CollationIteratorTest() |
| { |
| en_us = (RuleBasedCollator *)Collator::createInstance(Locale::US, status); |
| } |
| |
| CollationIteratorTest::~CollationIteratorTest() |
| { |
| delete en_us; |
| } |
| |
| /** |
| * Test for CollationElementIterator.previous() |
| * |
| * @bug 4108758 - Make sure it works with contracting characters |
| * |
| */ |
| void CollationIteratorTest::TestPrevious(/* char* par */) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| CollationElementIterator *iter = en_us->createCollationElementIterator(test1); |
| |
| // A basic test to see if it's working at all |
| backAndForth(*iter); |
| delete iter; |
| |
| // Test with a contracting character sequence |
| UnicodeString source; |
| RuleBasedCollator *c1 = NULL; |
| c1 = new RuleBasedCollator( |
| (UnicodeString)"< a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status); |
| |
| if (c1 == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator with a contracting sequence."); |
| delete c1; |
| return; |
| } |
| |
| source = "abchdcba"; |
| iter = c1->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c1; |
| |
| // Test with an expanding character sequence |
| RuleBasedCollator *c2 = NULL; |
| c2 = new RuleBasedCollator((UnicodeString)"< a < b < c/abd < d", status); |
| |
| if (c2 == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator with an expanding sequence."); |
| delete c2; |
| return; |
| } |
| |
| source = "abcd"; |
| iter = c2->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c2; |
| |
| // Now try both |
| RuleBasedCollator *c3 = NULL; |
| c3 = new RuleBasedCollator((UnicodeString)"< a < b < c/aba < d < z < ch", status); |
| |
| if (c3 == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); |
| delete c3; |
| return; |
| } |
| |
| source = "abcdbchdc"; |
| iter = c3->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c3; |
| |
| status=U_ZERO_ERROR; |
| source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e27abc"); |
| |
| Collator *c4=Collator::createInstance(Locale("th", "TH", ""), status); |
| if(U_FAILURE(status)){ |
| errln("Couldn't create a collator"); |
| } |
| iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c4; |
| |
| status=U_ZERO_ERROR; |
| Collator *c5=Collator::createInstance(status); |
| if(U_FAILURE(status)){ |
| errln("Couldn't create a collator"); |
| } |
| iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c5; |
| |
| |
| } |
| |
| /** |
| * Test for getOffset() and setOffset() |
| */ |
| void CollationIteratorTest::TestOffset(/* char* par */) |
| { |
| CollationElementIterator *iter = en_us->createCollationElementIterator(test1); |
| |
| // Run all the way through the iterator, then get the offset |
| int32_t orderLength = 0; |
| int32_t *orders = getOrders(*iter, orderLength); |
| |
| int32_t offset = iter->getOffset(); |
| |
| if (offset != test1.length()) |
| { |
| UnicodeString msg1("offset at end != length: "); |
| UnicodeString msg2(" vs "); |
| |
| errln(msg1 + offset + msg2 + test1.length()); |
| } |
| |
| // Now set the offset back to the beginning and see if it works |
| CollationElementIterator *pristine = en_us->createCollationElementIterator(test1); |
| UErrorCode status = U_ZERO_ERROR; |
| |
| iter->setOffset(0, status); |
| |
| if (U_FAILURE(status)) |
| { |
| errln("setOffset failed."); |
| } |
| else |
| { |
| assertEqual(*iter, *pristine); |
| } |
| |
| // TODO: try iterating halfway through a messy string. |
| |
| delete pristine; |
| delete[] orders; |
| delete iter; |
| } |
| |
| /** |
| * Test for setText() |
| */ |
| void CollationIteratorTest::TestSetText(/* char* par */) |
| { |
| CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1); |
| CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2); |
| UErrorCode status = U_ZERO_ERROR; |
| |
| // Run through the second iterator just to exercise it |
| int32_t c = iter2->next(status); |
| int32_t i = 0; |
| |
| while ( ++i < 10 && c != CollationElementIterator::NULLORDER) |
| { |
| if (U_FAILURE(status)) |
| { |
| errln("iter2->next() returned an error."); |
| delete iter2; |
| delete iter1; |
| } |
| |
| c = iter2->next(status); |
| } |
| |
| // Now set it to point to the same string as the first iterator |
| iter2->setText(test1, status); |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to iter2->setText(test1) failed."); |
| } |
| else |
| { |
| assertEqual(*iter1, *iter2); |
| } |
| iter1->reset(); |
| //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text |
| CharacterIterator* chariter = new StringCharacterIterator(test1); |
| iter2->setText(*chariter, status); |
| if (U_FAILURE(status)) |
| { |
| errln("call to iter2->setText(chariter(test1)) failed."); |
| } |
| else |
| { |
| assertEqual(*iter1, *iter2); |
| } |
| |
| delete iter2; |
| delete iter1; |
| } |
| |
| /** @bug 4108762 |
| * Test for getMaxExpansion() |
| */ |
| void CollationIteratorTest::TestMaxExpansion(/* char* par */) |
| { |
| // Try a simple one first: |
| // The only expansion ends with 'e' and has length 2 |
| UnicodeString rule1("< a & ae = "); |
| rule1 += (UChar)0x00e4; |
| rule1 += " < b < e"; |
| ExpansionRecord test1[] = |
| { |
| {0x61, 1}, |
| {0x62, 1}, |
| {0x65, 2} |
| }; |
| verifyExpansion(rule1, test1, ARRAY_LENGTH(test1)); |
| |
| // Now a more complicated one: |
| // "a1" --> "ae" |
| // "z" --> "aeef" |
| // |
| UnicodeString rule2("< a & ae = a1 & aeef = z < b < e < f"); |
| ExpansionRecord test2[] = |
| { |
| {0x61, 1}, |
| {0x62, 1}, |
| {0x65, 2}, |
| {0x66, 4} |
| }; |
| verifyExpansion(rule2, test2, ARRAY_LENGTH(test2)); |
| } |
| |
| /* |
| * @bug 4157299 |
| */ |
| void CollationIteratorTest::TestClearBuffers(/* char* par */) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedCollator *c = NULL; |
| c = new RuleBasedCollator((UnicodeString)"< a < b < c & ab = d", status); |
| |
| if (c == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator."); |
| delete c; |
| return; |
| } |
| |
| UnicodeString source("abcd"); |
| CollationElementIterator *i = c->createCollationElementIterator(source); |
| int32_t e0 = i->next(status); // save the first collation element |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->next() failed"); |
| goto bail; |
| } |
| |
| i->setOffset(3, status); // go to the expanding character |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->setOffset(3) failed"); |
| goto bail; |
| } |
| |
| i->next(status); // but only use up half of it |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->next() failed"); |
| goto bail; |
| } |
| |
| i->setOffset(0, status); // go back to the beginning |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->setOffset(0) failed"); |
| goto bail; |
| } |
| |
| { |
| // This is in it's own block to stop a stupid compiler |
| // error about the goto's skipping the initialization |
| // of e... |
| int32_t e = i->next(status); // and get this one again |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->next() failed."); |
| goto bail; |
| } |
| |
| if (e != e0) |
| { |
| UnicodeString msg; |
| |
| msg += "got 0x"; |
| appendHex(e, 8, msg); |
| msg += ", expected 0x"; |
| appendHex(e0, 8, msg); |
| |
| errln(msg); |
| } |
| } |
| |
| bail: |
| delete i; |
| delete c; |
| } |
| |
| void CollationIteratorTest::backAndForth(CollationElementIterator &iter) |
| { |
| // Run through the iterator forwards and stick it into an array |
| int32_t orderLength = 0; |
| int32_t *orders = getOrders(iter, orderLength); |
| UErrorCode status = U_ZERO_ERROR; |
| |
| // Now go through it backwards and make sure we get the same values |
| int32_t index = orderLength; |
| int32_t o; |
| |
| while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) |
| { |
| if (o != orders[--index]) |
| { |
| UnicodeString msg1("Mismatch at index "); |
| UnicodeString msg2(": 0x"); |
| appendHex(orders[index], 8, msg2); |
| msg2 += " vs 0x"; |
| appendHex(o, 8, msg2); |
| |
| errln(msg1 + index + msg2); |
| break; |
| } |
| } |
| |
| if (index != 0) |
| { |
| UnicodeString msg("Didn't get back to beginning - index is "); |
| errln(msg + index); |
| |
| iter.reset(); |
| err("next: "); |
| while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) |
| { |
| UnicodeString hexString("0x"); |
| |
| appendHex(o, 8, hexString); |
| hexString += " "; |
| err(hexString); |
| } |
| errln(""); |
| |
| err("prev: "); |
| while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) |
| { |
| UnicodeString hexString("0x"); |
| |
| appendHex(o, 8, hexString); |
| hexString += " "; |
| err(hexString); |
| } |
| errln(""); |
| } |
| |
| delete[] orders; |
| } |
| |
| /** |
| * Verify that getMaxExpansion works on a given set of collation rules |
| * |
| * The first row of the "tests" array contains the collation rules |
| * at index 0, and the string at index 1 is ignored. |
| * |
| * Subsequent rows of the array contain a character and a number, both |
| * represented as strings. The character's collation order is determined, |
| * and getMaxExpansion is called for that character. If its value is |
| * not equal to the specified number, an error results. |
| */ |
| void CollationIteratorTest::verifyExpansion(UnicodeString rules, ExpansionRecord tests[], int32_t testCount) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedCollator *coll = NULL; |
| coll = new RuleBasedCollator(rules, status); |
| |
| if (coll == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator."); |
| delete coll; |
| return; |
| } |
| |
| UnicodeString source(""); |
| CollationElementIterator *iter = coll->createCollationElementIterator(source); |
| |
| int32_t i; |
| for (i = 1; i < testCount; i += 1) |
| { |
| // First get the collation key that the test string expands to |
| UnicodeString test(&tests[i].character, 1); |
| iter->setText(test, status); |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to iter->setText() failed."); |
| return; |
| } |
| |
| int32_t order = iter->next(status); |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to iter->next() failed."); |
| return; |
| } |
| |
| if (order == CollationElementIterator::NULLORDER || iter->next(status) != CollationElementIterator::NULLORDER) |
| { |
| UnicodeString msg("verifyExpansion: '"); |
| |
| msg += test; |
| msg += "' has multiple orders:"; |
| orderString(*iter, msg); |
| |
| iter->reset(); |
| errln(msg); |
| } |
| |
| int32_t expansion = iter->getMaxExpansion(order); |
| int32_t expect = tests[i].count; |
| |
| if (expansion != expect) |
| { |
| UnicodeString msg1("expansion for '"); |
| |
| msg1 += test; |
| msg1 += "' is wrong: expected "; |
| |
| UnicodeString msg2(", got "); |
| |
| errln(msg1 + expect + msg2 + expansion); |
| } |
| } |
| delete iter; |
| delete coll; |
| } |
| |
| /** |
| * Return an integer array containing all of the collation orders |
| * returned by calls to next on the specified iterator |
| */ |
| int32_t *CollationIteratorTest::getOrders(CollationElementIterator &iter, int32_t &orderLength) |
| { |
| int32_t maxSize = 100; |
| int32_t size = 0; |
| int32_t *orders = new int32_t[maxSize]; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| int32_t order; |
| while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) |
| { |
| if (size == maxSize) |
| { |
| maxSize *= 2; |
| int32_t *temp = new int32_t[maxSize]; |
| |
| uprv_memcpy(temp, orders, size * sizeof(int32_t)); |
| delete[] orders; |
| orders = temp; |
| } |
| |
| orders[size++] = order; |
| } |
| |
| if (maxSize > size) |
| { |
| int32_t *temp = new int32_t[size]; |
| |
| memcpy(temp, orders, size * sizeof(int32_t)); |
| delete[] orders; |
| orders = temp; |
| } |
| |
| orderLength = size; |
| return orders; |
| } |
| |
| /** |
| * Return a string containing all of the collation orders |
| * returned by calls to next on the specified iterator |
| */ |
| UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target) |
| { |
| int32_t order; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) |
| { |
| target += "0x"; |
| appendHex(order, 8, target); |
| target += " "; |
| } |
| |
| return target; |
| } |
| |
| void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) |
| { |
| int32_t c1, c2, count = 0; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| do |
| { |
| c1 = i1.next(status); |
| c2 = i2.next(status); |
| |
| if (c1 != c2) |
| { |
| UnicodeString msg, msg1(" "); |
| |
| msg += msg1 + count; |
| msg += ": strength(0x"; |
| appendHex(c1, 8, msg); |
| msg += ") != strength(0x"; |
| appendHex(c2, 8, msg); |
| msg += ")"; |
| |
| errln(msg); |
| break; |
| } |
| |
| count += 1; |
| } |
| while (c1 != CollationElementIterator::NULLORDER); |
| } |
| |
| void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) |
| { |
| if (exec) |
| { |
| logln("Collation Iteration Tests: "); |
| } |
| |
| switch (index) |
| { |
| case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break; |
| case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break; |
| case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break; |
| case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break; |
| case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break; |
| default: name = ""; break; |
| } |
| } |
| |