source/test/intltest/regcoll.cpp - external/github.com/unicode-org/icu - Git at Google

 /********************************************************************
  * COPYRIGHT:
  * Copyright (c) 1997-2001, International Business Machines Corporation and
  * others. All Rights Reserved.
  ********************************************************************/

 #include "unicode/coll.h"
 #include "unicode/tblcoll.h"
 #include "unicode/unistr.h"
 #include "unicode/sortkey.h"
 #include "regcoll.h"
 #include "sfwdchit.h"

 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))

 CollationRegressionTest::CollationRegressionTest()
 {
     UErrorCode status = U_ZERO_ERROR;

     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::US, status);
 }

 CollationRegressionTest::~CollationRegressionTest()
 {
     delete en_us;
 }


     // @bug 4048446
 //
 // CollationElementIterator.reset() doesn't work
 //
 void CollationRegressionTest::Test4048446(/* char* par */)
 {
     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
     UErrorCode status = U_ZERO_ERROR;

     if (i1 == NULL|| i2 == NULL)
     {
         errln("Could not create CollationElementIterator's");
         delete i1;
         delete i2;
         return;
     }

     while (i1->next(status) != CollationElementIterator::NULLORDER)
     {
         if (U_FAILURE(status))
         {
             errln("error calling next()");

             delete i1;
             delete i2;
             return;
         }
     }

     i1->reset();

     assertEqual(*i1, *i2);

     delete i1;
     delete i2;
 }

 // @bug 4051866
 //
 // Collator -> rules -> Collator round-trip broken for expanding characters
 //
 void CollationRegressionTest::Test4051866(/* char* par */)
 {
 /*
     RuleBasedCollator c1 = new RuleBasedCollator("< o "
                                                 +"& oe ,o\u3080"
                                                 +"& oe ,\u1530 ,O"
                                                 +"& OE ,O\u3080"
                                                 +"& OE ,\u1520"
                                                 +"< p ,P");
 */

     UnicodeString rules;
     UErrorCode status = U_ZERO_ERROR;

     rules += "< o ";
     rules += "& oe ,o";
     rules += (UChar)0x3080;
     rules += "& oe ,";
     rules += (UChar)0x1530;
     rules += " ,O";
     rules += "& OE ,O";
     rules += (UChar)0x3080;
     rules += "& OE ,";
     rules += (UChar)0x1520;
     rules += "< p ,P";

     // Build a collator containing expanding characters
     RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);

     // Build another using the rules from  the first
     RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);

     // Make sure they're the same
     if (!(c1->getRules() == c2->getRules()))
     {
         errln("Rules are not equal");
     }

     delete c2;
     delete c1;
 }

 // @bug 4053636
 //
 // Collator thinks "black-bird" == "black"
 //
 void CollationRegressionTest::Test4053636(/* char* par */)
 {
     if (en_us->equals("black_bird", "black"))
     {
         errln("black-bird == black");
     }
 }

 // @bug 4054238
 //
 // CollationElementIterator will not work correctly if the associated
 // Collator object's mode is changed
 //
 void CollationRegressionTest::Test4054238(/* char* par */)
 {
     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
     const UnicodeString test3(chars3);
     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

     // NOTE: The Java code uses en_us to create the CollationElementIterators
     // but I'm pretty sure that's wrong, so I've changed this to use c.
     c->setDecomposition(Normalizer::DECOMP);
     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
     delete i1;
     delete c;
 }

 // @bug 4054734
 //
 // Collator::IDENTICAL documented but not implemented
 //
 void CollationRegressionTest::Test4054734(/* char* par */)
 {
     /*
         Here's the original Java:

         String[] decomp = {
             "\u0001",   "<",    "\u0002",
             "\u0001",   "=",    "\u0001",
             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
         };

         String[] nodecomp = {
             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
         };
     */

     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
     };


     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

     c->setStrength(Collator::IDENTICAL);

     c->setDecomposition(Normalizer::DECOMP);
     compareArray(*c, decomp, ARRAY_LENGTH(decomp));

     delete c;
 }

 // @bug 4054736
 //
 // Full Decomposition mode not implemented
 //
 void CollationRegressionTest::Test4054736(/* char* par */)
 {
     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

     c->setStrength(Collator::SECONDARY);
     c->setDecomposition(Normalizer::DECOMP_COMPAT);

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4058613
 //
 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
 //
 void CollationRegressionTest::Test4058613(/* char* par */)
 {
     // Creating a default collator doesn't work when Korean is the default
     // locale

     Locale oldDefault = Locale::getDefault();
     UErrorCode status = U_ZERO_ERROR;

     Locale::setDefault(Locale::KOREAN, status);

     if (U_FAILURE(status))
     {
         errln("Could not set default locale to Locale::KOREAN");
         return;
     }

     Collator *c = NULL;

     c = Collator::createInstance("en_US", status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Could not create a Korean collator");
         Locale::setDefault(oldDefault, status);
         delete c;
         return;
     }

     // Since the fix to this bug was to turn off decomposition for Korean collators,
     // ensure that's what we got
     if (c->getDecomposition() != Normalizer::NO_OP)
     {
       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
     }

     delete c;

     Locale::setDefault(oldDefault, status);
 }

 // @bug 4059820
 //
 // RuleBasedCollator.getRules does not return the exact pattern as input
 // for expanding character sequences
 //
 void CollationRegressionTest::Test4059820(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;

     RuleBasedCollator *c = NULL;
     UnicodeString rules = "< a < b , c/a < d < z";

     c = new RuleBasedCollator(rules, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failure building a collator.");
         delete c;
         return;
     }

     if ( c->getRules().indexOf("c/a") == -1)
     {
         errln("returned rules do not contain 'c/a'");
     }

     delete c;
 }

 // @bug 4060154
 //
 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
 //
 void CollationRegressionTest::Test4060154(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     UnicodeString rules;

     rules += "< g, G < h, H < i, I < j, J";
     rules +=  " & H < ";
     rules += (UChar)0x0131;
     rules += ", ";
     rules += (UChar)0x0130;
     rules += ", i, I";

     RuleBasedCollator *c = NULL;

     c = new RuleBasedCollator(rules, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("failure building collator.");
         delete c;
         return;
     }

     c->setDecomposition(Normalizer::DECOMP);

  /*
     String[] tertiary = {
         "A",        "<",    "B",
         "H",        "<",    "\u0131",
         "H",        "<",    "I",
         "\u0131",   "<",    "\u0130",
         "\u0130",   "<",    "i",
         "\u0130",   ">",    "H",
     };
 */

     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x41, 0},    {0x3c, 0}, {0x42, 0},
         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
         {0x48, 0},    {0x3c, 0}, {0x49, 0},
         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
     };

     c->setStrength(Collator::TERTIARY);
     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));

     /*
     String[] secondary = {
         "H",        "<",    "I",
         "\u0131",   "=",    "\u0130",
     };
 */
     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x48, 0},    {0x3c, 0}, {0x49, 0},
         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
     };

     c->setStrength(Collator::PRIMARY);
     compareArray(*c, secondary, ARRAY_LENGTH(secondary));

     delete c;
 };

 // @bug 4062418
 //
 // Secondary/Tertiary comparison incorrect in French Secondary
 //
 void CollationRegressionTest::Test4062418(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;

     RuleBasedCollator *c = NULL;

     c = (RuleBasedCollator *) Collator::createInstance(Locale::FRANCE, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failed to create collator for Locale::FRANCE");
         delete c;
         return;
     }

     c->setStrength(Collator::SECONDARY);

 /*
     String[] tests = {
             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
     };
 */
     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4065540
 //
 // Collator::compare() method broken if either string contains spaces
 //
 void CollationRegressionTest::Test4065540(/* char* par */)
 {
     if (en_us->compare("abcd e", "abcd f") == 0)
     {
         errln("'abcd e' == 'abcd f'");
     }
 }

 // @bug 4066189
 //
 // Unicode characters need to be recursively decomposed to get the
 // correct result. For example,
 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
 //
 void CollationRegressionTest::Test4066189(/* char* par */)
 {
     static const UChar chars1[] = {0x1EB1, 0};
     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
     const UnicodeString test1(chars1);
     const UnicodeString test2(chars2);

     // NOTE: The java code used en_us to create the
     // CollationElementIterator's. I'm pretty sure that
     // was wrong, so I've change the code to use c1 and c2
     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
     c1->setDecomposition(Normalizer::DECOMP_COMPAT);
     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);

     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
     c2->setDecomposition(Normalizer::NO_OP);
     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);

     assertEqual(*i1, *i2);

     delete i2;
     delete c2;
     delete i1;
     delete c1;
 }

 // @bug 4066696
 //
 // French secondary collation checking at the end of compare iteration fails
 //
 void CollationRegressionTest::Test4066696(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     RuleBasedCollator *c = NULL;

     c = (RuleBasedCollator *)Collator::createInstance(Locale::FRANCE, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failure creating collator for Locale::FRANCE");
         delete c;
         return;
     }

     c->setStrength(Collator::SECONDARY);

 /*
     String[] tests = {
         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
     };

   should be:

     String[] tests = {
         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
     };

 */

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4076676
 //
 // Bad canonicalization of same-class combining characters
 //
 void CollationRegressionTest::Test4076676(/* char* par */)
 {
     // These combining characters are all in the same class, so they should not
     // be reordered, and they should compare as unequal.
     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};

     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
     c->setStrength(Collator::TERTIARY);

     if (c->compare(s1,s2) == 0)
     {
         errln("Same-class combining chars were reordered");
     }

     delete c;
 }

 // @bug 4079231
 //
 // RuleBasedCollator::operator==(NULL) throws NullPointerException
 //
 void CollationRegressionTest::Test4079231(/* char* par */)
 {
     // I don't think there's any way to write this test
     // in C++. The following is equivalent to the Java,
     // but doesn't compile 'cause NULL can't be converted
     // to Collator&
     //
     // if (en_us->operator==(NULL))
     // {
     //     errln("en_us->operator==(NULL) returned TRUE");
     // }

  /*
    try {
         if (en_us->equals(null)) {
             errln("en_us->equals(null) returned true");
         }
     }
     catch (Exception e) {
         errln("en_us->equals(null) threw " + e.toString());
     }
 */
 }

 // @bug 4078588
 //
 // RuleBasedCollator breaks on "< a < bb" rule
 //
 void CollationRegressionTest::Test4078588(/* char *par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);

     if (rbc == NULL || U_FAILURE(status))
     {
         errln("Failed to create RuleBasedCollator.");
         delete rbc;
         return;
     }

     Collator::EComparisonResult result = rbc->compare("a","bb");

     if (result != Collator::LESS)
     {
         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
             + (UnicodeString)"; expected -1");
     }

     delete rbc;
 }

 // @bug 4081866
 //
 // Combining characters in different classes not reordered properly.
 //
 void CollationRegressionTest::Test4081866(/* char* par */)
 {
     // These combining characters are all in different classes,
     // so they should be reordered and the strings should compare as equal.
     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};

     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
     c->setStrength(Collator::TERTIARY);

     // Now that the default collators are set to NO_DECOMPOSITION
     // (as a result of fixing bug 4114077), we must set it explicitly
     // when we're testing reordering behavior.  -- lwerner, 5/5/98
     c->setDecomposition(Normalizer::DECOMP);

     if (c->compare(s1,s2) != 0)
     {
         errln("Combining chars were not reordered");
     }

     delete c;
 }

 // @bug 4087241
 //
 // string comparison errors in Scandinavian collators
 //
 void CollationRegressionTest::Test4087241(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     Locale da_DK("da", "DK");
     RuleBasedCollator *c = NULL;

     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failed to create collator for da_DK locale");
         delete c;
         return;
     }

     c->setStrength(Collator::SECONDARY);

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
         {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0},      // a-unlaut < a-ring
         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4087243
 //
 // CollationKey takes ignorable strings into account when it shouldn't
 //
 void CollationRegressionTest::Test4087243(/* char* par */)
 {
     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
     c->setStrength(Collator::TERTIARY);

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4092260
 //
 // Mu/micro conflict
 // Micro symbol and greek lowercase letter Mu should sort identically
 //
 void CollationRegressionTest::Test4092260(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     Locale el("el", "");
     Collator *c = NULL;

     c = Collator::createInstance(el, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failed to create collator for el locale.");
         delete c;
         return;
     }

     // These now have tertiary differences in UCA
     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4095316
 //
 void CollationRegressionTest::Test4095316(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     Locale el_GR("el", "GR");
     Collator *c = Collator::createInstance(el_GR, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failed to create collator for el_GR locale");
         delete c;
         return;
     }
     // These now have tertiary differences in UCA
     //c->setStrength(Collator::TERTIARY);
     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4101940
 //
 void CollationRegressionTest::Test4101940(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     RuleBasedCollator *c = NULL;
     UnicodeString rules = "< a < b";
     UnicodeString nothing = "";

     c = new RuleBasedCollator(rules, status);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failed to create RuleBasedCollator");
         delete c;
         return;
     }

     CollationElementIterator *i = c->createCollationElementIterator(nothing);
     i->reset();

     if (i->next(status) != CollationElementIterator::NULLORDER)
     {
         errln("next did not return NULLORDER");
     }

     delete i;
     delete c;
 }

 // @bug 4103436
 //
 // Collator::compare not handling spaces properly
 //
 void CollationRegressionTest::Test4103436(/* char* par */)
 {
     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
     c->setStrength(Collator::TERTIARY);

     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
     };

     compareArray(*c, tests, ARRAY_LENGTH(tests));

     delete c;
 }

 // @bug 4114076
 //
 // Collation not Unicode conformant with Hangul syllables
 //
 void CollationRegressionTest::Test4114076(/* char* par */)
 {
     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
     c->setStrength(Collator::TERTIARY);

     //
     // With Canonical decomposition, Hangul syllables should get decomposed
     // into Jamo, but Jamo characters should not be decomposed into
     // conjoining Jamo
     //
     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
     };

     c->setDecomposition(Normalizer::DECOMP);
     compareArray(*c, test1, ARRAY_LENGTH(test1));

     // From UTR #15:
     // *In earlier versions of Unicode, jamo characters like ksf
     //  had compatibility mappings to kf + sf. These mappings were
     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
     // That is, the following test is obsolete as of 2.1.9

 //obsolete-    // With Full decomposition, it should go all the way down to
 //obsolete-    // conjoining Jamo characters.
 //obsolete-    //
 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
 //obsolete-    {
 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
 //obsolete-    };
 //obsolete-
 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));

     delete c;
 }


 // @bug 4124632
 //
 // Collator::getCollationKey was hanging on certain character sequences
 //
 void CollationRegressionTest::Test4124632(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;
     Collator *coll = NULL;

     coll = Collator::createInstance(Locale::JAPAN, status);

     if (coll == NULL || U_FAILURE(status))
     {
         errln("Failed to create collator for Locale::JAPAN");
         delete coll;
     }

     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
     CollationKey key;

     coll->getCollationKey(test, key, status);

     if (key.isBogus() || U_FAILURE(status))
     {
         errln("CollationKey creation failed.");
     }

     delete coll;
 }

 // @bug 4132736
 //
 // sort order of french words with multiple accents has errors
 //
 void CollationRegressionTest::Test4132736(/* char* par */)
 {
     UErrorCode status = U_ZERO_ERROR;

     Collator *c = NULL;

     c = Collator::createInstance(Locale::FRANCE, status);
     c->setStrength(Collator::TERTIARY);

     if (c == NULL || U_FAILURE(status))
     {
         errln("Failed to create a collator for Locale::FRANCE");
         delete c;
     }

     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
     };

     compareArray(*c, test1, ARRAY_LENGTH(test1));

     delete c;
 }

 // @bug 4133509
 //
 // The sorting using java.text.CollationKey is not in the exact order
 //
 void CollationRegressionTest::Test4133509(/* char* par */)
 {
     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
     };

     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
 }

 // @bug 4114077
 //
 // Collation with decomposition off doesn't work for Europe
 //
 void CollationRegressionTest::Test4114077(/* char* par */)
 {
     // Ensure that we get the same results with decomposition off
     // as we do with it on....

     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
     c->setStrength(Collator::TERTIARY);

     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
                                                 //   -> a, ring, acute
         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
     };

     c->setDecomposition(Normalizer::NO_OP);
     compareArray(*c, test1, ARRAY_LENGTH(test1));

     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
     {
         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
     };

     c->setDecomposition(Normalizer::DECOMP);
     compareArray(*c, test2, ARRAY_LENGTH(test2));

     delete c;
 }

 // @bug 4141640
 //
 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
 //
 void CollationRegressionTest::Test4141640(/* char* par */)
 {
     //
     // Rather than just creating a Swedish collator, we might as well
     // try to instantiate one for every locale available on the system
     // in order to prevent this sort of bug from cropping up in the future
     //
     UErrorCode status = U_ZERO_ERROR;
     int32_t i, localeCount;
     const Locale *locales = Locale::getAvailableLocales(localeCount);

     for (i = 0; i < localeCount; i += 1)
     {
         Collator *c = NULL;

 		status = U_ZERO_ERROR;
         c = Collator::createInstance(locales[i], status);

         if (c == NULL || U_FAILURE(status))
         {
             UnicodeString msg, localeName;

             msg += "Could not create collator for locale ";
             msg += locales[i].getName();

             errln(msg);
         }

         delete c;
     }
 }

 // @bug 4139572
 //
 // getCollationKey throws exception for spanish text
 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
 //
 void CollationRegressionTest::Test4139572(/* char* par */)
 {
     //
     // Code pasted straight from the bug report
     // (and then translated to C++ ;-)
     //
     // create spanish locale and collator
     UErrorCode status = U_ZERO_ERROR;
     Locale l("es", "es");
     Collator *col = NULL;

     col = Collator::createInstance(l, status);

     if (col == NULL || U_FAILURE(status))
     {
         errln("Failed to create a collator for es_es locale.");
         delete col;
         return;
     }

     CollationKey key;

     // this spanish phrase kills it!
     col->getCollationKey("Nombre De Objeto", key, status);

     if (key.isBogus() || U_FAILURE(status))
     {
         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
     }

     delete col;
 }
 /* HSYS : RuleBasedCollator::compare() performance enhancements
           compare() does not create CollationElementIterator() anymore.*/

 class My4146160Collator : public RuleBasedCollator
 {
 public:
     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
     ~My4146160Collator();

     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;

     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;

     static int32_t count;
 };

 int32_t My4146160Collator::count = 0;

 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
   : RuleBasedCollator(rbc.getRules(), status)
 {
 }

 My4146160Collator::~My4146160Collator()
 {
 }

 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
 {
     count += 1;
     return RuleBasedCollator::createCollationElementIterator(text);
 }

 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
 {
     count += 1;
     return RuleBasedCollator::createCollationElementIterator(text);
 }

 // @bug 4146160
 //
 // RuleBasedCollator doesn't use createCollationElementIterator internally
 //
 void CollationRegressionTest::Test4146160(/* char* par */)
 {
 #if 0
     //
     // Use a custom collator class whose createCollationElementIterator
     // methods increment a count....
     //
     UErrorCode status = U_ZERO_ERROR;
     CollationKey key;

     My4146160Collator::count = 0;
     My4146160Collator *mc = NULL;

     mc = new My4146160Collator(*en_us, status);

     if (mc == NULL || U_FAILURE(status))
     {
         errln("Failed to create a My4146160Collator.");
         delete mc;
         return;
     }

     mc->getCollationKey("1", key, status);

     if (key.isBogus() || U_FAILURE(status))
     {
         errln("Failure to get a CollationKey from a My4146160Collator.");
         delete mc;
         return;
     }

     if (My4146160Collator::count < 1)
     {
         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
     }

     My4146160Collator::count = 0;
     mc->compare("1", "2");

     if (My4146160Collator::count < 1)
     {
         errln("My4146160Collator::createtCollationElementIterator not called for compare");
     }

     delete mc;
 #endif
 }
 void CollationRegressionTest::compareArray(Collator &c,
                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
                                            int32_t testCount)
 {
     int32_t i;
     Collator::EComparisonResult expectedResult = Collator::EQUAL;

     for (i = 0; i < testCount; i += 3)
     {
         UnicodeString source(tests[i]);
         UnicodeString comparison(tests[i + 1]);
         UnicodeString target(tests[i + 2]);

         if (comparison == "<")
         {
             expectedResult = Collator::LESS;
         }
         else if (comparison == ">")
         {
             expectedResult = Collator::GREATER;
         }
         else if (comparison == "=")
         {
             expectedResult = Collator::EQUAL;
         }
         else
         {
             UnicodeString bogus1("Bogus comparison string \"");
             UnicodeString bogus2("\"");
             errln(bogus1 + comparison + bogus2);
         }

         Collator::EComparisonResult compareResult = c.compare(source, target);

         CollationKey sourceKey, targetKey;
         UErrorCode status = U_ZERO_ERROR;

         c.getCollationKey(source, sourceKey, status);

         if (U_FAILURE(status))
         {
             errln("Couldn't get collationKey for source");
             continue;
         }

         c.getCollationKey(target, targetKey, status);

         if (U_FAILURE(status))
         {
             errln("Couldn't get collationKey for target");
             continue;
         }

         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);

         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );

     }
 }

 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
 {
     int32_t c1, c2, count = 0;
     UErrorCode status = U_ZERO_ERROR;

     do
     {
         c1 = i1.next(status);
         c2 = i2.next(status);

         if (c1 != c2)
         {
             UnicodeString msg, msg1("    ");

             msg += msg1 + count;
             msg += ": strength(0x";
             appendHex(c1, 8, msg);
             msg += ") != strength(0x";
             appendHex(c2, 8, msg);
             msg += ")";

             errln(msg);
             break;
         }

         count += 1;
     }
     while (c1 != CollationElementIterator::NULLORDER);
 }

 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
 {
     if (exec)
     {
         logln("Collation Regression Tests: ");
     }

     switch (index)
     {
         case  0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
         case  1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
         case  2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
         case  3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
         case  4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
         case  5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
         case  6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
         case  7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
         case  8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
         case  9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
         case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
         case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
         case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
         case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
         case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
         case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
         case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
         case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
         case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
         case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
         case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
         case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
         case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
         case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
         case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
         case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
         case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
         case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
         case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
         case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
         case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
         default: name = ""; break;
     }
 }