source/test/intltest/transrt.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (C) 2001, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   05/23/00    aliu        Creation.
 **********************************************************************
 */
 #include "unicode/utypes.h"
 #include "unicode/translit.h"
 #include "unicode/rbt.h"
 #include "unicode/uniset.h"
 #include "unicode/unicode.h"
 #include "unicode/normlzr.h"
 #include "unicode/uchar.h"
 #include "transrt.h"
 #include "testutil.h"

 #define CASE(id,test) case id:                          \
                           name = #test;                 \
                           if (exec) {                   \
                               logln(#test "---");       \
                               logln((UnicodeString)""); \
                               test();                   \
                           }                             \
                           break

 // #define ENABLE_FAILING_TESTS

 void
 TransliteratorRoundTripTest::runIndexedTest(int32_t index, UBool exec,
                                    const char* &name, char* /*par*/) {
     switch (index) {
         CASE(0,TestHiragana);
         CASE(1,TestKatakana);
         //CASE(2,TestArabic);
         //CASE(3,TestHebrew);
         CASE(2,TestGreek);
         CASE(3,Testel);
         CASE(4,TestCyrillic);
         CASE(5,TestDevanagariLatin);
         CASE(6,TestDevanagariBengali);
         CASE(7,TestDevanagariGurmukhi);
         CASE(8,TestDevanagariGujarati);
         CASE(9,TestDevanagariOriya);
         CASE(10,TestDevanagariTamil);
         CASE(11,TestDevanagariTelugu);
         CASE(12,TestDevanagariKannada);
         CASE(13,TestDevanagariMalayalam);
 #ifdef ENABLE_FAILING_TESTS
         CASE(14,TestJamo);
         CASE(15,TestJamoHangul);
 #endif
         default: name = ""; break;
     }
 }

 //--------------------------------------------------------------------
 // BitSet
 //--------------------------------------------------------------------

 /**
  * Tiny and incomplete BitSet.  Hardcoded to support 0..FFFF.
  */
 class BitSet {
     int32_t bits[65536/32];

 public:
     BitSet();
     ~BitSet();
     void clear();
     void set(int32_t x);
     UBool get(int32_t x) const;
 };

 BitSet::BitSet() {
     clear();
 }

 BitSet::~BitSet() {
 }

 void BitSet::clear() {
     int32_t *limit = bits + 65536/32;
     int32_t *p = bits;
     while (p < limit) *p++ = 0;
 }

 void BitSet::set(int32_t x) {
     x &= 0xFFFF;
     int32_t i = x / 32;
     int32_t bit = 1L << (x & 31);
     bits[i] |= bit;
 }

 UBool BitSet::get(int32_t x) const {
     x &= 0xFFFF;
     int32_t i = x / 32;
     int32_t bit = 1L << (x & 31);
     return (bits[i] & bit) != 0L;
 }

 //--------------------------------------------------------------------
 // Legal
 //--------------------------------------------------------------------

 class Legal {
 public:
     Legal() {}
     virtual ~Legal() {}
     virtual UBool is(const UnicodeString& sourceString) const {return TRUE;}
 };

 class LegalGreek : public Legal {
     UBool full;
 public:
     LegalGreek(UBool _full) { full = _full; }
     virtual ~LegalGreek() {}

     virtual UBool is(const UnicodeString& sourceString) const;

     static UBool isVowel(UChar c);

     static UBool isRho(UChar c);
 };

 UBool LegalGreek::is(const UnicodeString& sourceString) const {
     UnicodeString decomp;
     UErrorCode ec = U_ZERO_ERROR;
     Normalizer::decompose(sourceString, FALSE, 0, decomp, ec);

     // modern is simpler: don't care about anything but a grave
     if (!full) {
         if (sourceString == CharsToUnicodeString("\\u039C\\u03C0"))
             return FALSE;
         for (int32_t i = 0; i < decomp.length(); ++i) {
             UChar c = decomp.charAt(i);
             // exclude all the accents
             if (c == 0x0313 || c == 0x0314 || c == 0x0300 || c == 0x0302
                 || c == 0x0342 || c == 0x0345
                 ) return FALSE;
         }
         return TRUE;
     }

     // Legal greek has breathing marks IFF there is a vowel or RHO at the start
     // IF it has them, it has exactly one.
     // IF it starts with a RHO, then the breathing mark must come before the second letter.
     // Since there are no surrogates in greek, don't worry about them
     UBool firstIsVowel = FALSE;
     UBool firstIsRho = FALSE;
     UBool noLetterYet = TRUE;
     int32_t breathingCount = 0;
     int32_t letterCount = 0;
     for (int32_t i = 0; i < decomp.length(); ++i) {
         UChar c = decomp.charAt(i);
         if (u_isalpha(c)) {
             ++letterCount;
             if (noLetterYet) {
                 noLetterYet =  FALSE;
                 firstIsVowel = isVowel(c);
                 firstIsRho = isRho(c);
             }
             if (firstIsRho && letterCount == 2 && breathingCount == 0) return FALSE;
         }
         if (c == 0x0313 || c == 0x0314) {
             ++breathingCount;
         }
     }

     if (firstIsVowel || firstIsRho) return breathingCount == 1;
     return breathingCount == 0;
 }

 UBool LegalGreek::isVowel(UChar c) {
     switch (c) {
     case 0x03B1:
     case 0x03B5:
     case 0x03B7:
     case 0x03B9:
     case 0x03BF:
     case 0x03C5:
     case 0x03C9:
     case 0x0391:
     case 0x0395:
     case 0x0397:
     case 0x0399:
     case 0x039F:
     case 0x03A5:
     case 0x03A9:
         return TRUE;
     }
     return FALSE;
 }

 UBool LegalGreek::isRho(UChar c) {
     switch (c) {
     case 0x03C1:
     case 0x03A1:
         return TRUE;
     }
     return FALSE;
 }

 class LegalDeleter {
     Legal* obj;
     Legal*& zeroMe;
 public:
     LegalDeleter(Legal* adopted, Legal*& ptrToClean) :
         obj(adopted),
         zeroMe(ptrToClean) {}
     ~LegalDeleter() { delete obj; zeroMe = NULL; }
 };

 //--------------------------------------------------------------------
 // RTTest Interface
 //--------------------------------------------------------------------

 class RTTest {

     // PrintWriter out;

     UnicodeString transliteratorID;
     int8_t sourceScript;
     int8_t targetScript;
     int32_t errorLimit;
     int32_t errorCount;
     int32_t pairLimit;
     UnicodeSet sourceRange;
     UnicodeSet targetRange;
     UnicodeSet roundtripExclusions;
     IntlTest* log;
     Legal* legalSource; // NOT owned
     UnicodeSet badCharacters;

 public:

     /*
      * create a test for the given script transliterator.
      */
     RTTest(const UnicodeString& transliteratorIDStr,
            int8_t sourceScriptVal, int8_t targetScriptVal);

     virtual ~RTTest();

     void setErrorLimit(int32_t limit);

     void setPairLimit(int32_t limit);

     void test(const UnicodeString& sourceRange,
               const UnicodeString& targetRange,
               const char* roundtripExclusions,
               IntlTest* log,
               Legal* adoptedLegal);

 private:

     // Added to do better equality check.

     static UBool isSame(const UnicodeString& a, const UnicodeString& b);

     UBool includesSome(const UnicodeSet& set, const UnicodeString& a);

     static UBool isCamel(const UnicodeString& a);

     void test2();

     void logWrongScript(const UnicodeString& label,
                         const UnicodeString& from,
                         const UnicodeString& to);
     void logRoundTripFailure(const UnicodeString& from,
                              const UnicodeString& to,
                              const UnicodeString& back);
     void logNotCanonical(const UnicodeString& label,
                          const UnicodeString& from,
                          const UnicodeString& to,
                          const UnicodeString& toCan);

 protected:

     /*
      * Characters to filter for source-target mapping completeness
      * Typically is base alphabet, minus extended characters
      * Default is ASCII letters for Latin
      */
     virtual UBool isSource(UChar c);

     /*
      * Characters to check for target back to source mapping.
      * Typically the same as the target script, plus punctuation
      */
     inline UBool isReceivingSource(UChar c);

     /*
      * Characters to filter for target-source mapping
      * Typically is base alphabet, minus extended characters
      */
     inline UBool isTarget(UChar c);

     /*
      * Characters to check for target-source mapping
      * Typically the same as the source script, plus punctuation
      */
     inline UBool isReceivingTarget(UChar c);

     UBool isSource(const UnicodeString& s);
     UBool isTarget(const UnicodeString& s);
     UBool isReceivingSource(const UnicodeString& s);
     UBool isReceivingTarget(const UnicodeString& s);
 };

 //--------------------------------------------------------------------
 // RTTest Implementation
 //--------------------------------------------------------------------

 /*
  * create a test for the given script transliterator.
  */
 RTTest::RTTest(const UnicodeString& transliteratorIDStr,
                int8_t sourceScriptVal, int8_t targetScriptVal) {
     this->transliteratorID = transliteratorIDStr;
     this->sourceScript = sourceScriptVal;
     this->targetScript = targetScriptVal;
     legalSource = NULL;
     errorLimit = (int32_t)0x7FFFFFFFL;
     errorCount = 0;
     pairLimit  = 0x10000;
 }

 RTTest::~RTTest() {
 }

 void RTTest::setErrorLimit(int32_t limit) {
     errorLimit = limit;
 }

 void RTTest::setPairLimit(int32_t limit) {
     pairLimit = limit;
 }

 UBool RTTest::isSame(const UnicodeString& a, const UnicodeString& b) {
     if (a == b) return TRUE;
     if (a.caseCompare(b, U_FOLD_CASE_DEFAULT)==0 && isCamel(a)) return TRUE;
     UnicodeString aa, bb;
     UErrorCode ec = U_ZERO_ERROR;
     Normalizer::decompose(a, FALSE, 0, aa, ec);
     Normalizer::decompose(b, FALSE, 0, bb, ec);
     if (aa == bb) return TRUE;
     if (aa.caseCompare(bb, U_FOLD_CASE_DEFAULT)==0 && isCamel(aa)) return TRUE;
     return FALSE;
 }

 UBool RTTest::includesSome(const UnicodeSet& set, const UnicodeString& a) {
     UChar32 cp;
     for (int32_t i = 0; i < a.length(); i += UTF_CHAR_LENGTH(cp)) {
         cp = a.char32At(i);
         if (set.contains(cp)) return TRUE;
     }
     return FALSE;
 }

 UBool RTTest::isCamel(const UnicodeString& a) {
     // see if string is of the form aB; e.g. lower, then upper or title
     UChar32 cp;
     UBool haveLower = FALSE;
     for (int32_t i = 0; i < a.length(); i += UTF_CHAR_LENGTH(cp)) {
         cp = a.char32At(i);
         int8_t t = u_charType(cp);
         switch (t) {
         case U_UPPERCASE_LETTER:
             if (haveLower) return TRUE;
             break;
         case U_TITLECASE_LETTER:
             if (haveLower) return TRUE;
             // drop through, since second letter is lower.
         case U_LOWERCASE_LETTER:
             haveLower = TRUE;
             break;
         }
     }
     return FALSE;
 }

 void RTTest::test(const UnicodeString& sourceRangeVal,
                   const UnicodeString& targetRangeVal,
                   const char* roundtripExclusions,
                   IntlTest* logVal,
                   Legal* adoptedLegal) {

     UErrorCode status = U_ZERO_ERROR;

     this->log = logVal;
     this->legalSource = adoptedLegal;
     LegalDeleter cleaner(adoptedLegal, this->legalSource);

     if (sourceRangeVal.length() > 0) {
         this->sourceRange.applyPattern(sourceRangeVal, status);
         if (U_FAILURE(status)) {
             log->errln("FAIL: UnicodeSet::applyPattern(" +
                        sourceRangeVal + ")");
             return;
         }
     } else {
         this->sourceRange.applyPattern("[a-zA-Z]", status);
         if (U_FAILURE(status)) {
             log->errln("FAIL: UnicodeSet::applyPattern([a-z])");
             return;
         }
     }
     this->targetRange.clear();
     if (targetRangeVal.length() > 0) {
         this->targetRange.applyPattern(targetRangeVal, status);
         if (U_FAILURE(status)) {
             log->errln("FAIL: UnicodeSet::applyPattern(" +
                        targetRangeVal + ")");
             return;
         }
     }
     this->roundtripExclusions.clear();
     if (roundtripExclusions != NULL) {
         UErrorCode ec = U_ZERO_ERROR;
         this->roundtripExclusions.applyPattern(roundtripExclusions, ec);
     }
     if (badCharacters.isEmpty()) {
         UErrorCode ec = U_ZERO_ERROR;
         badCharacters.applyPattern("[:Other:]", ec);
     }

     test2();

     if (errorCount > 0) {
         log->errln(transliteratorID + " errors: " + errorCount); // + ", see " + logFileName);
     } else {
         log->logln(transliteratorID + " ok");
     }
 }

 void RTTest::logWrongScript(const UnicodeString& label,
                             const UnicodeString& from,
                             const UnicodeString& to) {
     log->errln((UnicodeString)"Fail " +
                label + ": " +
                from + "(" + TestUtility::hex(from) + ") => " +
                to + "(" + TestUtility::hex(to) + ")");
     ++errorCount;
 }

 void RTTest::logNotCanonical(const UnicodeString& label,
                              const UnicodeString& from,
                              const UnicodeString& to,
                              const UnicodeString& toCan) {
     log->errln((UnicodeString)"Fail (can.equiv)" +
                label + ": " +
                from + "(" + TestUtility::hex(from) + ") => " +
                to + "(" + TestUtility::hex(to) + ")" +
                toCan + " (" +
                TestUtility::hex(to) + ")"
                );
     ++errorCount;
 }

 void RTTest::logRoundTripFailure(const UnicodeString& from,
                                  const UnicodeString& to,
                                  const UnicodeString& back) {
     if (!legalSource->is(from)) return; // skip illegals

     log->errln((UnicodeString)"Fail Roundtrip: " +
                from + "(" + TestUtility::hex(from) + ") => " +
                to + "(" + TestUtility::hex(to) + ") => " +
                back + "(" + TestUtility::hex(back) + ") => ");
     ++errorCount;
 }

 /*
  * Characters to filter for source-target mapping completeness
  * Typically is base alphabet, minus extended characters
  * Default is ASCII letters for Latin
  */
 UBool RTTest::isSource(UChar c) {
     return (TestUtility::getScript(c) == sourceScript && u_isalpha(c)
         && sourceRange.contains(c));
 }

 /*
  * Characters to check for target back to source mapping.
  * Typically the same as the target script, plus punctuation
  */
 inline UBool
 RTTest::isReceivingSource(UChar c) {
     int8_t script = TestUtility::getScript(c);
     return (script == sourceScript || script == TestUtility::COMMON_SCRIPT);
 }

 /*
  * Characters to filter for target-source mapping
  * Typically is base alphabet, minus extended characters
  */
 inline UBool
 RTTest::isTarget(UChar c) {
     return (TestUtility::getScript(c) == targetScript && u_isalpha(c)
         && (targetRange.isEmpty() || targetRange.contains(c)));
 }

 /*
  * Characters to check for target-source mapping
  * Typically the same as the source script, plus punctuation
  */
 inline UBool
 RTTest::isReceivingTarget(UChar c) {
     int8_t script = TestUtility::getScript(c);
     return (script == targetScript || script == TestUtility::COMMON_SCRIPT);
 }

 UBool RTTest::isSource(const UnicodeString& s) {
     int32_t length = s.length();
     for (int32_t i = 0; i < length; ++i) {
         if (!isSource(s.charAt(i)))
             return FALSE;
     }
     return TRUE;
 }

 UBool RTTest::isTarget(const UnicodeString& s) {
     int32_t length = s.length();
     for (int32_t i = 0; i < length; ++i) {
         if (!isTarget(s.charAt(i)))
             return FALSE;
     }
     return TRUE;
 }

 UBool RTTest::isReceivingSource(const UnicodeString& s) {
     int32_t length = s.length();
     for (int32_t i = 0; i < length; ++i) {
         if (!isReceivingSource(s.charAt(i)))
             return FALSE;
     }
     return TRUE;
 }

 UBool RTTest::isReceivingTarget(const UnicodeString& s) {
     int32_t length = s.length();
     for (int32_t i = 0; i < length; ++i) {
         if (!isReceivingTarget(s.charAt(i)))
             return FALSE;
     }
     return TRUE;
 }

 //--------------------------------------------------------------------
 // Specific Tests
 //--------------------------------------------------------------------

 void TransliteratorRoundTripTest::TestHiragana() {
     RTTest test("Latin-Hiragana",
                 TestUtility::LATIN_SCRIPT, TestUtility::HIRAGANA_SCRIPT);
     test.test("[a-z]", UnicodeString("[\\u3040-\\u3094]", ""), NULL, this, new Legal());
 }

 void TransliteratorRoundTripTest::TestKatakana() {
     RTTest test("Latin-Katakana",
                 TestUtility::LATIN_SCRIPT, TestUtility::KATAKANA_SCRIPT);
     test.test("[a-z]", UnicodeString("[\\u30A1-\\u30FA\\u30FC]", ""), NULL, this, new Legal());
 }

 void TransliteratorRoundTripTest::TestArabic() {
 //  RTTest test("Latin-Arabic",
 //              TestUtility::LATIN_SCRIPT, TestUtility::ARABIC_SCRIPT);
 //  test.test("[a-z]", UnicodeString("[\\u0620-\\u065F-[\\u0640]]", ""), this, new Legal());
 }

 void TransliteratorRoundTripTest::TestHebrew() {
 //  RTTest test("Latin-Hebrew",
 //              TestUtility::LATIN_SCRIPT, TestUtility::HEBREW_SCRIPT);
 //  test.test("", UnicodeString("[\\u05D0-\\u05EF]", ""), this, new Legal());
 }

 void TransliteratorRoundTripTest::TestJamo() {
     RTTest t("Latin-Jamo",
              TestUtility::LATIN_SCRIPT, TestUtility::JAMO_SCRIPT);
     t.setErrorLimit(200); // Don't run full test -- too long
     t.test("", "", NULL, this, new Legal());
 }

 void TransliteratorRoundTripTest::TestJamoHangul() {
     RTTest t("Latin-Hangul",
              TestUtility::LATIN_SCRIPT, TestUtility::HANGUL_SCRIPT);
     t.setErrorLimit(50); // Don't run full test -- too long
     t.test("", "", NULL, this, new Legal());
 }

 void TransliteratorRoundTripTest::TestGreek() {
     RTTest test("Latin-Greek",
                 TestUtility::LATIN_SCRIPT, TestUtility::GREEK_SCRIPT);
     test.test("", UnicodeString("[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]", ""),
               "[\\u037A\\u03D0-\\u03F5]", /* exclusions */
               this, new LegalGreek(TRUE));
 }

 void TransliteratorRoundTripTest::Testel() {
     RTTest test("Latin-el",
                 TestUtility::LATIN_SCRIPT, TestUtility::GREEK_SCRIPT);
     test.test("", "[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]",
               "[\\u037A\\u03D0-\\u03F5]", /* exclusions */
               this, new LegalGreek(FALSE));
     }

 void TransliteratorRoundTripTest::TestCyrillic() {
     RTTest test("Latin-Cyrillic",
                 TestUtility::LATIN_SCRIPT, TestUtility::CYRILLIC_SCRIPT);
     test.test("", UnicodeString("[\\u0400-\\u045F]", ""), NULL, this, new Legal());
 }


 //----------------------------------
 // Inter-Indic Tests
 //----------------------------------
 void TransliteratorRoundTripTest::TestDevanagariLatin() {
     RTTest test("Latin-DEVANAGARI",
                 TestUtility::LATIN_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
     test.test("", "[:Devanagari:]", NULL, this, new Legal());
 }

 void TransliteratorRoundTripTest::TestDevanagariBengali()  {
     RTTest test("BENGALI-DEVANAGARI",
         TestUtility::BENGALI_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
          test.test("[:BENGALI:]", "[:Devanagari:]",
                 "[\\u0950\\u0935\\u0912\\u0933\\u090e\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u09F0\\u09F1]", /*roundtrip exclusions*/
                 this, new Legal());
     RTTest test1("DEVANAGARI-BENGALI",
             TestUtility::DEVANAGARI_SCRIPT, TestUtility::BENGALI_SCRIPT );
           test1.test( "[:Devanagari:]", "[:BENGALI:]",
                   "[\\u0950\\u0935\\u0912\\u0933\\u090e\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u09F0\\u09F1]", /*roundtrip exclusions*/
                   this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariGurmukhi()  {
     RTTest test("GURMUKHI-DEVANAGARI",
       TestUtility::GURMUKHI_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:GURMUKHI:]", "[:Devanagari:]",
             "[\\u0950\\u090D\\u090e\\u0912\\u0911\\u090b\\u090c\\u0934\\u0960\\u0961\\u0937\\u0a72\\u0a73\\u0a74\\u093d]", /*roundtrip exclusions*/
             this, new Legal());
     RTTest test1("DEVANAGARI-GURMUKHI",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::GURMUKHI_SCRIPT );
       test1.test( "[:Devanagari:]", "[:GURMUKHI:]",
               "[\\u0950\\u090D\\u090e\\u0912\\u0911\\u090b\\u090c\\u0934\\u0960\\u0961\\u0937\\u0a72\\u0a73\\u0a74\\u093d]", /*roundtrip exclusions*/
               this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariGujarati()  {
     RTTest test("GUJARATI-DEVANAGARI",
       TestUtility::GUJARATI_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:GUJARATI:]", "[:Devanagari:]",
             "[\\u0961\\u090c\\u090e\\u0912]", /*roundtrip exclusions*/
             this, new Legal());
     RTTest test1("DEVANAGARI-GUJARATI",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::GUJARATI_SCRIPT );
       test1.test( "[:Devanagari:]", "[:GUJARATI:]",NULL,
               this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariOriya()  {
     RTTest test("ORIYA-DEVANAGARI",
       TestUtility::ORIYA_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:ORIYA:]", "[:Devanagari:]",
             "[\\u0950\\u090D\\u090e\\u0912\\u0911\\u0931\\u0935]", /*roundtrip exclusions*/
             this, new Legal());
     RTTest test1("DEVANAGARI-ORIYA",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::ORIYA_SCRIPT );
       test1.test( "[:Devanagari:]", "[:ORIYA:]",
               "[\\u0950\\u090D\\u090e\\u0912\\u0911\\u0931\\u0935]", /*roundtrip exclusions*/
               this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariTamil()  {
     RTTest test("Tamil-DEVANAGARI",
       TestUtility::TAMIL_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:tamil:]", "[:Devanagari:]",
               "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]"
               "\\u090B\\u090C\\u0916\\u0917\\u0918\\u091B\\u091D\\u0920\\u0921"
               "\\u0922\\u0925\\u0926\\u0927\\u092B\\u092C\\u092D\\u0936\\u0960\\u0961]", /*roundtrip exclusions*/
               this, new Legal());
     RTTest test1("DEVANAGARI-Tamil",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::TAMIL_SCRIPT );
       test1.test( "[:Devanagari:]", "[:tamil:]",
               "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
               this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariTelugu()  {
     RTTest test("Telugu-DEVANAGARI",
       TestUtility::TELUGU_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:telugu:]", "[:Devanagari:]",
             "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
             this, new Legal());
     RTTest test1("DEVANAGARI-TELUGU",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::TELUGU_SCRIPT );
       test1.test( "[:Devanagari:]", "[:TELUGU:]",
               "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
               this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariKannada()  {
     RTTest test("KANNADA-DEVANAGARI",
       TestUtility::KANNADA_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:KANNADA:]", "[:Devanagari:]",
             "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
             this, new Legal());
     RTTest test1("DEVANAGARI-KANNADA",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::KANNADA_SCRIPT );
       test1.test( "[:Devanagari:]", "[:KANNADA:]",
               "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
               this, new Legal());
 }
 void TransliteratorRoundTripTest::TestDevanagariMalayalam()  {
     RTTest test("MALAYALAM-DEVANAGARI",
       TestUtility::MALAYALAM_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
       test.test("[:MALAYALAM:]", "[:Devanagari:]",
             "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
             this, new Legal());
     RTTest test1("DEVANAGARI-MALAYALAM",
         TestUtility::DEVANAGARI_SCRIPT, TestUtility::MALAYALAM_SCRIPT );
       test1.test( "[:Devanagari:]", "[:MALAYALAM:]",
               "[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
               this, new Legal());
 }
 //---------------
 // End Indic
 //---------------

 void RTTest::test2() {

     UChar c;
     UnicodeString cs, targ, reverse;
     int8_t *type = new int8_t[0xFFFF];
     UParseError parseError;
     UErrorCode status = U_ZERO_ERROR;
     Transliterator* sourceToTarget = Transliterator::createInstance(transliteratorID, UTRANS_FORWARD, parseError, status);
     if (sourceToTarget == NULL) {
         log->errln("Fail: createInstance(" + transliteratorID +
                    ") returned NULL");
         return;
     }
     Transliterator* targetToSource = sourceToTarget->createInverse(status);
     if (targetToSource == NULL) {
         log->errln("Fail: " + transliteratorID +
                    ".createInverse() returned NULL");
         delete sourceToTarget;
         return;
     }

     log->logln("Initializing type array");

     for (c = 0; c < 0xFFFF; ++c) {
         type[c] = u_charType(c);
     }

     BitSet failSourceTarg;

     log->logln("Checking that all source characters convert to target - Singles");

     for (c = 0; c < 0xFFFF; ++c) {
         if (type[c] == U_UNASSIGNED || !isSource(c))
             continue;
         cs.remove();
         cs.append(c);
         targ = cs;
         sourceToTarget->transliterate(targ);
         if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
             logWrongScript("Source-Target", cs, targ);
             failSourceTarg.set(c);
             if (errorCount >= errorLimit)
                 return;
         } else {
             UnicodeString cs2;
             UErrorCode ec = U_ZERO_ERROR;
             Normalizer::decompose(cs, FALSE, 0, cs2, ec);
             UnicodeString targ2 = cs2;
             sourceToTarget->transliterate(targ2);
             if (targ != targ2) {
                 logNotCanonical("Source-Target", cs, targ, targ2);
                 if (errorCount >= errorLimit)
                     return;
             }
         }
     }

     log->logln("Checking that all source characters convert to target - Doubles");

     for (c = 0; c < 0xFFFF; ++c) {
         if (type[c] == U_UNASSIGNED ||
             !isSource(c)) continue;
         if (failSourceTarg.get(c)) continue;

         for (UChar d = 0; d < 0xFFFF; ++d) {
             if (type[d] == U_UNASSIGNED || !isSource(d))
                 continue;
             if (failSourceTarg.get(d)) continue;

             cs.remove();
             cs.append(c).append(d);
             targ = cs;
             sourceToTarget->transliterate(targ);
             if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
                 logWrongScript("Source-Target", cs, targ);
                 if (errorCount >= errorLimit)
                     return;
             } else {
                 UnicodeString cs2;
                 UErrorCode ec = U_ZERO_ERROR;
                 Normalizer::decompose(cs, FALSE, 0, cs2, ec);
                 UnicodeString targ2 = cs2;
                 sourceToTarget->transliterate(targ2);
                 if (targ != targ2) {
                     logNotCanonical("Source-Target", cs, targ, targ2);
                     if (errorCount >= errorLimit)
                         return;
                 }
             }
         }
     }

     log->logln("Checking that target characters convert to source and back - Singles");

     BitSet failTargSource;
     BitSet failRound;

     for (c = 0; c < 0xFFFF; ++c) {
         if (type[c] == U_UNASSIGNED || !isTarget(c))
             continue;
         cs.remove();
         cs.append(c);
         targ = cs;
         targetToSource->transliterate(targ);
         reverse = targ;
         sourceToTarget->transliterate(reverse);
         if (!isReceivingSource(targ) || includesSome(badCharacters, targ)) {
             logWrongScript("Target-Source", cs, targ);
             failTargSource.set(c);
             if (errorCount >= errorLimit)
                 return;
         } else if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)) {
             logRoundTripFailure(cs, targ, reverse);
             failRound.set(c);
             if (errorCount >= errorLimit)
                 return;
         } else {
             UnicodeString targ2;
             UErrorCode ec = U_ZERO_ERROR;
             Normalizer::decompose(targ, FALSE, 0, targ2, ec);
             UnicodeString reverse2 = targ2;
             sourceToTarget->transliterate(reverse2);
             if (reverse != reverse2) {
                 logNotCanonical("Target-Source", cs, targ, targ2);
                 if (errorCount >= errorLimit)
                     return;
             }
         }
     }

     log->logln("Checking that target characters convert to source and back - Doubles");
     int32_t count = 0;
     cs = UNICODE_STRING("aa", 2);
     for (c = 0; c < 0xFFFF; ++c) {
         if (type[c] == U_UNASSIGNED || !isTarget(c))
             continue;
         if (++count > pairLimit) {
             //throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
             log->logln("");
             log->logln((UnicodeString)"Test truncated at " + pairLimit + " x 64k pairs");
             return;
         }
         cs.setCharAt(0, c);
         log->log(TestUtility::hex(c));
         for (UChar d = 0; d < 0xFFFF; ++d) {
             if (type[d] == U_UNASSIGNED || !isTarget(d))
                 continue;
             cs.setCharAt(1, d);
             targ = cs;
             targetToSource->transliterate(targ);
             reverse = targ;
             sourceToTarget->transliterate(reverse);
             if (!isReceivingSource(targ) && !failTargSource.get(c) && !failTargSource.get(d)
                 || includesSome(badCharacters, targ)) {
                 logWrongScript("Target-Source", cs, targ);
                 if (errorCount >= errorLimit)
                     return;
             } else if (!isSame(cs, reverse) && !failRound.get(c) && !failRound.get(d)
                        && !roundtripExclusions.contains(c) && !roundtripExclusions.contains(d)) {
                 logRoundTripFailure(cs, targ, reverse);
                 if (errorCount >= errorLimit)
                     return;
             } else {
                 UnicodeString targ2;
                 UErrorCode ec = U_ZERO_ERROR;
                 Normalizer::decompose(targ, FALSE, 0, targ2, ec);
                 UnicodeString reverse2 = targ2;
                 sourceToTarget->transliterate(reverse2);
                 if (reverse != reverse2) {
                     logNotCanonical("Target-Source", cs, targ, targ2);
                     if (errorCount >= errorLimit)
                         return;
                 }
             }
         }
     }
     log->logln("");
     delete []type;
     delete sourceToTarget;
     delete targetToSource;
 }