src/com/ibm/icu/dev/test/normalizer/BasicTest.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 1996-2000, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/normalizer/BasicTest.java,v $
  * $Date: 2002/02/25 22:43:58 $
  * $Revision: 1.10 $
  *
  *****************************************************************************************
  */
 package com.ibm.icu.dev.test.normalizer;

 import com.ibm.icu.dev.test.*;
 import com.ibm.icu.lang.*;
 import com.ibm.icu.text.*;
 import com.ibm.icu.impl.Utility;
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;

 public class BasicTest extends TestFmwk {
     public static void main(String[] args) throws Exception {
         new BasicTest().run(args);
     }

     String[][] canonTests = {
         // Input                Decomposed              Composed
         { "cat",                "cat",                  "cat"               },
         { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },

         { "\u1e0a",             "D\u0307",              "\u1e0a"            }, // D-dot_above
         { "D\u0307",            "D\u0307",              "\u1e0a"            }, // D dot_above

         { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_below dot_above
         { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_above dot_below
         { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      }, // D dot_below dot_above

         { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
         { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below

         { "\u1E14",             "E\u0304\u0300",        "\u1E14"            }, // E-macron-grave
         { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            }, // E-macron + grave
         { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      }, // E-grave + macron

         { "\u212b",             "A\u030a",              "\u00c5"            }, // angstrom_sign
         { "\u00c5",             "A\u030a",              "\u00c5"            }, // A-ring

 //        { "\u00fdffin",              "A\u0308ffin",          "\u00fdffin"             },
         { "\u00fdffin",              "y\u0301ffin",          "\u00fdffin"             },	//updated with 3.0
 //        { "\u00fd\uFB03n",           "A\u0308\uFB03n",       "\u00fd\uFB03n"          },
         { "\u00fd\uFB03n",           "y\u0301\uFB03n",       "\u00fd\uFB03n"          },	//updated with 3.0

         { "Henry IV",           "Henry IV",             "Henry IV"          },
         { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },

         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
         { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      }, // hw_ka + hw_ten
         { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      }, // ka + hw_ten
         { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      }, // hw_ka + ten

         { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
     };

     String[][] compatTests = {
             // Input                Decomposed              Composed
         { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     }, // Alef-Lamed vs. Alef, Lamed

 //        { "\u00fdffin",              "A\u0308ffin",          "\u00fdffin"             },
 //       { "\u00fd\uFB03n",           "A\u0308ffin",          "\u00fdffin"             }, // ffi ligature -> f + f + i
         { "\u00fdffin",              "y\u0301ffin",          "\u00fdffin"             },	//updated for 3.0
         { "\u00fd\uFB03n",           "y\u0301ffin",          "\u00fdffin"             }, // ffi ligature -> f + f + i

         { "Henry IV",           "Henry IV",             "Henry IV"          },
         { "Henry \u2163",       "Henry IV",             "Henry IV"          },

         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten

         { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + ten

         /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later
         { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + hw_ten
         { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // ka + hw_ten
         */
     };

     // With Canonical decomposition, Hangul syllables should get decomposed
     // into Jamo, but Jamo characters should not be decomposed into
     // conjoining Jamo
     String[][] hangulCanon = {
         // Input                Decomposed              Composed
         { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
         { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
     };

     // With compatibility decomposition turned on,
     // it should go all the way down to conjoining Jamo characters.
     // THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
     String[][] hangulCompat = {
         // Input        Decomposed                          Composed
         // { "\ud4db",     "\u1111\u116e\u1175\u11af\u11c2",   "\ud478\u1175\u11af\u11c2"  },
     };

     public void TestHangulCompose() {
         // Make sure that the static composition methods work
         logln("Canonical composition...");
         staticTest(Normalizer.COMPOSE,        0, hangulCanon,  2);
         logln("Compatibility composition...");
         staticTest(Normalizer.COMPOSE_COMPAT, 0, hangulCompat, 2);

         // Now try iterative composition....
         logln("Static composition...");
         Normalizer norm = new Normalizer("", Normalizer.COMPOSE, 0);
         iterateTest(norm, hangulCanon, 2);

         norm.setMode(Normalizer.COMPOSE_COMPAT);
         iterateTest(norm, hangulCompat, 2);

         // And finally, make sure you can do it in reverse too
         logln("Reverse iteration...");
         norm.setMode(Normalizer.COMPOSE);
         backAndForth(norm, hangulCanon);
     }

     public void TestHangulDecomp() {
         // Make sure that the static decomposition methods work
         logln("Canonical decomposition...");
         staticTest(Normalizer.DECOMP,        0, hangulCanon,  1);
         logln("Compatibility decomposition...");
         staticTest(Normalizer.DECOMP_COMPAT, 0, hangulCompat, 1);

         // Now the iterative decomposition methods...
         logln("Iterative decomposition...");
         Normalizer norm = new Normalizer("", Normalizer.DECOMP, 0);
         iterateTest(norm, hangulCanon, 1);

         norm.setMode(Normalizer.DECOMP_COMPAT);
         iterateTest(norm, hangulCompat, 1);

         // And finally, make sure you can do it in reverse too
         logln("Reverse iteration...");
         norm.setMode(Normalizer.DECOMP);
         backAndForth(norm, hangulCanon);
     }

     public void TestPrevious() {
         Normalizer norm = new Normalizer("", Normalizer.DECOMP, 0);

         logln("testing decomp...");
         backAndForth(norm, canonTests);

         logln("testing compose...");
         norm.setMode(Normalizer.COMPOSE);
         backAndForth(norm, canonTests);
     }

     public void TestDecomp() {
         Normalizer norm = new Normalizer("", Normalizer.DECOMP, 0);
         iterateTest(norm, canonTests, 1);

         staticTest(Normalizer.DECOMP, 0, canonTests, 1);
     }

     public void TestCompatDecomp() {
         Normalizer norm = new Normalizer("", Normalizer.DECOMP_COMPAT, 0);
         iterateTest(norm, compatTests, 1);

         staticTest(Normalizer.DECOMP_COMPAT, 0, compatTests, 1);
     }

     public void TestCanonCompose() {
         Normalizer norm = new Normalizer("", Normalizer.COMPOSE, 0);
         iterateTest(norm, canonTests, 2);

         staticTest(Normalizer.COMPOSE, 0, canonTests, 2);
     }

     public void TestCompatCompose() {
         Normalizer norm = new Normalizer("", Normalizer.COMPOSE_COMPAT, 0);
         iterateTest(norm, compatTests, 2);

         staticTest(Normalizer.COMPOSE_COMPAT, 0, compatTests, 2);
     }

     public void TestExplodingBase() {
         // \u017f - Latin small letter long s
         // \u0307 - combining dot above
         // \u1e61 - Latin small letter s with dot above
         // \u1e9b - Latin small letter long s with dot above
         String[][] canon = {
             // Input                Decomposed              Composed
             { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"       },
             { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"       },
         };
         String[][] compat = {
             // Input                Decomposed              Composed
             { "\u017f",        "s",              "s"           },
             { "\u1e9b",        "s\u0307",        "\u1e61"      },
         };

         staticTest(Normalizer.DECOMP,           0, canon,  1);
         staticTest(Normalizer.COMPOSE,          0, canon,  2);

         staticTest(Normalizer.DECOMP_COMPAT,    0, compat, 1);
         staticTest(Normalizer.COMPOSE_COMPAT,   0, compat, 2);

         Normalizer norm = new Normalizer("", Normalizer.DECOMP_COMPAT);
         iterateTest(norm, compat, 1);
         backAndForth(norm, compat);

         norm.setMode(Normalizer.COMPOSE_COMPAT);
         iterateTest(norm, compat, 2);
         backAndForth(norm, compat);
     }

     /**
      * The Tibetan vowel sign AA, 0f71, was messed up prior to Unicode version 2.1.9.
      * Once 2.1.9 or 3.0 is released, uncomment this test.
      */
     public void TestTibetan() {
         String[][] decomp = {
             { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
         };
         String[][] compose = {
             { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
         };

         staticTest(Normalizer.DECOMP,           0, decomp, 1);
         staticTest(Normalizer.DECOMP_COMPAT,    0, decomp, 2);
         staticTest(Normalizer.COMPOSE,          0, compose, 1);
         staticTest(Normalizer.COMPOSE_COMPAT,   0, compose, 2);
     }

     /**
      * Make sure characters in the CompositionExclusion.txt list do not get
      * composed to.
      */
     public void TestCompositionExclusion() {
         // This list is generated from CompositionExclusion.txt.
         // Update whenever the normalizer tables are updated.  Note
         // that we test all characters listed, even those that can be
         // derived from the Unicode DB and are therefore commented
         // out.
         String EXCLUDED =
             "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
             "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
             "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
             "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
             "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
             "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
             "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB" +
             "\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000" +
             "\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10" +
             "\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F" +
             "\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31" +
             "\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A" +
             "\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46" +
             "\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E";
         for (int i=0; i<EXCLUDED.length(); ++i) {
             String a = String.valueOf(EXCLUDED.charAt(i));
             String b = Normalizer.normalize(a, Normalizer.DECOMP_COMPAT, 0);
             String c = Normalizer.normalize(b, Normalizer.COMPOSE, 0);
             if (c.equals(a)) {
                 errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
                       hex(b) + " x COMPOSE => " +
                       hex(c));
             } else if (isVerbose()) {
                 logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
                       hex(b) + " x COMPOSE => " +
                       hex(c));
             }
         }
         // The following method works too, but it is somewhat
         // incestuous.  It uses UInfo, which is the same database that
         // NormalizerBuilder uses, so if something is wrong with
         // UInfo, the following test won't show it.  All it will show
         // is that NormalizerBuilder has been run with whatever the
         // current UInfo is.
         //
         // We comment this out in favor of the test above, which
         // provides independent verification (but also requires
         // independent updating).
 //      logln("---");
 //      UInfo uinfo = new UInfo();
 //      for (int i=0; i<=0xFFFF; ++i) {
 //          if (!uinfo.isExcludedComposition((char)i) ||
 //              (!uinfo.hasCanonicalDecomposition((char)i) &&
 //               !uinfo.hasCompatibilityDecomposition((char)i))) continue;
 //          String a = String.valueOf((char)i);
 //          String b = Normalizer.normalize(a, Normalizer.DECOMP_COMPAT, 0);
 //          String c = Normalizer.normalize(b, Normalizer.COMPOSE, 0);
 //          if (c.equals(a)) {
 //              errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
 //                    hex(b) + " x COMPOSE => " +
 //                    hex(c));
 //          } else if (isVerbose()) {
 //              logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
 //                    hex(b) + " x COMPOSE => " +
 //                    hex(c));
 //          }
 //      }
     }

     /**
      * Test for a problem that showed up just before ICU 1.6 release
      * having to do with combining characters with an index of zero.
      * Such characters do not participate in any canonical
      * decompositions.  However, having an index of zero means that
      * they all share one typeMask[] entry, that is, they all have to
      * map to the same canonical class, which is not the case, in
      * reality.
      */
     public void TestZeroIndex() {
         String[] DATA = {
             // Expect col1 x COMPOSE_COMPAT => col2
             // Expect col2 x DECOMP => col3
             "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
             "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
             "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
             "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
             "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
         };

         for (int i=0; i<DATA.length; i+=3) {
             String a = DATA[i];
             String b = Normalizer.normalize(a, Normalizer.COMPOSE_COMPAT, 0);
             String exp = DATA[i+1];
             if (b.equals(exp)) {
                 logln("Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b));
             } else {
                 errln("FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) +
                       ", expect " + hex(exp));
             }
             a = Normalizer.normalize(b, Normalizer.DECOMP, 0);
             exp = DATA[i+2];
             if (a.equals(exp)) {
                 logln("Ok: " + hex(b) + " x DECOMP => " + hex(a));
             } else {
                 errln("FAIL: " + hex(b) + " x DECOMP => " + hex(a) +
                       ", expect " + hex(exp));
             }
         }
     }

     /**
      * Test for a problem found by Verisign.  Problem is that
      * characters at the start of a string are not put in canonical
      * order correctly by compose() if there is no starter.
      */
     public void TestVerisign() {
         String[] inputs = {
             "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
             "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
         };
         String[] outputs = {
             "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
             "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
         };

         for (int i = 0; i < inputs.length; ++i) {
             String input = inputs[i];
             String output = outputs[i];
             String result = Normalizer.decompose(input, false, 0);
             if (!result.equals(output)) {
                 errln("FAIL input: " + Utility.escape(input));
                 errln(" decompose: " + Utility.escape(result));
                 errln("  expected: " + Utility.escape(output));
             }
             result = Normalizer.compose(input, false, 0);
             if (!result.equals(output)) {
                 errln("FAIL input: " + Utility.escape(input));
                 errln("   compose: " + Utility.escape(result));
                 errln("  expected: " + Utility.escape(output));
             }
         }
     }

     //------------------------------------------------------------------------
     // Internal utilities
     //

     private void backAndForth(Normalizer iter, String input)
     {
         iter.setText(input);

         // Run through the iterator forwards and stick it into a StringBuffer
         StringBuffer forward =  new StringBuffer();
         for (char ch = iter.first(); ch != iter.DONE; ch = iter.next()) {
             forward.append(ch);
         }

         // Now do it backwards
         StringBuffer reverse = new StringBuffer();
         for (char ch = iter.last(); ch != iter.DONE; ch = iter.previous()) {
             reverse.insert(0, ch);
         }

         if (!forward.toString().equals(reverse.toString())) {
             errln("FAIL: Forward/reverse mismatch for input " + hex(input)
                   + ", forward: " + hex(forward) + ", backward: " + hex(reverse));
         } else if (isVerbose()) {
             logln("Ok: Forward/reverse for input " + hex(input)
                   + ", forward: " + hex(forward) + ", backward: " + hex(reverse));
         }
     }

     private void backAndForth(Normalizer iter, String[][] tests)
     {
         for (int i = 0; i < tests.length; i++)
         {
             iter.setText(tests[i][0]);

             // Run through the iterator forwards and stick it into a StringBuffer
             StringBuffer forward =  new StringBuffer();
             for (char ch = iter.first(); ch != iter.DONE; ch = iter.next()) {
                 forward.append(ch);
             }

             // Now do it backwards
             StringBuffer reverse = new StringBuffer();
             for (char ch = iter.last(); ch != iter.DONE; ch = iter.previous()) {
                 reverse.insert(0, ch);
             }

             if (!forward.toString().equals(reverse.toString())) {
                 errln("FAIL: Forward/reverse mismatch for input " + hex(tests[i][0])
                     + ", forward: " + hex(forward) + ", backward: " + hex(reverse));
             } else if (isVerbose()) {
                 logln("Ok: Forward/reverse for input " + hex(tests[i][0])
                       + ", forward: " + hex(forward) + ", backward: " + hex(reverse));
             }
         }
     }

     private void staticTest(Normalizer.Mode mode, int options, String[][] tests, int outCol)
     {
         for (int i = 0; i < tests.length; i++)
         {
             String input = tests[i][0];
             String expect = tests[i][outCol];

             logln("Normalizing '" + input + "' (" + hex(input) + ")" );

             String output = Normalizer.normalize(input, mode, options);

             if (!output.equals(expect)) {
                 errln("FAIL: case " + i
                     + " expected '" + expect + "' (" + hex(expect) + ")"
                     + " but got '" + output + "' (" + hex(output) + ")" );
             }
         }
     }

     private void iterateTest(Normalizer iter, String[][] tests, int outCol)
     {
         for (int i = 0; i < tests.length; i++)
         {
             String input = tests[i][0];
             String expect = tests[i][outCol];

             logln("Normalizing '" + input + "' (" + hex(input) + ")" );

             iter.setText(input);
             assertEqual(expect, iter, "case " + i + " ");
         }
     }

     private void assertEqual(String expected, Normalizer iter, String msg)
     {
         int index = 0;
         for (char ch = iter.first(); ch != iter.DONE; ch = iter.next())
         {
             if (index >= expected.length()) {
                 errln("FAIL: " + msg + "Unexpected character '" + ch + "' (" + hex(ch) + ")"
                         + " at index " + index);
                 break;
             }
             char want = expected.charAt(index);
             if (ch != want) {
                 errln("FAIL: " + msg + "got '" + ch + "' (" + hex(ch) + ")"
                         + " but expected '" + want + "' (" + hex(want) + ")"
                         + " at index " + index);
             }
             index++;
         }
         if (index < expected.length()) {
             errln("FAIL: " + msg + "Only got " + index + " chars, expected " + expected.length());
         }
     }
 }
	/*
	*******************************************************************************
	* Copyright (C) 1996-2000, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/normalizer/BasicTest.java,v $
	* $Date: 2002/02/25 22:43:58 $
	* $Revision: 1.10 $
	*
	*****************************************************************************************
	*/
	package com.ibm.icu.dev.test.normalizer;

	import com.ibm.icu.dev.test.*;
	import com.ibm.icu.lang.*;
	import com.ibm.icu.text.*;
	import com.ibm.icu.impl.Utility;
	import java.text.CharacterIterator;
	import java.text.StringCharacterIterator;

	public class BasicTest extends TestFmwk {
	public static void main(String[] args) throws Exception {
	new BasicTest().run(args);
	}

	String[][] canonTests = {
	// Input Decomposed Composed
	{ "cat", "cat", "cat" },
	{ "\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark", },

	{ "\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above
	{ "D\u0307", "D\u0307", "\u1e0a" }, // D dot_above

	{ "\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above
	{ "\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below
	{ "D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above

	{ "\u1e10\u0307\u0323", "D\u0327\u0323\u0307", "\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
	{ "D\u0307\u0328\u0323","D\u0328\u0323\u0307", "\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below

	{ "\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave
	{ "\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave
	{ "\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron

	{ "\u212b", "A\u030a", "\u00c5" }, // angstrom_sign
	{ "\u00c5", "A\u030a", "\u00c5" }, // A-ring

	// { "\u00fdffin", "A\u0308ffin", "\u00fdffin" },
	{ "\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0
	// { "\u00fd\uFB03n", "A\u0308\uFB03n", "\u00fd\uFB03n" },
	{ "\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0

	{ "Henry IV", "Henry IV", "Henry IV" },
	{ "Henry \u2163", "Henry \u2163", "Henry \u2163" },

	{ "\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
	{ "\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
	{ "\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten
	{ "\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten
	{ "\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten

	{ "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
	};

	String[][] compatTests = {
	// Input Decomposed Composed
	{ "\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC", }, // Alef-Lamed vs. Alef, Lamed

	// { "\u00fdffin", "A\u0308ffin", "\u00fdffin" },
	// { "\u00fd\uFB03n", "A\u0308ffin", "\u00fdffin" }, // ffi ligature -> f + f + i
	{ "\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0
	{ "\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i

	{ "Henry IV", "Henry IV", "Henry IV" },
	{ "Henry \u2163", "Henry IV", "Henry IV" },

	{ "\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
	{ "\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten

	{ "\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten

	/* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later
	{ "\uFF76\uFF9E", "\u30AB\u3099", "\u30AC" }, // hw_ka + hw_ten
	{ "\u30AB\uFF9E", "\u30AB\u3099", "\u30AC" }, // ka + hw_ten
	*/
	};

	// With Canonical decomposition, Hangul syllables should get decomposed
	// into Jamo, but Jamo characters should not be decomposed into
	// conjoining Jamo
	String[][] hangulCanon = {
	// Input Decomposed Composed
	{ "\ud4db", "\u1111\u1171\u11b6", "\ud4db" },
	{ "\u1111\u1171\u11b6", "\u1111\u1171\u11b6", "\ud4db" },
	};

	// With compatibility decomposition turned on,
	// it should go all the way down to conjoining Jamo characters.
	// THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
	String[][] hangulCompat = {
	// Input Decomposed Composed
	// { "\ud4db", "\u1111\u116e\u1175\u11af\u11c2", "\ud478\u1175\u11af\u11c2" },
	};

	public void TestHangulCompose() {
	// Make sure that the static composition methods work
	logln("Canonical composition...");
	staticTest(Normalizer.COMPOSE, 0, hangulCanon, 2);
	logln("Compatibility composition...");
	staticTest(Normalizer.COMPOSE_COMPAT, 0, hangulCompat, 2);

	// Now try iterative composition....
	logln("Static composition...");
	Normalizer norm = new Normalizer("", Normalizer.COMPOSE, 0);
	iterateTest(norm, hangulCanon, 2);

	norm.setMode(Normalizer.COMPOSE_COMPAT);
	iterateTest(norm, hangulCompat, 2);

	// And finally, make sure you can do it in reverse too
	logln("Reverse iteration...");
	norm.setMode(Normalizer.COMPOSE);
	backAndForth(norm, hangulCanon);
	}

	public void TestHangulDecomp() {
	// Make sure that the static decomposition methods work
	logln("Canonical decomposition...");
	staticTest(Normalizer.DECOMP, 0, hangulCanon, 1);
	logln("Compatibility decomposition...");
	staticTest(Normalizer.DECOMP_COMPAT, 0, hangulCompat, 1);

	// Now the iterative decomposition methods...
	logln("Iterative decomposition...");
	Normalizer norm = new Normalizer("", Normalizer.DECOMP, 0);
	iterateTest(norm, hangulCanon, 1);

	norm.setMode(Normalizer.DECOMP_COMPAT);
	iterateTest(norm, hangulCompat, 1);

	// And finally, make sure you can do it in reverse too
	logln("Reverse iteration...");
	norm.setMode(Normalizer.DECOMP);
	backAndForth(norm, hangulCanon);
	}

	public void TestPrevious() {
	Normalizer norm = new Normalizer("", Normalizer.DECOMP, 0);

	logln("testing decomp...");
	backAndForth(norm, canonTests);

	logln("testing compose...");
	norm.setMode(Normalizer.COMPOSE);
	backAndForth(norm, canonTests);
	}

	public void TestDecomp() {
	Normalizer norm = new Normalizer("", Normalizer.DECOMP, 0);
	iterateTest(norm, canonTests, 1);

	staticTest(Normalizer.DECOMP, 0, canonTests, 1);
	}

	public void TestCompatDecomp() {
	Normalizer norm = new Normalizer("", Normalizer.DECOMP_COMPAT, 0);
	iterateTest(norm, compatTests, 1);

	staticTest(Normalizer.DECOMP_COMPAT, 0, compatTests, 1);
	}

	public void TestCanonCompose() {
	Normalizer norm = new Normalizer("", Normalizer.COMPOSE, 0);
	iterateTest(norm, canonTests, 2);

	staticTest(Normalizer.COMPOSE, 0, canonTests, 2);
	}

	public void TestCompatCompose() {
	Normalizer norm = new Normalizer("", Normalizer.COMPOSE_COMPAT, 0);
	iterateTest(norm, compatTests, 2);

	staticTest(Normalizer.COMPOSE_COMPAT, 0, compatTests, 2);
	}

	public void TestExplodingBase() {
	// \u017f - Latin small letter long s
	// \u0307 - combining dot above
	// \u1e61 - Latin small letter s with dot above
	// \u1e9b - Latin small letter long s with dot above
	String[][] canon = {
	// Input Decomposed Composed
	{ "Tschu\u017f", "Tschu\u017f", "Tschu\u017f" },
	{ "Tschu\u1e9b", "Tschu\u017f\u0307", "Tschu\u1e9b" },
	};
	String[][] compat = {
	// Input Decomposed Composed
	{ "\u017f", "s", "s" },
	{ "\u1e9b", "s\u0307", "\u1e61" },
	};

	staticTest(Normalizer.DECOMP, 0, canon, 1);
	staticTest(Normalizer.COMPOSE, 0, canon, 2);

	staticTest(Normalizer.DECOMP_COMPAT, 0, compat, 1);
	staticTest(Normalizer.COMPOSE_COMPAT, 0, compat, 2);

	Normalizer norm = new Normalizer("", Normalizer.DECOMP_COMPAT);
	iterateTest(norm, compat, 1);
	backAndForth(norm, compat);

	norm.setMode(Normalizer.COMPOSE_COMPAT);
	iterateTest(norm, compat, 2);
	backAndForth(norm, compat);
	}

	/**
	* The Tibetan vowel sign AA, 0f71, was messed up prior to Unicode version 2.1.9.
	* Once 2.1.9 or 3.0 is released, uncomment this test.
	*/
	public void TestTibetan() {
	String[][] decomp = {
	{ "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
	};
	String[][] compose = {
	{ "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
	};

	staticTest(Normalizer.DECOMP, 0, decomp, 1);
	staticTest(Normalizer.DECOMP_COMPAT, 0, decomp, 2);
	staticTest(Normalizer.COMPOSE, 0, compose, 1);
	staticTest(Normalizer.COMPOSE_COMPAT, 0, compose, 2);
	}

	/**
	* Make sure characters in the CompositionExclusion.txt list do not get
	* composed to.
	*/
	public void TestCompositionExclusion() {
	// This list is generated from CompositionExclusion.txt.
	// Update whenever the normalizer tables are updated. Note
	// that we test all characters listed, even those that can be
	// derived from the Unicode DB and are therefore commented
	// out.
	String EXCLUDED =
	"\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
	"\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
	"\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
	"\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
	"\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
	"\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
	"\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB" +
	"\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000" +
	"\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10" +
	"\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F" +
	"\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31" +
	"\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A" +
	"\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46" +
	"\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E";
	for (int i=0; i<EXCLUDED.length(); ++i) {
	String a = String.valueOf(EXCLUDED.charAt(i));
	String b = Normalizer.normalize(a, Normalizer.DECOMP_COMPAT, 0);
	String c = Normalizer.normalize(b, Normalizer.COMPOSE, 0);
	if (c.equals(a)) {
	errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
	hex(b) + " x COMPOSE => " +
	hex(c));
	} else if (isVerbose()) {
	logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
	hex(b) + " x COMPOSE => " +
	hex(c));
	}
	}
	// The following method works too, but it is somewhat
	// incestuous. It uses UInfo, which is the same database that
	// NormalizerBuilder uses, so if something is wrong with
	// UInfo, the following test won't show it. All it will show
	// is that NormalizerBuilder has been run with whatever the
	// current UInfo is.
	//
	// We comment this out in favor of the test above, which
	// provides independent verification (but also requires
	// independent updating).
	// logln("---");
	// UInfo uinfo = new UInfo();
	// for (int i=0; i<=0xFFFF; ++i) {
	// if (!uinfo.isExcludedComposition((char)i) \|\|
	// (!uinfo.hasCanonicalDecomposition((char)i) &&
	// !uinfo.hasCompatibilityDecomposition((char)i))) continue;
	// String a = String.valueOf((char)i);
	// String b = Normalizer.normalize(a, Normalizer.DECOMP_COMPAT, 0);
	// String c = Normalizer.normalize(b, Normalizer.COMPOSE, 0);
	// if (c.equals(a)) {
	// errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
	// hex(b) + " x COMPOSE => " +
	// hex(c));
	// } else if (isVerbose()) {
	// logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
	// hex(b) + " x COMPOSE => " +
	// hex(c));
	// }
	// }
	}

	/**
	* Test for a problem that showed up just before ICU 1.6 release
	* having to do with combining characters with an index of zero.
	* Such characters do not participate in any canonical
	* decompositions. However, having an index of zero means that
	* they all share one typeMask[] entry, that is, they all have to
	* map to the same canonical class, which is not the case, in
	* reality.
	*/
	public void TestZeroIndex() {
	String[] DATA = {
	// Expect col1 x COMPOSE_COMPAT => col2
	// Expect col2 x DECOMP => col3
	"A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
	"A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
	"A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
	"c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
	"c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
	};

	for (int i=0; i<DATA.length; i+=3) {
	String a = DATA[i];
	String b = Normalizer.normalize(a, Normalizer.COMPOSE_COMPAT, 0);
	String exp = DATA[i+1];
	if (b.equals(exp)) {
	logln("Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b));
	} else {
	errln("FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) +
	", expect " + hex(exp));
	}
	a = Normalizer.normalize(b, Normalizer.DECOMP, 0);
	exp = DATA[i+2];
	if (a.equals(exp)) {
	logln("Ok: " + hex(b) + " x DECOMP => " + hex(a));
	} else {
	errln("FAIL: " + hex(b) + " x DECOMP => " + hex(a) +
	", expect " + hex(exp));
	}
	}
	}

	/**
	* Test for a problem found by Verisign. Problem is that
	* characters at the start of a string are not put in canonical
	* order correctly by compose() if there is no starter.
	*/
	public void TestVerisign() {
	String[] inputs = {
	"\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
	"\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
	};
	String[] outputs = {
	"\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
	"\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
	};

	for (int i = 0; i < inputs.length; ++i) {
	String input = inputs[i];
	String output = outputs[i];
	String result = Normalizer.decompose(input, false, 0);
	if (!result.equals(output)) {
	errln("FAIL input: " + Utility.escape(input));
	errln(" decompose: " + Utility.escape(result));
	errln(" expected: " + Utility.escape(output));
	}
	result = Normalizer.compose(input, false, 0);
	if (!result.equals(output)) {
	errln("FAIL input: " + Utility.escape(input));
	errln(" compose: " + Utility.escape(result));
	errln(" expected: " + Utility.escape(output));
	}
	}
	}

	//------------------------------------------------------------------------
	// Internal utilities
	//

	private void backAndForth(Normalizer iter, String input)
	{
	iter.setText(input);

	// Run through the iterator forwards and stick it into a StringBuffer
	StringBuffer forward = new StringBuffer();
	for (char ch = iter.first(); ch != iter.DONE; ch = iter.next()) {
	forward.append(ch);
	}

	// Now do it backwards
	StringBuffer reverse = new StringBuffer();
	for (char ch = iter.last(); ch != iter.DONE; ch = iter.previous()) {
	reverse.insert(0, ch);
	}

	if (!forward.toString().equals(reverse.toString())) {
	errln("FAIL: Forward/reverse mismatch for input " + hex(input)
	+ ", forward: " + hex(forward) + ", backward: " + hex(reverse));
	} else if (isVerbose()) {
	logln("Ok: Forward/reverse for input " + hex(input)
	+ ", forward: " + hex(forward) + ", backward: " + hex(reverse));
	}
	}

	private void backAndForth(Normalizer iter, String[][] tests)
	{
	for (int i = 0; i < tests.length; i++)
	{
	iter.setText(tests[i][0]);

	// Run through the iterator forwards and stick it into a StringBuffer
	StringBuffer forward = new StringBuffer();
	for (char ch = iter.first(); ch != iter.DONE; ch = iter.next()) {
	forward.append(ch);
	}

	// Now do it backwards
	StringBuffer reverse = new StringBuffer();
	for (char ch = iter.last(); ch != iter.DONE; ch = iter.previous()) {
	reverse.insert(0, ch);
	}

	if (!forward.toString().equals(reverse.toString())) {
	errln("FAIL: Forward/reverse mismatch for input " + hex(tests[i][0])
	+ ", forward: " + hex(forward) + ", backward: " + hex(reverse));
	} else if (isVerbose()) {
	logln("Ok: Forward/reverse for input " + hex(tests[i][0])
	+ ", forward: " + hex(forward) + ", backward: " + hex(reverse));
	}
	}
	}

	private void staticTest(Normalizer.Mode mode, int options, String[][] tests, int outCol)
	{
	for (int i = 0; i < tests.length; i++)
	{
	String input = tests[i][0];
	String expect = tests[i][outCol];

	logln("Normalizing '" + input + "' (" + hex(input) + ")" );

	String output = Normalizer.normalize(input, mode, options);

	if (!output.equals(expect)) {
	errln("FAIL: case " + i
	+ " expected '" + expect + "' (" + hex(expect) + ")"
	+ " but got '" + output + "' (" + hex(output) + ")" );
	}
	}
	}

	private void iterateTest(Normalizer iter, String[][] tests, int outCol)
	{
	for (int i = 0; i < tests.length; i++)
	{
	String input = tests[i][0];
	String expect = tests[i][outCol];

	logln("Normalizing '" + input + "' (" + hex(input) + ")" );

	iter.setText(input);
	assertEqual(expect, iter, "case " + i + " ");
	}
	}

	private void assertEqual(String expected, Normalizer iter, String msg)
	{
	int index = 0;
	for (char ch = iter.first(); ch != iter.DONE; ch = iter.next())
	{
	if (index >= expected.length()) {
	errln("FAIL: " + msg + "Unexpected character '" + ch + "' (" + hex(ch) + ")"
	+ " at index " + index);
	break;
	}
	char want = expected.charAt(index);
	if (ch != want) {
	errln("FAIL: " + msg + "got '" + ch + "' (" + hex(ch) + ")"
	+ " but expected '" + want + "' (" + hex(want) + ")"
	+ " at index " + index);
	}
	index++;
	}
	if (index < expected.length()) {
	errln("FAIL: " + msg + "Only got " + index + " chars, expected " + expected.length());
	}
	}
	}