main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java - external/github.com/unicode-org/icu - Git at Google

 /**
  *******************************************************************************
  * Copyright (C) 2001-2014, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  * CollationTest.java, ported from collationtest.cpp
  * C++ version created on: 2012apr27
  * created by: Markus W. Scherer
  */
 package com.ibm.icu.dev.test.collator;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.text.ParseException;
 import java.util.HashSet;
 import java.util.Set;

 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.dev.test.TestUtil;
 import com.ibm.icu.impl.Norm2AllModes;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.impl.coll.Collation;
 import com.ibm.icu.impl.coll.CollationData;
 import com.ibm.icu.impl.coll.CollationFCD;
 import com.ibm.icu.impl.coll.CollationIterator;
 import com.ibm.icu.impl.coll.CollationRoot;
 import com.ibm.icu.impl.coll.CollationRootElements;
 import com.ibm.icu.impl.coll.CollationRuleParser;
 import com.ibm.icu.impl.coll.CollationWeights;
 import com.ibm.icu.impl.coll.FCDIterCollationIterator;
 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
 import com.ibm.icu.impl.coll.UTF16CollationIterator;
 import com.ibm.icu.impl.coll.UVector32;
 import com.ibm.icu.text.CollationElementIterator;
 import com.ibm.icu.text.CollationKey;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.Collator.ReorderCodes;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.RawCollationKey;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.UCharacterIterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.util.IllformedLocaleException;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;

 public class CollationTest extends TestFmwk {
     public static void main(String[] args) throws Exception{
         new CollationTest().run(args);
     }

     public CollationTest() {
     }

     // Fields
     Normalizer2 fcd, nfd;
     Collator coll;
     String fileLine;
     int fileLineNumber;
     String fileTestName;
     Throwable error;

     // package private methods ----------------------------------------------

     static void doTest(TestFmwk test, RuleBasedCollator col, String source,
                        String target, int result)
     {
         doTestVariant(test, col, source, target, result);
         if (result == -1) {
             doTestVariant(test, col, target, source, 1);
         }
         else if (result == 1) {
             doTestVariant(test, col, target, source, -1);
         }
         else {
             doTestVariant(test, col, target, source, 0);
         }

         CollationElementIterator iter = col.getCollationElementIterator(source);
         backAndForth(test, iter);
         iter.setText(target);
         backAndForth(test, iter);
     }

     /**
      * Return an integer array containing all of the collation orders
      * returned by calls to next on the specified iterator
      */
     static int[] getOrders(CollationElementIterator iter)
     {
         int maxSize = 100;
         int size = 0;
         int[] orders = new int[maxSize];

         int order;
         while ((order = iter.next()) != CollationElementIterator.NULLORDER) {
             if (size == maxSize) {
                 maxSize *= 2;
                 int[] temp = new int[maxSize];
                 System.arraycopy(orders, 0, temp,  0, size);
                 orders = temp;
             }
             orders[size++] = order;
         }

         if (maxSize > size) {
             int[] temp = new int[size];
             System.arraycopy(orders, 0, temp,  0, size);
             orders = temp;
         }
         return orders;
     }

     static void backAndForth(TestFmwk test, CollationElementIterator iter)
     {
         // Run through the iterator forwards and stick it into an array
         iter.reset();
         int[] orders = getOrders(iter);

         // Now go through it backwards and make sure we get the same values
         int index = orders.length;
         int o;

         // reset the iterator
         iter.reset();

         while ((o = iter.previous()) != CollationElementIterator.NULLORDER) {
             if (o != orders[--index]) {
                 if (o == 0) {
                     index ++;
                 } else {
                     while (index > 0 && orders[index] == 0) {
                         index --;
                     }
                     if (o != orders[index]) {
                         test.errln("Mismatch at index " + index + ": 0x"
                             + Utility.hex(orders[index]) + " vs 0x" + Utility.hex(o));
                         break;
                     }
                 }
             }
         }

         while (index != 0 && orders[index - 1] == 0) {
           index --;
         }

         if (index != 0) {
             String msg = "Didn't get back to beginning - index is ";
             test.errln(msg + index);

             iter.reset();
             test.err("next: ");
             while ((o = iter.next()) != CollationElementIterator.NULLORDER) {
                 String hexString = "0x" + Utility.hex(o) + " ";
                 test.err(hexString);
             }
             test.errln("");
             test.err("prev: ");
             while ((o = iter.previous()) != CollationElementIterator.NULLORDER) {
                 String hexString = "0x" + Utility.hex(o) + " ";
                  test.err(hexString);
             }
             test.errln("");
         }
     }

     static final String appendCompareResult(int result, String target){
         if (result == -1) {
             target += "LESS";
         } else if (result == 0) {
             target += "EQUAL";
         } else if (result == 1) {
             target += "GREATER";
         } else {
             String huh = "?";
             target += huh + result;
         }
         return target;
     }

     static final String prettify(CollationKey key) {
         byte[] bytes = key.toByteArray();
         return prettify(bytes, bytes.length);
     }

     static final String prettify(RawCollationKey key) {
         return prettify(key.bytes, key.size);
     }

     static final String prettify(byte[] skBytes, int length) {
         StringBuilder target = new StringBuilder(length * 3 + 2).append('[');

         for (int i = 0; i < length; i++) {
             String numStr = Integer.toHexString(skBytes[i] & 0xff);
             if (numStr.length() < 2) {
                 target.append('0');
             }
             target.append(numStr).append(' ');
         }
         target.append(']');
         return target.toString();
     }

     private static void doTestVariant(TestFmwk test,
                                       RuleBasedCollator myCollation,
                                       String source, String target, int result)
     {
         boolean printInfo = false;
         int compareResult  = myCollation.compare(source, target);
         if (compareResult != result) {

             // !!! if not mod build, error, else nothing.
             // warnln if not build, error, else always print warning.
             // do we need a 'quiet warning?' (err or log).  Hmmm,
             // would it work to have the 'verbose' flag let you
             // suppress warnings?  Are there ever some warnings you
             // want to suppress, and others you don't?
             if(!test.isModularBuild()){
                 test.errln("Comparing \"" + Utility.hex(source) + "\" with \""
                            + Utility.hex(target) + "\" expected " + result
                            + " but got " + compareResult);
             }else{
                 printInfo = true;
             }
         }
         CollationKey ssk = myCollation.getCollationKey(source);
         CollationKey tsk = myCollation.getCollationKey(target);
         compareResult = ssk.compareTo(tsk);
         if (compareResult != result) {

             if(!test.isModularBuild()){
                 test.errln("Comparing CollationKeys of \"" + Utility.hex(source)
                            + "\" with \"" + Utility.hex(target)
                            + "\" expected " + result + " but got "
                            + compareResult);
            }else{
                printInfo = true;
            }
         }
         RawCollationKey srsk = new RawCollationKey();
         myCollation.getRawCollationKey(source, srsk);
         RawCollationKey trsk = new RawCollationKey();
         myCollation.getRawCollationKey(target, trsk);
         compareResult = ssk.compareTo(tsk);
         if (compareResult != result) {

             if(!test.isModularBuild()){
                 test.errln("Comparing RawCollationKeys of \""
                            + Utility.hex(source)
                            + "\" with \"" + Utility.hex(target)
                            + "\" expected " + result + " but got "
                            + compareResult);
            }else{
                printInfo = true;
            }
         }
         // hmmm, but here we issue a warning
         // only difference is, one warning or two, and detailed info or not?
         // hmmm, does seem preferable to omit detail if we know it is due to missing resource data.
         // well, if we label the errors as warnings, we can let people know the details, but
         // also know they may be due to missing resource data.  basically this code is asserting
         // that the errors are due to missing resource data, which may or may not be true.
         if (printInfo) {
             test.warnln("Could not load locale data skipping.");
         }
     }

     public void TestMinMax() {
         setRootCollator();
         RuleBasedCollator rbc = (RuleBasedCollator)coll;

         final String s = "\uFFFE\uFFFF";
         long[] ces;

         ces = rbc.internalGetCEs(s);
         if (ces.length != 2) {
             errln("expected 2 CEs for <FFFE, FFFF>, got " + ces.length);
             return;
         }

         long ce = ces[0];
         long expected = Collation.makeCE(Collation.MERGE_SEPARATOR_PRIMARY);
         if (ce != expected) {
             errln("CE(U+fffe)=0x" + Utility.hex(ce) + " != 02..");
         }

         ce = ces[1];
         expected = Collation.makeCE(Collation.MAX_PRIMARY);
         if (ce != expected) {
             errln("CE(U+ffff)=0x" + Utility.hex(ce) + " != max..");
         }
     }

     public void TestImplicits() {
         CollationData cd = CollationRoot.getData();

         // Implicit primary weights should be assigned for the following sets,
         // and sort in ascending order by set and then code point.
         // See http://www.unicode.org/reports/tr10/#Implicit_Weights
         // core Han Unified Ideographs
         UnicodeSet coreHan = new UnicodeSet("[\\p{unified_ideograph}&"
                                  + "[\\p{Block=CJK_Unified_Ideographs}"
                                  + "\\p{Block=CJK_Compatibility_Ideographs}]]");
         // all other Unified Han ideographs
         UnicodeSet otherHan = new UnicodeSet("[\\p{unified ideograph}-"
                                  + "[\\p{Block=CJK_Unified_Ideographs}"
                                  + "\\p{Block=CJK_Compatibility_Ideographs}]]");

         UnicodeSet unassigned = new UnicodeSet("[[:Cn:][:Cs:][:Co:]]");
         unassigned.remove(0xfffe, 0xffff);  // These have special CLDR root mappings.

         // Starting with CLDR 26/ICU 54, the root Han order may instead be
         // the Unihan radical-stroke order.
         // The tests should pass either way, so we only test the order of a small set of Han characters
         // whose radical-stroke order is the same as their code point order.
         UnicodeSet someHanInCPOrder = new UnicodeSet(
                 "[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48" +
                 "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]");
         UnicodeSet inOrder = new UnicodeSet(someHanInCPOrder);
         inOrder.addAll(unassigned).freeze();

         UnicodeSet[] sets = { coreHan, otherHan, unassigned };
         int prev = 0;
         long prevPrimary = 0;
         UTF16CollationIterator ci = new UTF16CollationIterator(cd, false, "", 0);
         for (int i = 0; i < sets.length; ++i) {
             UnicodeSetIterator iter = new UnicodeSetIterator(sets[i]);
             while (iter.next()) {
                 String s = iter.getString();
                 int c = s.codePointAt(0);
                 ci.setText(false, s, 0);
                 long ce = ci.nextCE();
                 long ce2 = ci.nextCE();
                 if (ce == Collation.NO_CE || ce2 != Collation.NO_CE) {
                     errln("CollationIterator.nextCE(0x" + Utility.hex(c)
                             + ") did not yield exactly one CE");
                     continue;

                 }
                 if ((ce & 0xffffffffL) != Collation.COMMON_SEC_AND_TER_CE) {
                     errln("CollationIterator.nextCE(U+" + Utility.hex(c, 4)
                             + ") has non-common sec/ter weights: 0x" + Utility.hex(ce & 0xffffffffL, 8));
                     continue;
                 }
                 long primary = ce >>> 32;
                 if (!(primary > prevPrimary) && inOrder.contains(c) && inOrder.contains(prev)) {
                     errln("CE(U+" + Utility.hex(c) + ")=0x" + Utility.hex(primary)
                             + ".. not greater than CE(U+" + Utility.hex(prev)
                             + ")=0x" + Utility.hex(prevPrimary) + "..");

                 }
                 prev = c;
                 prevPrimary = primary;
             }
         }
     }

     // ICU4C: TestNulTerminated / renamed for ICU4J
     public void TestSubSequence() {
         CollationData data = CollationRoot.getData();
         final String s = "abab"; // { 0x61, 0x62, 0x61, 0x62 }

         UTF16CollationIterator ci1 = new UTF16CollationIterator(data, false, s, 0);
         UTF16CollationIterator ci2 = new UTF16CollationIterator(data, false, s, 2);

         for (int i = 0; i < 2; ++i) {
             long ce1 = ci1.nextCE();
             long ce2 = ci2.nextCE();

             if (ce1 != ce2) {
                 errln("CollationIterator.nextCE(with start position at 0) != "
                       + "nextCE(with start position at 2) at CE " + i);
             }
         }
     }


     // ICU4C: TestIllegalUTF8 / not applicable to ICU4J


     private static void addLeadSurrogatesForSupplementary(UnicodeSet src, UnicodeSet dest) {
         for(int c = 0x10000; c < 0x110000;) {
             int next = c + 0x400;
             if(src.containsSome(c, next - 1)) {
                 dest.add(UTF16.getLeadSurrogate(c));
             }
             c = next;
         }
     }

     public void TestShortFCDData() {
         UnicodeSet expectedLccc = new UnicodeSet("[:^lccc=0:]");
         expectedLccc.add(0xdc00, 0xdfff);   // add all trail surrogates
         addLeadSurrogatesForSupplementary(expectedLccc, expectedLccc);

         UnicodeSet lccc = new UnicodeSet(); // actual
         for (int c = 0; c <= 0xffff; ++c) {
             if (CollationFCD.hasLccc(c)) {
                 lccc.add(c);
             }
         }

         UnicodeSet diff = new UnicodeSet(expectedLccc);
         diff.removeAll(lccc);
         diff.remove(0x10000, 0x10ffff);  // hasLccc() only works for the BMP

         String empty = "[]";
         String diffString;

         diffString = diff.toPattern(true);
         assertEquals("CollationFCD::hasLccc() expected-actual", empty, diffString);

         diff = lccc;
         diff.removeAll(expectedLccc);
         diffString = diff.toPattern(true);
         assertEquals("CollationFCD::hasLccc() actual-expected", empty, diffString);

         UnicodeSet expectedTccc = new UnicodeSet("[:^tccc=0:]");
         addLeadSurrogatesForSupplementary(expectedLccc, expectedTccc);
         addLeadSurrogatesForSupplementary(expectedTccc, expectedTccc);

         UnicodeSet tccc = new UnicodeSet(); // actual
         for(int c = 0; c <= 0xffff; ++c) {
             if (CollationFCD.hasTccc(c)) {
                 tccc.add(c);
             }
         }

         diff = new UnicodeSet(expectedTccc);
         diff.removeAll(tccc);
         diff.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP
         assertEquals("CollationFCD::hasTccc() expected-actual", empty, diffString);

         diff = tccc;
         diff.removeAll(expectedTccc);
         diffString = diff.toPattern(true);
         assertEquals("CollationFCD::hasTccc() actual-expected", empty, diffString);
     }

     private static class CodePointIterator {
         int[] cp;
         int length;
         int pos;

         CodePointIterator(int[] cp) {
             this.cp = cp;
             this.length = cp.length;
             this.pos = 0;
         }

         void resetToStart() {
             pos = 0;
         }

         int next() {
             return (pos < length) ? cp[pos++] : Collation.SENTINEL_CP;
         }

         int previous() {
             return (pos > 0) ? cp[--pos] : Collation.SENTINEL_CP;
         }

         int getLength() {
             return length;
         }

         int getIndex() {
             return pos;
         }
     }

     private void checkFCD(String name, CollationIterator ci, CodePointIterator cpi) {
         // Iterate forward to the limit.
         for (;;) {
             int c1 = ci.nextCodePoint();
             int c2 = cpi.next();
             if (c1 != c2) {
                 errln(name + ".nextCodePoint(to limit, 1st pass) = U+" + Utility.hex(c1)
                         + " != U+" + Utility.hex(c1) + " at " + cpi.getIndex());
                 return;
             }
             if (c1 < 0) {
                 break;
             }
         }

         // Iterate backward most of the way.
         for (int n = (cpi.getLength() * 2) / 3; n > 0; --n) {
             int c1 = ci.previousCodePoint();
             int c2 = cpi.previous();
             if (c1 != c2) {
                 errln(name + ".previousCodePoint() = U+" + Utility.hex(c1) +
                         " != U+" + Utility.hex(c2) + " at " + cpi.getIndex());
                 return;
             }
         }

         // Forward again.
         for (;;) {
             int c1 = ci.nextCodePoint();
             int c2 = cpi.next();
             if (c1 != c2) {
                 errln(name + ".nextCodePoint(to limit again) = U+" + Utility.hex(c1)
                         + " != U+" + Utility.hex(c2) + " at " + cpi.getIndex());
                 return;
             }
             if (c1 < 0) {
                 break;
             }
         }

         // Iterate backward to the start.
         for (;;) {
             int c1 = ci.previousCodePoint();
             int c2 = cpi.previous();
             if (c1 != c2) {
                 errln(name + ".nextCodePoint(to start) = U+" + Utility.hex(c1)
                         + " != U+" + Utility.hex(c2) + " at " + cpi.getIndex());
                 return;
             }
             if (c1 < 0) {
                 break;
             }
         }
     }

     public void TestFCD() {
         CollationData data = CollationRoot.getData();

         // Input string, not FCD.
         StringBuilder buf = new StringBuilder();
         buf.append("\u0308\u00e1\u0062\u0301\u0327\u0430\u0062")
             .appendCodePoint(0x1D15F)   // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
             .append("\u0327\u0308")     // ccc=202, 230
             .appendCodePoint(0x1D16D)   // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
             .appendCodePoint(0x1D15F)
             .appendCodePoint(0x1D16D)
             .append("\uac01")
             .append("\u00e7")           // Character with tccc!=0 decomposed together with mis-ordered sequence.
             .appendCodePoint(0x1D16D).appendCodePoint(0x1D165)
             .append("\u00e1")           // Character with tccc!=0 decomposed together with decomposed sequence.
             .append("\u0f73\u0f75")     // Tibetan composite vowels must be decomposed.
             .append("\u4e00\u0f81");
         String s = buf.toString();

         // Expected code points.
         int[] cp = {
             0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
             0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
             0x1D15F, 0x1D16D,
             0xac01,
             0x63, 0x327, 0x1D165, 0x1D16D,
             0x61,
             0xf71, 0xf71, 0xf72, 0xf74, 0x301,
             0x4e00, 0xf71, 0xf80
         };

         FCDUTF16CollationIterator u16ci = new FCDUTF16CollationIterator(data, false, s, 0);
         CodePointIterator cpi = new CodePointIterator(cp);
         checkFCD("FCDUTF16CollationIterator", u16ci, cpi);

         cpi.resetToStart();
         UCharacterIterator iter = UCharacterIterator.getInstance(s);
         FCDIterCollationIterator uici = new FCDIterCollationIterator(data, false, iter, 0);
         checkFCD("FCDIterCollationIterator", uici, cpi);
     }

     private void checkAllocWeights(CollationWeights cw, long lowerLimit, long upperLimit,
             int n, int someLength, int minCount) {

         if (!cw.allocWeights(lowerLimit, upperLimit, n)) {
             errln("CollationWeights::allocWeights(0x"
                     + Utility.hex(lowerLimit) + ",0x"
                     + Utility.hex(upperLimit) + ","
                     + n + ") = false");
             return;
         }
         long previous = lowerLimit;
         int count = 0; // number of weights that have someLength
         for (int i = 0; i < n; ++i) {
             long w = cw.nextWeight();
             if (w == 0xffffffffL) {
                 errln("CollationWeights::allocWeights(0x"
                         + Utility.hex(lowerLimit) + ",0x"
                         + Utility.hex(upperLimit) + ",0x"
                         + n + ").nextWeight() returns only "
                         + i + " weights");
                 return;
             }
             if (!(previous < w && w < upperLimit)) {
                 errln("CollationWeights::allocWeights(0x"
                         + Utility.hex(lowerLimit) + ",0x"
                         + Utility.hex(upperLimit) + ","
                         + n + ").nextWeight() number "
                         + (i + 1) + " -> 0x" + Utility.hex(w)
                         + " not between "
                         + Utility.hex(previous) + " and "
                         + Utility.hex(upperLimit));
                 return;
             }
             if (CollationWeights.lengthOfWeight(w) == someLength) {
                 ++count;
             }
         }
         if (count < minCount) {
             errln("CollationWeights::allocWeights(0x"
                     + Utility.hex(lowerLimit) + ",0x"
                     + Utility.hex(upperLimit) + ","
                     + n + ").nextWeight() returns only "
                     + count + " < " + minCount + " weights of length "
                     + someLength);

         }
     }

     public void TestCollationWeights() {
         CollationWeights cw = new CollationWeights();

         // Non-compressible primaries use 254 second bytes 02..FF.
         logln("CollationWeights.initForPrimary(non-compressible)");
         cw.initForPrimary(false);
         // Expect 1 weight 11 and 254 weights 12xx.
         checkAllocWeights(cw, 0x10000000L, 0x13000000L, 255, 1, 1);
         checkAllocWeights(cw, 0x10000000L, 0x13000000L, 255, 2, 254);
         // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
         checkAllocWeights(cw, 0x10fefe40L, 0x12030300L, 260, 2, 255);
         // Expect 254 two-byte weights from the ranges 10ff and 11xx.
         checkAllocWeights(cw, 0x10fefe40L, 0x12030300L, 600, 2, 254);
         // Expect 254^2=64516 three-byte weights.
         // During computation, there should be 3 three-byte ranges
         // 10ffff, 11xxxx, 120202.
         // The middle one should be split 64515:1,
         // and the newly-split-off range and the last ranged lengthened.
         checkAllocWeights(cw, 0x10fffe00L, 0x12020300L, 1 + 64516 + 254 + 1, 3, 64516);
         // Expect weights 1102 & 1103.
         checkAllocWeights(cw, 0x10ff0000L, 0x11040000L, 2, 2, 2);
         // Expect weights 102102 & 102103.
         checkAllocWeights(cw, 0x1020ff00L, 0x10210400L, 2, 3, 2);

         // Compressible primaries use 251 second bytes 04..FE.
         logln("CollationWeights.initForPrimary(compressible)");
         cw.initForPrimary(true);
         // Expect 1 weight 11 and 251 weights 12xx.
         checkAllocWeights(cw, 0x10000000L, 0x13000000L, 252, 1, 1);
         checkAllocWeights(cw, 0x10000000L, 0x13000000L, 252, 2, 251);
         // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
         checkAllocWeights(cw, 0x10fdfe40L, 0x12050300L, 260, 2, 252);
         // Expect weights 1104 & 1105.
         checkAllocWeights(cw, 0x10fe0000L, 0x11060000L, 2, 2, 2);
         // Expect weights 102102 & 102103.
         checkAllocWeights(cw, 0x1020ff00L, 0x10210400L, 2, 3, 2);

         // Secondary and tertiary weights use only bytes 3 & 4.
         logln("CollationWeights.initForSecondary()");
         cw.initForSecondary();
         // Expect weights fbxx and all four fc..ff.
         checkAllocWeights(cw, 0xfb20L, 0x10000L, 20, 3, 4);

         logln("CollationWeights.initForTertiary()");
         cw.initForTertiary();
         // Expect weights 3dxx and both 3e & 3f.
         checkAllocWeights(cw, 0x3d02L, 0x4000L, 10, 3, 2);
     }

     private static boolean isValidCE(CollationRootElements re, CollationData data, long p, long s, long ctq) {
         long p1 = p >>> 24;
         long p2 = (p >>> 16) & 0xff;
         long p3 = (p >>> 8) & 0xff;
         long p4 = p & 0xff;
         long s1 = s >>> 8;
         long s2 = s & 0xff;
         // ctq = Case, Tertiary, Quaternary
         long c = (ctq & Collation.CASE_MASK) >>> 14;
         long t = ctq & Collation.ONLY_TERTIARY_MASK;
         long t1 = t >>> 8;
         long t2 = t & 0xff;
         long q = ctq & Collation.QUATERNARY_MASK;
         // No leading zero bytes.
         if ((p != 0 && p1 == 0) || (s != 0 && s1 == 0) || (t != 0 && t1 == 0)) {
             return false;
         }
         // No intermediate zero bytes.
         if (p1 != 0 && p2 == 0 && (p & 0xffff) != 0) {
             return false;
         }
         if (p2 != 0 && p3 == 0 && p4 != 0) {
             return false;
         }
         // Minimum & maximum lead bytes.
         if ((p1 != 0 && p1 <= Collation.MERGE_SEPARATOR_BYTE)
                 || s1 == Collation.LEVEL_SEPARATOR_BYTE
                 || t1 == Collation.LEVEL_SEPARATOR_BYTE || t1 > 0x3f) {
             return false;
         }
         if (c > 2) {
             return false;
         }
         // The valid byte range for the second primary byte depends on compressibility.
         if (p2 != 0) {
             if (data.isCompressibleLeadByte((int)p1)) {
                 if (p2 <= Collation.PRIMARY_COMPRESSION_LOW_BYTE
                         || Collation.PRIMARY_COMPRESSION_HIGH_BYTE <= p2) {
                     return false;
                 }
             } else {
                 if (p2 <= Collation.LEVEL_SEPARATOR_BYTE) {
                     return false;
                 }
             }
         }
         // Other bytes just need to avoid the level separator.
         // Trailing zeros are ok.
         // assert (Collation.LEVEL_SEPARATOR_BYTE == 1);
         if (p3 == Collation.LEVEL_SEPARATOR_BYTE || p4 == Collation.LEVEL_SEPARATOR_BYTE
                 || s2 == Collation.LEVEL_SEPARATOR_BYTE || t2 == Collation.LEVEL_SEPARATOR_BYTE) {
             return false;
         }
         // Well-formed CEs.
         if (p == 0) {
             if (s == 0) {
                 if (t == 0) {
                     // Completely ignorable CE.
                     // Quaternary CEs are not supported.
                     if (c != 0 || q != 0) {
                         return false;
                     }
                 } else {
                     // Tertiary CE.
                     if (t < re.getTertiaryBoundary() || c != 2) {
                         return false;
                     }
                 }
             } else {
                 // Secondary CE.
                 if (s < re.getSecondaryBoundary() || t == 0 || t >= re.getTertiaryBoundary()) {
                     return false;
                 }
             }
         } else {
             // Primary CE.
             if (s == 0 || (Collation.COMMON_WEIGHT16 < s && s <= re.getLastCommonSecondary())
                     || s >= re.getSecondaryBoundary()) {
                 return false;
             }
             if (t == 0 || t >= re.getTertiaryBoundary()) {
                 return false;
             }
         }
         return true;
     }

     private static boolean isValidCE(CollationRootElements re, CollationData data, long ce) {
         long p = ce >>> 32;
         long secTer = ce & 0xffffffffL;
         return isValidCE(re, data, p, secTer >>> 16, secTer & 0xffff);
     }

     private static class RootElementsIterator {
         CollationData data;
         long[] elements;
         int length;

         long pri;
         long secTer;
         int index;

         RootElementsIterator(CollationData root) {
             data = root;
             elements = root.rootElements;
             length = elements.length;
             pri = 0;
             secTer = 0;
             index = (int)elements[CollationRootElements.IX_FIRST_TERTIARY_INDEX];
         }

         boolean next() {
             if (index >= length) {
                 return false;
             }
             long p = elements[index];
             if (p == CollationRootElements.PRIMARY_SENTINEL) {
                 return false;
             }
             if ((p & CollationRootElements.SEC_TER_DELTA_FLAG) != 0) {
                 ++index;
                 secTer = p & ~CollationRootElements.SEC_TER_DELTA_FLAG;
                 return true;
             }
             if ((p & CollationRootElements.PRIMARY_STEP_MASK) != 0) {
                 // End of a range, enumerate the primaries in the range.
                 int step = (int)p & CollationRootElements.PRIMARY_STEP_MASK;
                 p &= 0xffffff00;
                 if (pri == p) {
                     // Finished the range, return the next CE after it.
                     ++index;
                     return next();
                 }
                 assert (pri < p);
                 // Return the next primary in this range.
                 boolean isCompressible = data.isCompressiblePrimary(pri);
                 if ((pri & 0xffff) == 0) {
                     pri = Collation.incTwoBytePrimaryByOffset(pri, isCompressible, step);
                 } else {
                     pri = Collation.incThreeBytePrimaryByOffset(pri, isCompressible, step);
                 }
                 return true;
             }
             // Simple primary CE.
             ++index;
             pri = p;
             secTer = Collation.COMMON_SEC_AND_TER_CE;
             return true;
         }

         long getPrimary() {
             return pri;
         }

         long getSecTer() {
             return secTer;
         }
     }

     public void TestRootElements() {
         CollationData root = CollationRoot.getData();

         CollationRootElements rootElements = new CollationRootElements(root.rootElements);
         RootElementsIterator iter = new RootElementsIterator(root);

         // We check each root CE for validity,
         // and we also verify that there is a tailoring gap between each two CEs.
         CollationWeights cw1c = new CollationWeights(); // compressible primary weights
         CollationWeights cw1u = new CollationWeights(); // uncompressible primary weights
         CollationWeights cw2 = new CollationWeights();
         CollationWeights cw3 = new CollationWeights();

         cw1c.initForPrimary(true);
         cw1u.initForPrimary(false);
         cw2.initForSecondary();
         cw3.initForTertiary();

         // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
         // nor the special merge-separator CE for U+FFFE.
         long prevPri = 0;
         long prevSec = 0;
         long prevTer = 0;

         while (iter.next()) {
             long pri = iter.getPrimary();
             long secTer = iter.getSecTer();
             // CollationRootElements CEs must have 0 case and quaternary bits.
             if ((secTer & Collation.CASE_AND_QUATERNARY_MASK) != 0) {
                 errln("CollationRootElements CE has non-zero case and/or quaternary bits: "
                         + "0x" + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
             }
             long sec = secTer >>> 16;
             long ter = secTer & Collation.ONLY_TERTIARY_MASK;
             long ctq = ter;
             if (pri == 0 && sec == 0 && ter != 0) {
                 // Tertiary CEs must have uppercase bits,
                 // but they are not stored in the CollationRootElements.
                 ctq |= 0x8000;
             }
             if (!isValidCE(rootElements, root, pri, sec, ctq)) {
                 errln("invalid root CE 0x"
                         + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
             } else {
                 if (pri != prevPri) {
                     long newWeight = 0;
                     if (prevPri == 0 || prevPri >= Collation.FFFD_PRIMARY) {
                         // There is currently no tailoring gap after primary ignorables,
                         // and we forbid tailoring after U+FFFD and U+FFFF.
                     } else if (root.isCompressiblePrimary(prevPri)) {
                         if (!cw1c.allocWeights(prevPri, pri, 1)) {
                             errln("no primary/compressible tailoring gap between "
                                     + "0x" + Utility.hex(prevPri, 8)
                                     + " and 0x" + Utility.hex(pri, 8));
                         } else {
                             newWeight = cw1c.nextWeight();
                         }
                     } else {
                         if (!cw1u.allocWeights(prevPri, pri, 1)) {
                             errln("no primary/uncompressible tailoring gap between "
                                     + "0x" + Utility.hex(prevPri, 8)
                                     + " and 0x" + Utility.hex(pri, 8));
                         } else {
                             newWeight = cw1u.nextWeight();
                         }
                     }
                     if (newWeight != 0 && !(prevPri < newWeight && newWeight < pri)) {
                         errln("mis-allocated primary weight, should get "
                                 + "0x" + Utility.hex(prevPri, 8)
                                 + " < 0x" + Utility.hex(newWeight, 8)
                                 + " < 0x" + Utility.hex(pri, 8));
                     }
                 } else if (sec != prevSec) {
                     long lowerLimit = prevSec == 0 ?
                             rootElements.getSecondaryBoundary() - 0x100 : prevSec;
                     if (!cw2.allocWeights(lowerLimit, sec, 1)) {
                         errln("no secondary tailoring gap between "
                                 + "0x" + Utility.hex(lowerLimit)
                                 + " and 0x" + Utility.hex(sec));
                     } else {
                         long newWeight = cw2.nextWeight();
                         if (!(prevSec < newWeight && newWeight < sec)) {
                             errln("mis-allocated secondary weight, should get "
                                     + "0x" + Utility.hex(lowerLimit)
                                     + " < 0x" + Utility.hex(newWeight)
                                     + " < 0x" + Utility.hex(sec));
                         }
                     }
                 } else if (ter != prevTer) {
                     long lowerLimit = prevTer == 0 ?
                             rootElements.getTertiaryBoundary() - 0x100 : prevTer;
                     if (!cw3.allocWeights(lowerLimit, ter, 1)) {
                         errln("no tertiary tailoring gap between "
                                 + "0x" + Utility.hex(lowerLimit)
                                 + " and 0x" + Utility.hex(ter));
                     } else {
                         long newWeight = cw3.nextWeight();
                         if (!(prevTer < newWeight && newWeight < ter)) {
                             errln("mis-allocated tertiary weight, should get "
                                     + "0x" + Utility.hex(lowerLimit)
                                     + " < 0x" + Utility.hex(newWeight)
                                     + " < 0x" + Utility.hex(ter));
                         }
                     }
                 } else {
                     errln("duplicate root CE 0x"
                             + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
                 }
             }
             prevPri = pri;
             prevSec = sec;
             prevTer = ter;
         }
     }

     public void TestTailoredElements() {
         CollationData root = CollationRoot.getData();
         CollationRootElements rootElements = new CollationRootElements(root.rootElements);

         Set<String> prevLocales = new HashSet<String>();
         prevLocales.add("");
         prevLocales.add("root");
         prevLocales.add("root@collation=standard");

         long[] ces;
         ULocale[] locales = Collator.getAvailableULocales();
         String localeID = "root";
         int locIdx = 0;

         for (; locIdx < locales.length; localeID = locales[locIdx++].getName()) {
             ULocale locale = new ULocale(localeID);
             String[] types = Collator.getKeywordValuesForLocale("collation", locale, false);
             for (int typeIdx = 0; typeIdx < types.length; ++typeIdx) {
                 String type = types[typeIdx];  // first: default type
                 if (type.startsWith("private-")) {
                     errln("Collator.getKeywordValuesForLocale(" + localeID +
                             ") returns private collation keyword: " + type);
                 }
                 ULocale localeWithType = locale.setKeywordValue("collation", type);
                 Collator coll = Collator.getInstance(localeWithType);
                 ULocale actual = coll.getLocale(ULocale.ACTUAL_LOCALE);
                 if (prevLocales.contains(actual.getName())) {
                     continue;
                 }
                 prevLocales.add(actual.getName());
                 logln("TestTailoredElements(): requested " + localeWithType.getName()
                         + " -> actual " + actual.getName());
                 if (!(coll instanceof RuleBasedCollator)) {
                     continue;
                 }
                 RuleBasedCollator rbc = (RuleBasedCollator) coll;

                 // Note: It would be better to get tailored strings such that we can
                 // identify the prefix, and only get the CEs for the prefix+string,
                 // not also for the prefix.
                 // There is currently no API for that.
                 // It would help in an unusual case where a contraction starting in the prefix
                 // extends past its end, and we do not see the intended mapping.
                 // For example, for a mapping p|st, if there is also a contraction ps,
                 // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
                 UnicodeSet tailored = coll.getTailoredSet();
                 UnicodeSetIterator iter = new UnicodeSetIterator(tailored);
                 while (iter.next()) {
                     String s = iter.getString();
                     ces = rbc.internalGetCEs(s);
                     for (int i = 0; i < ces.length; ++i) {
                         long ce = ces[i];
                         if (!isValidCE(rootElements, root, ce)) {
                             logln(prettify(s));
                             errln("invalid tailored CE 0x" + Utility.hex(ce, 16)
                                     + " at CE index " + i + " from string:");
                         }
                     }
                 }
             }
         }
     }

     private static boolean isSpace(char c) {
         return (c == 0x09 || c == 0x20 || c == 0x3000);
     }

     private static boolean isSectionStarter(char c) {
         return (c == '%' || c == '*' || c == '@');
     }

     private int skipSpaces(int i) {
         while (isSpace(fileLine.charAt(i))) {
             ++i;
         }
         return i;
     }

     private String printSortKey(byte[] p) {
         StringBuilder s = new StringBuilder();
         for (int i = 0; i < p.length; ++i) {
             if (i > 0) {
                 s.append(' ');
             }
             byte b = p[i];
             if (b == 0) {
                 s.append('.');
             } else if (b == 1) {
                 s.append('|');
             } else {
                 s.append(String.format("%02x", b & 0xff));
             }
         }
         return s.toString();
     }

     private String printCollationKey(CollationKey key) {
         byte[] p = key.toByteArray();
         return printSortKey(p);
     }

     private boolean readLine(BufferedReader in) throws IOException {
         String line = in.readLine();
         if (line == null) {
             fileLine = null;
             return false;
         }
         ++fileLineNumber;
         // Strip trailing comments and spaces
         int idx = line.indexOf('#');
         if (idx < 0) {
             idx = line.length();
         }
         for (; idx > 0; idx--) {
             if (!isSpace(line.charAt(idx -1))) {
                 break;
             }
         }

         fileLine = idx < line.length() ? line.substring(0, idx) : line;
         return true;
     }

     private int parseString(int start, Output<String> prefix, Output<String> s) {
         int length = fileLine.length();
         int i;
         for (i = start; i < length && !isSpace(fileLine.charAt(i)); ++i) {
         }
         int pipeIndex = fileLine.indexOf('|', start);
         if (pipeIndex >= 0 && pipeIndex < i) {
             String tmpPrefix  = Utility.unescape(fileLine.substring(start, pipeIndex));
             if (tmpPrefix.length() == 0) {
                 prefix.value = null;
                 logln(fileLine);
                 error = new ParseException("empty prefix on line " + fileLineNumber, fileLineNumber);
                 errln("empty prefix on line " + fileLineNumber);
                 return start;
             }
             prefix.value = tmpPrefix;
             start = pipeIndex + 1;
         } else {
             prefix.value = null;
         }

         String tmp = Utility.unescape(fileLine.substring(start, i));
         if (tmp.length() == 0) {
             s.value = null;
             logln(fileLine);
             error = new ParseException("empty string on line " + fileLineNumber, fileLineNumber);
             errln("empty string on line " + fileLineNumber);
             return start;
         }
         s.value = tmp;
         return i;
     }

     private int parseRelationAndString(Output<String> s) {
         int relation = Collation.NO_LEVEL;
         int start;
         if (fileLine.charAt(0) == '<') {
             char second = fileLine.charAt(1);
             start = 2;
             switch(second) {
             case 0x31:  // <1
                 relation = Collation.PRIMARY_LEVEL;
                 break;
             case 0x32:  // <2
                 relation = Collation.SECONDARY_LEVEL;
                 break;
             case 0x33:  // <3
                 relation = Collation.TERTIARY_LEVEL;
                 break;
             case 0x34:  // <4
                 relation = Collation.QUATERNARY_LEVEL;
                 break;
             case 0x63:  // <c
                 relation = Collation.CASE_LEVEL;
                 break;
             case 0x69:  // <i
                 relation = Collation.IDENTICAL_LEVEL;
                 break;
             default:  // just <
                 relation = Collation.NO_LEVEL;
                 start = 1;
                 break;
             }
         } else if (fileLine.charAt(0) == '=') {
             relation = Collation.ZERO_LEVEL;
             start = 1;
         } else {
             start = 0;
         }

         if (start == 0 || !isSpace(fileLine.charAt(start))) {
             logln(fileLine);
             error = new ParseException("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line "
                                         + fileLineNumber, fileLineNumber);
             errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line " + fileLineNumber);
             return Collation.NO_LEVEL;
         }

         start = skipSpaces(start);
         Output<String> prefixOut = new Output<String>();
         start = parseString(start, prefixOut, s);
         if (error == null && prefixOut.value != null) {
             logln(fileLine);
             error = new ParseException("prefix string not allowed for test string: on line "
                                         + fileLineNumber, fileLineNumber);
             errln("prefix string not allowed for test string: on line " + fileLineNumber);
             return Collation.NO_LEVEL;
         }
         if (start < fileLine.length()) {
             logln(fileLine);
             error = new ParseException("unexpected line contents after test string on line "
                                         + fileLineNumber, fileLineNumber);
             errln("unexpected line contents after test string on line " + fileLineNumber);
             return Collation.NO_LEVEL;
         }

         return relation;
     }

     private void parseAndSetAttribute() {
         int start = skipSpaces(1);
         int equalPos = fileLine.indexOf('=');
         if (equalPos < 0) {
             if (fileLine.regionMatches(start, "reorder", 0, 7)) {
                 parseAndSetReorderCodes(start + 7);
                 return;
             }
             logln(fileLine);
             error = new ParseException("missing '=' on line " + fileLineNumber, fileLineNumber);
             errln("missing '=' on line " + fileLineNumber);
             return;
         }

         String attrString = fileLine.substring(start,  equalPos);
         String valueString = fileLine.substring(equalPos + 1);
         if (attrString.equals("maxVariable")) {
             int max;
             if (valueString.equals("space")) {
                 max = ReorderCodes.SPACE;
             } else if(valueString.equals("punct")) {
                 max = ReorderCodes.PUNCTUATION;
             } else if(valueString.equals("symbol")) {
                 max = ReorderCodes.SYMBOL;
             } else if(valueString.equals("currency")) {
                 max = ReorderCodes.CURRENCY;
             } else {
                 logln(fileLine);
                 error = new ParseException("invalid attribute value name on line "
                                             + fileLineNumber, fileLineNumber);
                 errln("invalid attribute value name on line " + fileLineNumber);
                 return;
             }
             coll.setMaxVariable(max);
             fileLine = null;
             return;
         }

         boolean parsed = true;
         RuleBasedCollator rbc = (RuleBasedCollator)coll;
         if (attrString.equals("backwards")) {
             if (valueString.equals("on")) {
                 rbc.setFrenchCollation(true);
             } else if (valueString.equals("off")) {
                 rbc.setFrenchCollation(false);
             } else if (valueString.equals("default")) {
                 rbc.setFrenchCollationDefault();
             } else {
                 parsed = false;
             }
         } else if (attrString.equals("alternate")) {
             if (valueString.equals("non-ignorable")) {
                 rbc.setAlternateHandlingShifted(false);
             } else if (valueString.equals("shifted")) {
                 rbc.setAlternateHandlingShifted(true);
             } else if (valueString.equals("default")) {
                 rbc.setAlternateHandlingDefault();
             } else {
                 parsed = false;
             }
         } else if (attrString.equals("caseFirst")) {
             if (valueString.equals("upper")) {
                 rbc.setUpperCaseFirst(true);
             } else if (valueString.equals("lower")) {
                 rbc.setLowerCaseFirst(true);
             } else if (valueString.equals("default")) {
                 rbc.setCaseFirstDefault();
             } else {
                 parsed = false;
             }
         } else if (attrString.equals("caseLevel")) {
             if (valueString.equals("on")) {
                 rbc.setCaseLevel(true);
             } else if (valueString.equals("off")) {
                 rbc.setCaseLevel(false);
             } else if (valueString.equals("default")) {
                 rbc.setCaseLevelDefault();
             } else {
                 parsed = false;
             }
         } else if (attrString.equals("strength")) {
             if (valueString.equals("primary")) {
                 rbc.setStrength(Collator.PRIMARY);
             } else if (valueString.equals("secondary")) {
                 rbc.setStrength(Collator.SECONDARY);
             } else if (valueString.equals("tertiary")) {
                 rbc.setStrength(Collator.TERTIARY);
             } else if (valueString.equals("quaternary")) {
                 rbc.setStrength(Collator.QUATERNARY);
             } else if (valueString.equals("identical")) {
                 rbc.setStrength(Collator.IDENTICAL);
             } else if (valueString.equals("default")) {
                 rbc.setStrengthDefault();
             } else {
                 parsed = false;
             }
         } else if (attrString.equals("numeric")) {
             if (valueString.equals("on")) {
                 rbc.setNumericCollation(true);
             } else if (valueString.equals("off")) {
                 rbc.setNumericCollation(false);
             } else if (valueString.equals("default")) {
                 rbc.setNumericCollationDefault();
             } else {
                 parsed = false;
             }
         } else {
             logln(fileLine);
             error = new ParseException("invalid attribute value name on line "
                                         + fileLineNumber, fileLineNumber);
             errln("invalid attribute value name on line " + fileLineNumber);
             return;
         }
         if (!parsed) {
             logln(fileLine);
             error = new ParseException("invalid attribute=value combination on line "
                                         + fileLineNumber, fileLineNumber);
             errln("invalid attribute=value combination on line " + fileLineNumber);
             return;
         }

         fileLine = null;
     }

     private void parseAndSetReorderCodes(int start) {
         UVector32 reorderCodes = new UVector32();
         while (start < fileLine.length()) {
             start = skipSpaces(start);
             int limit = start;
             while (limit < fileLine.length() && !isSpace(fileLine.charAt(limit))) {
                 ++limit;
             }
             String name = fileLine.substring(start, limit);
             int code = CollationRuleParser.getReorderCode(name);
             if (code < -1) {
                 if (name.equalsIgnoreCase("default")) {
                     code = ReorderCodes.DEFAULT;  // -1
                 } else {
                     logln(fileLine);
                     error = new ParseException("invalid reorder code '" + name + "' on line "
                                                 + fileLineNumber, fileLineNumber);
                     return;
                 }
             }
             reorderCodes.addElement(code);
             start = limit;
         }
         int[] reorderCodesArray = new int[reorderCodes.size()];
         System.arraycopy(reorderCodes.getBuffer(), 0,
                 reorderCodesArray, 0, reorderCodes.size());
         coll.setReorderCodes(reorderCodesArray);

         fileLine = null;
     }

     private void buildTailoring(BufferedReader in) throws IOException {
         StringBuilder rules = new StringBuilder();
         while (readLine(in)) {
             if (fileLine.length() == 0) {
                 continue;
             }
             if (isSectionStarter(fileLine.charAt(0))) {
                 break;
             }
             rules.append(Utility.unescape(fileLine));
         }

         try {
             coll = new RuleBasedCollator(rules.toString());
         } catch (Exception e) {
             logln(rules.toString());
             errln("RuleBasedCollator(rules) failed - " + e.getMessage());
             error = e;
         }
     }

     private void setRootCollator() {
         coll = Collator.getInstance(ULocale.ROOT);
     }

     private void setLocaleCollator() {
         ULocale locale = null;
         if (fileLine.length() > 9) {
             String localeID = fileLine.substring(9); // "@ locale <langTag>"
             try {
                 locale = new ULocale(localeID);  // either locale ID or language tag
             } catch (IllformedLocaleException e) {
                 locale = null;
             }
         }
         if (locale == null) {
             logln(fileLine);
             errln("invalid language tag on line " + fileLineNumber);
             error = new ParseException("invalid langauge tag on line " + fileLineNumber, fileLineNumber);
             return;
         }

         logln("creating a collator for locale ID " + locale.getName());
         coll = Collator.getInstance(locale);
     }

     private boolean needsNormalization(String s) {
         if (!fcd.isNormalized(s)) {
             return true;
         }
         // In some sequences with Tibetan composite vowel signs,
         // even if the string passes the FCD check,
         // those composites must be decomposed.
         // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
         int index = 0;
         while((index = s.indexOf(0xf71, index)) >= 0) {
             if (++index < s.length()) {
                 char c = s.charAt(index);
                 if (c == 0xf73 || c == 0xf75 || c == 0xf81) {
                     return true;
                 }
             }
         }
         return false;
     }

     private boolean getCollationKey(String norm, String line, String s, Output<CollationKey> keyOut) {
         CollationKey key = coll.getCollationKey(s);
         keyOut.value = key;

         byte[] keyBytes = key.toByteArray();
         if (keyBytes.length == 0 || keyBytes[keyBytes.length - 1] != 0) {
             logln(fileTestName);
             logln(line);
             logln(printCollationKey(key));
             errln("Collator(" + norm + ").getCollationKey() wrote an empty or unterminated key");
             return false;
         }

         int numLevels = coll.getStrength();
         if (numLevels < Collator.IDENTICAL) {
             ++numLevels;
         } else {
             numLevels = 5;
         }
         if (((RuleBasedCollator)coll).isCaseLevel()) {
             ++numLevels;
         }
         int numLevelSeparators = 0;
         for (int i = 0; i < (keyBytes.length - 1); ++i) {
             byte b = keyBytes[i];
             if (b == 0) {
                 logln(fileTestName);
                 logln(line);
                 logln(printCollationKey(key));
                 errln("Collator(" + norm + ").getCollationKey() contains a 00 byte");
                 return false;
             }
             if (b == 1) {
                 ++numLevelSeparators;
             }
         }
         if (numLevelSeparators != (numLevels - 1)) {
             logln(fileTestName);
             logln(line);
             logln(printCollationKey(key));
             errln("Collator(" + norm + ").getCollationKey() has "
                     + numLevelSeparators + " level separators for "
                     + numLevels + " levels");
             return false;
         }

         // No nextSortKeyPart support in ICU4J

         return true;
     }

     /**
      * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
      * Leaves key unchanged if s does not contain U+FFFE.
      * @return true if the key was successfully changed
      */
     private boolean getMergedCollationKey(String s, Output<CollationKey> key) {
         CollationKey mergedKey = null;
         int sLength = s.length();
         int segmentStart = 0;
         for (int i = 0;;) {
             if (i == sLength) {
                 if (segmentStart == 0) {
                     // s does not contain any U+FFFE.
                     return false;
                 }
             } else if (s.charAt(i) != '\uFFFE') {
                 ++i;
                 continue;
             }
             // Get the sort key for another segment and merge it into mergedKey.
             CollationKey tmpKey = coll.getCollationKey(s.substring(segmentStart, i));
             if (mergedKey == null) {
                 mergedKey = tmpKey;
             } else {
                 mergedKey = mergedKey.merge(tmpKey);
             }
             if (i == sLength) {
                 break;
             }
             segmentStart = ++i;
         }
         key.value = mergedKey;
         return true;
     }

     private static int getDifferenceLevel(CollationKey prevKey, CollationKey key,
             int order, boolean collHasCaseLevel) {
         if (order == Collation.EQUAL) {
             return Collation.NO_LEVEL;
         }
         byte[] prevBytes = prevKey.toByteArray();
         byte[] bytes = key.toByteArray();
         int level = Collation.PRIMARY_LEVEL;
         for (int i = 0;; ++i) {
             byte b = prevBytes[i];
             if (b != bytes[i]) {
                 break;
             }
             if ((int)b == Collation.LEVEL_SEPARATOR_BYTE) {
                 ++level;
                 if (level == Collation.CASE_LEVEL && !collHasCaseLevel) {
                     ++level;
                 }
             }
         }
         return level;
     }

     private boolean checkCompareTwo(String norm, String prevFileLine, String prevString, String s,
                                     int expectedOrder, int expectedLevel) {
         // Get the sort keys first, for error debug output.
         Output<CollationKey> prevKeyOut = new Output<CollationKey>();
         CollationKey prevKey;
         if (!getCollationKey(norm, fileLine, prevString, prevKeyOut)) {
             return false;
         }
         prevKey = prevKeyOut.value;

         Output<CollationKey> keyOut = new Output<CollationKey>();
         CollationKey key;
         if (!getCollationKey(norm, fileLine, s, keyOut)) {
             return false;
         }
         key = keyOut.value;

         int order = coll.compare(prevString, s);
         if (order != expectedOrder) {
             logln(fileTestName);
             logln(prevFileLine);
             logln(fileLine);
             logln(printCollationKey(prevKey));
             logln(printCollationKey(key));
             errln("line " + fileLineNumber
                     + " Collator(" + norm + ").compare(previous, current) wrong order: "
                     + order + " != " + expectedOrder);
             return false;
         }
         order = coll.compare(s, prevString);
         if (order != -expectedOrder) {
             logln(fileTestName);
             logln(prevFileLine);
             logln(fileLine);
             logln(printCollationKey(prevKey));
             logln(printCollationKey(key));
             errln("line " + fileLineNumber
                     + " Collator(" + norm + ").compare(current, previous) wrong order: "
                     + order + " != " + -expectedOrder);
             return false;
         }

         order = prevKey.compareTo(key);
         if (order != expectedOrder) {
             logln(fileTestName);
             logln(prevFileLine);
             logln(fileLine);
             logln(printCollationKey(prevKey));
             logln(printCollationKey(key));
             errln("line " + fileLineNumber
                     + " Collator(" + norm + ").getCollationKey(previous, current).compareTo() wrong order: "
                     + order + " != " + expectedOrder);
             return false;
         }
         boolean collHasCaseLevel = ((RuleBasedCollator)coll).isCaseLevel();
         int level = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
         if (order != Collation.EQUAL && expectedLevel != Collation.NO_LEVEL) {
             if (level != expectedLevel) {
                 logln(fileTestName);
                 logln(prevFileLine);
                 logln(fileLine);
                 logln(printCollationKey(prevKey));
                 logln(printCollationKey(key));
                 errln("line " + fileLineNumber
                         + " Collator(" + norm + ").getCollationKey(previous, current).compareTo()="
                         + order + " wrong level: " + level + " != " + expectedLevel);
                 return false;
             }
         }

         // If either string contains U+FFFE, then their sort keys must compare the same as
         // the merged sort keys of each string's between-FFFE segments.
         //
         // It is not required that
         //   sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
         // only that those two methods yield the same order.
         //
         // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
         Output<CollationKey> outPrevKey = new Output<CollationKey>(prevKey);
         Output<CollationKey> outKey = new Output<CollationKey>(key);
         if (getMergedCollationKey(prevString, outPrevKey) | getMergedCollationKey(s, outKey)) {
             prevKey = outPrevKey.value;
             key = outKey.value;
             order = prevKey.compareTo(key);
             if (order != expectedOrder) {
                 logln(fileTestName);
                 errln("line " + fileLineNumber
                         + " Collator(" + norm + ").getCollationKey"
                         + "(previous, current segments between U+FFFE)).merge().compareTo() wrong order: "
                         + order + " != " + expectedOrder);
                 logln(prevFileLine);
                 logln(fileLine);
                 logln(printCollationKey(prevKey));
                 logln(printCollationKey(key));
                 return false;
             }
             int mergedLevel = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
             if (order != Collation.EQUAL && expectedLevel != Collation.NO_LEVEL) {
                 if(mergedLevel != level) {
                     logln(fileTestName);
                     errln("line " + fileLineNumber
                         + " Collator(" + norm + ").getCollationKey"
                         + "(previous, current segments between U+FFFE)).merge().compareTo()="
                         + order + " wrong level: " + mergedLevel + " != " + level);
                     logln(prevFileLine);
                     logln(fileLine);
                     logln(printCollationKey(prevKey));
                     logln(printCollationKey(key));
                     return false;
                 }
             }
         }
         return true;
     }

     private void checkCompareStrings(BufferedReader in) throws IOException {
         String prevFileLine = "(none)";
         String prevString = "";
         String s;
         Output<String> sOut = new Output<String>();
         while (readLine(in)) {
             if (fileLine.length() == 0) {
                 continue;
             }
             if (isSectionStarter(fileLine.charAt(0))) {
                 break;
             }
             int relation = parseRelationAndString(sOut);
             s = sOut.value;
             int expectedOrder = (relation == Collation.ZERO_LEVEL) ? Collation.EQUAL : Collation.LESS;
             int expectedLevel = relation;
             boolean isOk = true;
             if (!needsNormalization(prevString) && !needsNormalization(s)) {
                 coll.setDecomposition(Collator.NO_DECOMPOSITION);
                 isOk = checkCompareTwo("normalization=off", prevFileLine, prevString, s,
                                         expectedOrder, expectedLevel);
             }
             if (isOk) {
                 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
                 isOk = checkCompareTwo("normalization=on", prevFileLine, prevString, s,
                                         expectedOrder, expectedLevel);
             }
             if (isOk && (!nfd.isNormalized(prevString) || !nfd.isNormalized(s))) {
                 String pn = nfd.normalize(prevString);
                 String n = nfd.normalize(s);
                 isOk = checkCompareTwo("NFD input", prevFileLine, pn, n,
                                         expectedOrder, expectedLevel);
             }
             prevFileLine = fileLine;
             prevString = s;
         }
     }

     public void TestDataDriven() {
         nfd = Normalizer2.getNFDInstance();
         fcd = Norm2AllModes.getFCDNormalizer2();

         BufferedReader in = null;

         try {
             in = TestUtil.getDataReader("collationtest.txt", "UTF-8");

             // read first line and remove BOM if present
             readLine(in);
             if (fileLine != null && fileLine.charAt(0) == '\uFEFF') {
                 fileLine = fileLine.substring(1);
             }

             while (error == null) {
                 if (fileLine == null || fileLine.length() == 0) {
                     if (!readLine(in)) {
                         break;
                     }
                     continue;
                 }
                 if (!isSectionStarter(fileLine.charAt(0))) {
                     logln(fileLine);
                     errln("syntax error on line " + fileLineNumber);
                     return;
                 }
                 if (fileLine.startsWith("** test: ")) {
                     fileTestName = fileLine;
                     logln(fileLine);
                     fileLine = null;
                 } else if (fileLine.equals("@ root")) {
                     setRootCollator();
                     fileLine = null;
                 } else if (fileLine.startsWith("@ locale ")) {
                     setLocaleCollator();
                     fileLine = null;
                 } else if (fileLine.equals("@ rules")) {
                     buildTailoring(in);
                 } else if (fileLine.charAt(0) == '%'
                         && fileLine.length() > 1 && isSpace(fileLine.charAt(1))) {
                     parseAndSetAttribute();
                 } else if (fileLine.equals("* compare")) {
                     checkCompareStrings(in);
                 } else {
                     logln(fileLine);
                     errln("syntax error on line " + fileLineNumber);
                     return;
                 }
             }
         } catch (IOException e) {
             errln(e.getMessage());
         } finally {
             try {
                 if (in != null) {
                     in.close();
                 }
             } catch (IOException e) {
                 e.printStackTrace();
             }
         }
     }
 }