src/com/ibm/icu/dev/test/rbbi/BreakIteratorTest.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 1996-2005, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  */
 package com.ibm.icu.dev.test.rbbi;

 import com.ibm.icu.dev.test.*;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.RuleBasedBreakIterator_Old;
 import java.text.StringCharacterIterator;
 import java.util.Locale;
 import java.util.Vector;

 public class BreakIteratorTest extends TestFmwk
 {
     private BreakIterator characterBreak;
     private BreakIterator wordBreak;
     private BreakIterator lineBreak;
     private BreakIterator sentenceBreak;
     private BreakIterator titleBreak;

     public static void main(String[] args) throws Exception {
         new BreakIteratorTest().run(args);
     }

     public BreakIteratorTest()
     {

     }
     protected void init(){
         characterBreak = BreakIterator.getCharacterInstance();
         wordBreak = BreakIterator.getWordInstance();
         lineBreak = BreakIterator.getLineInstance();
         //logln("Creating sentence iterator...");
         sentenceBreak = BreakIterator.getSentenceInstance();
         //logln("Finished creating sentence iterator...");
         titleBreak = BreakIterator.getTitleInstance();
     }
     //=========================================================================
     // general test subroutines
     //=========================================================================

     private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {
         StringBuffer buffer = new StringBuffer();
         String text;
         for (int i = 0; i < expectedResult.size(); i++) {
             text = (String)expectedResult.elementAt(i);
             buffer.append(text);
         }
         text = buffer.toString();

         bi.setText(text);

         Vector nextResults = _testFirstAndNext(bi, text);
         Vector previousResults = _testLastAndPrevious(bi, text);

         logln("comparing forward and backward...");
         int errs = getErrorCount();
         compareFragmentLists("forward iteration", "backward iteration", nextResults,
                         previousResults);
         if (getErrorCount() == errs) {
             logln("comparing expected and actual...");
             compareFragmentLists("expected result", "actual result", expectedResult,
                             nextResults);
         }

         int[] boundaries = new int[expectedResult.size() + 3];
         boundaries[0] = BreakIterator.DONE;
         boundaries[1] = 0;
         for (int i = 0; i < expectedResult.size(); i++)
             boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).
                             length();
         boundaries[boundaries.length - 1] = BreakIterator.DONE;

         _testFollowing(bi, text, boundaries);
         _testPreceding(bi, text, boundaries);
         _testIsBoundary(bi, text, boundaries);

         doMultipleSelectionTest(bi, text);
     }

     private Vector _testFirstAndNext(BreakIterator bi, String text) {
         int p = bi.first();
         int lastP = p;
         Vector result = new Vector();

         if (p != 0)
             errln("first() returned " + p + " instead of 0");
         while (p != BreakIterator.DONE) {
             p = bi.next();
             if (p != BreakIterator.DONE) {
                 if (p <= lastP)
                     errln("next() failed to move forward: next() on position "
                                     + lastP + " yielded " + p);

                 result.addElement(text.substring(lastP, p));
             }
             else {
                 if (lastP != text.length())
                     errln("next() returned DONE prematurely: offset was "
                                     + lastP + " instead of " + text.length());
             }
             lastP = p;
         }
         return result;
     }

     private Vector _testLastAndPrevious(BreakIterator bi, String text) {
         int p = bi.last();
         int lastP = p;
         Vector result = new Vector();

         if (p != text.length())
             errln("last() returned " + p + " instead of " + text.length());
         while (p != BreakIterator.DONE) {
             p = bi.previous();
             if (p != BreakIterator.DONE) {
                 if (p >= lastP)
                     errln("previous() failed to move backward: previous() on position "
                                     + lastP + " yielded " + p);

                 result.insertElementAt(text.substring(p, lastP), 0);
             }
             else {
                 if (lastP != 0)
                     errln("previous() returned DONE prematurely: offset was "
                                     + lastP + " instead of 0");
             }
             lastP = p;
         }
         return result;
     }

     private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {
         int p1 = 0;
         int p2 = 0;
         String s1;
         String s2;
         int t1 = 0;
         int t2 = 0;

         while (p1 < f1.size() && p2 < f2.size()) {
             s1 = (String)f1.elementAt(p1);
             s2 = (String)f2.elementAt(p2);
             t1 += s1.length();
             t2 += s2.length();

             if (s1.equals(s2)) {
                 debugLogln("   >" + s1 + "<");
                 ++p1;
                 ++p2;
             }
             else {
                 int tempT1 = t1;
                 int tempT2 = t2;
                 int tempP1 = p1;
                 int tempP2 = p2;

                 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
                     while (tempT1 < tempT2 && tempP1 < f1.size()) {
                         tempT1 += ((String)f1.elementAt(tempP1)).length();
                         ++tempP1;
                     }
                     while (tempT2 < tempT1 && tempP2 < f2.size()) {
                         tempT2 += ((String)f2.elementAt(tempP2)).length();
                         ++tempP2;
                     }
                 }
                 logln("*** " + f1Name + " has:");
                 while (p1 <= tempP1 && p1 < f1.size()) {
                     s1 = (String)f1.elementAt(p1);
                     t1 += s1.length();
                     debugLogln(" *** >" + s1 + "<");
                     ++p1;
                 }
                 logln("***** " + f2Name + " has:");
                 while (p2 <= tempP2 && p2 < f2.size()) {
                     s2 = (String)f2.elementAt(p2);
                     t2 += s2.length();
                     debugLogln(" ***** >" + s2 + "<");
                     ++p2;
                 }
                 errln("Discrepancy between " + f1Name + " and " + f2Name);
             }
         }
     }

     private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {
         logln("testFollowing():");
         int p = 2;
         for (int i = 0; i <= text.length(); i++) {
             if (i == boundaries[p])
                 ++p;

             int b = bi.following(i);
             logln("bi.following(" + i + ") -> " + b);
             if (b != boundaries[p])
                 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
                                 + ", got " + b);
         }
     }

     private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {
         logln("testPreceding():");
         int p = 0;
         for (int i = 0; i <= text.length(); i++) {
             int b = bi.preceding(i);
             logln("bi.preceding(" + i + ") -> " + b);
             if (b != boundaries[p])
                 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                                 + ", got " + b);

             if (i == boundaries[p + 1])
                 ++p;
         }
     }

     private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
         logln("testIsBoundary():");
         int p = 1;
         boolean isB;
         for (int i = 0; i <= text.length(); i++) {
             isB = bi.isBoundary(i);
             logln("bi.isBoundary(" + i + ") -> " + isB);

             if (i == boundaries[p]) {
                 if (!isB)
                     errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
                 ++p;
             }
             else {
                 if (isB)
                     errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
             }
         }
     }

     private void doMultipleSelectionTest(BreakIterator iterator, String testText)
     {
         logln("Multiple selection test...");
         BreakIterator testIterator = (BreakIterator)iterator.clone();
         int offset = iterator.first();
         int testOffset;
         int count = 0;

         do {
             testOffset = testIterator.first();
             testOffset = testIterator.next(count);
             logln("next(" + count + ") -> " + testOffset);
             if (offset != testOffset)
                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);

             if (offset != BreakIterator.DONE) {
                 count++;
                 offset = iterator.next();
             }
         } while (offset != BreakIterator.DONE);

         // now do it backwards...
         offset = iterator.last();
         count = 0;

         do {
             testOffset = testIterator.last();
             testOffset = testIterator.next(count);
             logln("next(" + count + ") -> " + testOffset);
             if (offset != testOffset)
                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);

             if (offset != BreakIterator.DONE) {
                 count--;
                 offset = iterator.previous();
             }
         } while (offset != BreakIterator.DONE);
     }

     private void doBreakInvariantTest(BreakIterator tb, String testChars)
     {
         StringBuffer work = new StringBuffer("aaa");
 //        int errorCount = 0;

         // a break should always occur after CR (unless followed by LF), LF, PS, and LS,
         // unless they're followed by a non-spacing mark or a format character
         String breaks = "\r\n\u2029\u2028";

         for (int i = 0; i < breaks.length(); i++) {
             work.setCharAt(1, breaks.charAt(i));
             for (int j = 0; j < testChars.length(); j++) {
                 work.setCharAt(0, testChars.charAt(j));
                 for (int k = 0; k < testChars.length(); k++) {
                     char c = testChars.charAt(k);

                     // if a cr is followed by lf, ps, ls or etx, don't do the check (that's
                     // not supposed to work)
                     if (work.charAt(1) == '\r' && (c == '\n' || c == '\u2029'
                             || c == '\u2028' || c == '\u0003'))
                         continue;

                     work.setCharAt(2, c);
                     tb.setText(work.toString());
                     boolean seen2 = false;
                     for (int l = tb.first(); l != BreakIterator.DONE; l = tb.next()) {
                         if (l == 2)
                             seen2 = true;
                     }
                     if (!seen2) {
                         errln("No break between U+" + Integer.toHexString((int)(work.charAt(1)))
                                     + " and U+" + Integer.toHexString((int)(work.charAt(2))));
                     }
                 }
             }
         }
     }

     private void doOtherInvariantTest(BreakIterator tb, String testChars)
     {
         StringBuffer work = new StringBuffer("a\r\na");
         int errorCount = 0;

         // a break should never occur between CR and LF
         for (int i = 0; i < testChars.length(); i++) {
             work.setCharAt(0, testChars.charAt(i));
             for (int j = 0; j < testChars.length(); j++) {
                 work.setCharAt(3, testChars.charAt(j));
                 tb.setText(work.toString());
                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
                     if (k == 2) {
                         errln("Break between CR and LF in string U+" + Integer.toHexString(
                                 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
                                 (int)(work.charAt(3))));
                         errorCount++;
                         if (errorCount >= 75)
                             return;
                     }
             }
         }

         // a break should never occur before a non-spacing mark, unless it's preceded
         // by a line terminator
         work.setLength(0);
         work.append("aaaa");
         for (int i = 0; i < testChars.length(); i++) {
             char c = testChars.charAt(i);
             if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
                 continue;
             work.setCharAt(1, c);
             for (int j = 0; j < testChars.length(); j++) {
                 c = testChars.charAt(j);
                 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
                         != Character.ENCLOSING_MARK)
                     continue;
                 work.setCharAt(2, c);
                 tb.setText(work.toString());
                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
                     if (k == 2) {
                         errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
                                 + " and U+" + Integer.toHexString((int)(work.charAt(2))));
                         errorCount++;
                         if (errorCount >= 75)
                             return;
                     }
             }
         }
     }

     public void debugLogln(String s) {
         final String zeros = "0000";
         String temp;
         StringBuffer out = new StringBuffer();
         for (int i = 0; i < s.length(); i++) {
             char c = s.charAt(i);
             if (c >= ' ' && c < '\u007f')
                 out.append(c);
             else {
                 out.append("\\u");
                 temp = Integer.toHexString((int)c);
                 out.append(zeros.substring(0, 4 - temp.length()));
                 out.append(temp);
             }
         }
         logln(out.toString());
     }

     //=========================================================================
     // tests
     //=========================================================================

     public void TestWordBreak() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)wordBreak;
             Vector wordSelectionData = new Vector();

             wordSelectionData.addElement("12,34");

             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("\u00A2"); //cent sign
             wordSelectionData.addElement("\u00A3"); //pound sign
             wordSelectionData.addElement("\u00A4"); //currency sign
             wordSelectionData.addElement("\u00A5"); //yen sign
             wordSelectionData.addElement("alpha-beta-gamma");
             wordSelectionData.addElement(".");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("Badges");
             wordSelectionData.addElement("?");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("BADGES");
             wordSelectionData.addElement("!");
             wordSelectionData.addElement("?");
             wordSelectionData.addElement("!");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("We");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("don't");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("need");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("no");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("STINKING");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("BADGES");
             wordSelectionData.addElement("!");
             wordSelectionData.addElement("!");
             wordSelectionData.addElement("!");

             wordSelectionData.addElement("012.566,5");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("123.3434,900");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("1000,233,456.000");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("1,23.322%");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("123.1222");

             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("\u0024123,000.20");

             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("179.01\u0025");

             wordSelectionData.addElement("Hello");
             wordSelectionData.addElement(",");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("how");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("are");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("you");
             wordSelectionData.addElement(" ");
             wordSelectionData.addElement("X");
             wordSelectionData.addElement(" ");

             wordSelectionData.addElement("Now");
             wordSelectionData.addElement("\r");
             wordSelectionData.addElement("is");
             wordSelectionData.addElement("\n");
             wordSelectionData.addElement("the");
             wordSelectionData.addElement("\r\n");
             wordSelectionData.addElement("time");
             wordSelectionData.addElement("\n");
             wordSelectionData.addElement("\r");
             wordSelectionData.addElement("for");
             wordSelectionData.addElement("\r");
             wordSelectionData.addElement("\r");
             wordSelectionData.addElement("all");
             wordSelectionData.addElement(" ");

             generalIteratorTest(wordBreak, wordSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     /**
      * @bug 4097779
      */
     public void TestBug4097779() {
         Vector wordSelectionData = new Vector();

         wordSelectionData.addElement("aa\u0300a");
         wordSelectionData.addElement(" ");

         generalIteratorTest(wordBreak, wordSelectionData);
     }

     /**
      * @bug 4098467
      */
     public void TestBug4098467Words() {
         Vector wordSelectionData = new Vector();

         // What follows is a string of Korean characters (I found it in the Yellow Pages
         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
         // it correctly), first as precomposed syllables, and then as conjoining jamo.
         // Both sequences should be semantically identical and break the same way.
         // precomposed syllables...
         wordSelectionData.addElement("\uc0c1\ud56d");
         wordSelectionData.addElement(" ");
         wordSelectionData.addElement("\ud55c\uc778");
         wordSelectionData.addElement(" ");
         wordSelectionData.addElement("\uc5f0\ud569");
         wordSelectionData.addElement(" ");
         wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");
         wordSelectionData.addElement(" ");
         // conjoining jamo...
         wordSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");
         wordSelectionData.addElement(" ");
         wordSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");
         wordSelectionData.addElement(" ");
         wordSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");
         wordSelectionData.addElement(" ");
         wordSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
         wordSelectionData.addElement(" ");

         generalIteratorTest(wordBreak, wordSelectionData);
     }

     /**
      * @bug 4117554
      */
     public void TestBug4117554Words() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)wordBreak;
             Vector wordSelectionData = new Vector();

             // this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
             // count as a Kanji character for the purposes of word breaking
             wordSelectionData.addElement("abc");
             wordSelectionData.addElement("\u4e01\u4e02\u3005\u4e03\u4e03");
             wordSelectionData.addElement("abc");

             generalIteratorTest(wordBreak, wordSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     public void TestSentenceBreak() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
             Vector sentenceSelectionData = new Vector();

             sentenceSelectionData.addElement("This is a simple sample sentence. ");
             sentenceSelectionData.addElement("(This is it.) ");
             sentenceSelectionData.addElement("This is a simple sample sentence. ");
             sentenceSelectionData.addElement("\"This isn\'t it.\" ");
             sentenceSelectionData.addElement("Hi! ");
             sentenceSelectionData.addElement("This is a simple sample sentence. ");
             sentenceSelectionData.addElement("It does not have to make any sense as you can see. ");
             sentenceSelectionData.addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
             sentenceSelectionData.addElement("Che la dritta via aveo smarrita. ");
             sentenceSelectionData.addElement("He said, that I said, that you said!! ");

             sentenceSelectionData.addElement("Don't rock the boat.\u2029");

             sentenceSelectionData.addElement("Because I am the daddy, that is why. ");
             sentenceSelectionData.addElement("Not on my time (el timo.)! ");

             sentenceSelectionData.addElement("So what!!\u2029");

             sentenceSelectionData.addElement("\"But now,\" he said, \"I know!\" ");
             sentenceSelectionData.addElement("Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). ");
             sentenceSelectionData.addElement("One species, B. anthracis, is highly virulent.\n");
             sentenceSelectionData.addElement("Wolf said about Sounder:\"Beautifully thought-out and directed.\" ");
             sentenceSelectionData.addElement("Have you ever said, \"This is where \tI shall live\"? ");
             sentenceSelectionData.addElement("He answered, \"You may not!\" ");
             sentenceSelectionData.addElement("Another popular saying is: \"How do you do?\". ");
             sentenceSelectionData.addElement("Yet another popular saying is: \'I\'m fine thanks.\' ");
             sentenceSelectionData.addElement("What is the proper use of the abbreviation pp.? ");
             sentenceSelectionData.addElement("Yes, I am definatelly 12\" tall!!");

             generalIteratorTest(sentenceBreak, sentenceSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
 }

     /**
      * @bug 4113835
      */
     public void TestBug4113835() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;

             Vector sentenceSelectionData = new Vector();

             // test for bug #4113835: \n and \r count as spaces, not as paragraph breaks
             sentenceSelectionData.addElement("Now\ris\nthe\r\ntime\n\rfor\r\rall\u2029");

             generalIteratorTest(sentenceBreak, sentenceSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     /**
      * @bug 4111338
      */
     public void TestBug4111338() {
         Vector sentenceSelectionData = new Vector();

         // test for bug #4111338: Don't break sentences at the boundary between CJK
         // and other letters
         sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
                 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
                 + "\u611d\u57b6\u2510\u5d46\".\u2029");
         sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
                 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
                 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
         sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
                 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
                 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
         sentenceSelectionData.addElement("He said, \"I can go there.\"\u2029");

         generalIteratorTest(sentenceBreak, sentenceSelectionData);
     }

     /**
      * @bug 4117554
      */
     public void TestBug4117554Sentences() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
             Vector sentenceSelectionData = new Vector();

             // Treat fullwidth variants of .!? the same as their
             // normal counterparts
             sentenceSelectionData.addElement("I know I'm right\uff0e ");
             sentenceSelectionData.addElement("Right\uff1f ");
             sentenceSelectionData.addElement("Right\uff01 ");

             // Don't break sentences at boundary between CJK and digits
             sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
                     + "\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
                     + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");

             // Break sentence between a sentence terminator and
             // opening punctuation
             sentenceSelectionData.addElement("no?");
             sentenceSelectionData.addElement("(yes)");

             generalIteratorTest(sentenceBreak, sentenceSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     /**
      * @bug 4158381
      */
     public void TestBug4158381() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
             Vector sentenceSelectionData = new Vector();

             // Don't break sentence after period if it isn't followed by a space
             sentenceSelectionData.addElement("Test <code>Flags.Flag</code> class.  ");
             sentenceSelectionData.addElement("Another test.\u2029");

             // No breaks when there are no terminators around
             sentenceSelectionData.addElement("<P>Provides a set of "
                     + "&quot;lightweight&quot; (all-java<FONT SIZE=\"-2\"><SUP>TM"
                     + "</SUP></FONT> language) components that, "
                     + "to the maximum degree possible, work the same on all platforms.  ");
             sentenceSelectionData.addElement("Another test.\u2029");

             generalIteratorTest(sentenceBreak, sentenceSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
 }

     /**
      * @bug 4143071
      */
     public void TestBug4143071() {
         Vector sentenceSelectionData = new Vector();

         // Make sure sentences that end with digits work right
         sentenceSelectionData.addElement("Today is the 27th of May, 1998.  ");
         sentenceSelectionData.addElement("Tomorrow will be 28 May 1998.  ");
         sentenceSelectionData.addElement("The day after will be the 30th.\u2029");

         generalIteratorTest(sentenceBreak, sentenceSelectionData);
     }

     /**
      * @bug 4152416
      */
     public void TestBug4152416() {
         Vector sentenceSelectionData = new Vector();

         // Make sure sentences ending with a capital letter are treated correctly
         sentenceSelectionData.addElement("The type of all primitive "
                 + "<code>boolean</code> values accessed in the target VM.  ");
         sentenceSelectionData.addElement("Calls to xxx will return an "
                 + "implementor of this interface.\u2029");

         generalIteratorTest(sentenceBreak, sentenceSelectionData);
     }

     /**
      * @bug 4152117
      */
     public void TestBug4152117() {
         Vector sentenceSelectionData = new Vector();

         // Make sure sentence breaking is handling punctuation correctly
         // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
         // IT DOESN'T CROP UP]
         sentenceSelectionData.addElement("Constructs a randomly generated "
                 + "BigInteger, uniformly distributed over the range <tt>0</tt> "
                 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");
         sentenceSelectionData.addElement("The uniformity of the distribution "
                 + "assumes that a fair source of random bits is provided in "
                 + "<tt>rnd</tt>.  ");
         sentenceSelectionData.addElement("Note that this constructor always "
                 + "constructs a non-negative BigInteger.\u2029");

         generalIteratorTest(sentenceBreak, sentenceSelectionData);
     }

     public void TestLineBreak() {
         Vector lineSelectionData = new Vector();

         lineSelectionData.addElement("Multi-");
         lineSelectionData.addElement("Level ");
         lineSelectionData.addElement("example ");
         lineSelectionData.addElement("of ");
         lineSelectionData.addElement("a ");
         lineSelectionData.addElement("semi-");
         lineSelectionData.addElement("idiotic ");
         lineSelectionData.addElement("non-");
         lineSelectionData.addElement("sensical ");
         lineSelectionData.addElement("(non-");
         lineSelectionData.addElement("important) ");
         lineSelectionData.addElement("sentence. ");

         lineSelectionData.addElement("Hi  ");
         lineSelectionData.addElement("Hello ");
         lineSelectionData.addElement("How\n");
         lineSelectionData.addElement("are\r");
         lineSelectionData.addElement("you\u2028");
         lineSelectionData.addElement("fine.\t");
         lineSelectionData.addElement("good.  ");

         lineSelectionData.addElement("Now\r");
         lineSelectionData.addElement("is\n");
         lineSelectionData.addElement("the\r\n");
         lineSelectionData.addElement("time\n");
         lineSelectionData.addElement("\r");
         lineSelectionData.addElement("for\r");
         lineSelectionData.addElement("\r");
         lineSelectionData.addElement("all");

         generalIteratorTest(lineBreak, lineSelectionData);
     }

     /**
      * @bug 4068133
      */
     public void TestBug4068133() {
         Vector lineSelectionData = new Vector();

         lineSelectionData.addElement("\u96f6");
         lineSelectionData.addElement("\u4e00\u3002");
         lineSelectionData.addElement("\u4e8c\u3001");
         lineSelectionData.addElement("\u4e09\u3002\u3001");
         lineSelectionData.addElement("\u56db\u3001\u3002\u3001");
         lineSelectionData.addElement("\u4e94,");
         lineSelectionData.addElement("\u516d.");
         lineSelectionData.addElement("\u4e03.\u3001,\u3002");
         lineSelectionData.addElement("\u516b");

         generalIteratorTest(lineBreak, lineSelectionData);
     }

     /**
      * @bug 4086052
      */
     public void TestBug4086052() {
         Vector lineSelectionData = new Vector();

         lineSelectionData.addElement("foo\u00a0bar ");
 //        lineSelectionData.addElement("foo\ufeffbar");

         generalIteratorTest(lineBreak, lineSelectionData);
     }

     /**
      * @bug 4097920
      */
     public void TestBug4097920() {
         Vector lineSelectionData = new Vector();

         lineSelectionData.addElement("dog,cat,mouse ");
         lineSelectionData.addElement("(one)");
         lineSelectionData.addElement("(two)\n");
         generalIteratorTest(lineBreak, lineSelectionData);
     }

     /**
      * @bug 4035266
      */
     public void TestBug4035266() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
             Vector lineSelectionData = new Vector();

             lineSelectionData.addElement("The ");
             lineSelectionData.addElement("balance ");
             lineSelectionData.addElement("is ");
             lineSelectionData.addElement("$-23,456.78, ");
             lineSelectionData.addElement("not ");
             lineSelectionData.addElement("-$32,456.78!\n");

             generalIteratorTest(lineBreak, lineSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
 }

     /**
      * @bug 4098467
      */
     public void TestBug4098467Lines() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
             Vector lineSelectionData = new Vector();

             // What follows is a string of Korean characters (I found it in the Yellow Pages
             // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
             // it correctly), first as precomposed syllables, and then as conjoining jamo.
             // Both sequences should be semantically identical and break the same way.
             // precomposed syllables...
             lineSelectionData.addElement("\uc0c1\ud56d ");
             lineSelectionData.addElement("\ud55c\uc778 ");
             lineSelectionData.addElement("\uc5f0\ud569 ");
             lineSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c ");
             // conjoining jamo...
             lineSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc ");
             lineSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab ");
             lineSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8 ");
             lineSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");

             generalIteratorTest(lineBreak, lineSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     public void TestThaiLineBreak() {
         Vector lineSelectionData = new Vector();

         // \u0e2f-- the Thai paiyannoi character-- isn't a letter.  It's a symbol that
         // represents elided letters at the end of a long word.  It should be bound to
         // the end of the word and not treated as an independent punctuation mark.
         lineSelectionData.addElement("\u0e2a\u0e16\u0e32\u0e19\u0e35\u0e2f");
         lineSelectionData.addElement("\u0e08\u0e30");
         lineSelectionData.addElement("\u0e23\u0e30\u0e14\u0e21");
         lineSelectionData.addElement("\u0e40\u0e08\u0e49\u0e32");
 //        lineSelectionData.addElement("\u0e2b\u0e19\u0e49\u0e32");
 //        lineSelectionData.addElement("\u0e17\u0e35\u0e48");
 // I think the above two lines are the preferred reading of this text, but our current
 // dictionary yields the following:
 lineSelectionData.addElement("\u0e2b\u0e16\u0e49\u0e32\u0e17\u0e35\u0e48");
         lineSelectionData.addElement("\u0e2d\u0e2d\u0e01");
         lineSelectionData.addElement("\u0e21\u0e32");
         lineSelectionData.addElement("\u0e40\u0e23\u0e48\u0e07");
         lineSelectionData.addElement("\u0e23\u0e30\u0e1a\u0e32\u0e22");
         lineSelectionData.addElement("\u0e2d\u0e22\u0e48\u0e32\u0e07");
         lineSelectionData.addElement("\u0e40\u0e15\u0e47\u0e21");

         // the one time where the paiyannoi occurs somewhere other than at the end
         // of a word is in the Thai abbrevation for "etc.", which both begins and
         // ends with a paiyannoi
         lineSelectionData.addElement("\u0e2f\u0e25\u0e2f");
         lineSelectionData.addElement("\u0e17\u0e35\u0e48");
         lineSelectionData.addElement("\u0e19\u0e31\u0e49\u0e19");

         generalIteratorTest(BreakIterator.getLineInstance(new Locale("th", "", "")),
                 lineSelectionData);
     }

     public void TestMixedThaiLineBreak() {
         Vector lineSelectionData = new Vector();

         // @suwit -- Test Arabic numerals, Thai numerals, Punctuation and English characters
         // start

         lineSelectionData.addElement("\u0E1B\u0E35");
         lineSelectionData.addElement("\u0E1E\u0E38\u0E17\u0E18\u0E28\u0E31\u0E01\u0E23\u0E32\u0E0A ");
         lineSelectionData.addElement("2545 ");
         lineSelectionData.addElement("\u0E40\u0E1B\u0E47\u0E19");
         lineSelectionData.addElement("\u0E1B\u0E35");
         lineSelectionData.addElement("\u0E09\u0E25\u0E2D\u0E07");
         lineSelectionData.addElement("\u0E04\u0E23\u0E1A");
         lineSelectionData.addElement("\u0E23\u0E2D\u0E1A ");
         lineSelectionData.addElement("\"\u0E52\u0E52\u0E50 ");
         lineSelectionData.addElement("\u0E1b\u0E35\" ");
         lineSelectionData.addElement("\u0E02\u0E2d\u0E07");
         lineSelectionData.addElement("\u0E01\u0E23\u0E38\u0E07");
         lineSelectionData.addElement("\u0E23\u0E31\u0E15\u0E19\u0E42\u0E01\u0E2A\u0E34\u0E19\u0E17\u0E23\u0E4C ");
         lineSelectionData.addElement("(\u0E01\u0E23\u0E38\u0E07\u0E40\u0E17\u0E1e\u0E2F");
         lineSelectionData.addElement("\u0E2B\u0E23\u0E37\u0E2D ");
         lineSelectionData.addElement("Bangkok)");

         // @suwit - end of changes
         // Arabic numerals should always be separated from surrounding Thai text
 /*
         lineSelectionData.addElement("\u0e04\u0e48\u0e32");
         lineSelectionData.addElement("\u0e40\u0e07\u0e34\u0e19");
         lineSelectionData.addElement("\u0e1a\u0e32\u0e17");
         lineSelectionData.addElement("\u0e41\u0e15\u0e30");
         lineSelectionData.addElement("\u0e23\u0e30\u0e14\u0e31\u0e1a");
         lineSelectionData.addElement("39");
         lineSelectionData.addElement("\u0e1a\u0e32\u0e17 ");

         // words in non-Thai scripts should always be separated from surrounding Thai text
         lineSelectionData.addElement("\u0e17\u0e14");
         lineSelectionData.addElement("\u0e2a\u0e2d\u0e1a");
         lineSelectionData.addElement("Java");
         lineSelectionData.addElement("\u0e1a\u0e19");
         lineSelectionData.addElement("\u0e40\u0e04\u0e23\u0e37\u0e48\u0e2d\u0e07");
         lineSelectionData.addElement("\u0e44\u0e2d\u0e1a\u0e35\u0e40\u0e2d\u0e47\u0e21 ");

         // Thai numerals should always be separated from the text surrounding them
         lineSelectionData.addElement("\u0e04\u0e48\u0e32");
         lineSelectionData.addElement("\u0e40\u0e07\u0e34\u0e19");
         lineSelectionData.addElement("\u0e1a\u0e32\u0e17");
         lineSelectionData.addElement("\u0e41\u0e15\u0e30");
         lineSelectionData.addElement("\u0e23\u0e30\u0e14\u0e31\u0e1a");
         lineSelectionData.addElement("\u0e53\u0e59");
         lineSelectionData.addElement("\u0e1a\u0e32\u0e17 ");

         // Thai text should interact correctly with punctuation and symbols
         lineSelectionData.addElement("\u0e44\u0e2d\u0e1a\u0e35\u0e40\u0e2d\u0e47\u0e21");
 //        lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28");
 //        lineSelectionData.addElement("\u0e44\u0e17\u0e22)");
 // I think the above lines represent the preferred reading for this text, but our current
 // dictionary file yields the following:
         lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22)");
         lineSelectionData.addElement("\u0e08\u0e33\u0e01\u0e31\u0e14");
         lineSelectionData.addElement("\u0e40\u0e1b\u0e34\u0e14");
         lineSelectionData.addElement("\u0e15\u0e31\u0e27\"");

         lineSelectionData.addElement("\u0e2e\u0e32\u0e23\u0e4c\u0e14\u0e14\u0e34\u0e2a\u0e01\u0e4c\"");
         lineSelectionData.addElement("\u0e23\u0e38\u0e48\u0e19");
         lineSelectionData.addElement("\u0e43\u0e2b\u0e21\u0e48");
         lineSelectionData.addElement("\u0e40\u0e14\u0e37\u0e2d\u0e19\u0e21\u0e34.");
         lineSelectionData.addElement("\u0e22.");
         lineSelectionData.addElement("\u0e19\u0e35\u0e49");
         lineSelectionData.addElement("\u0e23\u0e32\u0e04\u0e32");
         lineSelectionData.addElement("$200");
         lineSelectionData.addElement("\u0e40\u0e17\u0e48\u0e32");
         lineSelectionData.addElement("\u0e19\u0e31\u0e49\u0e19 ");
         lineSelectionData.addElement("(\"\u0e2e\u0e32\u0e23\u0e4c\u0e14\u0e14\u0e34\u0e2a\u0e01\u0e4c\").");
 */

         generalIteratorTest(BreakIterator.getLineInstance(new Locale("th", "", "")),
                 lineSelectionData);
     }

     public void TestMaiyamok() {
         Vector lineSelectionData = new Vector();

         // the Thai maiyamok character is a shorthand symbol that means "repeat the previous
         // word".  Instead of appearing as a word unto itself, however, it's kept together
         // with the word before it
         lineSelectionData.addElement("\u0e44\u0e1b\u0e46");
         lineSelectionData.addElement("\u0e21\u0e32\u0e46");
         lineSelectionData.addElement("\u0e23\u0e30\u0e2b\u0e27\u0e48\u0e32\u0e07");
         lineSelectionData.addElement("\u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e");
         lineSelectionData.addElement("\u0e41\u0e25\u0e30");
         lineSelectionData.addElement("\u0e40\u0e03\u0e35\u0e22\u0e07");
         lineSelectionData.addElement("\u0e43\u0e2b\u0e21\u0e48");

         Locale loc = new Locale("th", "", "");
         BreakIterator bi = BreakIterator.getLineInstance(loc);
         generalIteratorTest(bi, lineSelectionData);
     }

     /**
      * @bug 4117554
      */
     public void TestBug4117554Lines() {
         Vector lineSelectionData = new Vector();

         // Fullwidth .!? should be treated as postJwrd
         lineSelectionData.addElement("\u4e01\uff0e");
         lineSelectionData.addElement("\u4e02\uff01");
         lineSelectionData.addElement("\u4e03\uff1f");

         generalIteratorTest(lineBreak, lineSelectionData);
     }

     public void TestLettersAndDigits() {
         // a character sequence such as "X11" or "30F3" or "native2ascii" should
         // be kept together as a single word
         Vector lineSelectionData = new Vector();

         lineSelectionData.addElement("X11 ");
         lineSelectionData.addElement("30F3 ");
         lineSelectionData.addElement("native2ascii");

         generalIteratorTest(lineBreak, lineSelectionData);
     }

     /**
      * @bug 4217703
      */
     public void TestBug4217703() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
             Vector lineSelectionData = new Vector();

             // There shouldn't be a line break between sentence-ending punctuation
             // and a closing quote
             lineSelectionData.addElement("He ");
             lineSelectionData.addElement("said ");
             lineSelectionData.addElement("\"Go!\"  ");
             lineSelectionData.addElement("I ");
             lineSelectionData.addElement("went.  ");

             lineSelectionData.addElement("Hashtable$Enumeration ");
             lineSelectionData.addElement("getText().");
             lineSelectionData.addElement("getIndex()");

             generalIteratorTest(lineBreak, lineSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
 }

     private static final String graveS = "S\u0300";
     private static final String acuteBelowI = "i\u0317";
     private static final String acuteE = "e\u0301";
     private static final String circumflexA = "a\u0302";
     private static final String tildeE = "e\u0303";

     public void TestCharacterBreak() {
         Vector characterSelectionData = new Vector();

         characterSelectionData.addElement(graveS);
         characterSelectionData.addElement(acuteBelowI);
         characterSelectionData.addElement("m");
         characterSelectionData.addElement("p");
         characterSelectionData.addElement("l");
         characterSelectionData.addElement(acuteE);
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("s");
         characterSelectionData.addElement(circumflexA);
         characterSelectionData.addElement("m");
         characterSelectionData.addElement("p");
         characterSelectionData.addElement("l");
         characterSelectionData.addElement(tildeE);
         characterSelectionData.addElement(".");
         characterSelectionData.addElement("w");
         characterSelectionData.addElement(circumflexA);
         characterSelectionData.addElement("w");
         characterSelectionData.addElement("a");
         characterSelectionData.addElement("f");
         characterSelectionData.addElement("q");
         characterSelectionData.addElement("\n");
         characterSelectionData.addElement("\r");
         characterSelectionData.addElement("\r\n");
         characterSelectionData.addElement("\n");

         generalIteratorTest(characterBreak, characterSelectionData);
     }

     /**
      * @bug 4098467
      */
     public void TestBug4098467Characters() {
         Vector characterSelectionData = new Vector();

         // What follows is a string of Korean characters (I found it in the Yellow Pages
         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
         // it correctly), first as precomposed syllables, and then as conjoining jamo.
         // Both sequences should be semantically identical and break the same way.
         // precomposed syllables...
         characterSelectionData.addElement("\uc0c1");
         characterSelectionData.addElement("\ud56d");
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("\ud55c");
         characterSelectionData.addElement("\uc778");
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("\uc5f0");
         characterSelectionData.addElement("\ud569");
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("\uc7a5");
         characterSelectionData.addElement("\ub85c");
         characterSelectionData.addElement("\uad50");
         characterSelectionData.addElement("\ud68c");
         characterSelectionData.addElement(" ");
         // conjoining jamo...
         characterSelectionData.addElement("\u1109\u1161\u11bc");
         characterSelectionData.addElement("\u1112\u1161\u11bc");
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("\u1112\u1161\u11ab");
         characterSelectionData.addElement("\u110b\u1175\u11ab");
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("\u110b\u1167\u11ab");
         characterSelectionData.addElement("\u1112\u1161\u11b8");
         characterSelectionData.addElement(" ");
         characterSelectionData.addElement("\u110c\u1161\u11bc");
         characterSelectionData.addElement("\u1105\u1169");
         characterSelectionData.addElement("\u1100\u116d");
         characterSelectionData.addElement("\u1112\u116c");

         generalIteratorTest(characterBreak, characterSelectionData);
     }

     public void TestTitleBreak()
     {
         Vector titleData = new Vector();
         titleData.addElement("   ");
         titleData.addElement("This ");
         titleData.addElement("is ");
         titleData.addElement("a ");
         titleData.addElement("simple ");
         titleData.addElement("sample ");
         titleData.addElement("sentence. ");
         titleData.addElement("This ");

         generalIteratorTest(titleBreak, titleData);
     }


     /*
      * @bug 4153072
      */
     public void TestBug4153072() {
         BreakIterator iter = BreakIterator.getWordInstance();
         String str = "...Hello, World!...";
         int begin = 3;
         int end = str.length() - 3;
         // not used boolean gotException = false;


         iter.setText(new StringCharacterIterator(str, begin, end, begin));
         for (int index = -1; index < begin + 1; ++index) {
             try {
                 iter.isBoundary(index);
                 if (index < begin)
                     errln("Didn't get exception with offset = " + index +
                                     " and begin index = " + begin);
             }
             catch (IllegalArgumentException e) {
                 if (index >= begin)
                     errln("Got exception with offset = " + index +
                                     " and begin index = " + begin);
             }
         }
     }

     public void TestBug4146175Sentences() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
             Vector sentenceSelectionData = new Vector();

             // break between periods and opening punctuation even when there's no
             // intervening space
             sentenceSelectionData.addElement("end.");
             sentenceSelectionData.addElement("(This is\u2029");

             // treat the fullwidth period as an unambiguous sentence terminator
             sentenceSelectionData.addElement("\u7d42\u308f\u308a\uff0e");
             sentenceSelectionData.addElement("\u300c\u3053\u308c\u306f");

             generalIteratorTest(sentenceBreak, sentenceSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     public void TestBug4146175Lines() {
         Vector lineSelectionData = new Vector();

         // the fullwidth comma should stick to the preceding Japanese character
         lineSelectionData.addElement("\u7d42\uff0c");
         lineSelectionData.addElement("\u308f");

         generalIteratorTest(lineBreak, lineSelectionData);
     }

     public void TestBug4214367() {
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)wordBreak;
             Vector wordSelectionData = new Vector();

             // the hiragana and katakana iteration marks and the long vowel mark
             // are not being treated correctly by the word-break iterator
             wordSelectionData.addElement("\u3042\u3044\u309d\u3042\u309e\u3042\u30fc\u3042");
             wordSelectionData.addElement("\u30a2\u30a4\u30fd\u30a2\u30fe\u30a2\u30fc\u30a2");

             generalIteratorTest(wordBreak, wordSelectionData);
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     private static final String cannedTestChars
         = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
         + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
         + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
         + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
         + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
         + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";

     public void TestSentenceInvariants()
     {
         BreakIterator e = BreakIterator.getSentenceInstance();
         doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
     }

     public void TestWordInvariants()
     {
         BreakIterator e = BreakIterator.getWordInstance();
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)e;
             doBreakInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
                     + "\u30a3\u4e00\u4e01\u4e02");
             doOtherInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
                     + "\u30a3\u4e00\u4e01\u4e02");
         }
         catch (ClassCastException ex) {
             logln("New Break Iterator, skipping old test");
         }
     }

     public void TestLineInvariants()
     {
         BreakIterator e = BreakIterator.getLineInstance();
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)e;
             String testChars = cannedTestChars + ".,;:\u3001\u3002\u3041\u3042\u3043\u3044\u3045"
             + "\u30a3\u4e00\u4e01\u4e02";
             doBreakInvariantTest(e, testChars);
             doOtherInvariantTest(e, testChars);

             int errorCount = 0;

             // in addition to the other invariants, a line-break iterator should make sure that:
             // it doesn't break around the non-breaking characters
             String noBreak = "\u00a0\u2007\u2011\ufeff";
             StringBuffer work = new StringBuffer("aaa");
             for (int i = 0; i < testChars.length(); i++) {
                 char c = testChars.charAt(i);
                 if (c == '\r' || c == '\n' || c == '\u2029' || c == '\u2028' || c == '\u0003')
                     continue;
                 work.setCharAt(0, c);
                 for (int j = 0; j < noBreak.length(); j++) {
                     work.setCharAt(1, noBreak.charAt(j));
                     for (int k = 0; k < testChars.length(); k++) {
                         work.setCharAt(2, testChars.charAt(k));
                         e.setText(work.toString());
                         for (int l = e.first(); l != BreakIterator.DONE; l = e.next())
                             if (l == 1 || l == 2) {
                                 errln("Got break between U+" + Integer.toHexString((int)
                                         (work.charAt(l - 1))) + " and U+" + Integer.toHexString(
                                                 (int)(work.charAt(l))));
                                 errorCount++;
                                 if (errorCount >= 75)
                                     return;
                             }
                     }
                 }
             }

             // it does break after dashes (unless they're followed by a digit, a non-spacing mark,
             // a currency symbol, a space, a format-control character, a regular control character,
             // a line or paragraph separator, or another dash)
             String dashes = "-\u00ad\u2010\u2012\u2013\u2014";
             for (int i = 0; i < testChars.length(); i++) {
                 work.setCharAt(0, testChars.charAt(i));
                 for (int j = 0; j < dashes.length(); j++) {
                     work.setCharAt(1, dashes.charAt(j));
                     for (int k = 0; k < testChars.length(); k++) {
                         char c = testChars.charAt(k);
                         if (Character.getType(c) == Character.DECIMAL_DIGIT_NUMBER ||
                                 Character.getType(c) == Character.OTHER_NUMBER ||
                                 Character.getType(c) == Character.NON_SPACING_MARK ||
                                 Character.getType(c) == Character.ENCLOSING_MARK ||
                                 Character.getType(c) == Character.CURRENCY_SYMBOL ||
                                 Character.getType(c) == Character.DASH_PUNCTUATION ||
                                 Character.getType(c) == Character.SPACE_SEPARATOR ||
                                 Character.getType(c) == Character.FORMAT ||
                                 Character.getType(c) == Character.CONTROL ||
                                 c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029' ||
                                 c == '\u0003' || c == '\u2007' || c == '\u2011' ||
                                 c == '\ufeff')
                             continue;
                         work.setCharAt(2, c);
                         e.setText(work.toString());
                         boolean saw2 = false;
                         for (int l = e.first(); l != BreakIterator.DONE; l = e.next())
                             if (l == 2)
                                 saw2 = true;
                         if (!saw2) {
                             errln("Didn't get break between U+" + Integer.toHexString((int)
                                     (work.charAt(1))) + " and U+" + Integer.toHexString(
                                             (int)(work.charAt(2))));
                             errorCount++;
                             if (errorCount >= 75)
                                 return;
                         }
                     }
                 }
             }
         }
         catch (ClassCastException ex) {
             logln("New Break Iterator, skipping old test");
         }
     }

     public void TestCharacterInvariants()
         {
             BreakIterator e = BreakIterator.getCharacterInstance();
             try {
                 RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)e;
                 doBreakInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"
                         + "\u11a9\u11aa");
                 doOtherInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"
                         + "\u11a9\u11aa");
             }
             catch (ClassCastException ex) {
                 logln("New Break Iterator, skipping old test");
             }
         }

     public void TestEmptyString()
     {
         String text = "";
         Vector x = new Vector();
         x.addElement(text);

         generalIteratorTest(lineBreak, x);
     }

     public void TestGetAvailableLocales()
     {
         Locale[] locList = BreakIterator.getAvailableLocales();

         if (locList.length == 0)
             errln("getAvailableLocales() returned an empty list!");
         // I have no idea how to test this function...

         com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
         if (ulocList.length == 0) {
             errln("getAvailableULocales() returned an empty list!");
         } else {
             logln("getAvailableULocales() returned " + ulocList.length + " locales");
         }
     }

     /**
      * @bug 4095322
      */
     public void TestJapaneseLineBreak()
     {
         StringBuffer testString = new StringBuffer("\u4e00x\u4e8c");
         String precedingChars = "([{\u00ab$\u00a5\u00a3\u00a4\u2018\u201a\u201c\u201e\u201b\u201f";
         String followingChars = ")]}\u00bb!%,.\u3001\u3002\u3063\u3083\u3085\u3087\u30c3\u30e3\u30e5\u30e7\u30fc:;\u309b\u309c\u3005\u309d\u309e\u30fd\u30fe\u2019\u201d\u00b0\u2032\u2033\u2034\u2030\u2031\u2103\u2109\u00a2\u0300\u0301\u0302";
         BreakIterator iter = BreakIterator.getLineInstance(Locale.JAPAN);
         try {
             RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)iter;

             for (int i = 0; i < precedingChars.length(); i++) {
                 testString.setCharAt(1, precedingChars.charAt(i));
                 iter.setText(testString.toString());
                 int j = iter.first();
                 if (j != 0)
                     errln("ja line break failure: failed to start at 0");
                 j = iter.next();
                 if (j != 1)
                     errln("ja line break failure: failed to stop before '" + precedingChars.charAt(i)
                             + "' (" + ((int)(precedingChars.charAt(i))) + ")");
                 j = iter.next();
                 if (j != 3)
                     errln("ja line break failure: failed to skip position after '" + precedingChars.charAt(i)
                             + "' (" + ((int)(precedingChars.charAt(i))) + ")");
             }

             for (int i = 0; i < followingChars.length(); i++) {
                 testString.setCharAt(1, followingChars.charAt(i));
                 iter.setText(testString.toString());
                 int j = iter.first();
                 if (j != 0)
                     errln("ja line break failure: failed to start at 0");
                 j = iter.next();
                 if (j != 2)
                     errln("ja line break failure: failed to skip position before '" + followingChars.charAt(i)
                             + "' (" + ((int)(followingChars.charAt(i))) + ")");
                 j = iter.next();
                 if (j != 3)
                     errln("ja line break failure: failed to stop after '" + followingChars.charAt(i)
                             + "' (" + ((int)(followingChars.charAt(i))) + ")");
             }
         }
         catch (ClassCastException e) {
             logln("New Break Iterator, skipping old test");
         }
     }

     /**
      * Bug 4638433
      */
         public void TestLineBreakBasedOnUnicode3_0_0() {
             BreakIterator iter;
             int i;

             /* Latin Extend-B characters
              * 0x0218-0x0233 which have been added since Unicode 3.0.0.
              */
             iter = BreakIterator.getWordInstance(Locale.US);
             try {
                 RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)iter;
                 iter.setText("\u0216\u0217\u0218\u0219\u021A");
                 i = iter.first();
                 i = iter.next();
                 if (i != 5) {
                     errln("Word break failure: failed to stop at 5 and bounded at " + i);
                 }


                 iter = BreakIterator.getLineInstance(Locale.US);

                 /* <Three(Nd)><Two(Nd)><Low Double Prime Quotation Mark(Pe)><One(Nd)>
                  * \u301f has changed its category from Ps to Pe since Unicode 2.1.
                  */
                 iter.setText("32\u301f1");
                 i = iter.first();
                 i = iter.next();
                 if (i != 3) {
                     errln("Line break failure: failed to skip before \\u301F(Pe) at 3 and bounded at " + i);
                 }

                 /* Mongolian <Letter A(Lo)><Todo Soft Hyphen(Pd)><Letter E(Lo)>
                  * which have been added since Unicode 3.0.0.
                  */
                 iter.setText("\u1820\u1806\u1821");
                 i = iter.first();
                 i = iter.next();
                 if (i != 2) {
                     errln("Mongolian line break failure: failed to skip position before \\u1806(Pd) at 2 and bounded at " + i);
                 }

                 /* Khmer <ZERO(Nd)><Currency Symbol(Sc)><ONE(Nd)> which have
                  * been added since Unicode 3.0.0.
                  */
                 /*
                  * Richard: fail to pass, refer to #3550
                  iter.setText("\u17E0\u17DB\u17E1");
                  i = iter.first();
                  i = iter.next();
                  if (i != 1) {
                  errln("Khmer line break failure: failed to stop before \\u17DB(Sc) at 1 and bounded at " + i);
                  }
                  i = iter.next();
                  if (i != 3) {
                  errln("Khmer line break failure: failed to skip position after \\u17DB(Sc) at 3 and bounded at " + i);
                  }*/

                 /* Ogham <Letter UR(Lo)><Space Mark(Zs)><Letter OR(Lo)> which have
                  * been added since Unicode 3.0.0.
                  */
                 iter.setText("\u1692\u1680\u1696");
                 i = iter.first();
                 i = iter.next();
                 if (i != 2) {
                     errln("Ogham line break failure: failed to skip postion before \\u1680(Zs) at 2 and bounded at " + i);
                 }


                 // Confirm changes in BreakIteratorRules_th.java have been reflected.
                 iter = BreakIterator.getLineInstance(new Locale("th", ""));

                 /* Thai <Seven(Nd)>
                  *      <Left Double Quotation Mark(Pi)>
                  *      <Five(Nd)>
                  *      <Right Double Quotation Mark(Pf)>
                  *      <Three(Nd)>
                  */
                 iter.setText("\u0E57\u201C\u0E55\u201D\u0E53");
                 i = iter.first();
                 i = iter.next();
                 if (i != 1) {
                     errln("Thai line break failure: failed to stop before \\u201C(Pi) at 1 and bounded at " + i);
                 }
                 i = iter.next();
                 if (i != 4) {
                     errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);
                 }
             }
             catch (ClassCastException e) {
                 logln("New Break Iterator, skipping old test");
             }
         }

     /**
      * @bug 4068137
      */
     public void TestEndBehavior()
     {
         String testString = "boo.";
         BreakIterator wb = BreakIterator.getWordInstance();
         wb.setText(testString);

         if (wb.first() != 0)
             errln("Didn't get break at beginning of string.");
         if (wb.next() != 3)
             errln("Didn't get break before period in \"boo.\"");
         if (wb.current() != 4 && wb.next() != 4)
             errln("Didn't get break at end of string.");
     }

     // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
     /**
      * Port From:   ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
      * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
      **/
     /**
      * test methods preceding, following and isBoundary
      **/
     public void TestPreceding() {
         String words3 = "aaa bbb ccc";
         BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());
         e.setText( words3 );
         e.first();
         int p1 = e.next();
         int p2 = e.next();
         int p3 = e.next();
         int p4 = e.next();

         int f = e.following(p2+1);
         int p = e.preceding(p2+1);
         if (f!=p3)
             errln("IntlTestTextBoundary::TestPreceding: f!=p3");
         if (p!=p2)
             errln("IntlTestTextBoundary::TestPreceding: p!=p2");

         if (p1+1!=p2)
             errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");

         if (p3+1!=p4)
             errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");

         if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))
         {
             errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
         }
     }

     /**
      * Test Thai word break using generalIteratorTest()
      **/
     public void TestThaiWordBreak() {
         Vector thaiWordSelection = new Vector();

         // @suwit -- Thai sample data from GVT Guideline
         // start
         thaiWordSelection.addElement("\u0E2B\u0E19\u0E36\u0E48\u0E07"); //5
         thaiWordSelection.addElement("\u0E04\u0E33"); //7
         thaiWordSelection.addElement("\u0E44\u0E17\u0E22"); //10
         thaiWordSelection.addElement("\u0E2A\u0E32\u0E21\u0E32\u0E23\u0E16"); //16
         thaiWordSelection.addElement("\u0E1B\u0E23\u0E30\u0E01\u0E2D\u0E1A"); //22
         thaiWordSelection.addElement("\u0E14\u0E49\u0E27\u0E22");  //26
         thaiWordSelection.addElement("\u0e2b\u0e25\u0e32\u0e22");  //30
         thaiWordSelection.addElement("\u0e1e\u0e22\u0e32\u0e07\u0e04\u0e4c");  //36

         // @suwit - end of changes

         /*  remove the old data sample because Thai translation of the Wizard of Oz is not good testcase for wordbreak API.
         thaiWordSelection.addElement("\u0E1A\u0E17"); //2
         thaiWordSelection.addElement("\u0E17\u0E35\u0E48"); //5
         thaiWordSelection.addElement("\u0E51"); //6
         thaiWordSelection.addElement("\u0E1E\u0E32\u0E22\u0E38"); //10
         thaiWordSelection.addElement("\u0E44\u0E0B\u0E42\u0E04\u0E25\u0E19"); //16
         thaiWordSelection.addElement("\r\n"); //18

         // This is the correct result
         //thaiWordSelection.addElement(("\u0E42\u0E14\u0E42\u0E23\u0E18\u0E35")); //24
         //thaiWordSelection.addElement(("\u0E2D\u0E32\u0E28\u0E31\u0E22")); //29

         // and this is what the dictionary does...
         thaiWordSelection.addElement("\u0E42\u0E14"); // 20
         thaiWordSelection.addElement("\u0E42\u0E23\u0E18\u0E35\u0E2D\u0E32\u0E28\u0E31\u0E22"); //29

         thaiWordSelection.addElement("\u0E2D\u0E22\u0E39\u0E48"); //33

         // This is the correct result
         //thaiWordSelection.addElement("\u0E17\u0E48\u0E32\u0E21"); //37
         //thaiWordSelection.addElement("\u0E01\u0E25\u0E32\u0E07"); //41

         // and this is what the dictionary does
         thaiWordSelection.addElement("\u0E17\u0E48\u0E32\u0E21\u0E01\u0E25\u0E32\u0E07"); //41

         thaiWordSelection.addElement("\u0E17\u0E38\u0E48\u0E07"); //45
         thaiWordSelection.addElement("\u0E43\u0E2B\u0E0D\u0E48"); //49
         thaiWordSelection.addElement("\u0E43\u0E19"); //51

         // This is the correct result
         //thaiWordSelection.addElement("\u0E41\u0E04\u0E19\u0E0B\u0E31\u0E2A"); //57
         //thaiWordSelection.addElement("\u0E01\u0E31\u0E1A"); //60

         // and this is what the dictionary does
         thaiWordSelection.addElement("\u0E41\u0E04\u0E19"); // 54
         thaiWordSelection.addElement("\u0E0B\u0E31\u0E2A\u0E01\u0E31\u0E1A"); //60

         thaiWordSelection.addElement("\u0E25\u0E38\u0E07"); //63

         // This is the correct result
         //thaiWordSelection.addElement("\u0E40\u0E2E\u0E19\u0E23\u0E35"); //68
         //thaiWordSelection.addElement("\u0E0A\u0E32\u0E27"); //71
         //thaiWordSelection.addElement("\u0E44\u0E23\u0E48"); //74
         //thaiWordSelection.addElement("\u0E41\u0E25\u0E30"); //77

         // and this is what the dictionary does
         thaiWordSelection.addElement("\u0E40\u0E2E"); // 65
         thaiWordSelection.addElement("\u0E19\u0E23\u0E35\u0E0A\u0E32\u0E27\u0E44\u0E23\u0E48\u0E41\u0E25\u0E30"); //77
         */

         BreakIterator e = BreakIterator.getWordInstance(new Locale("th","",""));

         generalIteratorTest(e, thaiWordSelection);
     }

     /**
      * Bug 4450804
      */
     public void TestLineBreakContractions() {
         Vector expected = new Vector();
         expected.add("These ");
         expected.add("are ");
         expected.add("'foobles'. ");
         expected.add("Don't ");
         expected.add("you ");
         expected.add("like ");
         expected.add("them?");
         generalIteratorTest(lineBreak, expected);
     }
 }