icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java - external/github.com/unicode-org/icu - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html#License
 /*
  *******************************************************************************
  * Copyright (C) 1996-2016, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */
 package com.ibm.icu.dev.test.rbbi;

 //Regression testing of RuleBasedBreakIterator
 //
 //  TODO:  These tests should be mostly retired.
 //          Much of the test data that was originally here was removed when the RBBI rules
 //            were updated to match the Unicode boundary TRs, and the data was found to be invalid.
 //          Much of the remaining data has been moved into the rbbitst.txt test data file,
 //            which is common between ICU4C and ICU4J.  The remaining test data should also be moved,
 //            or simply retired if it is no longer interesting.
 import java.text.CharacterIterator;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;

 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.RBBIDataWrapper;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.RuleBasedBreakIterator;
 import com.ibm.icu.util.CodePointTrie;
 import com.ibm.icu.util.ULocale;


 @RunWith(JUnit4.class)
 public class RBBITest extends TestFmwk {
     public RBBITest() {
     }


     @Test
    public void TestThaiDictionaryBreakIterator() {
        int position;
        int index;
        int result[] = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 };
        char ctext[] = {
                0x0041, 0x0020,
                0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07,
                0x0020, 0x0041
                };
        String text = new String(ctext);

        ULocale locale = ULocale.createCanonical("th");
        BreakIterator b = BreakIterator.getWordInstance(locale);

        b.setText(text);

        index = 0;
        // Test forward iteration
        while ((position = b.next())!= BreakIterator.DONE) {
            if (position != result[index++]) {
                errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index-1]);
            }
        }

        // Test backward iteration
        while ((position = b.previous())!= BreakIterator.DONE) {
            if (position != result[index++]) {
                errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index-1]);
            }
        }

        //Test invalid sequence and spaces
        char text2[] = {
                0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
                0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
                0x0E16, 0x0E49, 0x0E33
        };
        int expectedWordResult[] = {
                2, 3, 6, 10, 11, 15, 17, 20, 22
        };
        int expectedLineResult[] = {
                3, 6, 11, 15, 17, 20, 22
        };
        BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
        brk.setText(new String(text2));
        position = index = 0;
        while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
            if (position != expectedWordResult[index++]) {
                errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index-1] + " Got: " + position);
            }
        }

        brk = BreakIterator.getLineInstance(new ULocale("th"));
        brk.setText(new String(text2));
        position = index = 0;
        while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
            if (position != expectedLineResult[index++]) {
                errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index-1] + " Got: " + position);
            }
        }
        // Improve code coverage
        if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) {
            errln("Incorrect preceding position.");
        }
        if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) {
            errln("Incorrect following position.");
        }
        int []fillInArray = new int[2];
        if (((RuleBasedBreakIterator)brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) {
            errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
        }
    }


    // TODO: Move these test cases to rbbitst.txt if they aren't there already, then remove this test. It is redundant.
     @Test
     public void TestTailoredBreaks() {
         class TBItem {
             private int     type;
             private ULocale locale;
             private String  text;
             private int[]   expectOffsets;
             TBItem(int typ, ULocale loc, String txt, int[] eOffs) {
                 type          = typ;
                 locale        = loc;
                 text          = txt;
                 expectOffsets = eOffs;
             }
             private static final int maxOffsetCount = 128;
             private boolean offsetsMatchExpected(int[] foundOffsets, int foundOffsetsLength) {
                 if ( foundOffsetsLength != expectOffsets.length ) {
                     return false;
                 }
                 for (int i = 0; i < foundOffsetsLength; i++) {
                     if ( foundOffsets[i] != expectOffsets[i] ) {
                         return false;
                     }
                 }
                 return true;
             }
             private String formatOffsets(int[] offsets, int length) {
                 StringBuffer buildString = new StringBuffer(4*maxOffsetCount);
                 for (int i = 0; i < length; i++) {
                     buildString.append(" " + offsets[i]);
                 }
                 return buildString.toString();
             }

             public void doTest() {
                 BreakIterator brkIter;
                 switch( type ) {
                     case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break;
                     case BreakIterator.KIND_WORD:      brkIter = BreakIterator.getWordInstance(locale); break;
                     case BreakIterator.KIND_LINE:      brkIter = BreakIterator.getLineInstance(locale); break;
                     case BreakIterator.KIND_SENTENCE:  brkIter = BreakIterator.getSentenceInstance(locale); break;
                     default: errln("Unsupported break iterator type " + type); return;
                 }
                 brkIter.setText(text);
                 int[] foundOffsets = new int[maxOffsetCount];
                 int offset, foundOffsetsCount = 0;
                 // do forwards iteration test
                 while ( foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE ) {
                     foundOffsets[foundOffsetsCount++] = offset;
                 }
                 if ( !offsetsMatchExpected(foundOffsets, foundOffsetsCount) ) {
                     // log error for forwards test
                     String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
                     errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
                             "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
                             "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount) );
                 } else {
                     // do backwards iteration test
                     --foundOffsetsCount; // back off one from the end offset
                     while ( foundOffsetsCount > 0 ) {
                         offset = brkIter.previous();
                         if ( offset != foundOffsets[--foundOffsetsCount] ) {
                             // log error for backwards test
                             String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
                             errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
                                     "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
                                     "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount] );
                             break;
                         }
                     }
                 }
             }
         }
         // KIND_SENTENCE "el"
         final String elSentText     = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
                                       "\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
         final int[]  elSentTOffsets = { 8, 14, 20, 27, 35, 36 };
         final int[]  elSentROffsets = {        20, 27, 35, 36 };
         // KIND_CHARACTER "th"
         final String thCharText     = "\u0E01\u0E23\u0E30\u0E17\u0E48\u0E2D\u0E21\u0E23\u0E08\u0E19\u0E32 " +
                                       "(\u0E2A\u0E38\u0E0A\u0E32\u0E15\u0E34-\u0E08\u0E38\u0E11\u0E32\u0E21\u0E32\u0E28) " +
                                       "\u0E40\u0E14\u0E47\u0E01\u0E21\u0E35\u0E1B\u0E31\u0E0D\u0E2B\u0E32 ";
         final int[]  thCharTOffsets = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11,
                                         12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28,
                                         29, 30, 32, 33, 35, 37, 38, 39, 40, 41 };
         //starting in Unicode 6.1, root behavior should be the same as Thai above
         //final int[]  thCharROffsets = { 1,    3, 5, 6, 7, 8, 9,     11,
         //                                12, 13, 15,     17, 19, 20, 22,     24,     26, 27, 28,
         //                                29,     32, 33, 35, 37, 38,     40, 41 };

         final TBItem[] tests = {
             new TBItem( BreakIterator.KIND_SENTENCE,  new ULocale("el"),          elSentText,   elSentTOffsets   ),
             new TBItem( BreakIterator.KIND_SENTENCE,  ULocale.ROOT,               elSentText,   elSentROffsets   ),
             new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"),          thCharText,   thCharTOffsets   ),
             new TBItem( BreakIterator.KIND_CHARACTER, ULocale.ROOT,               thCharText,   thCharTOffsets   ),
         };
         for (int iTest = 0; iTest < tests.length; iTest++) {
             tests[iTest].doTest();
         }
     }

     /* Tests the method public Object clone() */
     @Test
     public void TestClone() {
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         try {
             rbbi.setText((CharacterIterator) null);
             if (((RuleBasedBreakIterator) rbbi.clone()).getText() != null)
                 errln("RuleBasedBreakIterator.clone() was suppose to return "
                         + "the same object because fText is set to null.");
         } catch (Exception e) {
             errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception.");
         }
     }

     /*
      * Tests the method public boolean equals(Object that)
      */
     @Test
     public void TestEquals() {
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator(".;");

         // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true

         // Tests when "if (fText == null || other.fText == null)" is true
         rbbi.setText((CharacterIterator) null);
         if (rbbi.equals(rbbi1)) {
             errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
                     + "true when the other object has a null fText.");
         }

         // Tests when "if (fText == null && other.fText == null)" is true
         rbbi1.setText((CharacterIterator) null);
         if (!rbbi.equals(rbbi1)) {
             errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
                     + "false when both objects has a null fText.");
         }

         // Tests when an exception occurs
         if (rbbi.equals(0)) {
             errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0.");
         }
         if (rbbi.equals(0.0)) {
             errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0.");
         }
         if (rbbi.equals("0")) {
             errln("RuleBasedBreakIterator.equals(Object) was suppose to return "
                     + "false when comparing to string '0'.");
         }
     }

     /*
      * Tests the method public int first()
      */
     @Test
     public void TestFirst() {
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         // Tests when "if (fText == null)" is true
         rbbi.setText((CharacterIterator) null);
         assertEquals("RuleBasedBreakIterator.first()", BreakIterator.DONE, rbbi.first());

         rbbi.setText("abc");
         assertEquals("RuleBasedBreakIterator.first()", 0, rbbi.first());
         assertEquals("RuleBasedBreakIterator.next()", 1, rbbi.next());
     }

     /*
      * Tests the method public int last()
      */
     @Test
     public void TestLast() {
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         // Tests when "if (fText == null)" is true
         rbbi.setText((CharacterIterator) null);
         if (rbbi.last() != BreakIterator.DONE) {
             errln("RuleBasedBreakIterator.last() was suppose to return "
                     + "BreakIterator.DONE when the object has a null fText.");
         }
     }

     /*
      * Tests the method public int following(int offset)
      */
     @Test
     public void TestFollowing() {
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         // Tests when "else if (offset < fText.getBeginIndex())" is true
         rbbi.setText("dummy");
         if (rbbi.following(-1) != 0) {
             errln("RuleBasedBreakIterator.following(-1) was suppose to return "
                     + "0 when the object has a fText of dummy.");
         }
     }

     /*
      * Tests the method public int preceding(int offset)
      */
     @Test
     public void TestPreceding() {
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         // Tests when "if (fText == null || offset > fText.getEndIndex())" is true
         rbbi.setText((CharacterIterator)null);
         if (rbbi.preceding(-1) != BreakIterator.DONE) {
             errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
                     + "0 when the object has a fText of null.");
         }

         // Tests when "else if (offset < fText.getBeginIndex())" is true
         rbbi.setText("dummy");
         if (rbbi.preceding(-1) != 0) {
             errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
                     + "0 when the object has a fText of dummy.");
         }
     }

     /* Tests the method public int current() */
     @Test
     public void TestCurrent(){
         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");
         // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.DONE" is true and false
         rbbi.setText((CharacterIterator)null);
         if(rbbi.current() != BreakIterator.DONE){
             errln("RuleBasedBreakIterator.current() was suppose to return "
                     + "BreakIterator.DONE when the object has a fText of null.");
         }
         rbbi.setText("dummy");
         if(rbbi.current() != 0){
             errln("RuleBasedBreakIterator.current() was suppose to return "
                     + "0 when the object has a fText of dummy.");
         }
     }

     @Test
     public void TestBug7547() {
         try {
             new RuleBasedBreakIterator("");
             fail("TestBug7547: RuleBasedBreakIterator constructor failed to throw an exception with empty rules.");
         }
         catch (IllegalArgumentException e) {
             // expected exception with empty rules.
         }
         catch (Exception e) {
             fail("TestBug7547: Unexpected exception while creating RuleBasedBreakIterator: " + e);
         }
     }

     @Test
     public void TestBug12797() {
         String rules = "!!chain; !!forward; $v=b c; a b; $v; !!reverse; .*;";
         RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);

         bi.setText("abc");
         bi.first();
         assertEquals("Rule chaining test", 3,  bi.next());
     }


     @Test
     public void TestBug12873() {
         // Bug with RuleBasedBreakIterator's internal structure for recording potential look-ahead
         // matches not being cloned when a break iterator is cloned. This resulted in usage
         // collisions if the original break iterator and its clone were used concurrently.

         // The Line Break rules for Regional Indicators make use of look-ahead rules, and
         // show the bug. 1F1E6 = \uD83C\uDDE6 = REGIONAL INDICATOR SYMBOL LETTER A
         // Regional indicators group into pairs, expect breaks after two code points, which
         // is after four 16 bit code units.

         final String dataToBreak = "\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6";
         final RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getLineInstance();
         final AssertionError[] assertErr = new AssertionError[1];  // saves an error found from within a thread

         class WorkerThread implements Runnable {
             @Override
             public void run() {
                 try {
                     RuleBasedBreakIterator localBI = (RuleBasedBreakIterator)bi.clone();
                     localBI.setText(dataToBreak);
                     for (int loop=0; loop<100; loop++) {
                         int nextExpectedBreak = 0;
                         for (int actualBreak = localBI.first(); actualBreak != BreakIterator.DONE;
                                 actualBreak = localBI.next(), nextExpectedBreak+= 4) {
                             assertEquals("", nextExpectedBreak, actualBreak);
                         }
                         assertEquals("", dataToBreak.length()+4, nextExpectedBreak);
                     }
                 } catch (AssertionError e) {
                     assertErr[0] = e;
                 }
             }
         }

         List<Thread> threads = new ArrayList<>();
         for (int n = 0; n<4; ++n) {
             threads.add(new Thread(new WorkerThread()));
         }
         for (Thread thread: threads) {
             thread.start();
         }
         for (Thread thread: threads) {
             try {
                 thread.join();
             } catch (InterruptedException e) {
                 fail(e.toString());
             }
         }

         // JUnit wont see failures from within the worker threads, so
         // check again if one occurred.
         if (assertErr[0] != null) {
             throw assertErr[0];
         }
     }

     @Test
     public void TestBreakAllChars() {
         // Make a "word" from each code point, separated by spaces.
         // For dictionary based breaking, runs the start-of-range
         // logic with all possible dictionary characters.
         StringBuilder sb = new StringBuilder();
         for (int c=0; c<0x110000; ++c) {
             sb.appendCodePoint(c);
             sb.appendCodePoint(c);
             sb.appendCodePoint(c);
             sb.appendCodePoint(c);
             sb.append(' ');
         }
         String s = sb.toString();

         for (int breakKind=BreakIterator.KIND_CHARACTER; breakKind<=BreakIterator.KIND_TITLE; ++breakKind) {
             RuleBasedBreakIterator bi =
                     (RuleBasedBreakIterator)BreakIterator.getBreakInstance(ULocale.ENGLISH, breakKind);
             bi.setText(s);
             int lastb = -1;
             for (int b = bi.first(); b != BreakIterator.DONE; b = bi.next()) {
                 assertTrue("(lastb < b) : (" + lastb + " < " + b + ")", lastb < b);
             }
         }
     }

     @Test
     public void TestBug12918() {
         // This test triggered an assertion failure in ICU4C, in dictbe.cpp
         // The equivalent code in ICU4J is structured slightly differently,
         // and does not appear vulnerable to the same issue.
         //
         // \u3325 decomposes with normalization, then the CJK dictionary
         // finds a break within the decomposition.

         String crasherString = "\u3325\u4a16";
         BreakIterator iter = BreakIterator.getWordInstance(ULocale.ENGLISH);
         iter.setText(crasherString);
         iter.first();
         int pos = 0;
         int lastPos = -1;
         while((pos = iter.next()) != BreakIterator.DONE) {
             assertTrue("", pos > lastPos);
         }
     }

     @Test
     public void TestBug12519() {
         RuleBasedBreakIterator biEn = (RuleBasedBreakIterator)BreakIterator.getWordInstance(ULocale.ENGLISH);
         RuleBasedBreakIterator biFr = (RuleBasedBreakIterator)BreakIterator.getWordInstance(ULocale.FRANCE);
         assertEquals("", ULocale.ENGLISH, biEn.getLocale(ULocale.VALID_LOCALE));
         assertEquals("", ULocale.FRENCH, biFr.getLocale(ULocale.VALID_LOCALE));
         assertEquals("Locales do not participate in BreakIterator equality.", biEn, biFr);

         RuleBasedBreakIterator cloneEn = (RuleBasedBreakIterator)biEn.clone();
         assertEquals("", biEn, cloneEn);
         assertEquals("", ULocale.ENGLISH, cloneEn.getLocale(ULocale.VALID_LOCALE));

         RuleBasedBreakIterator cloneFr = (RuleBasedBreakIterator)biFr.clone();
         assertEquals("", biFr, cloneFr);
         assertEquals("", ULocale.FRENCH, cloneFr.getLocale(ULocale.VALID_LOCALE));
     }

     static class T13512Thread extends Thread {
         private String fText;
         public List fBoundaries;
         public List fExpectedBoundaries;

         T13512Thread(String text) {
             fText = text;
             fExpectedBoundaries = getBoundary(fText);
         }
         @Override
         public void run() {
             for (int i= 0; i<10000; ++i) {
                 fBoundaries = getBoundary(fText);
                 if (!fBoundaries.equals(fExpectedBoundaries)) {
                     break;
                 }
             }
         }
         private static final BreakIterator BREAK_ITERATOR_CACHE = BreakIterator.getWordInstance(ULocale.ROOT);
         public static List<Integer> getBoundary(String toParse) {
             List<Integer> retVal = new ArrayList<>();
             BreakIterator bi = (BreakIterator) BREAK_ITERATOR_CACHE.clone();
             bi.setText(toParse);
             for (int boundary=bi.first(); boundary != BreakIterator.DONE; boundary = bi.next()) {
                 retVal.add(boundary);
             }
             return retVal;
         }
     }

     @Test
     public void TestBug13512() {
         String japanese = "コンピューターは、本質的には数字しか扱うことができません。コンピューターは、文字や記号などのそれぞれに番号を割り振る"
                 + "ことによって扱えるようにします。ユニコードが出来るまでは、これらの番号を割り振る仕組みが何百種類も存在しました。どの一つをとっても、十分な"
                 + "文字を含んではいませんでした。例えば、欧州連合一つを見ても、そのすべての言語をカバーするためには、いくつかの異なる符号化の仕"
                 + "組みが必要でした。英語のような一つの言語に限っても、一つだけの符号化の仕組みでは、一般的に使われるすべての文字、句読点、技術"
                 + "的な記号などを扱うには不十分でした。";

         String thai = "โดยพื้นฐานแล้ว, คอมพิวเตอร์จะเกี่ยวข้องกับเรื่องของตัวเลข. คอมพิวเตอร์จัดเก็บตัวอักษรและอักขระอื่นๆ"
                 + " โดยการกำหนดหมายเลขให้สำหรับแต่ละตัว. ก่อนหน้าที่๊ Unicode จะถูกสร้างขึ้น, ได้มีระบบ encoding "
                 + "อยู่หลายร้อยระบบสำหรับการกำหนดหมายเลขเหล่านี้. ไม่มี encoding ใดที่มีจำนวนตัวอักขระมากเพียงพอ: ยกตัวอย่างเช่น, "
                 + "เฉพาะในกลุ่มสหภาพยุโรปเพียงแห่งเดียว ก็ต้องการหลาย encoding ในการครอบคลุมทุกภาษาในกลุ่ม. "
                 + "หรือแม้แต่ในภาษาเดี่ยว เช่น ภาษาอังกฤษ ก็ไม่มี encoding ใดที่เพียงพอสำหรับทุกตัวอักษร, "
                 + "เครื่องหมายวรรคตอน และสัญลักษณ์ทางเทคนิคที่ใช้กันอยู่ทั่วไป.\n" +
                 "ระบบ encoding เหล่านี้ยังขัดแย้งซึ่งกันและกัน. นั่นก็คือ, ในสอง encoding สามารถใช้หมายเลขเดียวกันสำหรับตัวอักขระสองตัวที่แตกต่างกัน,"
                 + "หรือใช้หมายเลขต่างกันสำหรับอักขระตัวเดียวกัน. ในระบบคอมพิวเตอร์ (โดยเฉพาะเซิร์ฟเวอร์) ต้องมีการสนับสนุนหลาย"
                 + " encoding; และเมื่อข้อมูลที่ผ่านไปมาระหว่างการเข้ารหัสหรือแพล็ตฟอร์มที่ต่างกัน, ข้อมูลนั้นจะเสี่ยงต่อการผิดพลาดเสียหาย.";

         T13512Thread t1 = new T13512Thread(thai);
         T13512Thread t2 = new T13512Thread(japanese);
         try {
             t1.start(); t2.start();
             t1.join(); t2.join();
         } catch (Exception e) {
             fail(e.toString());
         }
         assertEquals("", t1.fExpectedBoundaries, t1.fBoundaries);
         assertEquals("", t2.fExpectedBoundaries, t2.fBoundaries);
     }

     @Test
     public void TestBug12677() {
         // Check that stripping of comments from rules for getRules() is not confused by
         // the presence of '#' characters in the rules that do not introduce comments.
         String rules = "!!forward; \n"
                      + "$x = [ab#];  # a set with a # literal. \n"
                      + " # .;        # a comment that looks sort of like a rule.   \n"
                      + " '#' '?';    # a rule with a quoted #   \n";

         RuleBasedBreakIterator bi  = new RuleBasedBreakIterator(rules);
         String rtRules = bi.toString();        // getRules() in C++
         assertEquals("Break Iterator rule stripping test", "!!forward;$x=[ab#];'#''?';",  rtRules);
     }

     @Test
     public void TestTableRedundancies() {
         RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH);
         String rules = bi.toString();
         bi = new RuleBasedBreakIterator(rules);
         // Build a break iterator from source rules.
         // Want to check the rule builder in Java, not the pre-built rules that are imported from ICU4C.
         RBBIDataWrapper dw = bi.fRData;
         RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;
         int numCharClasses = dw.fHeader.fCatCount;

         // Check for duplicate columns (character categories)
         List<String> columns = new ArrayList<>();
         for (int column=0; column<numCharClasses; column++) {
             StringBuilder s = new StringBuilder();
             for (int r = 1; r < fwtbl.fNumStates; r++) {
                 int row = dw.getRowIndex(r);
                 char tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
                 s.append(tableVal);
             }
             columns.add(s.toString());
         }
         // Ignore column (char class) 0 while checking; it's special, and may have duplicates.
         for (int c1=1; c1<numCharClasses; c1++) {
             int limit = c1 < fwtbl.fDictCategoriesStart ? fwtbl.fDictCategoriesStart : numCharClasses;
             for (int c2 = c1+1; c2 < limit; c2++) {
                 assertFalse(String.format("Duplicate columns (%d, %d)", c1, c2), columns.get(c1).equals(columns.get(c2)));
                 // if (columns.get(c1).equals(columns.get(c2))) {
                 //    System.out.printf("Duplicate columns (%d, %d)\n", c1, c2);
                 // }
             }
         }

         // Check for duplicate states.
         List<String> rows = new ArrayList<>();
         for (int r=0; r<fwtbl.fNumStates; r++) {
             StringBuilder s = new StringBuilder();
             int row = dw.getRowIndex(r);
             s.append(fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING]);
             s.append(fwtbl.fTable[row + RBBIDataWrapper.LOOKAHEAD]);
             s.append(fwtbl.fTable[row + RBBIDataWrapper.TAGSIDX]);
             for (int column=0; column<numCharClasses; column++) {
                 char tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
                 s.append(tableVal);
             }
             rows.add(s.toString());
         }

         for (int r1=0; r1 < fwtbl.fNumStates; r1++) {
             for (int r2= r1+1; r2 < fwtbl.fNumStates; r2++) {
                 assertFalse(String.format("Duplicate states (%d, %d)", r1, r2), rows.get(r1).equals(rows.get(r2)));
                 // if (rows.get(r1).equals(rows.get(r2))) {
                 //     System.out.printf("Duplicate states (%d, %d)\n", r1, r2);
                 // }
             }
         }
     }

     @Test
     public void TestBug13447() {
         // Bug 13447: verify that getRuleStatus() returns the value corresponding to current(),
         //  even after next() has returned DONE.
        RuleBasedBreakIterator bi =
                 (RuleBasedBreakIterator)BreakIterator.getWordInstance(Locale.ENGLISH);
         bi.setText("1234");
         assertEquals("", BreakIterator.WORD_NONE, bi.getRuleStatus());
         assertEquals("", 4, bi.next());
         assertEquals("", BreakIterator.WORD_NUMBER, bi.getRuleStatus());
         assertEquals("", BreakIterator.DONE, bi.next());
         assertEquals("", 4, bi.current());
         assertEquals("", BreakIterator.WORD_NUMBER, bi.getRuleStatus());
     }

     @Test
     public void TestTableRebuild() {
         // Test to verify that rebuilding the state tables from rule source for the standard
         // break iterator types yields the same tables as are imported from ICU4C as part of the default data.
         List<RuleBasedBreakIterator> breakIterators = new ArrayList<>();
         breakIterators.add((RuleBasedBreakIterator)BreakIterator.getCharacterInstance(Locale.ENGLISH));
         breakIterators.add((RuleBasedBreakIterator)BreakIterator.getWordInstance(Locale.ENGLISH));
         breakIterators.add((RuleBasedBreakIterator)BreakIterator.getSentenceInstance(Locale.ENGLISH));
         breakIterators.add((RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH));

         for (RuleBasedBreakIterator bi: breakIterators) {
             String rules = bi.toString();
             RuleBasedBreakIterator bi2 = new RuleBasedBreakIterator(rules);
             assertTrue("Forward Table",      RBBIDataWrapper.equals(bi.fRData.fFTable, bi2.fRData.fFTable));
             assertTrue("Reverse Table",      RBBIDataWrapper.equals(bi.fRData.fRTable, bi2.fRData.fRTable));
         }
     }

     // Helper function to test 8/16 bits of trie and 8/16 bits of state table.
     private void testTrieStateTable(int numChar, boolean expectUCPTrieValueWidthIn8Bits,
         boolean expectStateRowIn8Bits) {
         // Text are duplicate characters from U+4E00 to U+4FFF
         StringBuilder builder = new StringBuilder(2 * (0x5000 - 0x4e00));
         for (char c = 0x4e00; c < 0x5000; c++) {
             builder.append(c).append(c);
         }
         String text = builder.toString();

         // Generate rule which will caused length+4 character classes and
         // length+3 states

         builder = new StringBuilder(100 + 6 * numChar);
         builder.append("!!quoted_literals_only;");
         for (char c = 0x4e00; c < 0x4e00 + numChar; c++) {
             builder.append("\'").append(c).append(c).append("';");
         }
         builder.append(".;");
         String rules = builder.toString();

         RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);

         RBBIDataWrapper dw = bi.fRData;
         RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;
         RBBIDataWrapper.RBBIStateTable rvtbl = dw.fRTable;

         boolean has8BitRowDataForwardTable = (fwtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
         boolean has8BitRowDataReverseTable = (rvtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
         boolean has8BitsTrie = dw.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8;

         assertEquals("Number of char classes mismatch numChar=" + numChar, numChar + 4, dw.fHeader.fCatCount);
         assertEquals("Number of states in Forward Table mismatch numChar=" + numChar, numChar + 3, fwtbl.fNumStates);
         assertEquals("Number of states in Reverse Table mismatch numChar=" + numChar, numChar + 3, rvtbl.fNumStates);
         assertEquals("Trie width mismatch numChar=" + numChar, expectUCPTrieValueWidthIn8Bits, has8BitsTrie);
         assertEquals("Bits of Forward State table mismatch numChar=" + numChar,
                      expectStateRowIn8Bits, has8BitRowDataForwardTable);
         assertEquals("Bits of Reverse State table mismatch numChar=" + numChar,
                      expectStateRowIn8Bits, has8BitRowDataReverseTable);

         bi.setText(text);

         int pos;
         int i = 0;
         while ((pos = bi.next()) > 0) {
             // The first numChar should not break between the pair
             if (i++ < numChar) {
                 assertEquals("next() mismatch numChar=" + numChar, i * 2, pos);
             } else {
                 // After the first numChar next(), break on each character.
                 assertEquals("next() mismatch numChar=" + numChar, i + numChar, pos);
             }
         }
         while ((pos = bi.previous()) > 0) {
             // The first numChar should not break between the pair
             if (--i < numChar) {
                 assertEquals("previous() mismatch numChar=" + numChar, i * 2, pos);
             } else {
                 // After the first numChar next(), break on each character.
                 assertEquals("previous() mismatch numChar=" + numChar, i + numChar, pos);
             }
         }
     }

     @Test
     public void Test8BitsTrieWith8BitStateTable() {
         testTrieStateTable(251,  true /* expectUCPTrieValueWidthIn8Bits */,  true /* expectStateRowIn8Bits */);
     }

     @Test
     public void Test16BitsTrieWith8BitStateTable() {
         testTrieStateTable(252, false /* expectUCPTrieValueWidthIn8Bits */,  true /* expectStateRowIn8Bits */);
     }

     @Test
     public void Test16BitsTrieWith16BitStateTable() {
         testTrieStateTable(253, false /* expectUCPTrieValueWidthIn8Bits */, false /* expectStateRowIn8Bits */);
     }

     @Test
     public void Test8BitsTrieWith16BitStateTable() {
         // Test UCPTRIE_VALUE_BITS_8 with 16 bits rows. Use a different approach to
         // create state table in 16 bits.

         // Generate 510 'a' as text
         StringBuilder builder = new StringBuilder(510);
         for (int i = 0; i < 510; i++) {
             builder.append('a');
         }
         String text = builder.toString();

         builder = new StringBuilder(550);
         builder.append("!!quoted_literals_only;'");
         // 254 'a' in the rule will cause 256 states
         for (int i = 0; i < 254; i++) {
             builder.append('a');
         }
         builder.append("';.;");
         String rules = builder.toString();

         RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);

         RBBIDataWrapper dw = bi.fRData;
         RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;

         boolean has8BitRowData = (fwtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
         boolean has8BitsTrie = dw.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8;
         assertFalse("State table should be in 16 bits", has8BitRowData);
         assertTrue("Trie should be in 8 bits", has8BitsTrie);

         bi.setText(text);

         // break positions:
         // 254, 508, 509, 510
         assertEquals("next()", 254, bi.next());
         int i = 0;
         int pos;
         while ((pos = bi.next()) > 0) {
             assertEquals("next()", 508 + i , pos);
             i++;
         }
         i = 0;
         while ((pos = bi.previous()) > 0) {
              i++;
             if (pos >= 508) {
                 assertEquals("previous()", 510 - i , pos);
             } else {
                 assertEquals("previous()", 254 , pos);
             }
         }
     }

     /**
      * Test that both compact (8 bit) and full sized (16 bit) rbbi tables work, and
      * that there are no problems with rules at the size that transitions between the two.
      *
      * A rule that matches a literal string, like 'abcdefghij', will require one state and
      * one character class per character in the string. So we can make a rule to tickle the
      * boundaries by using literal strings of various lengths.
      *
      * For both the number of states and the number of character classes, the eight bit format
      * only has 7 bits available, allowing for 128 values. For both, a few values are reserved,
      * leaving 120 something available. This test runs the string over the range of 120 - 130,
      * which allows some margin for changes to the number of values reserved by the rule builder
      * without breaking the test.
      */
     @Test
     public void TestTable_8_16_Bits() {
         // testStr serves as both the source of the rule string (truncated to the desired length)
         // and as test data to check matching behavior. A break rule consisting of the first 120
         // characters of testStr will match the first 120 chars of the full-length testStr.
         StringBuilder builder = new StringBuilder(0x200);
         for (char c=0x3000; c<0x3200; ++c) {
             builder.append(c);
         }
         String testStr = builder.toString();

         int startLength = 120;   // The shortest rule string to test.
         int endLength = 260;     // The longest rule string to test
         int increment = 1;
         for (int ruleLen=startLength; ruleLen <= endLength; ruleLen += increment) {
             String ruleString = (new String("!!quoted_literals_only; '#';"))
                 .replace("#", testStr.substring(0, ruleLen));
             RuleBasedBreakIterator bi = new RuleBasedBreakIterator(ruleString);

             // Verify that the break iterator is functioning - that the first boundary found
             // in testStr is at the length of the rule string.
             bi.setText(testStr);
             assertEquals("The first boundary found in testStr should be at the length of the rule string",
                 ruleLen, bi.next());

             // Reverse iteration. Do a setText() first, to flush the break iterator's internal cache
             // of previously detected boundaries, thus forcing the engine to run the safe reverse rules.
             bi.setText(testStr);
             int result = bi.preceding(ruleLen);
             assertEquals("Reverse iteration should find the boundary at 0", 0, result);

             // Verify that the range of rule lengths being tested cover the transations
             // from 8 to 16 bit data.
             RBBIDataWrapper dw = bi.fRData;
             RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;

             boolean has8BitRowData = (fwtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
             boolean has8BitsTrie = dw.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8;
             if (ruleLen == startLength) {
                 assertTrue("State table should be in 8 bits", has8BitRowData);
                 assertTrue("Trie should be in 8 bits", has8BitsTrie);
             }
             if (ruleLen == endLength) {
                 assertFalse("State table should be in 16 bits", has8BitRowData);
                 assertFalse("Trie should be in 16 bits", has8BitsTrie);
             }
         }
     }

     /* Test handling of a large number of look-ahead rules.
      * The number of rules in the test exceeds the implementation limits prior to the
      * improvements introduced with #13590.
      *
      * The test look-ahead rules have the form "AB / CE"; "CD / EG"; ...
      * The text being matched is sequential, "ABCDEFGHI..."
      *
      * The upshot is that the look-ahead rules all match on their preceding context,
      * and consequently must save a potential result, but then fail to match on their
      * trailing context, so that they don't actually cause a boundary.
      *
      * Additionally, add a ".*" rule, so there are no boundaries unless a
      * look-ahead hard-break rule forces one.
      */
     @Test
     public void TestBug13590() {
         StringBuilder rules = new StringBuilder("!!quoted_literals_only; !!chain; .*;\n");

         int NUM_LOOKAHEAD_RULES = 50;
         char STARTING_CHAR = '\u5000';
         char firstChar = 0;
         for (int ruleNum = 0; ruleNum < NUM_LOOKAHEAD_RULES; ++ruleNum) {
             firstChar = (char) (STARTING_CHAR + ruleNum*2);
             rules.append('\'') .append(firstChar) .append((char)(firstChar+1)) .append('\'')
                  .append(' ') .append('/') .append(' ')
                  .append('\'') .append((char)(firstChar+2)) .append((char)(firstChar+4)) .append('\'')
                  .append(';') .append('\n');
         }

         // Change the last rule added from the form "UV / WY" to "UV / WX".
         // Changes the rule so that it will match - all 4 chars are in ascending sequence.
         String rulesStr = rules.toString().replace((char)(firstChar+4), (char)(firstChar+3));

         RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rulesStr);
         // bi.dump(System.out);

         StringBuilder testString = new StringBuilder();
         for (char c = (char) (STARTING_CHAR-200); c < STARTING_CHAR + NUM_LOOKAHEAD_RULES*4; ++c) {
             testString.append(c);
         }
         bi.setText(testString);

         int breaksFound = 0;
         while (bi.next() != BreakIterator.DONE) {
             ++breaksFound;
         }

         // Two matches are expected, one from the last rule that was explicitly modified,
         // and one at the end of the text.
         assertEquals("Wrong number of breaks found", 2, breaksFound);
     }

 }