| /* |
| ********************************************************************** |
| * Copyright (c) 2002-2008, International Business Machines * |
| * Corporation and others. All Rights Reserved. * |
| ********************************************************************** |
| */ |
| package com.ibm.icu.dev.test.perf; |
| |
| import java.io.FileInputStream; |
| import java.util.ArrayList; |
| |
| public class BreakIteratorPerformanceTest extends PerfTest { |
| |
| String fileContents; |
| |
| com.ibm.icu.text.BreakIterator iSentenceIter; |
| com.ibm.icu.text.BreakIterator iWordIter; |
| com.ibm.icu.text.BreakIterator iLineIter; |
| com.ibm.icu.text.BreakIterator iCharacterIter; |
| java.text.BreakIterator jSentenceIter; |
| java.text.BreakIterator jWordIter; |
| java.text.BreakIterator jLineIter; |
| java.text.BreakIterator jCharacterIter; |
| String[] iSentences; |
| String[] iWords; |
| String[] iLines; |
| String[] iCharacters; |
| String[] jSentences; |
| String[] jWords; |
| String[] jLines; |
| String[] jCharacters; |
| |
| public static void main(String[] args) throws Exception { |
| new BreakIteratorPerformanceTest().run(args); |
| } |
| |
| protected void setup(String[] args) { |
| try { |
| // read in the input file, being careful with a possible BOM |
| FileInputStream in = new FileInputStream(fileName); |
| BOMFreeReader reader = new BOMFreeReader(in, encoding); |
| fileContents = new String(readToEOS(reader)); |
| |
| // // get rid of any characters that may cause differences between ICU4J and Java BreakIterator |
| // // fileContents = fileContents.replaceAll("[\t\f\r\n\\-/ ]+", " "); |
| // String res = ""; |
| // StringTokenizer tokenizer = new StringTokenizer(fileContents, "\t\f\r\n-/ "); |
| // while (tokenizer.hasMoreTokens()) |
| // res += tokenizer.nextToken() + " "; |
| // fileContents = res.trim(); |
| |
| // create the break iterators with respect to locale |
| if (locale == null) { |
| iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(); |
| iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(); |
| iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(); |
| iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(); |
| |
| jSentenceIter = java.text.BreakIterator.getSentenceInstance(); |
| jWordIter = java.text.BreakIterator.getWordInstance(); |
| jLineIter = java.text.BreakIterator.getLineInstance(); |
| jCharacterIter = java.text.BreakIterator.getCharacterInstance(); |
| } else { |
| iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(locale); |
| iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(locale); |
| iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(locale); |
| iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(locale); |
| |
| jSentenceIter = java.text.BreakIterator.getSentenceInstance(locale); |
| jWordIter = java.text.BreakIterator.getWordInstance(locale); |
| jLineIter = java.text.BreakIterator.getLineInstance(locale); |
| jCharacterIter = java.text.BreakIterator.getCharacterInstance(locale); |
| } |
| |
| iSentences = init(iSentenceIter); |
| iWords = init(iWordIter); |
| iLines = init(iLineIter); |
| iCharacters = init(iCharacterIter); |
| jSentences = init(jSentenceIter); |
| jWords = init(jWordIter); |
| jLines = init(jLineIter); |
| jCharacters = init(jCharacterIter); |
| |
| } catch (Exception ex) { |
| ex.printStackTrace(); |
| throw new RuntimeException(ex.getMessage()); |
| } |
| |
| // we created some heavy objects, so lets try to clean up a little before running the tests |
| gc(); |
| } |
| |
| private String[] init(com.ibm.icu.text.BreakIterator iter) { |
| // set the string to iterate on |
| iter.setText(fileContents); |
| |
| // produce a token list |
| ArrayList tokenList = new ArrayList(); |
| int start = iter.first(); |
| for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next()) |
| tokenList.add(fileContents.substring(start, end)); |
| |
| // return the token list as a string array |
| return (String[]) tokenList.toArray(new String[0]); |
| } |
| |
| private String[] init(java.text.BreakIterator iter) { |
| // set the string to iterate on |
| iter.setText(fileContents); |
| |
| // produce a token list |
| ArrayList tokenList = new ArrayList(); |
| int start = iter.first(); |
| for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next()) |
| tokenList.add(fileContents.substring(start, end)); |
| |
| // return the token list as a string array |
| return (String[]) tokenList.toArray(new String[0]); |
| } |
| |
| PerfTest.Function createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct, |
| final String breakType) { |
| return new PerfTest.Function() { |
| public void call() { |
| int k = 0; |
| int start = iIter.first(); |
| for (int end = iIter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iIter |
| .next()) |
| if (!correct[k++].equals(fileContents.substring(start, end))) |
| throw new RuntimeException("ICU4J BreakIterator gave the wrong answer for " + breakType + " " |
| + (k - 1) + " during the performance test. Cannot continue the performance test."); |
| if (k != correct.length) |
| throw new RuntimeException("ICU4J BreakIterator gave the wrong number of " + breakType |
| + "s during the performance test. Cannot continue the performance test."); |
| } |
| |
| public long getOperationsPerIteration() { |
| return fileContents.length(); |
| } |
| }; |
| } |
| |
| PerfTest.Function createTestJava(final java.text.BreakIterator jIter, final String[] correct, final String breakType) { |
| return new PerfTest.Function() { |
| public void call() { |
| int k = 0; |
| int start = jIter.first(); |
| for (int end = jIter.next(); end != java.text.BreakIterator.DONE; start = end, end = jIter.next()) |
| if (!correct[k++].equals(fileContents.substring(start, end))) |
| throw new RuntimeException("Java BreakIterator gave the wrong answer for " + breakType + " " |
| + (k - 1) + " during the performance test. Cannot continue the performance test."); |
| if (k != correct.length) |
| throw new RuntimeException("Java BreakIterator gave the wrong number of " + breakType |
| + "s during the performance test. Cannot continue the performance test."); |
| } |
| |
| public long getOperationsPerIteration() { |
| return fileContents.length(); |
| } |
| }; |
| } |
| |
| PerfTest.Function TestICUSentences() { |
| return createTestICU(iSentenceIter, iSentences, "sentence"); |
| } |
| |
| PerfTest.Function TestICUWords() { |
| return createTestICU(iWordIter, iWords, "word"); |
| } |
| |
| PerfTest.Function TestICULines() { |
| return createTestICU(iLineIter, iLines, "line"); |
| } |
| |
| PerfTest.Function TestICUCharacters() { |
| return createTestICU(iCharacterIter, iCharacters, "character"); |
| } |
| |
| PerfTest.Function TestJavaSentences() { |
| return createTestJava(jSentenceIter, jSentences, "sentence"); |
| } |
| |
| PerfTest.Function TestJavaWords() { |
| return createTestJava(jWordIter, jWords, "word"); |
| } |
| |
| PerfTest.Function TestJavaLines() { |
| return createTestJava(jLineIter, jLines, "line"); |
| } |
| |
| PerfTest.Function TestJavaCharacters() { |
| return createTestJava(jCharacterIter, jCharacters, "character"); |
| } |
| } |