// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
 **********************************************************************
 * Copyright (c) 2002-2008, International Business Machines           *
 * Corporation and others.  All Rights Reserved.                      *
 **********************************************************************
 */
package com.ibm.icu.dev.test.perf;

import java.io.FileInputStream;
import java.util.ArrayList;

public class BreakIteratorPerformanceTest extends PerfTest {

    String fileContents;

    com.ibm.icu.text.BreakIterator iSentenceIter;
    com.ibm.icu.text.BreakIterator iWordIter;
    com.ibm.icu.text.BreakIterator iLineIter;
    com.ibm.icu.text.BreakIterator iCharacterIter;
    java.text.BreakIterator jSentenceIter;
    java.text.BreakIterator jWordIter;
    java.text.BreakIterator jLineIter;
    java.text.BreakIterator jCharacterIter;
    String[] iSentences;
    String[] iWords;
    String[] iLines;
    String[] iCharacters;
    String[] jSentences;
    String[] jWords;
    String[] jLines;
    String[] jCharacters;

    public static void main(String[] args) throws Exception {
        new BreakIteratorPerformanceTest().run(args);
    }

    protected void setup(String[] args) {
        try {
            // read in the input file, being careful with a possible BOM
            FileInputStream in = new FileInputStream(fileName);
            BOMFreeReader reader = new BOMFreeReader(in, encoding);
            fileContents = new String(readToEOS(reader));

            // // get rid of any characters that may cause differences between ICU4J and Java BreakIterator
            // // fileContents = fileContents.replaceAll("[\t\f\r\n\\-/ ]+", " ");
            // String res = "";
            // StringTokenizer tokenizer = new StringTokenizer(fileContents, "\t\f\r\n-/ ");
            // while (tokenizer.hasMoreTokens())
            // res += tokenizer.nextToken() + " ";
            // fileContents = res.trim();

            // create the break iterators with respect to locale
            if (locale == null) {
                iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance();
                iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance();
                iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance();
                iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance();

                jSentenceIter = java.text.BreakIterator.getSentenceInstance();
                jWordIter = java.text.BreakIterator.getWordInstance();
                jLineIter = java.text.BreakIterator.getLineInstance();
                jCharacterIter = java.text.BreakIterator.getCharacterInstance();
            } else {
                iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(locale);
                iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(locale);
                iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(locale);
                iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(locale);

                jSentenceIter = java.text.BreakIterator.getSentenceInstance(locale);
                jWordIter = java.text.BreakIterator.getWordInstance(locale);
                jLineIter = java.text.BreakIterator.getLineInstance(locale);
                jCharacterIter = java.text.BreakIterator.getCharacterInstance(locale);
            }

            iSentences = init(iSentenceIter);
            iWords = init(iWordIter);
            iLines = init(iLineIter);
            iCharacters = init(iCharacterIter);
            jSentences = init(jSentenceIter);
            jWords = init(jWordIter);
            jLines = init(jLineIter);
            jCharacters = init(jCharacterIter);

        } catch (Exception ex) {
            ex.printStackTrace();
            throw new RuntimeException(ex.getMessage());
        }

        // we created some heavy objects, so lets try to clean up a little before running the tests
        gc();
    }

    private String[] init(com.ibm.icu.text.BreakIterator iter) {
        // set the string to iterate on
        iter.setText(fileContents);

        // produce a token list
        ArrayList tokenList = new ArrayList();
        int start = iter.first();
        for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
            tokenList.add(fileContents.substring(start, end));

        // return the token list as a string array
        return (String[]) tokenList.toArray(new String[0]);
    }

    private String[] init(java.text.BreakIterator iter) {
        // set the string to iterate on
        iter.setText(fileContents);

        // produce a token list
        ArrayList tokenList = new ArrayList();
        int start = iter.first();
        for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
            tokenList.add(fileContents.substring(start, end));

        // return the token list as a string array
        return (String[]) tokenList.toArray(new String[0]);
    }

    PerfTest.Function createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct,
            final String breakType) {
        return new PerfTest.Function() {
            public void call() {
                int k = 0;
                int start = iIter.first();
                for (int end = iIter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iIter
                        .next())
                    if (!correct[k++].equals(fileContents.substring(start, end)))
                        throw new RuntimeException("ICU4J BreakIterator gave the wrong answer for " + breakType + " "
                                + (k - 1) + " during the performance test. Cannot continue the performance test.");
                if (k != correct.length)
                    throw new RuntimeException("ICU4J BreakIterator gave the wrong number of " + breakType
                            + "s during the performance test. Cannot continue the performance test.");
            }

            public long getOperationsPerIteration() {
                return fileContents.length();
            }
        };
    }

    PerfTest.Function createTestJava(final java.text.BreakIterator jIter, final String[] correct, final String breakType) {
        return new PerfTest.Function() {
            public void call() {
                int k = 0;
                int start = jIter.first();
                for (int end = jIter.next(); end != java.text.BreakIterator.DONE; start = end, end = jIter.next())
                    if (!correct[k++].equals(fileContents.substring(start, end)))
                        throw new RuntimeException("Java BreakIterator gave the wrong answer for " + breakType + " "
                                + (k - 1) + " during the performance test. Cannot continue the performance test.");
                if (k != correct.length)
                    throw new RuntimeException("Java BreakIterator gave the wrong number of " + breakType
                            + "s during the performance test. Cannot continue the performance test.");
            }

            public long getOperationsPerIteration() {
                return fileContents.length();
            }
        };
    }

    PerfTest.Function TestICUSentences() {
        return createTestICU(iSentenceIter, iSentences, "sentence");
    }

    PerfTest.Function TestICUWords() {
        return createTestICU(iWordIter, iWords, "word");
    }

    PerfTest.Function TestICULines() {
        return createTestICU(iLineIter, iLines, "line");
    }

    PerfTest.Function TestICUCharacters() {
        return createTestICU(iCharacterIter, iCharacters, "character");
    }

    PerfTest.Function TestJavaSentences() {
        return createTestJava(jSentenceIter, jSentences, "sentence");
    }

    PerfTest.Function TestJavaWords() {
        return createTestJava(jWordIter, jWords, "word");
    }

    PerfTest.Function TestJavaLines() {
        return createTestJava(jLineIter, jLines, "line");
    }

    PerfTest.Function TestJavaCharacters() {
        return createTestJava(jCharacterIter, jCharacters, "character");
    }
}
