// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
 **********************************************************************
 * Copyright (c) 2002-2008, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
package com.ibm.icu.dev.test.perf;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

import com.ibm.icu.dev.tool.UOption;
import com.ibm.icu.impl.LocaleUtility;

/**
 * Base class for performance testing framework. To use, the subclass can simply
 * define one or more instance methods with names beginning with "test" (case
 * ignored). The prototype of the method is
 *
 * PerfTest.Function testTheName()
 *
 * The actual performance test will execute on the returned Commond object
 * (refer to Command Pattern). To call a test from command line, the 'test'
 * prefix of the test method name can be ignored/removed.
 *
 * In addition, the subclass should define a main() method that calls
 * PerfTest.run() as defined here.
 *
 * If the subclasses uses any command line arguments (beyond those handled
 * automatically by this calss) then it should override PerfTest.setup() to
 * handle its arguments. If the subclasse needs more sophisticated management
 * for controlling finding/calling test method, it can replace the default
 * implementation for PerfTest.testProvider before calling PerfTest.run().
 *
 * Example invocation: java -cp classes -verbose:gc
 * com.ibm.icu.dev.test.perf.UnicodeSetPerf --gc --passes 4 --iterations 100
 * UnicodeSetAdd [[:l:][:c:]]
 *
 * Example output: [GC 511K->192K(1984K), 0.0086170 secs] [GC 704K->353K(1984K),
 * 0.0059619 secs] [Full GC 618K->371K(1984K), 0.0242779 secs] [Full GC
 * 371K->371K(1984K), 0.0228649 secs] = testUnicodeSetAdd begin 100 =
 * testUnicodeSetAdd end 11977 1109044 = testUnicodeSetAdd begin 100 =
 * testUnicodeSetAdd end 12047 1109044 = testUnicodeSetAdd begin 100 =
 * testUnicodeSetAdd end 11987 1109044 = testUnicodeSetAdd begin 100 =
 * testUnicodeSetAdd end 11978 1109044
 *
 * The [] lines are emitted by the JVM as a result of the -verbose:gc switch.
 *
 * Lines beginning with '=' are emitted by PerfTest: = testUnicodeSetAdd begin
 * 100 A 'begin' statement contains the name of the setup method, which
 * determines what test function is measures, and the number of iterations that
 * will be times. = testUnicodeSetAdd end 12047 1109044 An 'end' statement gives
 * the name of the setup method again, and then two integers. The first is the
 * total elapsed time in milliseconds, and the second is the number of events
 * per iteration. In this example, the time per event is 12047 / (100 * 1109044)
 * or 108.6 ns/event.
 *
 * Raw times are given as integer ms, because this is what the system measures.
 *
 * @author Alan Liu
 * @since ICU 2.4
 */
public abstract class PerfTest {
    // Command-line options set these:
    protected boolean verbose;
    protected String sourceDir;
    protected String fileName;

    // protected String resolvedFileName;
    protected String encoding;
    protected String testName;
    protected boolean uselen;
    protected int iterations;
    protected int passes;
    protected int time;
    protected boolean line_mode;
    protected boolean bulk_mode;
    protected Locale locale;
    protected boolean doPriorGC;
    protected int threads;

    protected TestCmdProvider testProvider = new TestPrefixProvider(this);

    static interface TestCmdProvider {
        /**
         * @return The names for all available test.
         */
        public Set getAllTestCmdNames();

        /**
         * @param name
         * @return Whether the given name is a test name. The implementation may
         *         have more sophisticated naming control here.
         *         TestCmdProvider.isTestCmd() != Set.contains()
         */
        public boolean isTestCmd(String name);

        /**
         * @param name
         * @return the test Command or null
         */
        public PerfTest.Function getTestCmd(String name);
    }

    /**
     * Treat all method beginning with 'test' prefix (ignoring case) for given
     * object as the test methods.
     */
    static class TestPrefixProvider implements TestCmdProvider {
        private Map theTests = null; // Map<string(no case), string(with case)>
        private Set orgNames = null; // shadow reference, ==theTests, for better output
        private Object refer;

        TestPrefixProvider(Object theProvider) {
            refer = theProvider;
        }

        public Set getAllTestCmdNames() {
            if (theTests == null) {
                theTests = new HashMap();
                orgNames = new HashSet();
                Method[] methods = refer.getClass().getDeclaredMethods();
                for (int i = 0; i < methods.length; i++) {
                    String org = methods[i].getName();
                    String name = org.toLowerCase(); // ignoring case
                    // beginning with 'test'
                    // Note: methods named 'test()' are ignored
                    if (name.length() > 4 && name.startsWith("test")) {
                        if (theTests.containsKey(name)) {
                            throw new Error(
                                    "Duplicate method name ignoring case: "
                                            + name);
                        }
                        theTests.put(name, org);
                        orgNames.add(org);
                    }
                }
            }
            return orgNames; // begining with 'test', keeping case
        }

        /**
         * The given name will map to a method of the same name, or a method
         * named "test" + name. Case is ignored.
         */
        private String isTestCmd_impl(String name) {
            getAllTestCmdNames();
            String tn1 = name.toLowerCase();
            String tn2 = "test" + tn1;
            if (theTests.containsKey(tn1)) {
                return tn1;
            } else if (theTests.containsKey(tn2)) {
                return tn2;
            }
            return null;
        }

        public boolean isTestCmd(String name) {
            return isTestCmd_impl(name) != null;
        }

        public Function getTestCmd(String aname) {
            String name = (String) theTests.get(isTestCmd_impl(aname));
            if (name == null) {
                return null;
            }

            try {
                Method m = refer.getClass().getDeclaredMethod(name,
                        (Class[]) null);
                return (Function) m.invoke(refer, new Object[] {});
            } catch (Exception e) {
                throw new Error(
                        "TestPrefixProvider implementation error. Finding: "
                                + name, e);
            }
        }
    }

    /**
     * Subclasses of PerfTest will need to create subclasses of Function that
     * define a call() method which contains the code to be timed. They then
     * call setTestFunction() in their "Test..." method to establish this as the
     * current test functor.
     */
    public abstract static class Function {

        /**
         * Subclasses should implement this method to do the action to be
         * measured if the action is thread-safe
         */
        public void call() { call(0); }

        /**
         * Subclasses should implement this method if the action is not thread-safe
         */
        public void call(int i) { call(); }

        /**
         * Subclasses may implement this method to return positive integer
         * indicating the number of operations in a single call to this object's
         * call() method. If subclasses do not override this method, the default
         * implementation returns 1.
         */
        public long getOperationsPerIteration() {
            return 1;
        }

        /**
         * Subclasses may implement this method to return either positive or
         * negative integer indicating the number of events in a single call to
         * this object's call() method. If subclasses do not override this
         * method, the default implementation returns -1, indicating that events
         * are not applicable to this test. e.g: Number of breaks / iterations
         * for break iterator
         */
        public long getEventsPerIteration() {
            return -1;
        }

        /**
         * Call call() n times in a tight loop and return the elapsed
         * milliseconds. If n is small and call() is fast the return result may
         * be zero. Small return values have limited meaningfulness, depending
         * on the underlying VM and OS.
         */
        public final long time(long n) {
            long start, stop;
            start = System.currentTimeMillis();
            while (n-- > 0) {
                call();
            }
            stop = System.currentTimeMillis();
            return stop - start; // ms
        }
        
        
        /**
         * init is called each time before looping through call
         */
        public void init() {}
        
        
        public final int getID() {
            return id;
        }
        
        public final void setID(int id) {
            this.id = id;
        }
        
        private int id;
    }
    
    private class FunctionRunner implements Runnable {
        public FunctionRunner(Function f, long loops, int id) {
            this.f = f;
            this.loops = loops;
            this.id = id;
        }

        public void run() {
            long n = loops;
            while (n-- > 0)
                f.call(id);
        }
        
        private Function f;

        private long loops;
        private int id;
    }
    

    /**
     * Exception indicating a usage error.
     */
    public static class UsageException extends Exception {
        /**
         * For serialization
         */
        private static final long serialVersionUID = -1201256240606806242L;

        public UsageException(String message) {
            super(message);
        }

        public UsageException() {
            super();
        }
    }

    /**
     * Constructor.
     */
    protected PerfTest() {
    }

    /**
     * Framework method. Default implementation does not parse any extra
     * arguments. Subclasses may override this to parse extra arguments.
     * Subclass implementations should NOT call the base class implementation.
     */
    protected void setup(String[] args) {
        if (args.length > 0) {
            throw new RuntimeException("Extra arguments received");
        }
    }

    /**
     * These must be kept in sync with getOptions().
     */
    static final int HELP1 = 0;
    static final int HELP2 = 1;
    static final int VERBOSE = 2;
    static final int SOURCEDIR = 3;
    static final int ENCODING = 4;
    static final int USELEN = 5;
    static final int FILE_NAME = 6;
    static final int PASSES = 7;
    static final int ITERATIONS = 8;
    static final int TIME = 9;
    static final int LINE_MODE = 10;
    static final int BULK_MODE = 11;
    static final int LOCALE = 12;
    static final int TEST_NAME = 13;
    static final int THREADS = 14;

    // Options above here are identical to those in C; keep in sync with C
    // Options below here are unique to Java; shift down as necessary
    static final int GARBAGE_COLLECT = 14;
    static final int LIST = 15;

    UOption[] getOptions() {
        return new UOption[] {
                UOption.HELP_H(),
                UOption.HELP_QUESTION_MARK(),
                UOption.VERBOSE(),
                UOption.SOURCEDIR(),
                UOption.ENCODING(),
                UOption.DEF("uselen",     'u', UOption.NO_ARG),
                UOption.DEF("filename",   'f', UOption.REQUIRES_ARG),
                UOption.DEF("passes",     'p', UOption.REQUIRES_ARG),
                UOption.DEF("iterations", 'i', UOption.REQUIRES_ARG),
                UOption.DEF("time",       't', UOption.REQUIRES_ARG),
                UOption.DEF("line-mode",  'l', UOption.NO_ARG),
                UOption.DEF("bulk-mode",  'b', UOption.NO_ARG),
                UOption.DEF("locale",     'L', UOption.REQUIRES_ARG),
                UOption.DEF("testname",   'T', UOption.REQUIRES_ARG),
                UOption.DEF("threads",    'r', UOption.REQUIRES_ARG),

                // Options above here are identical to those in C; keep in sync
                // Options below here are unique to Java

                UOption.DEF("gc", 'g', UOption.NO_ARG),
                UOption.DEF("list", (char) -1, UOption.NO_ARG), };
    }

    /**
     * Subclasses should call this method in their main(). run() will in turn
     * call setup() with any arguments it does not parse. This method parses the
     * command line and runs the tests given on the command line, with the given
     * parameters. See the class description for details.
     */
    protected final void run(String[] args) throws Exception {
        Set testList = parseOptions(args);

        // Run the tests
        for (Iterator iter = testList.iterator(); iter.hasNext();) {
            String meth = (String) iter.next();

            // Call meth to set up the test
            // long eventsPerCall = -1;
            Function testFunction = testProvider.getTestCmd(meth);
            if (testFunction == null) {
                throw new RuntimeException(meth
                        + " failed to return a test function");
            }
            if (testFunction.getOperationsPerIteration() < 1) {
                throw new RuntimeException(meth
                        + " returned an illegal operations/iteration()");
            }

            long t;
            // long b = System.currentTimeMillis();
            long loops = getIteration(meth, testFunction);
            // System.out.println("The guess cost: " + (System.currentTimeMillis() - b)/1000. + " s.");

            for (int j = 0; j < passes; ++j) {
                long events = -1;
                if (verbose) {
                    if (iterations > 0) {
                        System.out.println("= " + meth + " begin " + iterations);
                    } else {
                        System.out.println("= " + meth + " begin " + time + " seconds");
                    }
                } else {
                    System.out.println("= " + meth + " begin ");
                }

                t = performLoops(testFunction, loops);
                
                events = testFunction.getEventsPerIteration();

                if (verbose) {
                    if (events == -1) {
                        System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: "
                                + testFunction.getOperationsPerIteration());
                    } else {
                        System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: "
                                + testFunction.getOperationsPerIteration() + " events: " + events);
                    }
                } else {
                    if (events == -1) {
                        System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " "
                                + testFunction.getOperationsPerIteration());
                    } else {
                        System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " "
                                + testFunction.getOperationsPerIteration() + " " + events);
                    }
                }

            }
        }
    }

    /**
     * @param args
     * @return the method list to call
     * @throws UsageException
     */
    private Set parseOptions(String[] args) throws UsageException {

        doPriorGC = false;
        encoding = "";
        uselen = false;
        fileName = null;
        sourceDir = null;
        line_mode = false;
        verbose = false;
        bulk_mode = false;
        passes = iterations = time = -1;
        locale = null;
        testName = null;
        threads = 1;

        UOption[] options = getOptions();
        int remainingArgc = UOption.parseArgs(args, options);

        if (args.length == 0 || options[HELP1].doesOccur || options[HELP2].doesOccur)
            throw new UsageException();

        if (options[LIST].doesOccur) {
            System.err.println("Available tests:");
            Set testNames = testProvider.getAllTestCmdNames();
            for (Iterator iter = testNames.iterator(); iter.hasNext();) {
                String name = (String) iter.next();
                System.err.println(" " + name);
            }
            System.exit(0);
        }

        if (options[TIME].doesOccur && options[ITERATIONS].doesOccur)
            throw new UsageException("Cannot specify both '-t <seconds>' and '-i <iterations>'");
        else if (!options[TIME].doesOccur && !options[ITERATIONS].doesOccur)
            throw new UsageException("Either '-t <seconds>' or '-i <iterations>' must be specified");
        else if (options[ITERATIONS].doesOccur) {
            try {
                iterations = Integer.parseInt(options[ITERATIONS].value);
            } catch (NumberFormatException ex) {
                throw new UsageException("'-i <iterations>' requires an integer number of iterations");
            }
        } else { //if (options[TIME].doesOccur)
            try {
                time = Integer.parseInt(options[TIME].value);
            } catch (NumberFormatException ex) {
                throw new UsageException("'-r <seconds>' requires an integer number of seconds");
            }
        }

        if (!options[PASSES].doesOccur)
            throw new UsageException("'-p <passes>' must be specified");
        else
            passes = Integer.parseInt(options[PASSES].value);

        if (options[LINE_MODE].doesOccur && options[BULK_MODE].doesOccur)
            throw new UsageException("Cannot specify both '-l' (line mode) and '-b' (bulk mode)");

        if (options[THREADS].doesOccur) {
            try {
                threads = Integer.parseInt(options[THREADS].value);
            } catch (NumberFormatException ex) {
                throw new UsageException("'-r <threads>' requires an integer number of threads");
            }
            if (threads <= 0)
                throw new UsageException("'-r <threads>' requires an number of threads greater than 0");
        }
        
        line_mode = options[LINE_MODE].doesOccur;
        bulk_mode = options[BULK_MODE].doesOccur;
        verbose   = options[VERBOSE].doesOccur;
        uselen    = options[USELEN].doesOccur;
        doPriorGC = options[GARBAGE_COLLECT].doesOccur;

        if (options[SOURCEDIR].doesOccur) sourceDir = options[SOURCEDIR].value;
        if (options[ENCODING].doesOccur)  encoding  = options[ENCODING].value;
        if (options[FILE_NAME].doesOccur) fileName  = options[FILE_NAME].value;
        if (options[TEST_NAME].doesOccur) testName  = options[TEST_NAME].value;
        if (options[LOCALE].doesOccur)    locale    = LocaleUtility.getLocaleFromName(options[LOCALE].value);


        // build the test list
        Set testList = new HashSet();
        int i, j;
        for (i = 0; i < remainingArgc; ++i) {
            // is args[i] a method name?
            if (testProvider.isTestCmd(args[i])) {
                testList.add(args[i]);
            } else {
                // args[i] is neither a method name nor a number. Pass
                // everything from here on through to the subclass via
                // setup().
                break;
            }
        }

        // if no tests were specified, put all the tests in the test list
        if (testList.size() == 0) {
            Set testNames = testProvider.getAllTestCmdNames();
            Iterator iter = testNames.iterator();
            while (iter.hasNext())
                testList.add((String)iter.next());
        }

        // pass remaining arguments, if any, through to the subclass via setup() method.
        String[] subclassArgs = new String[remainingArgc - i];
        for (j = 0; i < remainingArgc; j++)
            subclassArgs[j] = args[i++];
        setup(subclassArgs);

        // Put the heap in a consistent state
        if (doPriorGC)
            gc();

        return testList;
    }

    /**
     * Translate '-t time' to iterations (or just return '-i iteration')
     *
     * @param meth
     * @param fn
     * @return rt
     */
    private long getIteration(String methName, Function fn) throws InterruptedException {
        long iter = 0;
        if (time < 0) { // && iterations > 0
            iter = iterations;
        } else { // && iterations < 0
            // Translate time to iteration
            // Assuming there is a linear relation between time and iterations

            if (verbose) {
                System.out.println("= " + methName + " calibrating " + time
                        + " seconds");
            }

            long base = time * 1000;
            // System.out.println("base :" + base);
            long seed = 1;
            long t = 0;
            while (t < base * 0.9 || base * 1.1 < t) { // + - 10%
                if (iter == 0 || t == 0) {
                    iter = seed; // start up from 1
                    seed *= 100; // if the method is too fast (t == 0),
                    // multiply 100 times
                    // 100 is rational because 'base' is always larger than 1000
                } else {
                    // If 't' is large enough, use linear function to calculate
                    // new iteration
                    //
                    // new iter(base) old iter
                    // -------------- = -------- = k
                    // new time old time
                    //
                    // System.out.println("before guess t: " + t);
                    // System.out.println("before guess iter: " + iter);
                    iter = (long) ((double) iter / t * base); // avoid long
                    // cut, eg. 1/10
                    // == 0
                    if (iter == 0) {
                        throw new RuntimeException(
                                "Unable to converge on desired duration");
                    }
                }
                t = performLoops(fn, iter);
            }
            // System.out.println("final t : " + t);
            // System.out.println("final i : " + iter);
        }
        return iter;
    }
    
    
    private long performLoops(Function function, long loops) throws InterruptedException {
        function.init();
        if (threads > 1) {
            Thread[] threadList = new Thread[threads];
            for (int i=0; i<threads; i++)
                threadList[i] = new Thread(new FunctionRunner(function, loops, i));
            
            long start = System.currentTimeMillis();
            for (int i=0; i<threads; i++)
                threadList[i].start();
            for (int i=0; i<threads; i++)
                threadList[i].join();
            return System.currentTimeMillis() - start;
            
        } else {
            return function.time(loops); // ms
        }
    }
    

    /**
     * Invoke the runtime's garbage collection procedure repeatedly until the
     * amount of free memory stabilizes to within 10%.
     */
    protected void gc() {
        if (false) {
            long last;
            long free = 1;
            Runtime runtime = Runtime.getRuntime();
            do {
                runtime.gc();
                last = free;
                free = runtime.freeMemory();
            } while (((double) Math.abs(free - last)) / free > 0.1);
            // Wait for the change in free memory to drop under 10%
            // between successive calls.
        }

        // From "Java Platform Performance". This is the procedure
        // recommended by Javasoft.
        try {
            System.gc();
            Thread.sleep(100);
            System.runFinalization();
            Thread.sleep(100);

            System.gc();
            Thread.sleep(100);
            System.runFinalization();
            Thread.sleep(100);
        } catch (InterruptedException e) {
        }
    }


    public static char[] readToEOS(Reader reader) {
        ArrayList vec = new ArrayList();
        int count = 0;
        int pos = 0;
        final int MAXLENGTH = 0x8000; // max buffer size - 32K
        int length = 0x80; // start with small buffers and work up
        do {
            pos = 0;
            length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
            char[] buffer = new char[length];
            try {
                do {
                    int n = reader.read(buffer, pos, length - pos);
                    if (n == -1) {
                        break;
                    }
                    pos += n;
                } while (pos < length);
            }
            catch (IOException e) {
            }
            vec.add(buffer);
            count += pos;
        } while (pos == length);

        char[] data = new char[count];
        pos = 0;
        for (int i = 0; i < vec.size(); ++i) {
            char[] buf = (char[]) vec.get(i);
            int len = Math.min(buf.length, count - pos);
            System.arraycopy(buf, 0, data, pos, len);
            pos += len;
        }
        return data;
    }
    public static byte[] readToEOS(InputStream stream) {

        ArrayList vec = new ArrayList();
        int count = 0;
        int pos = 0;
        final int MAXLENGTH = 0x8000; // max buffer size - 32K
        int length = 0x80; // start with small buffers and work up
        do {
            pos = 0;
            length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
            byte[] buffer = new byte[length];
            try {
                do {
                    int n = stream.read(buffer, pos, length - pos);
                    if (n == -1) {
                        break;
                    }
                    pos += n;
                } while (pos < length);
            }
            catch (IOException e) {
            }
            vec.add(buffer);
            count += pos;
        } while (pos == length);


        byte[] data = new byte[count];
        pos = 0;
        for (int i = 0; i < vec.size(); ++i) {
            byte[] buf = (byte[]) vec.get(i);
            int len = Math.min(buf.length, count - pos);
            System.arraycopy(buf, 0, data, pos, len);
            pos += len;
        }
        return data;
    }

    protected String[] readLines(String filename, String srcEncoding, boolean bulkMode) {
        FileInputStream fis = null;
        InputStreamReader isr = null;
        BufferedReader br = null;
        try {
            fis = new FileInputStream(filename);
            isr = new InputStreamReader(fis, srcEncoding);
            br = new BufferedReader(isr);
        } catch (Exception e) {
            System.err.println("Error: File access exception: " + e.getMessage() + "!");
            System.exit(1);
        }
        ArrayList list = new ArrayList();
        while (true) {
            String line = null;
            try {
                line = readDataLine(br);
            } catch (Exception e) {
                System.err.println("Read File Error" + e.getMessage() + "!");
                System.exit(1);
            }
            if (line == null) break;
            if (line.length() == 0) continue;
            list.add(line);
        }

        int size = list.size();
        String[] lines = null;

        if (bulkMode) {
            lines = new String[1];
            StringBuffer buffer = new StringBuffer("");
            for (int i = 0; i < size; ++i) {
                buffer.append((String) list.get(i));
                /*if (i < (size - 1)) {
                    buffer.append("\r\n");
                }*/
            }
            lines[0] = buffer.toString();
        } else {
            lines = new String[size];
            for (int i = 0; i < size; ++i) {
                lines[i] = (String) list.get(i);
            }
        }

        return lines;
    }

    public String readDataLine(BufferedReader br) throws Exception {
        String originalLine = "";
        String line = "";
        try {
            line = originalLine = br.readLine();
            if (line == null) return null;
            if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
            int commentPos = line.indexOf('#');
            if (commentPos >= 0) line = line.substring(0, commentPos);
            line = line.trim();
        } catch (Exception e) {
            throw new Exception("Line \"{0}\",  \"{1}\"" + originalLine + " "
                    + line + " " + e.toString());
        }
        return line;
    }

    
    public static class BOMFreeReader extends Reader {
        InputStreamReader reader;
        String encoding;
        int MAX_BOM_LENGTH = 5;

        /**
         * Creates a new reader, skipping a BOM associated with the given
         * encoding. Equivalent to BOMFreeReader(in, null).
         * 
         * @param in
         *            The input stream.
         * @throws IOException
         *             Thrown if reading for a BOM causes an IOException.
         */
        public BOMFreeReader(InputStream in) throws IOException {
            this(in, null);
        }
        
        /**
         * Creates a new reader, skipping a BOM associated with the given
         * encoding. If encoding is null, attempts to detect the encoding by the
         * BOM.
         * 
         * @param in
         *            The input stream.
         * @param encoding
         *            The encoding to use. Can be null.
         * @throws IOException
         *             Thrown if reading for a BOM causes an IOException.
         */
        public BOMFreeReader(InputStream in, String encoding) throws IOException {
            PushbackInputStream pushback = new PushbackInputStream(in, MAX_BOM_LENGTH);
            this.encoding = encoding;
            
            byte[] start = new byte[MAX_BOM_LENGTH];
            Arrays.fill(start, (byte)0xa5);
            
            int amountRead = pushback.read(start, 0, MAX_BOM_LENGTH);
            int bomLength = detectBOMLength(start);
            if (amountRead > bomLength)
                pushback.unread(start, bomLength, amountRead - bomLength);
            
            reader = (encoding == null) ? new InputStreamReader(pushback) : new InputStreamReader(pushback, encoding);
        }

        /**
         * Determines the length of a BOM in the beginning of start. Assumes
         * start is at least a length 5 array. If encoding is null, the check
         * will not be encoding specific and it will set the encoding of this
         * BOMFreeReader.
         * 
         * @param start
         *            The starting bytes.
         * @param encoding
         *            The encoding. Can be null.
         * @return The length of a detected BOM.
         */
        private int detectBOMLength(byte[] start) {
            if ((encoding == null || "UTF-16BE".equals(encoding)) && start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
                if (encoding == null) this.encoding = "UTF-16BE";
                return 2; // "UTF-16BE";
            } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
                if ((encoding == null || "UTF-32LE".equals(encoding)) && start[2] == (byte) 0x00
                        && start[3] == (byte) 0x00) {
                    if (encoding == null) this.encoding = "UTF-32LE";
                    return 4; // "UTF-32LE";
                } else if ((encoding == null || "UTF-16LE".equals(encoding))) {
                    if (encoding == null) this.encoding = "UTF-16LE";
                    return 2; // "UTF-16LE";
                }
            } else if ((encoding == null || "UTF-8".equals(encoding)) && start[0] == (byte) 0xEF
                    && start[1] == (byte) 0xBB && start[2] == (byte) 0xBF) {
                if (encoding == null) this.encoding = "UTF-8";
                return 3; // "UTF-8";
            } else if ((encoding == null || "UTF-32BE".equals(encoding)) && start[0] == (byte) 0x00
                    && start[1] == (byte) 0x00 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
                if (encoding == null) this.encoding = "UTF-32BE";
                return 4; // "UTF-32BE";
            } else if ((encoding == null || "SCSU".equals(encoding)) && start[0] == (byte) 0x0E
                    && start[1] == (byte) 0xFE && start[2] == (byte) 0xFF) {
                if (encoding == null) this.encoding = "SCSU";
                return 3; // "SCSU";
            } else if ((encoding == null || "BOCU-1".equals(encoding)) && start[0] == (byte) 0xFB
                    && start[1] == (byte) 0xEE && start[2] == (byte) 0x28) {
                if (encoding == null) this.encoding = "BOCU-1";
                return 3; // "BOCU-1";
            } else if ((encoding == null || "UTF-7".equals(encoding)) && start[0] == (byte) 0x2B
                    && start[1] == (byte) 0x2F && start[2] == (byte) 0x76) {
                if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
                    if (encoding == null) this.encoding = "UTF-7";
                    return 5; // "UTF-7";
                } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 || start[3] == (byte) 0x2B
                        || start[3] == (byte) 0x2F) {
                    if (encoding == null) this.encoding = "UTF-7";
                    return 4; // "UTF-7";
                }
            } else if ((encoding == null || "UTF-EBCDIC".equals(encoding)) && start[0] == (byte) 0xDD
                    && start[2] == (byte) 0x73 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
                if (encoding == null) this.encoding = "UTF-EBCDIC";
                return 4; // "UTF-EBCDIC";
            }

            /* no known Unicode signature byte sequence recognized */
            return 0;
        }

        public int read(char[] cbuf, int off, int len) throws IOException {
            return reader.read(cbuf, off, len);
        }

        public void close() throws IOException {
            reader.close();
        }
    }
}



// eof
