/*
 *******************************************************************************
 * Copyright (C) 2002-2006, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /icu/icuhtml/icu.sf.net/docs/eclipse_howto/eclipse3x.html,v 
 com.ibm.icu.dev.test.charset/TestConversion.java,v $ 
 * $Date: 2006/09/18 21:30:45 $ 
 * $Revision: 1.5 $ 
 *
 *******************************************************************************
 */

package com.ibm.icu.dev.test.charset;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Iterator;

import com.ibm.icu.charset.CharsetDecoderICU;
import com.ibm.icu.charset.CharsetProviderICU;
import com.ibm.icu.dev.test.ModuleTest;
import com.ibm.icu.dev.test.TestDataModule.DataMap;
import com.ibm.icu.impl.ICUResourceBundle;


/**
 * This maps to convtest.c which tests the test file for data-driven conversion tests. 
 * 
 */
public class TestConversion extends ModuleTest {
    /**
     * This maps to the C struct of conversion case in convtest.h that stores the
     * data for a conversion test
     * 
     */
    private class ConversionCase {
        int caseNr;                                         // testcase index   
        String option = null;                               // callback options
        CodingErrorAction cbErrorAction = null;             // callback action type

        // data retrieved from a test case conversion.txt
        String charset;                                     // charset
        String unicode;                                     // unicode string
        ByteBuffer bytes;                                   // bytes
        int[] offsets;                                      // offsets
        boolean finalFlush;                                 // flush
        boolean fallbacks;                                  // fallback
        String outErrorCode;                                // errorCode
        String cbopt;                                       // callback 

        // TestGetUnicodeSet variables
        String map;
        String mapnot;
        int which;
    }

    // public methods --------------------------------------------------------

    public static void main(String[] args) throws Exception {
        new TestConversion().run(args);
    }

    public TestConversion() {
        super("com/ibm/icu/dev/data/testdata/", "conversion");
    }

    /*
     * This method maps to the convtest.cpp runIndexedTest() method to run each
     * type of conversion.
     */
    public void processModules() {
        try { 
            int testFromUnicode = 0;
            String testName = t.getName().toString();
            int testToUnicode = 0;
            // Iterate through and get each of the test case to process
            for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
                DataMap testcase = (DataMap) iter.next();

                if (testName.equalsIgnoreCase("toUnicode")) { 
                    TestToUnicode(testcase, testToUnicode);
                    testToUnicode++;
                } else if (testName.equalsIgnoreCase("fromUnicode")) {                    
                    TestFromUnicode(testcase, testFromUnicode);
                    testFromUnicode++;
                } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
                    TestGetUnicodeSet(testcase);
                } else {
                    warnln("Could not load the test cases for conversion");
                    continue;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    // private methods -------------------------------------------------------

    private void TestToUnicode(DataMap testcase, int caseNr) {
        // create Conversion case to store the test case data
        ConversionCase cc = new ConversionCase();

        try {
            // retrieve test case data
            cc.caseNr = caseNr;
            cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                    .getString();
            cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
                    .getBinary();
            cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode"))
                    .getString();
            cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets"))
                    .getIntVector();
            cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush"))
                    .getUInt() != 0;
            cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks"))
                    .getUInt() != 0;
            cc.outErrorCode = ((ICUResourceBundle) testcase
                    .getObject("errorCode")).getString();
            cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback"))
                    .getString();
        } catch (Exception e) { 
            errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr);
            return;
        }
//      ----for debugging only 
        logln("\nTestToUnicode[" + caseNr + "] "
                + cc.charset + " ");
        logln("Bytes:");
        printbytes(cc.bytes, cc.bytes.limit());
        logln("");
        logln("Unicode: " + hex(cc.unicode));    
        logln("Callback: (" + cc.cbopt + ")");  
        logln("\n...............................................");

//         ----for debugging only
        
        //This test case is skipped due to limitation in java's API for decoder replacement 
        // { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "", "?", :bin{""} }
        if(cc.caseNr == 63)
        {
            logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
            logln("Skipping test due to limitation in Java API - callback replacement value");
            return;
        }
        // process the retrieved test data case
        if (cc.offsets.length == 0) {
            cc.offsets = null;
        } else if (cc.offsets.length != cc.unicode.length()) { 
            errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode[" + cc.unicode.length()
                    + "] and offsets[" + cc.offsets.length
                    + "] must have the same length");
            return;
        }
        // check for the callback replacement value for unmappable
        // characters or malformed errors
        if (cc.cbopt.length() > 0) {
            switch ((cc.cbopt).charAt(0)) {
            case '?':           //CALLBACK_SUBSTITUTE 
                cc.cbErrorAction = CodingErrorAction.REPLACE;
                break;
            case '0':           //CALLBACK_SKIP 
                cc.cbErrorAction = CodingErrorAction.IGNORE;
                break;
            case '.':           //CALLBACK_STOP 
                cc.cbErrorAction = CodingErrorAction.REPORT;
                break;
            case '&':           //CALLBACK_ESCAPE 
                cc.cbErrorAction = CodingErrorAction.REPORT;
                break;
            default:     
                cc.cbErrorAction = null;
                break;
            }
        }
        // check for any options for the callback value 
        cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt.substring(1);
        if (cc.option == null) {
            cc.option = null;
        }

        logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
        ToUnicodeCase(cc);

    }

    private void ToUnicodeCase(ConversionCase cc) {

        // create converter for charset and decoder for each test case
        CharsetProviderICU provider = new CharsetProviderICU();
        CharsetDecoderICU decoder = null;
        Charset charset = null;

        try {
            charset = (Charset) provider.charsetForName(cc.charset);
            decoder = (CharsetDecoderICU) charset.newDecoder();
            decoder.onMalformedInput(CodingErrorAction.REPLACE);
            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

        } catch (Exception e) {

            logln("Skipping test:(" + cc.charset
                    + ") due to ICU Charset not supported at this time");
            return;
        }

        // set the callback for the decoder
        if (cc.cbErrorAction != null) {
            decoder.onMalformedInput(cc.cbErrorAction);
            decoder.onUnmappableCharacter(cc.cbErrorAction);

            // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
            if (cc.option.equals("i")) {
                decoder.onMalformedInput(CodingErrorAction.REPORT);
            }

            // if callback action is replace, and there is a subchar
            // replace the decoder's default replacement value
            // if substring, skip test due to current api not supporting
            // substring replacement
            if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
                if (cc.cbopt.length() > 1) {
                    if (cc.cbopt.charAt(1) == '=') {
                        logln("Skipping test due to limitation in Java API - substitution string not supported");

                    } else {
                        // // read NUL-separated subchar first, if any
                        // copy the subchar from Latin-1 characters
                        // start after the NUL
                        if (cc.cbopt.charAt(1) == 0x00) {
                            cc.cbopt = cc.cbopt.substring(2);

                            try {
                                decoder.replaceWith(cc.cbopt);
                            } catch (Exception e) {
                                logln("Skipping test due to limitation in Java API - substitution character sequence size error");

                            }
                        }
                    }
                }
            }
        } 

        // decode source to unicode
        ByteBuffer source = ByteBuffer.wrap(cc.bytes.array());
        CharBuffer out = CharBuffer.allocate((int) (decoder
                .averageCharsPerByte() * source.remaining()));
        do {
            CoderResult cr = decoder.decode(source, out, true);
            if (cr.isOverflow()) {
                int pos = out.position();
                char[] temp = out.array();
                out = CharBuffer.allocate(temp.length * 4);
                out.put(temp);
                out.position(pos);
            } else if (cr.isError()) {
                checkResultsToUnicode(cc,cc.unicode, out); 
                return;
            }
        } while (source.remaining() > 0);

        checkResultsToUnicode(cc,cc.unicode, out);
        return;
    }

    private void TestFromUnicode(DataMap testcase, int caseNr) {

        ConversionCase cc = new ConversionCase();
        cc.caseNr = caseNr;

        try {
            // retrieve test case data
            cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                    .getString();
            cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode"))
                    .getString();
            cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
                    .getBinary();
            cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets"))
                    .getIntVector();
            cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush"))
                    .getUInt() != 0;
            cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks"))
                    .getUInt() != 0;
            cc.outErrorCode = ((ICUResourceBundle) testcase
                    .getObject("errorCode")).getString();
            cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback"))
                    .getString();


        } catch (Exception e) {
            errln("Skipping test:");
            errln("error parsing conversion/toUnicode test case " + cc.caseNr);
            return;
        }
        // ----for debugging only
        logln("\nTestFromUnicode[" + caseNr + "] "
                + cc.charset + " "); 
        logln("Unicode: " + cc.unicode);
        logln("Bytes:");
        printbytes(cc.bytes, cc.bytes.limit());
        logln("");
        logln("Callback: (" + cc.cbopt + ")"); 
        logln("...............................................");

//         ----for debugging only
    
 
        // TODO: ***Currently skipping test for charset ibm-1390, gb18030,
        // ibm-930 due to external mapping need to be fix
        if (cc.charset.equalsIgnoreCase("ibm-1390")
                || cc.charset.equalsIgnoreCase("gb18030")
                || cc.charset.equalsIgnoreCase("ibm-970")) {
            logln("Skipping test:("
                    + cc.charset
                    + ") due to ICU Charset external mapping not supported at this time");
            return;
        }

        // process the retrieved test data case
        if (cc.offsets.length == 0) {
            cc.offsets = null;
        } else if (cc.offsets.length != cc.bytes.limit()) {
            errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
                    + "] and offsets[" + cc.offsets.length
                    + "] must have the same length");
            return;
        }

        // check the callback replacement value
        if (cc.cbopt.length() > 0) {

            switch ((cc.cbopt).charAt(0)) {
            case '?': 
                cc.cbErrorAction = CodingErrorAction.REPLACE;
                break;
            case '0': 
                cc.cbErrorAction = CodingErrorAction.IGNORE;
                break;
            case '.': 
                cc.cbErrorAction = CodingErrorAction.REPORT;
                break;
            case '&': 
                cc.cbErrorAction = CodingErrorAction.REPORT;
                break;
            default: 
                cc.cbErrorAction = null;
                break;
            }

            // check for any options for the callback value -- 
            cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt.substring(1);
            if (cc.option == null) {
                cc.option = null;
            }
        }
        logln("TestFromUnicode[" + cc.caseNr + "] " + cc.charset);
        FromUnicodeCase(cc);

        return;

    }

    private void FromUnicodeCase(ConversionCase cc) {

        // create charset encoder for conversion test
        CharsetProviderICU provider = new CharsetProviderICU();
        CharsetEncoder encoder = null;
        Charset charset = null;
        try {
            charset = (Charset) provider.charsetForName(cc.charset);
            encoder = (CharsetEncoder) charset.newEncoder();
            encoder.onMalformedInput(CodingErrorAction.REPLACE);
            encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

        } catch (Exception e) {

            logln("Skipping test:(" + cc.charset
                    + ") due to ICU Charset not supported at this time");
            return;

        }
 
        // set the callback for the encoder 
        if (cc.cbErrorAction != null) {
            encoder.onUnmappableCharacter(cc.cbErrorAction);
            encoder.onMalformedInput(cc.cbErrorAction);

            // if action has an option, put in the option for the case
            if (cc.option.equals("i")) {
                encoder.onMalformedInput(CodingErrorAction.REPORT);
            }

            // if callback action is replace, and there is a subchar
            // replace the decoder's default replacement value
            // if substring, skip test due to current api not supporting
            // substring
            if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
                if (cc.cbopt.length() > 1) {
                    if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') {
                        logln("Skipping test due to limitation in Java API - substitution string not supported");
                        return;
                    } else {
                        // // read NUL-separated subchar first, if any
                        // copy the subchar from Latin-1 characters
                        // start after the NUL
                        if (cc.cbopt.charAt(1) == 0x00) {
                            cc.cbopt = cc.cbopt.substring(2);
                            try {
                                encoder.replaceWith(toByteArray(cc.cbopt));
                            } catch (Exception e) {
                                logln("Skipping test due to limitation in Java API - substitution character sequence size error");
                                return;
                            }
                        }
                    }
                }
            }
        } 
        // do charset encoding from unicode
        CharBuffer uniStr = CharBuffer.wrap(cc.unicode.toCharArray());
        ByteBuffer out = ByteBuffer.allocate((int) (encoder
                .averageBytesPerChar() * uniStr.remaining()));
        do {
            CoderResult cr = encoder.encode(uniStr, out, true);
            if (cr.isOverflow()) {
                int pos = out.position();
                byte[] temp = out.array();
                out = ByteBuffer.allocate(temp.length * 4);
                out.put(temp);
                out.position(pos);
            } else if (cr.isError()) {
                // check the stopped test for current output and match the
                // expected results
                checkResultsFromUnicode(cc,cc.bytes, out);
                return;
            }
            else {
                cr = encoder.flush(out);
            }

        } while (uniStr.remaining() > 0);

        checkResultsFromUnicode(cc,cc.bytes, out);
        return;

    }
    private byte[] toByteArray(String str){
        byte[] ret = new byte[ str.length() ];
        for(int i=0; i<ret.length;i++){
            char ch =  str.charAt(i);
            if(ch<=0xFF){
                ret[i]= (byte)ch;
            }else{
                throw new IllegalArgumentException(" byte value out of range: " + ch);
            }
        }
        return ret;
    }
    private void TestGetUnicodeSet(DataMap testcase) {
        /*
         * charset - will be opened, and ucnv_getUnicodeSet() called on it //
         * map - set of code points and strings that must be in the returned set //
         * mapnot - set of code points and strings that must *not* be in the //
         * returned set // which - numeric UConverterUnicodeSet value Headers {
         * "charset", "map", "mapnot", "which" }
         */
        ConversionCase cc = new ConversionCase();
        // retrieve test case data
        cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                .getString();
        cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
        cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
                .getString();
        cc.which = ((ICUResourceBundle) testcase.getObject("which")).getUInt();
 
        // create charset and encoder for each test case
        logln("Test not supported at this time");

    }

    /**
     * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
     * start of the stream for example U+FEFF (the Unicode BOM/signature
     * character) that can be ignored.
     * 
     * Detects Unicode signature byte sequences at the start of the byte stream
     * and returns number of bytes of the BOM of the indicated Unicode charset.
     * 0 is returned when no Unicode signature is recognized.
     * 
     */

    private String detectUnicodeSignature(ByteBuffer source) {
        int signatureLength = 0; // number of bytes of the signature
        final int SIG_MAX_LEN = 5;
        String sigUniCharset = null; // states what unicode charset is the BOM
        int i = 0;

        /*
         * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
         * don't misdetect something
         */
        byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
                (byte) 0xa5 };

        while (i < source.remaining() && i < SIG_MAX_LEN) {
            start[i] = source.get(i);
            i++;
        }

        if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
            signatureLength = 2;
            sigUniCharset = "UTF-16BE";
            source.position(signatureLength);
            return sigUniCharset;
        } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
            if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
                signatureLength = 4;
                sigUniCharset = "UTF-32LE";
                source.position(signatureLength);
                return sigUniCharset;
            } else {
                signatureLength = 2;
                sigUniCharset = "UTF-16LE";
                source.position(signatureLength);
                return sigUniCharset;
            }
        } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
                && start[2] == (byte) 0xBF) {
            signatureLength = 3;
            sigUniCharset = "UTF-8";
            source.position(signatureLength);
            return sigUniCharset;
        } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
                && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
            signatureLength = 4;
            sigUniCharset = "UTF-32BE";
            source.position(signatureLength);
            return sigUniCharset;
        } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
                && start[2] == (byte) 0xFF) {
            signatureLength = 3;
            sigUniCharset = "SCSU";
            source.position(signatureLength);
            return sigUniCharset;
        } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
                && start[2] == (byte) 0x28) {
            signatureLength = 3;
            sigUniCharset = "BOCU-1";
            source.position(signatureLength);
            return sigUniCharset;
        } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
                && start[2] == (byte) 0x76) {

            if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
                signatureLength = 5;
                sigUniCharset = "UTF-7";
                source.position(signatureLength);
                return sigUniCharset;
            } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
                    || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
                signatureLength = 4;
                sigUniCharset = "UTF-7";
                source.position(signatureLength);
                return sigUniCharset;
            }
        } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
                && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
            signatureLength = 4;
            sigUniCharset = "UTF-EBCDIC";
            source.position(signatureLength);
            return sigUniCharset;
        }

        /* no known Unicode signature byte sequence recognized */
        return null;
    }

    void printbytes(ByteBuffer buf, int pos) {
        int cur = buf.position();
        log(" (" + pos + ")==[");
        for (int i = 0; i < pos; i++) {
            log("(" + i + ")" + hex(buf.get(i) & 0xff) + " ");
        }
        log("]");
        buf.position(cur);
    }

    void printchar(CharBuffer buf, int pos) {
        int cur = buf.position();
        log(" (" + pos + ")==[");
        for (int i = 0; i < pos; i++) {
            log("(" + i + ")" + hex(buf.get(i)) + " ");
        }
        log("]");
        buf.position(cur);
    }

    private void checkResultsFromUnicode(ConversionCase cc, ByteBuffer source, ByteBuffer target) {
 
        int len = target.position();
        source.rewind();
        target.rewind();
        
        // remove any BOM signature before checking
        detectUnicodeSignature(target);

        // test to see if the conversion matches actual results
        // remove any BOM signature before checking
        detectUnicodeSignature(target);
        
        len = len-target.position();
        
        if (len != source.remaining()) {
            errln("Test failed: output does not match expected\n"); 
            logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
            printbytes(target, len); 
            return;
        }
        for (int i = 0; i < source.remaining(); i++) {
            if (target.get() != source.get()) {
                errln("Test failed: output does not match expected\n"); 
                logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
                printbytes(target, len); 
                return;
            }
        }
        logln("["+ cc.caseNr + "]:"+cc.charset);
        log("output=" ); 
        printbytes(target, len);  
        logln("\nPassed\n");
        return;
    }

    private void checkResultsToUnicode(ConversionCase cc, String source, CharBuffer target) {
 
        int len = target.position(); 
        target.rewind(); 

        // test to see if the conversion matches actual results
        if (len != source.length()) {
            errln("Test failed: output does not match expected\n"); 
            logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
            printchar(target,len); 
            return;
        }
        for (int i = 0; i < source.length(); i++) {
            if ( ! (hex(target.get(i)).equals(hex(source.charAt(i)))) ) {
                errln("Test failed: output does not match expected\n"); 
                logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
                printchar(target,len); 
                return;
            }
        }
        logln("["+ cc.caseNr + "]:"+cc.charset);
        log("output=" ); 
         printchar(target,len);  
        logln("\nPassed\n"); 
        return;
    }
}
