src/com/ibm/icu/dev/test/charset/TestConversion.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 2002-2006, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  *
  * $Source: /icu/icuhtml/icu.sf.net/docs/eclipse_howto/eclipse3x.html,v
  com.ibm.icu.dev.test.charset/TestConversion.java,v $
  * $Date: 2006/09/18 21:30:45 $
  * $Revision: 1.5 $
  *
  *******************************************************************************
  */

 package com.ibm.icu.dev.test.charset;

 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.Iterator;

 import com.ibm.icu.charset.CharsetDecoderICU;
 import com.ibm.icu.charset.CharsetProviderICU;
 import com.ibm.icu.dev.test.ModuleTest;
 import com.ibm.icu.dev.test.TestDataModule.DataMap;
 import com.ibm.icu.impl.ICUResourceBundle;


 /**
  * This maps to convtest.c which tests the test file for data-driven conversion tests.
  *
  */
 public class TestConversion extends ModuleTest {
     /**
      * This maps to the C struct of conversion case in convtest.h that stores the
      * data for a conversion test
      *
      */
     private class ConversionCase {
         int caseNr;                                         // testcase index
         String option = null;                               // callback options
         CodingErrorAction cbErrorAction = null;             // callback action type

         // data retrieved from a test case conversion.txt
         String charset;                                     // charset
         String unicode;                                     // unicode string
         ByteBuffer bytes;                                   // bytes
         int[] offsets;                                      // offsets
         boolean finalFlush;                                 // flush
         boolean fallbacks;                                  // fallback
         String outErrorCode;                                // errorCode
         String cbopt;                                       // callback

         // TestGetUnicodeSet variables
         String map;
         String mapnot;
         int which;
     }

     // public methods --------------------------------------------------------

     public static void main(String[] args) throws Exception {
         new TestConversion().run(args);
     }

     public TestConversion() {
         super("com/ibm/icu/dev/data/testdata/", "conversion");
     }

     /*
      * This method maps to the convtest.cpp runIndexedTest() method to run each
      * type of conversion.
      */
     public void processModules() {
         try {
             int testFromUnicode = 0;
             String testName = t.getName().toString();
             int testToUnicode = 0;
             // Iterate through and get each of the test case to process
             for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
                 DataMap testcase = (DataMap) iter.next();

                 if (testName.equalsIgnoreCase("toUnicode")) {
                     TestToUnicode(testcase, testToUnicode);
                     testToUnicode++;
                 } else if (testName.equalsIgnoreCase("fromUnicode")) {
                     TestFromUnicode(testcase, testFromUnicode);
                     testFromUnicode++;
                 } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
                     TestGetUnicodeSet(testcase);
                 } else {
                     warnln("Could not load the test cases for conversion");
                     continue;
                 }
             }
         } catch (Exception e) {
             e.printStackTrace();
         }

     }

     // private methods -------------------------------------------------------

     private void TestToUnicode(DataMap testcase, int caseNr) {
         // create Conversion case to store the test case data
         ConversionCase cc = new ConversionCase();

         try {
             // retrieve test case data
             cc.caseNr = caseNr;
             cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                     .getString();
             cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
                     .getBinary();
             cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode"))
                     .getString();
             cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets"))
                     .getIntVector();
             cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush"))
                     .getUInt() != 0;
             cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks"))
                     .getUInt() != 0;
             cc.outErrorCode = ((ICUResourceBundle) testcase
                     .getObject("errorCode")).getString();
             cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback"))
                     .getString();
         } catch (Exception e) {
             errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr);
             return;
         }
 //      ----for debugging only
         logln("\nTestToUnicode[" + caseNr + "] "
                 + cc.charset + " ");
         logln("Bytes:");
         printbytes(cc.bytes, cc.bytes.limit());
         logln("");
         logln("Unicode: " + hex(cc.unicode));
         logln("Callback: (" + cc.cbopt + ")");
         logln("\n...............................................");

 //         ----for debugging only

         //This test case is skipped due to limitation in java's API for decoder replacement
         // { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "", "?", :bin{""} }
         if(cc.caseNr == 63)
         {
             logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
             logln("Skipping test due to limitation in Java API - callback replacement value");
             return;
         }
         // process the retrieved test data case
         if (cc.offsets.length == 0) {
             cc.offsets = null;
         } else if (cc.offsets.length != cc.unicode.length()) {
             errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode[" + cc.unicode.length()
                     + "] and offsets[" + cc.offsets.length
                     + "] must have the same length");
             return;
         }
         // check for the callback replacement value for unmappable
         // characters or malformed errors
         if (cc.cbopt.length() > 0) {
             switch ((cc.cbopt).charAt(0)) {
             case '?':           //CALLBACK_SUBSTITUTE
                 cc.cbErrorAction = CodingErrorAction.REPLACE;
                 break;
             case '0':           //CALLBACK_SKIP
                 cc.cbErrorAction = CodingErrorAction.IGNORE;
                 break;
             case '.':           //CALLBACK_STOP
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 break;
             case '&':           //CALLBACK_ESCAPE
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 break;
             default:
                 cc.cbErrorAction = null;
                 break;
             }
         }
         // check for any options for the callback value
         cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt.substring(1);
         if (cc.option == null) {
             cc.option = null;
         }

         logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
         ToUnicodeCase(cc);

     }

     private void ToUnicodeCase(ConversionCase cc) {

         // create converter for charset and decoder for each test case
         CharsetProviderICU provider = new CharsetProviderICU();
         CharsetDecoderICU decoder = null;
         Charset charset = null;

         try {
             charset = (Charset) provider.charsetForName(cc.charset);
             decoder = (CharsetDecoderICU) charset.newDecoder();
             decoder.onMalformedInput(CodingErrorAction.REPLACE);
             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

         } catch (Exception e) {

             logln("Skipping test:(" + cc.charset
                     + ") due to ICU Charset not supported at this time");
             return;
         }

         // set the callback for the decoder
         if (cc.cbErrorAction != null) {
             decoder.onMalformedInput(cc.cbErrorAction);
             decoder.onUnmappableCharacter(cc.cbErrorAction);

             // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
             if (cc.option.equals("i")) {
                 decoder.onMalformedInput(CodingErrorAction.REPORT);
             }

             // if callback action is replace, and there is a subchar
             // replace the decoder's default replacement value
             // if substring, skip test due to current api not supporting
             // substring replacement
             if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
                 if (cc.cbopt.length() > 1) {
                     if (cc.cbopt.charAt(1) == '=') {
                         logln("Skipping test due to limitation in Java API - substitution string not supported");

                     } else {
                         // // read NUL-separated subchar first, if any
                         // copy the subchar from Latin-1 characters
                         // start after the NUL
                         if (cc.cbopt.charAt(1) == 0x00) {
                             cc.cbopt = cc.cbopt.substring(2);

                             try {
                                 decoder.replaceWith(cc.cbopt);
                             } catch (Exception e) {
                                 logln("Skipping test due to limitation in Java API - substitution character sequence size error");

                             }
                         }
                     }
                 }
             }
         }

         // decode source to unicode
         ByteBuffer source = ByteBuffer.wrap(cc.bytes.array());
         CharBuffer out = CharBuffer.allocate((int) (decoder
                 .averageCharsPerByte() * source.remaining()));
         do {
             CoderResult cr = decoder.decode(source, out, true);
             if (cr.isOverflow()) {
                 int pos = out.position();
                 char[] temp = out.array();
                 out = CharBuffer.allocate(temp.length * 4);
                 out.put(temp);
                 out.position(pos);
             } else if (cr.isError()) {
                 checkResultsToUnicode(cc,cc.unicode, out);
                 return;
             }
         } while (source.remaining() > 0);

         checkResultsToUnicode(cc,cc.unicode, out);
         return;
     }

     private void TestFromUnicode(DataMap testcase, int caseNr) {

         ConversionCase cc = new ConversionCase();
         cc.caseNr = caseNr;

         try {
             // retrieve test case data
             cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                     .getString();
             cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode"))
                     .getString();
             cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
                     .getBinary();
             cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets"))
                     .getIntVector();
             cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush"))
                     .getUInt() != 0;
             cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks"))
                     .getUInt() != 0;
             cc.outErrorCode = ((ICUResourceBundle) testcase
                     .getObject("errorCode")).getString();
             cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback"))
                     .getString();


         } catch (Exception e) {
             errln("Skipping test:");
             errln("error parsing conversion/toUnicode test case " + cc.caseNr);
             return;
         }
         // ----for debugging only
         logln("\nTestFromUnicode[" + caseNr + "] "
                 + cc.charset + " ");
         logln("Unicode: " + cc.unicode);
         logln("Bytes:");
         printbytes(cc.bytes, cc.bytes.limit());
         logln("");
         logln("Callback: (" + cc.cbopt + ")");
         logln("...............................................");

 //         ----for debugging only


         // TODO: ***Currently skipping test for charset ibm-1390, gb18030,
         // ibm-930 due to external mapping need to be fix
         if (cc.charset.equalsIgnoreCase("ibm-1390")
                 || cc.charset.equalsIgnoreCase("gb18030")
                 || cc.charset.equalsIgnoreCase("ibm-970")) {
             logln("Skipping test:("
                     + cc.charset
                     + ") due to ICU Charset external mapping not supported at this time");
             return;
         }

         // process the retrieved test data case
         if (cc.offsets.length == 0) {
             cc.offsets = null;
         } else if (cc.offsets.length != cc.bytes.limit()) {
             errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
                     + "] and offsets[" + cc.offsets.length
                     + "] must have the same length");
             return;
         }

         // check the callback replacement value
         if (cc.cbopt.length() > 0) {

             switch ((cc.cbopt).charAt(0)) {
             case '?':
                 cc.cbErrorAction = CodingErrorAction.REPLACE;
                 break;
             case '0':
                 cc.cbErrorAction = CodingErrorAction.IGNORE;
                 break;
             case '.':
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 break;
             case '&':
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 break;
             default:
                 cc.cbErrorAction = null;
                 break;
             }

             // check for any options for the callback value --
             cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt.substring(1);
             if (cc.option == null) {
                 cc.option = null;
             }
         }
         logln("TestFromUnicode[" + cc.caseNr + "] " + cc.charset);
         FromUnicodeCase(cc);

         return;

     }

     private void FromUnicodeCase(ConversionCase cc) {

         // create charset encoder for conversion test
         CharsetProviderICU provider = new CharsetProviderICU();
         CharsetEncoder encoder = null;
         Charset charset = null;
         try {
             charset = (Charset) provider.charsetForName(cc.charset);
             encoder = (CharsetEncoder) charset.newEncoder();
             encoder.onMalformedInput(CodingErrorAction.REPLACE);
             encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

         } catch (Exception e) {

             logln("Skipping test:(" + cc.charset
                     + ") due to ICU Charset not supported at this time");
             return;

         }

         // set the callback for the encoder
         if (cc.cbErrorAction != null) {
             encoder.onUnmappableCharacter(cc.cbErrorAction);
             encoder.onMalformedInput(cc.cbErrorAction);

             // if action has an option, put in the option for the case
             if (cc.option.equals("i")) {
                 encoder.onMalformedInput(CodingErrorAction.REPORT);
             }

             // if callback action is replace, and there is a subchar
             // replace the decoder's default replacement value
             // if substring, skip test due to current api not supporting
             // substring
             if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
                 if (cc.cbopt.length() > 1) {
                     if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') {
                         logln("Skipping test due to limitation in Java API - substitution string not supported");
                         return;
                     } else {
                         // // read NUL-separated subchar first, if any
                         // copy the subchar from Latin-1 characters
                         // start after the NUL
                         if (cc.cbopt.charAt(1) == 0x00) {
                             cc.cbopt = cc.cbopt.substring(2);
                             try {
                                 encoder.replaceWith(toByteArray(cc.cbopt));
                             } catch (Exception e) {
                                 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
                                 return;
                             }
                         }
                     }
                 }
             }
         }
         // do charset encoding from unicode
         CharBuffer uniStr = CharBuffer.wrap(cc.unicode.toCharArray());
         ByteBuffer out = ByteBuffer.allocate((int) (encoder
                 .averageBytesPerChar() * uniStr.remaining()));
         do {
             CoderResult cr = encoder.encode(uniStr, out, true);
             if (cr.isOverflow()) {
                 int pos = out.position();
                 byte[] temp = out.array();
                 out = ByteBuffer.allocate(temp.length * 4);
                 out.put(temp);
                 out.position(pos);
             } else if (cr.isError()) {
                 // check the stopped test for current output and match the
                 // expected results
                 checkResultsFromUnicode(cc,cc.bytes, out);
                 return;
             }
             else {
                 cr = encoder.flush(out);
             }

         } while (uniStr.remaining() > 0);

         checkResultsFromUnicode(cc,cc.bytes, out);
         return;

     }
     private byte[] toByteArray(String str){
         byte[] ret = new byte[ str.length() ];
         for(int i=0; i<ret.length;i++){
             char ch =  str.charAt(i);
             if(ch<=0xFF){
                 ret[i]= (byte)ch;
             }else{
                 throw new IllegalArgumentException(" byte value out of range: " + ch);
             }
         }
         return ret;
     }
     private void TestGetUnicodeSet(DataMap testcase) {
         /*
          * charset - will be opened, and ucnv_getUnicodeSet() called on it //
          * map - set of code points and strings that must be in the returned set //
          * mapnot - set of code points and strings that must *not* be in the //
          * returned set // which - numeric UConverterUnicodeSet value Headers {
          * "charset", "map", "mapnot", "which" }
          */
         ConversionCase cc = new ConversionCase();
         // retrieve test case data
         cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                 .getString();
         cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
         cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
                 .getString();
         cc.which = ((ICUResourceBundle) testcase.getObject("which")).getUInt();

         // create charset and encoder for each test case
         logln("Test not supported at this time");

     }

     /**
      * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
      * start of the stream for example U+FEFF (the Unicode BOM/signature
      * character) that can be ignored.
      *
      * Detects Unicode signature byte sequences at the start of the byte stream
      * and returns number of bytes of the BOM of the indicated Unicode charset.
      * 0 is returned when no Unicode signature is recognized.
      *
      */

     private String detectUnicodeSignature(ByteBuffer source) {
         int signatureLength = 0; // number of bytes of the signature
         final int SIG_MAX_LEN = 5;
         String sigUniCharset = null; // states what unicode charset is the BOM
         int i = 0;

         /*
          * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
          * don't misdetect something
          */
         byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
                 (byte) 0xa5 };

         while (i < source.remaining() && i < SIG_MAX_LEN) {
             start[i] = source.get(i);
             i++;
         }

         if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
             signatureLength = 2;
             sigUniCharset = "UTF-16BE";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
             if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
                 signatureLength = 4;
                 sigUniCharset = "UTF-32LE";
                 source.position(signatureLength);
                 return sigUniCharset;
             } else {
                 signatureLength = 2;
                 sigUniCharset = "UTF-16LE";
                 source.position(signatureLength);
                 return sigUniCharset;
             }
         } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
                 && start[2] == (byte) 0xBF) {
             signatureLength = 3;
             sigUniCharset = "UTF-8";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
                 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
             signatureLength = 4;
             sigUniCharset = "UTF-32BE";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
                 && start[2] == (byte) 0xFF) {
             signatureLength = 3;
             sigUniCharset = "SCSU";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
                 && start[2] == (byte) 0x28) {
             signatureLength = 3;
             sigUniCharset = "BOCU-1";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
                 && start[2] == (byte) 0x76) {

             if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
                 signatureLength = 5;
                 sigUniCharset = "UTF-7";
                 source.position(signatureLength);
                 return sigUniCharset;
             } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
                     || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
                 signatureLength = 4;
                 sigUniCharset = "UTF-7";
                 source.position(signatureLength);
                 return sigUniCharset;
             }
         } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
                 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
             signatureLength = 4;
             sigUniCharset = "UTF-EBCDIC";
             source.position(signatureLength);
             return sigUniCharset;
         }

         /* no known Unicode signature byte sequence recognized */
         return null;
     }

     void printbytes(ByteBuffer buf, int pos) {
         int cur = buf.position();
         log(" (" + pos + ")==[");
         for (int i = 0; i < pos; i++) {
             log("(" + i + ")" + hex(buf.get(i) & 0xff) + " ");
         }
         log("]");
         buf.position(cur);
     }

     void printchar(CharBuffer buf, int pos) {
         int cur = buf.position();
         log(" (" + pos + ")==[");
         for (int i = 0; i < pos; i++) {
             log("(" + i + ")" + hex(buf.get(i)) + " ");
         }
         log("]");
         buf.position(cur);
     }

     private void checkResultsFromUnicode(ConversionCase cc, ByteBuffer source, ByteBuffer target) {

         int len = target.position();
         source.rewind();
         target.rewind();

         // remove any BOM signature before checking
         detectUnicodeSignature(target);

         // test to see if the conversion matches actual results
         // remove any BOM signature before checking
         detectUnicodeSignature(target);

         len = len-target.position();

         if (len != source.remaining()) {
             errln("Test failed: output does not match expected\n");
             logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
             printbytes(target, len);
             return;
         }
         for (int i = 0; i < source.remaining(); i++) {
             if (target.get() != source.get()) {
                 errln("Test failed: output does not match expected\n");
                 logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
                 printbytes(target, len);
                 return;
             }
         }
         logln("["+ cc.caseNr + "]:"+cc.charset);
         log("output=" );
         printbytes(target, len);
         logln("\nPassed\n");
         return;
     }

     private void checkResultsToUnicode(ConversionCase cc, String source, CharBuffer target) {

         int len = target.position();
         target.rewind();

         // test to see if the conversion matches actual results
         if (len != source.length()) {
             errln("Test failed: output does not match expected\n");
             logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
             printchar(target,len);
             return;
         }
         for (int i = 0; i < source.length(); i++) {
             if ( ! (hex(target.get(i)).equals(hex(source.charAt(i)))) ) {
                 errln("Test failed: output does not match expected\n");
                 logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
                 printchar(target,len);
                 return;
             }
         }
         logln("["+ cc.caseNr + "]:"+cc.charset);
         log("output=" );
          printchar(target,len);
         logln("\nPassed\n");
         return;
     }
 }
	/*
	*******************************************************************************
	* Copyright (C) 2002-2006, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source: /icu/icuhtml/icu.sf.net/docs/eclipse_howto/eclipse3x.html,v
	com.ibm.icu.dev.test.charset/TestConversion.java,v $
	* $Date: 2006/09/18 21:30:45 $
	* $Revision: 1.5 $
	*
	*******************************************************************************
	*/

	package com.ibm.icu.dev.test.charset;

	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CoderResult;
	import java.nio.charset.CodingErrorAction;
	import java.util.Iterator;

	import com.ibm.icu.charset.CharsetDecoderICU;
	import com.ibm.icu.charset.CharsetProviderICU;
	import com.ibm.icu.dev.test.ModuleTest;
	import com.ibm.icu.dev.test.TestDataModule.DataMap;
	import com.ibm.icu.impl.ICUResourceBundle;


	/**
	* This maps to convtest.c which tests the test file for data-driven conversion tests.
	*
	*/
	public class TestConversion extends ModuleTest {
	/**
	* This maps to the C struct of conversion case in convtest.h that stores the
	* data for a conversion test
	*
	*/
	private class ConversionCase {
	int caseNr; // testcase index
	String option = null; // callback options
	CodingErrorAction cbErrorAction = null; // callback action type

	// data retrieved from a test case conversion.txt
	String charset; // charset
	String unicode; // unicode string
	ByteBuffer bytes; // bytes
	int[] offsets; // offsets
	boolean finalFlush; // flush
	boolean fallbacks; // fallback
	String outErrorCode; // errorCode
	String cbopt; // callback

	// TestGetUnicodeSet variables
	String map;
	String mapnot;
	int which;
	}

	// public methods --------------------------------------------------------

	public static void main(String[] args) throws Exception {
	new TestConversion().run(args);
	}

	public TestConversion() {
	super("com/ibm/icu/dev/data/testdata/", "conversion");
	}

	/*
	* This method maps to the convtest.cpp runIndexedTest() method to run each
	* type of conversion.
	*/
	public void processModules() {
	try {
	int testFromUnicode = 0;
	String testName = t.getName().toString();
	int testToUnicode = 0;
	// Iterate through and get each of the test case to process
	for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
	DataMap testcase = (DataMap) iter.next();

	if (testName.equalsIgnoreCase("toUnicode")) {
	TestToUnicode(testcase, testToUnicode);
	testToUnicode++;
	} else if (testName.equalsIgnoreCase("fromUnicode")) {
	TestFromUnicode(testcase, testFromUnicode);
	testFromUnicode++;
	} else if (testName.equalsIgnoreCase("getUnicodeSet")) {
	TestGetUnicodeSet(testcase);
	} else {
	warnln("Could not load the test cases for conversion");
	continue;
	}
	}
	} catch (Exception e) {
	e.printStackTrace();
	}

	}

	// private methods -------------------------------------------------------

	private void TestToUnicode(DataMap testcase, int caseNr) {
	// create Conversion case to store the test case data
	ConversionCase cc = new ConversionCase();

	try {
	// retrieve test case data
	cc.caseNr = caseNr;
	cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
	.getString();
	cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
	.getBinary();
	cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode"))
	.getString();
	cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets"))
	.getIntVector();
	cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush"))
	.getUInt() != 0;
	cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks"))
	.getUInt() != 0;
	cc.outErrorCode = ((ICUResourceBundle) testcase
	.getObject("errorCode")).getString();
	cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback"))
	.getString();
	} catch (Exception e) {
	errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr);
	return;
	}
	// ----for debugging only
	logln("\nTestToUnicode[" + caseNr + "] "
	+ cc.charset + " ");
	logln("Bytes:");
	printbytes(cc.bytes, cc.bytes.limit());
	logln("");
	logln("Unicode: " + hex(cc.unicode));
	logln("Callback: (" + cc.cbopt + ")");
	logln("\n...............................................");

	// ----for debugging only

	//This test case is skipped due to limitation in java's API for decoder replacement
	// { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "", "?", :bin{""} }
	if(cc.caseNr == 63)
	{
	logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
	logln("Skipping test due to limitation in Java API - callback replacement value");
	return;
	}
	// process the retrieved test data case
	if (cc.offsets.length == 0) {
	cc.offsets = null;
	} else if (cc.offsets.length != cc.unicode.length()) {
	errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode[" + cc.unicode.length()
	+ "] and offsets[" + cc.offsets.length
	+ "] must have the same length");
	return;
	}
	// check for the callback replacement value for unmappable
	// characters or malformed errors
	if (cc.cbopt.length() > 0) {
	switch ((cc.cbopt).charAt(0)) {
	case '?': //CALLBACK_SUBSTITUTE
	cc.cbErrorAction = CodingErrorAction.REPLACE;
	break;
	case '0': //CALLBACK_SKIP
	cc.cbErrorAction = CodingErrorAction.IGNORE;
	break;
	case '.': //CALLBACK_STOP
	cc.cbErrorAction = CodingErrorAction.REPORT;
	break;
	case '&': //CALLBACK_ESCAPE
	cc.cbErrorAction = CodingErrorAction.REPORT;
	break;
	default:
	cc.cbErrorAction = null;
	break;
	}
	}
	// check for any options for the callback value
	cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt.substring(1);
	if (cc.option == null) {
	cc.option = null;
	}

	logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
	ToUnicodeCase(cc);

	}

	private void ToUnicodeCase(ConversionCase cc) {

	// create converter for charset and decoder for each test case
	CharsetProviderICU provider = new CharsetProviderICU();
	CharsetDecoderICU decoder = null;
	Charset charset = null;

	try {
	charset = (Charset) provider.charsetForName(cc.charset);
	decoder = (CharsetDecoderICU) charset.newDecoder();
	decoder.onMalformedInput(CodingErrorAction.REPLACE);
	decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

	} catch (Exception e) {

	logln("Skipping test:(" + cc.charset
	+ ") due to ICU Charset not supported at this time");
	return;
	}

	// set the callback for the decoder
	if (cc.cbErrorAction != null) {
	decoder.onMalformedInput(cc.cbErrorAction);
	decoder.onUnmappableCharacter(cc.cbErrorAction);

	// set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
	if (cc.option.equals("i")) {
	decoder.onMalformedInput(CodingErrorAction.REPORT);
	}

	// if callback action is replace, and there is a subchar
	// replace the decoder's default replacement value
	// if substring, skip test due to current api not supporting
	// substring replacement
	if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
	if (cc.cbopt.length() > 1) {
	if (cc.cbopt.charAt(1) == '=') {
	logln("Skipping test due to limitation in Java API - substitution string not supported");

	} else {
	// // read NUL-separated subchar first, if any
	// copy the subchar from Latin-1 characters
	// start after the NUL
	if (cc.cbopt.charAt(1) == 0x00) {
	cc.cbopt = cc.cbopt.substring(2);

	try {
	decoder.replaceWith(cc.cbopt);
	} catch (Exception e) {
	logln("Skipping test due to limitation in Java API - substitution character sequence size error");

	}
	}
	}
	}
	}
	}

	// decode source to unicode
	ByteBuffer source = ByteBuffer.wrap(cc.bytes.array());
	CharBuffer out = CharBuffer.allocate((int) (decoder
	.averageCharsPerByte() * source.remaining()));
	do {
	CoderResult cr = decoder.decode(source, out, true);
	if (cr.isOverflow()) {
	int pos = out.position();
	char[] temp = out.array();
	out = CharBuffer.allocate(temp.length * 4);
	out.put(temp);
	out.position(pos);
	} else if (cr.isError()) {
	checkResultsToUnicode(cc,cc.unicode, out);
	return;
	}
	} while (source.remaining() > 0);

	checkResultsToUnicode(cc,cc.unicode, out);
	return;
	}

	private void TestFromUnicode(DataMap testcase, int caseNr) {

	ConversionCase cc = new ConversionCase();
	cc.caseNr = caseNr;

	try {
	// retrieve test case data
	cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
	.getString();
	cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode"))
	.getString();
	cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
	.getBinary();
	cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets"))
	.getIntVector();
	cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush"))
	.getUInt() != 0;
	cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks"))
	.getUInt() != 0;
	cc.outErrorCode = ((ICUResourceBundle) testcase
	.getObject("errorCode")).getString();
	cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback"))
	.getString();


	} catch (Exception e) {
	errln("Skipping test:");
	errln("error parsing conversion/toUnicode test case " + cc.caseNr);
	return;
	}
	// ----for debugging only
	logln("\nTestFromUnicode[" + caseNr + "] "
	+ cc.charset + " ");
	logln("Unicode: " + cc.unicode);
	logln("Bytes:");
	printbytes(cc.bytes, cc.bytes.limit());
	logln("");
	logln("Callback: (" + cc.cbopt + ")");
	logln("...............................................");

	// ----for debugging only


	// TODO: ***Currently skipping test for charset ibm-1390, gb18030,
	// ibm-930 due to external mapping need to be fix
	if (cc.charset.equalsIgnoreCase("ibm-1390")
	\|\| cc.charset.equalsIgnoreCase("gb18030")
	\|\| cc.charset.equalsIgnoreCase("ibm-970")) {
	logln("Skipping test:("
	+ cc.charset
	+ ") due to ICU Charset external mapping not supported at this time");
	return;
	}

	// process the retrieved test data case
	if (cc.offsets.length == 0) {
	cc.offsets = null;
	} else if (cc.offsets.length != cc.bytes.limit()) {
	errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
	+ "] and offsets[" + cc.offsets.length
	+ "] must have the same length");
	return;
	}

	// check the callback replacement value
	if (cc.cbopt.length() > 0) {

	switch ((cc.cbopt).charAt(0)) {
	case '?':
	cc.cbErrorAction = CodingErrorAction.REPLACE;
	break;
	case '0':
	cc.cbErrorAction = CodingErrorAction.IGNORE;
	break;
	case '.':
	cc.cbErrorAction = CodingErrorAction.REPORT;
	break;
	case '&':
	cc.cbErrorAction = CodingErrorAction.REPORT;
	break;
	default:
	cc.cbErrorAction = null;
	break;
	}

	// check for any options for the callback value --
	cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt.substring(1);
	if (cc.option == null) {
	cc.option = null;
	}
	}
	logln("TestFromUnicode[" + cc.caseNr + "] " + cc.charset);
	FromUnicodeCase(cc);

	return;

	}

	private void FromUnicodeCase(ConversionCase cc) {

	// create charset encoder for conversion test
	CharsetProviderICU provider = new CharsetProviderICU();
	CharsetEncoder encoder = null;
	Charset charset = null;
	try {
	charset = (Charset) provider.charsetForName(cc.charset);
	encoder = (CharsetEncoder) charset.newEncoder();
	encoder.onMalformedInput(CodingErrorAction.REPLACE);
	encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

	} catch (Exception e) {

	logln("Skipping test:(" + cc.charset
	+ ") due to ICU Charset not supported at this time");
	return;

	}

	// set the callback for the encoder
	if (cc.cbErrorAction != null) {
	encoder.onUnmappableCharacter(cc.cbErrorAction);
	encoder.onMalformedInput(cc.cbErrorAction);

	// if action has an option, put in the option for the case
	if (cc.option.equals("i")) {
	encoder.onMalformedInput(CodingErrorAction.REPORT);
	}

	// if callback action is replace, and there is a subchar
	// replace the decoder's default replacement value
	// if substring, skip test due to current api not supporting
	// substring
	if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
	if (cc.cbopt.length() > 1) {
	if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') {
	logln("Skipping test due to limitation in Java API - substitution string not supported");
	return;
	} else {
	// // read NUL-separated subchar first, if any
	// copy the subchar from Latin-1 characters
	// start after the NUL
	if (cc.cbopt.charAt(1) == 0x00) {
	cc.cbopt = cc.cbopt.substring(2);
	try {
	encoder.replaceWith(toByteArray(cc.cbopt));
	} catch (Exception e) {
	logln("Skipping test due to limitation in Java API - substitution character sequence size error");
	return;
	}
	}
	}
	}
	}
	}
	// do charset encoding from unicode
	CharBuffer uniStr = CharBuffer.wrap(cc.unicode.toCharArray());
	ByteBuffer out = ByteBuffer.allocate((int) (encoder
	.averageBytesPerChar() * uniStr.remaining()));
	do {
	CoderResult cr = encoder.encode(uniStr, out, true);
	if (cr.isOverflow()) {
	int pos = out.position();
	byte[] temp = out.array();
	out = ByteBuffer.allocate(temp.length * 4);
	out.put(temp);
	out.position(pos);
	} else if (cr.isError()) {
	// check the stopped test for current output and match the
	// expected results
	checkResultsFromUnicode(cc,cc.bytes, out);
	return;
	}
	else {
	cr = encoder.flush(out);
	}

	} while (uniStr.remaining() > 0);

	checkResultsFromUnicode(cc,cc.bytes, out);
	return;

	}
	private byte[] toByteArray(String str){
	byte[] ret = new byte[ str.length() ];
	for(int i=0; i<ret.length;i++){
	char ch = str.charAt(i);
	if(ch<=0xFF){
	ret[i]= (byte)ch;
	}else{
	throw new IllegalArgumentException(" byte value out of range: " + ch);
	}
	}
	return ret;
	}
	private void TestGetUnicodeSet(DataMap testcase) {
	/*
	* charset - will be opened, and ucnv_getUnicodeSet() called on it //
	* map - set of code points and strings that must be in the returned set //
	* mapnot - set of code points and strings that must not be in the //
	* returned set // which - numeric UConverterUnicodeSet value Headers {
	* "charset", "map", "mapnot", "which" }
	*/
	ConversionCase cc = new ConversionCase();
	// retrieve test case data
	cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
	.getString();
	cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
	cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
	.getString();
	cc.which = ((ICUResourceBundle) testcase.getObject("which")).getUInt();

	// create charset and encoder for each test case
	logln("Test not supported at this time");

	}

	/**
	* This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
	* start of the stream for example U+FEFF (the Unicode BOM/signature
	* character) that can be ignored.
	*
	* Detects Unicode signature byte sequences at the start of the byte stream
	* and returns number of bytes of the BOM of the indicated Unicode charset.
	* 0 is returned when no Unicode signature is recognized.
	*
	*/

	private String detectUnicodeSignature(ByteBuffer source) {
	int signatureLength = 0; // number of bytes of the signature
	final int SIG_MAX_LEN = 5;
	String sigUniCharset = null; // states what unicode charset is the BOM
	int i = 0;

	/*
	* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
	* don't misdetect something
	*/
	byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
	(byte) 0xa5 };

	while (i < source.remaining() && i < SIG_MAX_LEN) {
	start[i] = source.get(i);
	i++;
	}

	if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
	signatureLength = 2;
	sigUniCharset = "UTF-16BE";
	source.position(signatureLength);
	return sigUniCharset;
	} else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
	if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
	signatureLength = 4;
	sigUniCharset = "UTF-32LE";
	source.position(signatureLength);
	return sigUniCharset;
	} else {
	signatureLength = 2;
	sigUniCharset = "UTF-16LE";
	source.position(signatureLength);
	return sigUniCharset;
	}
	} else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
	&& start[2] == (byte) 0xBF) {
	signatureLength = 3;
	sigUniCharset = "UTF-8";
	source.position(signatureLength);
	return sigUniCharset;
	} else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
	&& start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
	signatureLength = 4;
	sigUniCharset = "UTF-32BE";
	source.position(signatureLength);
	return sigUniCharset;
	} else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
	&& start[2] == (byte) 0xFF) {
	signatureLength = 3;
	sigUniCharset = "SCSU";
	source.position(signatureLength);
	return sigUniCharset;
	} else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
	&& start[2] == (byte) 0x28) {
	signatureLength = 3;
	sigUniCharset = "BOCU-1";
	source.position(signatureLength);
	return sigUniCharset;
	} else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
	&& start[2] == (byte) 0x76) {

	if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
	signatureLength = 5;
	sigUniCharset = "UTF-7";
	source.position(signatureLength);
	return sigUniCharset;
	} else if (start[3] == (byte) 0x38 \|\| start[3] == (byte) 0x39
	\|\| start[3] == (byte) 0x2B \|\| start[3] == (byte) 0x2F) {
	signatureLength = 4;
	sigUniCharset = "UTF-7";
	source.position(signatureLength);
	return sigUniCharset;
	}
	} else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
	&& start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
	signatureLength = 4;
	sigUniCharset = "UTF-EBCDIC";
	source.position(signatureLength);
	return sigUniCharset;
	}

	/* no known Unicode signature byte sequence recognized */
	return null;
	}

	void printbytes(ByteBuffer buf, int pos) {
	int cur = buf.position();
	log(" (" + pos + ")==[");
	for (int i = 0; i < pos; i++) {
	log("(" + i + ")" + hex(buf.get(i) & 0xff) + " ");
	}
	log("]");
	buf.position(cur);
	}

	void printchar(CharBuffer buf, int pos) {
	int cur = buf.position();
	log(" (" + pos + ")==[");
	for (int i = 0; i < pos; i++) {
	log("(" + i + ")" + hex(buf.get(i)) + " ");
	}
	log("]");
	buf.position(cur);
	}

	private void checkResultsFromUnicode(ConversionCase cc, ByteBuffer source, ByteBuffer target) {

	int len = target.position();
	source.rewind();
	target.rewind();

	// remove any BOM signature before checking
	detectUnicodeSignature(target);

	// test to see if the conversion matches actual results
	// remove any BOM signature before checking
	detectUnicodeSignature(target);

	len = len-target.position();

	if (len != source.remaining()) {
	errln("Test failed: output does not match expected\n");
	logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
	printbytes(target, len);
	return;
	}
	for (int i = 0; i < source.remaining(); i++) {
	if (target.get() != source.get()) {
	errln("Test failed: output does not match expected\n");
	logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
	printbytes(target, len);
	return;
	}
	}
	logln("["+ cc.caseNr + "]:"+cc.charset);
	log("output=" );
	printbytes(target, len);
	logln("\nPassed\n");
	return;
	}

	private void checkResultsToUnicode(ConversionCase cc, String source, CharBuffer target) {

	int len = target.position();
	target.rewind();

	// test to see if the conversion matches actual results
	if (len != source.length()) {
	errln("Test failed: output does not match expected\n");
	logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
	printchar(target,len);
	return;
	}
	for (int i = 0; i < source.length(); i++) {
	if ( ! (hex(target.get(i)).equals(hex(source.charAt(i)))) ) {
	errln("Test failed: output does not match expected\n");
	logln("["+ cc.caseNr + "]:"+cc.charset+"\noutput=" );
	printchar(target,len);
	return;
	}
	}
	logln("["+ cc.caseNr + "]:"+cc.charset);
	log("output=" );
	printchar(target,len);
	logln("\nPassed\n");
	return;
	}
	}