main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 2002-2014, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */

 package com.ibm.icu.dev.test.charset;

 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.Iterator;

 import com.ibm.icu.charset.CharsetCallback;
 import com.ibm.icu.charset.CharsetDecoderICU;
 import com.ibm.icu.charset.CharsetEncoderICU;
 import com.ibm.icu.charset.CharsetICU;
 import com.ibm.icu.charset.CharsetProviderICU;
 import com.ibm.icu.dev.test.ModuleTest;
 import com.ibm.icu.dev.test.TestDataModule.DataMap;
 import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.text.UnicodeSet;

 /**
  * This maps to convtest.c which tests the test file for data-driven conversion tests.
  *
  */
 public class TestConversion extends ModuleTest {
     /**
      * This maps to the C struct of conversion case in convtest.h that stores the
      * data for a conversion test
      *
      */
     private class ConversionCase {
         int caseNr;                                             // testcase index
         String option = null;                                   // callback options
         CodingErrorAction cbErrorAction = null;                 // callback action type
         CharBuffer toUnicodeResult = null;
         ByteBuffer fromUnicodeResult = null;

         // data retrieved from a test case conversion.txt
         String charset;                                         // charset
         String unicode;                                         // unicode string
         ByteBuffer bytes;                                       // byte
         int[] offsets;                                          // offsets
         boolean finalFlush;                                     // flush
         boolean fallbacks;                                      // fallback
         String outErrorCode;                                    // errorCode
         String cbopt;                                           // callback

         // TestGetUnicodeSet variables
         String map;
         String mapnot;
         int which;

         // CharsetCallback encoder and decoder
         CharsetCallback.Decoder cbDecoder = null;
         CharsetCallback.Encoder cbEncoder = null;

         String caseNrAsString() {
             return "[" + caseNr + "]";
         }
     }

     /* In the data-driven conversion test, converters that are not available in
      * ICU4J are marked with the following leading symbol.
      */
     private static final char UNSUPPORTED_CHARSET_SYMBOL = '+';

     // public methods --------------------------------------------------------

     public static void main(String[] args) throws Exception {
         new TestConversion().run(args);
     }

     public TestConversion() {
         super("com/ibm/icu/dev/data/testdata/", "conversion");
     }

     /*
      * This method maps to the convtest.cpp runIndexedTest() method to run each
      * type of conversion.
      */
     public void processModules() {
         try {
             int testFromUnicode = 0;
             int testToUnicode = 0;
             String testName = t.getName().toString();

             // Iterate through and get each of the test case to process
             for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
                 DataMap testcase = (DataMap) iter.next();

                 if (testName.equalsIgnoreCase("toUnicode")) {
                     TestToUnicode(testcase, testToUnicode);
                     testToUnicode++;

                 } else if (testName.equalsIgnoreCase("fromUnicode")) {
                     TestFromUnicode(testcase, testFromUnicode);
                     testFromUnicode++;
                 } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
                     TestGetUnicodeSet(testcase);
                 } else {
                     warnln("Could not load the test cases for conversion");
                     continue;
                 }
             }
         } catch (Exception e) {
             e.printStackTrace();
         }

     }

     // private methods -------------------------------------------------------


     // fromUnicode test worker functions ---------------------------------------
     private void TestFromUnicode(DataMap testcase, int caseNr) {

         ConversionCase cc = new ConversionCase();

        try {
             // retrieve test case data
             cc.caseNr = caseNr;
             cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString();
             cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString();
             cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary();
             cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector();
             cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0;
             cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0;
             cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString();
             cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString();

         } catch (Exception e) {
             errln("Skipping test:");
             errln("error parsing conversion/toUnicode test case " + cc.caseNr);
             return;
         }

         /*
          * Skip the following data driven converter tests.
          * These tests were added to the data driven conversion test in ICU
          * to test direct-from-UTF-8 m:n Unicode:charset conversion.
          * This feature is not in ICU4J.
          * See #9601
          */
         String [] testsToSkip = {
                 "*test2"
         };
         for (int i = 0; i < testsToSkip.length; i++) {
             if (cc.charset.equals(testsToSkip[i])) {
                 logln("");
                 logln("Skipping: " + cc.charset);
                 logln("...............................................");
                 return;
             }
         }

         // ----for debugging only
         logln("");
         logln("TestFromUnicode[" + caseNr + "] " + cc.charset + " ");
         logln("Unicode:   " + cc.unicode);
         logln("Bytes:    " + printbytes(cc.bytes, cc.bytes.limit()));
         ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
         logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")");
         logln("...............................................");

         // process the retrieved test data case
         if (cc.offsets.length == 0) {
             cc.offsets = null;
         } else if (cc.offsets.length != cc.bytes.limit()) {
             errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
                     + "] and offsets[" + cc.offsets.length
                     + "] must have the same length");
             return;
         }

         // check the callback replacement value
         if (cc.cbopt.length() > 0) {

             switch ((cc.cbopt).charAt(0)) {
             case '?':
                 cc.cbErrorAction = CodingErrorAction.REPLACE;
                 break;
             case '0':
                 cc.cbErrorAction = CodingErrorAction.IGNORE;
                 break;
             case '.':
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 break;
             case '&':
                 cc.cbErrorAction = CodingErrorAction.REPLACE;
                 cc.cbEncoder = CharsetCallback.FROM_U_CALLBACK_ESCAPE;
                 break;
             default:
                 cc.cbErrorAction = null;
                 break;
             }

             // check for any options for the callback value --
             cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt
                     .substring(1);
             if (cc.option == null) {
                 cc.option = null;
             }
         }
         FromUnicodeCase(cc);
     }


     private void FromUnicodeCase(ConversionCase cc) {
         // create charset encoder for conversion test
         CharsetProviderICU provider = new CharsetProviderICU();
         CharsetEncoder encoder = null;
         Charset charset = null;
         try {
             // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
             charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
                     ? (Charset) provider.charsetForName(cc.charset.substring(1),
                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
                     : (Charset) provider.charsetForName(cc.charset);
             if (charset != null) {
                 encoder = (CharsetEncoder) charset.newEncoder();
                 encoder.onMalformedInput(CodingErrorAction.REPLACE);
                 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
                 if (encoder instanceof CharsetEncoderICU) {
                     ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
                     if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
                         errln("Fallback could not be set for " + cc.charset);
                     }
                 }
             }
         } catch (Exception e) {
             encoder = null;
         }
         if (encoder == null) {
             if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
                 logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
             } else {
                 errln(cc.charset + " was not found");
             }
             return;
         }

         // set the callback for the encoder
         if (cc.cbErrorAction != null) {
             if (cc.cbEncoder != null) {
                 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.malformedForLength(1), cc.cbEncoder, cc.option);
                 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), cc.cbEncoder, cc.option);
             } else {
                 encoder.onUnmappableCharacter(cc.cbErrorAction);
                 encoder.onMalformedInput(cc.cbErrorAction);
             }

             // if action has an option, put in the option for the case
             if (cc.option.equals("i")) {
                 encoder.onMalformedInput(CodingErrorAction.REPORT);
             }

             // if callback action is replace,
           //   and there is a subchar
             // replace the decoder's default replacement value
             // if substring, skip test due to current api not supporting
             // substring
             if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
                 if (cc.cbopt.length() > 1) {
                     if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') {
                         logln("Skipping test due to limitation in Java API - substitution string not supported");
                         return;
                     } else {
                         // // read NUL-separated subchar first, if any
                         // copy the subchar from Latin-1 characters
                         // start after the NUL
                         if (cc.cbopt.charAt(1) == 0x00) {
                             cc.cbopt = cc.cbopt.substring(2);

                             try {
                                 encoder.replaceWith(toByteArray(cc.cbopt));
                             } catch (Exception e) {
                                 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
                                 return;
                             }
                         }
                     }
                 }
             }
         }

         // do charset encoding from unicode

         // testing by steps using charset.encoder(in,out,flush)
         int resultLength;
         boolean ok;
         String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
                 { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
         int i, step;

         ok = true;

         for (i = 0; i < steps.length && ok; ++i) {
             step = Integer.parseInt(steps[i][0]);

             logln("Testing step:[" + step + "]");
             try {
                 resultLength = stepFromUnicode(cc, encoder, step);
                 ok = checkFromUnicode(cc, resultLength);
             } catch (Exception ex) {
                 errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]");
                 ex.printStackTrace(System.out);
                 return;
             }

         }
         // testing by whole buffer using out = charset.encoder(in)
         while (ok && cc.finalFlush) {
             logln("Testing java API charset.encoder(in):");
             cc.fromUnicodeResult = null;
             ByteBuffer out = null;

             try {
                 out = encoder.encode(CharBuffer.wrap(cc.unicode.toCharArray()));
                 out.position(out.limit());
                 if (out.limit() != out.capacity() || cc.finalFlush) {
                     int pos = out.position();
                     byte[] temp = out.array();
                     out = ByteBuffer.allocate(temp.length * 4);
                     out.put(temp);
                     out.position(pos);
                     CoderResult cr = encoder.flush(out);
                     if (cr.isOverflow()) {
                         logln("Overflow error with flushing encoder");
                     }
                 }
                 cc.fromUnicodeResult = out;

                 ok = checkFromUnicode(cc, out.limit());
                 if (!ok) {
                     break;
                 }
             } catch (Exception e) {
                 //check the error code to see if it matches cc.errorCode
                 logln("Encoder returned an error code");
                 logln("ErrorCode expected is: " + cc.outErrorCode);
                 logln("Error Result is: " + e.toString());
             }
             break;
         }
     }

     private int stepFromUnicode(ConversionCase cc, CharsetEncoder encoder, int step) {
         if (step < 0) {
             errln("Negative step size, test internal error.");
             return 0;
         }

         int sourceLen = cc.unicode.length();
         int targetLen = cc.bytes.capacity() + 20;  // for BOM, and to let failures produce excess output
         CharBuffer source = CharBuffer.wrap(cc.unicode.toCharArray());
         ByteBuffer target = ByteBuffer.allocate(targetLen);
         cc.fromUnicodeResult = null;
         encoder.reset();

         int currentSourceLimit;
         int currentTargetLimit;
         if (step > 0) {
             currentSourceLimit = Math.min(step, sourceLen);
             currentTargetLimit = Math.min(step, targetLen);
         } else {
             currentSourceLimit = sourceLen;
             currentTargetLimit = targetLen;
         }

         CoderResult cr = null;

         for (;;) {
             source.limit(currentSourceLimit);
             target.limit(currentTargetLimit);

             cr = encoder.encode(source, target, currentSourceLimit == sourceLen);

             if (cr.isUnderflow()) {
                 if (currentSourceLimit == sourceLen) {
                     if (target.position() == cc.bytes.limit()) {
                         // target contains the correct number of bytes
                         break;
                     }
                     // Do a final flush for cleanup, then break out
                     // Encode loop, exits with cr==underflow in normal operation.
                     //target.limit(targetLen);
                     target.limit(targetLen);
                     cr = encoder.flush(target);
                     if (cr.isUnderflow()) {
                         // good
                     } else if (cr.isOverflow()) {
                         errln(cc.caseNrAsString() + " Flush is producing excessive output");
                     } else {
                         errln(cc.caseNrAsString() + " Flush operation failed.  CoderResult = \""
                                 + cr.toString() + "\"");
                     }
                     break;
                 }
                 currentSourceLimit = Math.min(currentSourceLimit + step, sourceLen);
             } else if (cr.isOverflow()) {
                 if (currentTargetLimit == targetLen) {
                     errln(cc.caseNrAsString() + " encode() is producing excessive output");
                     break;
                 }
                 currentTargetLimit = Math.min(currentTargetLimit + step, targetLen);
             } else {
                 // check the error code to see if it matches cc.errorCode
                 logln("Encoder returned an error code");
                 logln("ErrorCode expected is: " + cc.outErrorCode);
                 logln("Error Result is: " + cr.toString());
                 break;
             }

         }

         cc.fromUnicodeResult = target;
         return target.position();
     }

     private boolean checkFromUnicode(ConversionCase cc, int resultLength) {
         return checkResultsFromUnicode(cc, cc.bytes, cc.fromUnicodeResult);
     }

     // toUnicode test worker functions ----------------------------------------- ***

     private void TestToUnicode(DataMap testcase, int caseNr) {
         // create Conversion case to store the test case data
         ConversionCase cc = new ConversionCase();

         try {
             // retrieve test case data
             cc.caseNr = caseNr;
             cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString();
             cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary();
             cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString();
             cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector();
             cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0;
             cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0;
             cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString();
             cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString();

         } catch (Exception e) {
             errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr);
             return;
         }

         // ----for debugging only
         logln("");
         logln("TestToUnicode[" + caseNr + "] " + cc.charset + " ");
         logln("Unicode:   " + hex(cc.unicode));
         logln("Bytes:    " + printbytes(cc.bytes, cc.bytes.limit()));
         ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
         logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")");
         logln("...............................................");

         // process the retrieved test data case
         if (cc.offsets.length == 0) {
             cc.offsets = null;
         } else if (cc.offsets.length != cc.unicode.length()) {
             errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode["
                     + cc.unicode.length() + "] and offsets["
                     + cc.offsets.length + "] must have the same length");
             return;
         }
         // check for the callback replacement value for unmappable
         // characters or malformed errors
         if (cc.cbopt.length() > 0) {
             switch ((cc.cbopt).charAt(0)) {
             case '?': // CALLBACK_SUBSTITUTE
                 cc.cbErrorAction = CodingErrorAction.REPLACE;
                 break;
             case '0': // CALLBACK_SKIP
                 cc.cbErrorAction = CodingErrorAction.IGNORE;
                 break;
             case '.': // CALLBACK_STOP
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 break;
             case '&': // CALLBACK_ESCAPE
                 cc.cbErrorAction = CodingErrorAction.REPORT;
                 cc.cbDecoder = CharsetCallback.TO_U_CALLBACK_ESCAPE;
                 break;
             default:
                 cc.cbErrorAction = null;
                 break;
             }
         }
         // check for any options for the callback value
         cc.option = cc.cbErrorAction == null ? null : cc.cbopt.substring(1);
         if (cc.option == null) {
             cc.option = null;
         }

         ToUnicodeCase(cc);

     }

     private void ToUnicodeCase(ConversionCase cc) {

         // create converter for charset and decoder for each test case
         CharsetProviderICU provider = new CharsetProviderICU();
         CharsetDecoder decoder = null;
         Charset charset = null;

         try {
             // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
             charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
                     ? (Charset) provider.charsetForName(cc.charset.substring(1),
                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
                     : (Charset) provider.charsetForName(cc.charset);
             if (charset != null) {
                 decoder = (CharsetDecoder) charset.newDecoder();
                 decoder.onMalformedInput(CodingErrorAction.REPLACE);
                 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
             }
         } catch (Exception e) {
             // TODO implement loading of test data.
             decoder = null;
         }
         if (decoder == null) {
             if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
                 logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
             } else {
                 errln(cc.charset + " was not found");
             }
             return;
         }

         // set the callback for the decoder
         if (cc.cbErrorAction != null) {
             if (cc.cbDecoder != null) {
                 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), cc.cbDecoder, cc.option);
                 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.unmappableForLength(1), cc.cbDecoder, cc.option);
             } else {
                 decoder.onMalformedInput(cc.cbErrorAction);
                 decoder.onUnmappableCharacter(cc.cbErrorAction);
             }

             // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
             if (cc.option.equals("i")) {
                 decoder.onMalformedInput(CodingErrorAction.REPORT);
             }

             // if callback action is replace, and there is a subchar
             // replace the decoder's default replacement value
             // if substring, skip test due to current api not supporting
             // substring replacement
             if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
                 if (cc.cbopt.length() > 1) {
                     if (cc.cbopt.charAt(1) == '=') {
                         logln("Skipping test due to limitation in Java API - substitution string not supported");

                     } else {
                         // // read NUL-separated subchar first, if any
                         // copy the subchar from Latin-1 characters
                         // start after the NUL
                         if (cc.cbopt.charAt(1) == 0x00) {
                             cc.cbopt = cc.cbopt.substring(2);

                             try {
                                 decoder.replaceWith(cc.cbopt);
                             } catch (Exception e) {
                                 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
                             }
                         }
                     }
                 }
             }
         }

         //      Check the step to unicode
         boolean ok;
         int resultLength;

         String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
                 { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
         /* TODO: currently not supported test steps, getNext API is not supported for now
          { "-1", "getNext" },
          { "-2", "toU(bulk)+getNext" },
          { "-3", "getNext+toU(bulk)" },
          { "-4", "toU(1)+getNext" },
          { "-5", "getNext+toU(1)" },
          { "-12", "toU(5)+getNext" },
          { "-13", "getNext+toU(5)" }};*/

         ok = true;
         int step;
         // testing by steps using the CoderResult cr = charset.decoder(in,out,flush) api
         for (int i = 0; i < steps.length && ok; ++i) {
             step = Integer.parseInt(steps[i][0]);

             if (step < 0 && !cc.finalFlush) {
                 continue;
             }
             logln("Testing step:[" + step + "]");

             try {
                 resultLength = stepToUnicode(cc, decoder, step);
                 ok = checkToUnicode(cc, resultLength);
             } catch (Exception ex) {
                 errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]");
                 ex.printStackTrace(System.out);
                 return;
             }
         }

         //testing the java's out = charset.decoder(in) api
         while (ok && cc.finalFlush) {
             logln("Testing java charset.decoder(in):");
             cc.toUnicodeResult = null;
             CharBuffer out = null;

             try {
                 cc.bytes.rewind();
                 out = decoder.decode(cc.bytes);
                 out.position(out.limit());
                 if (out.limit() < cc.unicode.length()) {
                     int pos = out.position();
                     char[] temp = out.array();
                     out = CharBuffer.allocate(cc.bytes.limit());
                     out.put(temp);
                     out.position(pos);
                     CoderResult cr = decoder.flush(out);
                     if (cr.isOverflow()) {
                         logln("Overflow error with flushing decodering");
                     }
                 }

                 cc.toUnicodeResult = out;

                 ok = checkToUnicode(cc, out.limit());
                 if (!ok) {
                     break;
                 }
             } catch (Exception e) {
                 //check the error code to see if it matches cc.errorCode
                 logln("Decoder returned an error code");
                 logln("ErrorCode expected is: " + cc.outErrorCode);
                 logln("Error Result is: " + e.toString());
             }
             break;
         }

         return;
     }


     private int stepToUnicode(ConversionCase cc, CharsetDecoder decoder,
             int step)

     {
         ByteBuffer source;
         CharBuffer target;
         boolean flush = false;
         int sourceLen;
         source = cc.bytes;
         sourceLen = cc.bytes.limit();
         source.position(0);
         target = CharBuffer.allocate(cc.unicode.length() + 4);
         target.position(0);
         cc.toUnicodeResult = null;
         decoder.reset();

         if (step >= 0) {

             int iStep = step;
             int oStep = step;

             for (;;) {

                 if (step != 0) {
                     source.limit((iStep <= sourceLen) ? iStep : sourceLen);
                     target.limit((oStep <= target.capacity()) ? oStep : target
                             .capacity());
                     flush = (cc.finalFlush && source.limit() == sourceLen);

                 } else {
                     //bulk mode
                     source.limit(sourceLen);
                     target.limit(target.capacity());
                     flush = cc.finalFlush;
                 }
                 // convert
                 CoderResult cr = null;
                 if (source.hasRemaining()) {

                     cr = decoder.decode(source, target, flush);
                     // check pointers and errors
                     if (cr.isOverflow()) {
                         // the partial target is filled, set a new limit,
                         oStep = (target.position() + step);
                         target.limit((oStep < target.capacity()) ? oStep
                                 : target.capacity());
                         if (target.limit() > target.capacity()) {
                             //target has reached its limit, an error occurred or test case has an error code
                             //check error code
                             logln("UnExpected error: Target Buffer is larger than capacity");
                             break;
                         }

                     } else if (cr.isError()) {
                         //check the error code to see if it matches cc.errorCode
                         logln("Decoder returned an error code");
                         logln("ErrorCode expected is: " + cc.outErrorCode);
                         logln("Error Result is: " + cr.toString());
                         break;
                     }

                 } else {
                     if (source.limit() == sourceLen) {

                         cr = decoder.decode(source, target, true);

                         //due to limitation of the API we need to check for target limit for expected
                         if (target.position() != cc.unicode.length()) {
                             if (target.limit() != cc.unicode.length()) {
                                 target.limit(cc.unicode.length());
                             }
                             cr = decoder.flush(target);
                             if (cr.isError()) {
                                 errln("Flush operation failed");
                             }
                         }
                         break;
                     }
                 }
                 iStep += step;

             }

         }// if(step ==0)

         //--------------------------------------------------------------------------
         else /* step<0 */{
             /*
              * step==-1: call only ucnv_getNextUChar()
              * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
              *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
              *   else give it at most (-step-2)/2 bytes
              */

             for (;;) {
                 // convert
                 if ((step & 1) != 0 /* odd: -1, -3, -5, ... */) {

                     target.limit(target.position() < target.capacity() ? target
                             .position() + 1 : target.capacity());

                     // decode behavior is return to output target 1 character
                     CoderResult cr = null;

                     //similar to getNextUChar() , input is the whole string, while outputs only 1 character
                     source.limit(sourceLen);
                     while (target.position() != target.limit()
                             && source.hasRemaining()) {
                         cr = decoder.decode(source, target,
                                 source.limit() == sourceLen);

                         if (cr.isOverflow()) {

                             if (target.limit() >= target.capacity()) {
                                 // target has reached its limit, an error occurred
                                 logln("UnExpected error: Target Buffer is larger than capacity");
                                 break;
                             } else {
                                 //1 character has been consumed
                                 target.limit(target.position() + 1);
                                 break;
                             }
                         } else if (cr.isError()) {
                             logln("Decoder returned an error code");
                             logln("ErrorCode expected is: " + cc.outErrorCode);
                             logln("Error Result is: " + cr.toString());

                             cc.toUnicodeResult = target;
                             return target.position();
                         }

                         else {
                             // one character has been consumed
                             if (target.limit() == target.position()) {
                                 target.limit(target.position() + 1);
                                 break;
                             }
                         }

                     }
                     if (source.position() == sourceLen) {

                         // due to limitation of the API we need to check
                         // for target limit for expected
                         cr = decoder.decode(source, target, true);
                         if (target.position() != cc.unicode.length()) {

                             target.limit(cc.unicode.length());
                             cr = decoder.flush(target);
                             if (cr.isError()) {
                                 errln("Flush operation failed");
                             }
                         }
                         break;
                     }
                     // alternate between -n-1 and -n but leave -1 alone
                     if (step < -1) {
                         ++step;
                     }
                 } else {/* step is even */
                     // allow only one UChar output

                     target.limit(target.position() < target.capacity() ? target
                             .position() + 1 : target.capacity());
                     if (step == -2) {
                         source.limit(sourceLen);
                     } else {
                         source.limit(source.position() + (-step - 2) / 2);
                         if (source.limit() > sourceLen) {
                             source.limit(sourceLen);
                         }
                     }
                     CoderResult cr = decoder.decode(source, target, source
                             .limit() == sourceLen);
                     // check pointers and errors
                     if (cr.isOverflow()) {
                         // one character has been consumed
                         if (target.limit() >= target.capacity()) {
                             // target has reached its limit, an error occurred
                             logln("Unexpected error: Target Buffer is larger than capacity");
                             break;
                         }
                     } else if (cr.isError()) {
                         logln("Decoder returned an error code");
                         logln("ErrorCode expected is: " + cc.outErrorCode);
                         logln("Error Result is: " + cr.toString());
                         break;
                     }

                     --step;
                 }
             }
         }

         //--------------------------------------------------------------------------

         cc.toUnicodeResult = target;
         return target.position();
     }


     private boolean checkToUnicode(ConversionCase cc, int resultLength) {
         return checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult);
     }


     private void TestGetUnicodeSet(DataMap testcase) {
         /*
          * charset - will be opened, and ucnv_getUnicodeSet() called on it //
          * map - set of code points and strings that must be in the returned set //
          * mapnot - set of code points and strings that must *not* be in the //
          * returned set // which - numeric UConverterUnicodeSet value Headers {
          * "charset", "map", "mapnot", "which" }
          */


         // retrieve test case data
         ConversionCase cc = new ConversionCase();
         CharsetProviderICU provider = new CharsetProviderICU();
         CharsetICU charset  ;


         UnicodeSet mapset = new UnicodeSet();
         UnicodeSet mapnotset = new UnicodeSet();
         UnicodeSet unicodeset = new UnicodeSet();
         String ellipsis = "0x2e";
         cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                 .getString();
         cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
         cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
                 .getString();


         cc.which = ((ICUResourceBundle) testcase.getObject("which")).getInt(); // only checking for ROUNDTRIP_SET

         // ----for debugging only
         logln("");
         logln("TestGetUnicodeSet[" + cc.charset + "] ");
         logln("...............................................");

         try{
            // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
            charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
                     ? (CharsetICU) provider.charsetForName(cc.charset.substring(1),
                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
                     : (CharsetICU) provider.charsetForName(cc.charset);

            //checking for converter that are not supported at this point
            try{
                 if(charset==null ||
                         charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
                       charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
                       charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" ||
                       charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
                     logln("Converter not supported at this point :" + cc.charset);
                    return;
                }

                if(cc.which==1){
                    logln("Fallback set not supported at this point for converter : "+charset.displayName());
                   return;
                }

            }catch(Exception e){
                return;
            }

            mapset.clear();
            mapnotset.clear();

            mapset.applyPattern(cc.map,false);
            mapnotset.applyPattern(cc.mapnot,false);

            charset.getUnicodeSet(unicodeset, cc.which);
            UnicodeSet diffset = new UnicodeSet();

            //are there items that must be in unicodeset but are not?
            (diffset = mapset).removeAll(unicodeset);
            if(!diffset.isEmpty()){
                StringBuffer s = new StringBuffer(diffset.toPattern(true));
                if(s.length()>100){
                    s.replace(0, 0x7fffffff, ellipsis);
                }
                errln("error in missing items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
            }

           //are the items that must not be in unicodeset but are?
            (diffset=mapnotset).retainAll(unicodeset);
            if(!diffset.isEmpty()){
                StringBuffer s = new StringBuffer(diffset.toPattern(true));
                if(s.length()>100){
                    s.replace(0, 0x7fffffff, ellipsis);
                }
                errln("contains unexpected items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
            }
          } catch (Exception e) {
              errln("getUnicodeSet returned an error code");
              errln("ErrorCode expected is: " + cc.outErrorCode);
              errln("Error Result is: " + e.toString());
              return;
          }
     }

     /**
      * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
      * start of the stream for example U+FEFF (the Unicode BOM/signature
      * character) that can be ignored.
      *
      * Detects Unicode signature byte sequences at the start of the byte stream
      * and returns number of bytes of the BOM of the indicated Unicode charset.
      * 0 is returned when no Unicode signature is recognized.
      *
      */

     private String detectUnicodeSignature(ByteBuffer source) {
         int signatureLength = 0; // number of bytes of the signature
         final int SIG_MAX_LEN = 5;
         String sigUniCharset = null; // states what unicode charset is the BOM
         int i = 0;

         /*
          * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
          * don't misdetect something
          */
         byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
                 (byte) 0xa5 };

         while (i < source.limit() && i < SIG_MAX_LEN) {
             start[i] = source.get(i);
             i++;
         }

         if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
             signatureLength = 2;
             sigUniCharset = "UTF-16BE";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
             if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
                 signatureLength = 4;
                 sigUniCharset = "UTF-32LE";
                 source.position(signatureLength);
                 return sigUniCharset;
             } else {
                 signatureLength = 2;
                 sigUniCharset = "UTF-16LE";
                 source.position(signatureLength);
                 return sigUniCharset;
             }
         } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
                 && start[2] == (byte) 0xBF) {
             signatureLength = 3;
             sigUniCharset = "UTF-8";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
                 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
             signatureLength = 4;
             sigUniCharset = "UTF-32BE";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
                 && start[2] == (byte) 0xFF) {
             signatureLength = 3;
             sigUniCharset = "SCSU";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
                 && start[2] == (byte) 0x28) {
             signatureLength = 3;
             sigUniCharset = "BOCU-1";
             source.position(signatureLength);
             return sigUniCharset;
         } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
                 && start[2] == (byte) 0x76) {

             if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
                 signatureLength = 5;
                 sigUniCharset = "UTF-7";
                 source.position(signatureLength);
                 return sigUniCharset;
             } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
                     || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
                 signatureLength = 4;
                 sigUniCharset = "UTF-7";
                 source.position(signatureLength);
                 return sigUniCharset;
             }
         } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
                 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
             signatureLength = 4;
             sigUniCharset = "UTF-EBCDIC";
             source.position(signatureLength);
             return sigUniCharset;
         }

         /* no known Unicode signature byte sequence recognized */
         return null;
     }

     String printbytes(ByteBuffer buf, int pos) {
         int cur = buf.position();
         String res = " (" + pos + ")==[";
         for (int i = 0; i < pos; i++) {
             res += "(" + i + ")" + hex(buf.get(i) & 0xff).substring(2) + " ";
         }
         buf.position(cur);
         return res + "]";
     }

     String printchars(CharBuffer buf, int pos) {
         int cur = buf.position();
         String res = " (" + pos + ")==[";
         for (int i = 0; i < pos; i++) {
             res += "(" + i + ")" + hex(buf.get(i)) + " ";
         }
         buf.position(cur);
         return res + "]";
     }

     private boolean checkResultsFromUnicode(ConversionCase cc, ByteBuffer expected,
             ByteBuffer output) {

         boolean res = true;
         expected.rewind();
         output.limit(output.position());
         output.rewind();

         // remove any BOM signature before checking
         if (!cc.charset.contains("UnicodeLittle") && !cc.charset.contains("UnicodeBig")) {
             detectUnicodeSignature(output); // sets the position to after the BOM
             output = output.slice(); // removes anything before the current position
         }

         if (output.limit() != expected.limit()) {
             errln("Test failed: output length does not match expected for charset: " + cc.charset
                     + " [" + cc.caseNr + "]");
             res = false;
         } else {
             while (output.hasRemaining()) {
                 if (output.get() != expected.get()) {
                     errln("Test failed: output does not match expected for charset: " + cc.charset
                             + " [" + cc.caseNr + "]");
                     res = false;
                     break;
                 }
             }
         }

         if (res) {
             logln("[" + cc.caseNr + "]:" + cc.charset);
             logln("Input:       " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length()));
             logln("Output:      " + printbytes(output, output.limit()));
             logln("Expected:    " + printbytes(expected, expected.limit()));
             logln("Passed");
         }
         else {
             errln("[" + cc.caseNr + "]:" + cc.charset);
             errln("Input:       " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length()));
             errln("Output:      " + printbytes(output, output.limit()));
             errln("Expected:    " + printbytes(expected, expected.limit()));
             errln("Failed");
         }
         return res;
     }

     private boolean checkResultsToUnicode(ConversionCase cc, String expected, CharBuffer output) {

         boolean res = true;
         output.limit(output.position());
         output.rewind();

         // test to see if the conversion matches actual results
         if (output.limit() != expected.length()) {
             errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
             res = false;
         } else {
             for (int i = 0; i < expected.length(); i++) {
                 if (output.get(i) != expected.charAt(i)) {
                     errln("Test failed: output does not match expected for charset: " + cc.charset
                             + " [" + cc.caseNr + "]");
                     res = false;
                     break;
                 }
             }
         }

         if (res) {
             logln("[" + cc.caseNr + "]:" + cc.charset);
             logln("Input:       " + printbytes(cc.bytes, cc.bytes.limit()));
             logln("Output:      " + printchars(output, output.limit()));
             logln("Expected:    " + printchars(CharBuffer.wrap(expected), expected.length()));
             logln("Passed");
         } else {
             errln("[" + cc.caseNr + "]:" + cc.charset);
             errln("Input:       " + printbytes(cc.bytes, cc.bytes.limit()));
             errln("Output:      " + printchars(output, output.limit()));
             errln("Expected:    " + printchars(CharBuffer.wrap(expected), expected.length()));
             errln("Failed");
         }
         return res;
     }

     private byte[] toByteArray(String str) {
         byte[] ret = new byte[str.length()];
         for (int i = 0; i < ret.length; i++) {
             char ch = str.charAt(i);
             if (ch <= 0xFF) {
                 ret[i] = (byte) ch;
             } else {
                 throw new IllegalArgumentException(" byte value out of range: " + ch);
             }
         }
         return ret;
     }
 }