| /* |
| ******************************************************************************* |
| * Copyright (C) 2002-2009, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| * |
| ******************************************************************************* |
| */ |
| |
| package com.ibm.icu.dev.test.charset; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.CodingErrorAction; |
| import java.util.Iterator; |
| |
| import com.ibm.icu.charset.CharsetCallback; |
| import com.ibm.icu.charset.CharsetEncoderICU; |
| import com.ibm.icu.charset.CharsetDecoderICU; |
| import com.ibm.icu.charset.CharsetICU; |
| import com.ibm.icu.charset.CharsetProviderICU; |
| import com.ibm.icu.dev.test.ModuleTest; |
| import com.ibm.icu.dev.test.TestDataModule.DataMap; |
| import com.ibm.icu.impl.ICUResourceBundle; |
| import com.ibm.icu.text.UnicodeSet; |
| |
| /** |
| * This maps to convtest.c which tests the test file for data-driven conversion tests. |
| * |
| */ |
| public class TestConversion extends ModuleTest { |
| /** |
| * This maps to the C struct of conversion case in convtest.h that stores the |
| * data for a conversion test |
| * |
| */ |
| private class ConversionCase { |
| int caseNr; // testcase index |
| String option = null; // callback options |
| CodingErrorAction cbErrorAction = null; // callback action type |
| CharBuffer toUnicodeResult = null; |
| ByteBuffer fromUnicodeResult = null; |
| |
| // data retrieved from a test case conversion.txt |
| String charset; // charset |
| String unicode; // unicode string |
| ByteBuffer bytes; // byte |
| int[] offsets; // offsets |
| boolean finalFlush; // flush |
| boolean fallbacks; // fallback |
| String outErrorCode; // errorCode |
| String cbopt; // callback |
| |
| // TestGetUnicodeSet variables |
| String map; |
| String mapnot; |
| int which; |
| |
| // CharsetCallback encoder and decoder |
| CharsetCallback.Decoder cbDecoder = null; |
| CharsetCallback.Encoder cbEncoder = null; |
| |
| String caseNrAsString() { |
| return "[" + caseNr + "]"; |
| } |
| } |
| |
| // public methods -------------------------------------------------------- |
| |
| public static void main(String[] args) throws Exception { |
| new TestConversion().run(args); |
| } |
| |
| public TestConversion() { |
| super("com/ibm/icu/dev/data/testdata/", "conversion"); |
| } |
| |
| /* |
| * This method maps to the convtest.cpp runIndexedTest() method to run each |
| * type of conversion. |
| */ |
| public void processModules() { |
| try { |
| int testFromUnicode = 0; |
| int testToUnicode = 0; |
| String testName = t.getName().toString(); |
| |
| // Iterate through and get each of the test case to process |
| for (Iterator iter = t.getDataIterator(); iter.hasNext();) { |
| DataMap testcase = (DataMap) iter.next(); |
| |
| if (testName.equalsIgnoreCase("toUnicode")) { |
| TestToUnicode(testcase, testToUnicode); |
| testToUnicode++; |
| |
| } else if (testName.equalsIgnoreCase("fromUnicode")) { |
| TestFromUnicode(testcase, testFromUnicode); |
| testFromUnicode++; |
| } else if (testName.equalsIgnoreCase("getUnicodeSet")) { |
| TestGetUnicodeSet(testcase); |
| } else { |
| warnln("Could not load the test cases for conversion"); |
| continue; |
| } |
| } |
| } catch (Exception e) { |
| e.printStackTrace(); |
| } |
| |
| } |
| |
| // private methods ------------------------------------------------------- |
| |
| |
| // fromUnicode test worker functions --------------------------------------- |
| private void TestFromUnicode(DataMap testcase, int caseNr) { |
| |
| ConversionCase cc = new ConversionCase(); |
| |
| try { |
| // retrieve test case data |
| cc.caseNr = caseNr; |
| cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString(); |
| cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString(); |
| cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary(); |
| cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector(); |
| cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0; |
| cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0; |
| cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString(); |
| cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString(); |
| |
| } catch (Exception e) { |
| errln("Skipping test:"); |
| errln("error parsing conversion/toUnicode test case " + cc.caseNr); |
| return; |
| } |
| |
| // ----for debugging only |
| logln(""); |
| logln("TestFromUnicode[" + caseNr + "] " + cc.charset + " "); |
| logln("Unicode: " + cc.unicode); |
| logln("Bytes: " + printbytes(cc.bytes, cc.bytes.limit())); |
| ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes()); |
| logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")"); |
| logln("..............................................."); |
| |
| // process the retrieved test data case |
| if (cc.offsets.length == 0) { |
| cc.offsets = null; |
| } else if (cc.offsets.length != cc.bytes.limit()) { |
| errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes |
| + "] and offsets[" + cc.offsets.length |
| + "] must have the same length"); |
| return; |
| } |
| |
| // check the callback replacement value |
| if (cc.cbopt.length() > 0) { |
| |
| switch ((cc.cbopt).charAt(0)) { |
| case '?': |
| cc.cbErrorAction = CodingErrorAction.REPLACE; |
| break; |
| case '0': |
| cc.cbErrorAction = CodingErrorAction.IGNORE; |
| break; |
| case '.': |
| cc.cbErrorAction = CodingErrorAction.REPORT; |
| break; |
| case '&': |
| cc.cbErrorAction = CodingErrorAction.REPLACE; |
| cc.cbEncoder = CharsetCallback.FROM_U_CALLBACK_ESCAPE; |
| break; |
| default: |
| cc.cbErrorAction = null; |
| break; |
| } |
| |
| // check for any options for the callback value -- |
| cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt |
| .substring(1); |
| if (cc.option == null) { |
| cc.option = null; |
| } |
| } |
| FromUnicodeCase(cc); |
| } |
| |
| |
| private void FromUnicodeCase(ConversionCase cc) { |
| |
| // create charset encoder for conversion test |
| CharsetProviderICU provider = new CharsetProviderICU(); |
| CharsetEncoder encoder = null; |
| Charset charset = null; |
| try { |
| // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata |
| charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*') |
| ? (Charset) provider.charsetForName(cc.charset.substring(1), |
| "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader()) |
| : (Charset) provider.charsetForName(cc.charset); |
| encoder = (CharsetEncoder) charset.newEncoder(); |
| encoder.onMalformedInput(CodingErrorAction.REPLACE); |
| encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| if (encoder instanceof CharsetEncoderICU) { |
| ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks); |
| if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) { |
| errln("Fallback could not be set for " + cc.charset); |
| } |
| } |
| |
| } catch (Exception e) { |
| if (skipIfBeforeICU(4,2,0)) { // TIME BOMB |
| logln("Skipping test:(" + cc.charset + ") due to ICU Charset not supported at this time"); |
| } else { |
| errln(cc.charset + " was not found"); |
| } |
| return; |
| } |
| |
| // set the callback for the encoder |
| if (cc.cbErrorAction != null) { |
| if (cc.cbEncoder != null) { |
| ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.malformedForLength(1), cc.cbEncoder, cc.option); |
| ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), cc.cbEncoder, cc.option); |
| } else { |
| encoder.onUnmappableCharacter(cc.cbErrorAction); |
| encoder.onMalformedInput(cc.cbErrorAction); |
| } |
| |
| // if action has an option, put in the option for the case |
| if (cc.option.equals("i")) { |
| encoder.onMalformedInput(CodingErrorAction.REPORT); |
| } |
| |
| // if callback action is replace, |
| // and there is a subchar |
| // replace the decoder's default replacement value |
| // if substring, skip test due to current api not supporting |
| // substring |
| if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) { |
| if (cc.cbopt.length() > 1) { |
| if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') { |
| logln("Skipping test due to limitation in Java API - substitution string not supported"); |
| return; |
| } else { |
| // // read NUL-separated subchar first, if any |
| // copy the subchar from Latin-1 characters |
| // start after the NUL |
| if (cc.cbopt.charAt(1) == 0x00) { |
| cc.cbopt = cc.cbopt.substring(2); |
| |
| try { |
| encoder.replaceWith(toByteArray(cc.cbopt)); |
| } catch (Exception e) { |
| logln("Skipping test due to limitation in Java API - substitution character sequence size error"); |
| return; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // do charset encoding from unicode |
| |
| // testing by steps using charset.encoder(in,out,flush) |
| int resultLength; |
| boolean ok; |
| String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked |
| { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } }; |
| int i, step; |
| |
| ok = true; |
| |
| for (i = 0; i < steps.length && ok; ++i) { |
| step = Integer.parseInt(steps[i][0]); |
| |
| logln("Testing step:[" + step + "]"); |
| try { |
| resultLength = stepFromUnicode(cc, encoder, step); |
| ok = checkFromUnicode(cc, resultLength); |
| } catch (Exception ex) { |
| errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]"); |
| ex.printStackTrace(System.out); |
| return; |
| } |
| |
| } |
| // testing by whole buffer using out = charset.encoder(in) |
| while (ok && cc.finalFlush) { |
| logln("Testing java API charset.encoder(in):"); |
| cc.fromUnicodeResult = null; |
| ByteBuffer out = null; |
| |
| try { |
| out = encoder.encode(CharBuffer.wrap(cc.unicode.toCharArray())); |
| out.position(out.limit()); |
| if (out.limit() != out.capacity() || cc.finalFlush) { |
| int pos = out.position(); |
| byte[] temp = out.array(); |
| out = ByteBuffer.allocate(temp.length * 4); |
| out.put(temp); |
| out.position(pos); |
| CoderResult cr = encoder.flush(out); |
| if (cr.isOverflow()) { |
| logln("Overflow error with flushing encoder"); |
| } |
| } |
| cc.fromUnicodeResult = out; |
| |
| ok = checkFromUnicode(cc, out.limit()); |
| if (!ok) { |
| break; |
| } |
| } catch (Exception e) { |
| //check the error code to see if it matches cc.errorCode |
| logln("Encoder returned an error code"); |
| logln("ErrorCode expected is: " + cc.outErrorCode); |
| logln("Error Result is: " + e.toString()); |
| } |
| break; |
| } |
| } |
| |
| private int stepFromUnicode(ConversionCase cc, CharsetEncoder encoder, int step) { |
| if (step < 0) { |
| errln("Negative step size, test internal error."); |
| return 0; |
| } |
| |
| int sourceLen = cc.unicode.length(); |
| int targetLen = cc.bytes.capacity() + 20; // for BOM, and to let failures produce excess output |
| CharBuffer source = CharBuffer.wrap(cc.unicode.toCharArray()); |
| ByteBuffer target = ByteBuffer.allocate(targetLen); |
| cc.fromUnicodeResult = null; |
| encoder.reset(); |
| |
| int currentSourceLimit; |
| int currentTargetLimit; |
| if (step > 0) { |
| currentSourceLimit = Math.min(step, sourceLen); |
| currentTargetLimit = Math.min(step, targetLen); |
| } else { |
| currentSourceLimit = sourceLen; |
| currentTargetLimit = targetLen; |
| } |
| |
| CoderResult cr = null; |
| |
| for (;;) { |
| source.limit(currentSourceLimit); |
| target.limit(currentTargetLimit); |
| |
| cr = encoder.encode(source, target, currentSourceLimit == sourceLen); |
| |
| if (cr.isUnderflow()) { |
| if (currentSourceLimit == sourceLen) { |
| if (target.position() == cc.bytes.limit()) { |
| // target contains the correct number of bytes |
| break; |
| } |
| // Do a final flush for cleanup, then break out |
| // Encode loop, exits with cr==underflow in normal operation. |
| //target.limit(targetLen); |
| target.limit(targetLen); |
| cr = encoder.flush(target); |
| if (cr.isUnderflow()) { |
| // good |
| } else if (cr.isOverflow()) { |
| errln(cc.caseNrAsString() + " Flush is producing excessive output"); |
| } else { |
| errln(cc.caseNrAsString() + " Flush operation failed. CoderResult = \"" |
| + cr.toString() + "\""); |
| } |
| break; |
| } |
| currentSourceLimit = Math.min(currentSourceLimit + step, sourceLen); |
| } else if (cr.isOverflow()) { |
| if (currentTargetLimit == targetLen) { |
| errln(cc.caseNrAsString() + " encode() is producing excessive output"); |
| break; |
| } |
| currentTargetLimit = Math.min(currentTargetLimit + step, targetLen); |
| } else { |
| // check the error code to see if it matches cc.errorCode |
| logln("Encoder returned an error code"); |
| logln("ErrorCode expected is: " + cc.outErrorCode); |
| logln("Error Result is: " + cr.toString()); |
| break; |
| } |
| |
| } |
| |
| cc.fromUnicodeResult = target; |
| return target.position(); |
| } |
| |
| private boolean checkFromUnicode(ConversionCase cc, int resultLength) { |
| return checkResultsFromUnicode(cc, cc.bytes, cc.fromUnicodeResult); |
| } |
| |
| // toUnicode test worker functions ----------------------------------------- *** |
| |
| private void TestToUnicode(DataMap testcase, int caseNr) { |
| // create Conversion case to store the test case data |
| ConversionCase cc = new ConversionCase(); |
| |
| try { |
| // retrieve test case data |
| cc.caseNr = caseNr; |
| cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString(); |
| cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary(); |
| cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString(); |
| cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector(); |
| cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0; |
| cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0; |
| cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString(); |
| cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString(); |
| |
| } catch (Exception e) { |
| errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr); |
| return; |
| } |
| |
| // ----for debugging only |
| logln(""); |
| logln("TestToUnicode[" + caseNr + "] " + cc.charset + " "); |
| logln("Unicode: " + hex(cc.unicode)); |
| logln("Bytes: " + printbytes(cc.bytes, cc.bytes.limit())); |
| ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes()); |
| logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")"); |
| logln("..............................................."); |
| |
| // process the retrieved test data case |
| if (cc.offsets.length == 0) { |
| cc.offsets = null; |
| } else if (cc.offsets.length != cc.unicode.length()) { |
| errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode[" |
| + cc.unicode.length() + "] and offsets[" |
| + cc.offsets.length + "] must have the same length"); |
| return; |
| } |
| // check for the callback replacement value for unmappable |
| // characters or malformed errors |
| if (cc.cbopt.length() > 0) { |
| switch ((cc.cbopt).charAt(0)) { |
| case '?': // CALLBACK_SUBSTITUTE |
| cc.cbErrorAction = CodingErrorAction.REPLACE; |
| break; |
| case '0': // CALLBACK_SKIP |
| cc.cbErrorAction = CodingErrorAction.IGNORE; |
| break; |
| case '.': // CALLBACK_STOP |
| cc.cbErrorAction = CodingErrorAction.REPORT; |
| break; |
| case '&': // CALLBACK_ESCAPE |
| cc.cbErrorAction = CodingErrorAction.REPORT; |
| cc.cbDecoder = CharsetCallback.TO_U_CALLBACK_ESCAPE; |
| break; |
| default: |
| cc.cbErrorAction = null; |
| break; |
| } |
| } |
| // check for any options for the callback value |
| cc.option = cc.cbErrorAction == null ? null : cc.cbopt.substring(1); |
| if (cc.option == null) { |
| cc.option = null; |
| } |
| |
| ToUnicodeCase(cc); |
| |
| } |
| |
| private void ToUnicodeCase(ConversionCase cc) { |
| |
| // create converter for charset and decoder for each test case |
| CharsetProviderICU provider = new CharsetProviderICU(); |
| CharsetDecoder decoder = null; |
| Charset charset = null; |
| |
| try { |
| // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata |
| charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*') |
| ? (Charset) provider.charsetForName(cc.charset.substring(1), |
| "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader()) |
| : (Charset) provider.charsetForName(cc.charset); |
| decoder = (CharsetDecoder) charset.newDecoder(); |
| decoder.onMalformedInput(CodingErrorAction.REPLACE); |
| decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| |
| } catch (Exception e) { |
| // TODO implement loading of test data. |
| if (skipIfBeforeICU(4,2,0)) { |
| logln("Skipping test:(" + cc.charset + ") due to ICU Charset not supported at this time"); |
| } else { |
| errln(cc.charset + " was not found"); |
| } |
| return; |
| } |
| |
| // set the callback for the decoder |
| if (cc.cbErrorAction != null) { |
| if (cc.cbDecoder != null) { |
| ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), cc.cbDecoder, cc.option); |
| ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.unmappableForLength(1), cc.cbDecoder, cc.option); |
| } else { |
| decoder.onMalformedInput(cc.cbErrorAction); |
| decoder.onUnmappableCharacter(cc.cbErrorAction); |
| } |
| |
| // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback |
| if (cc.option.equals("i")) { |
| decoder.onMalformedInput(CodingErrorAction.REPORT); |
| } |
| |
| // if callback action is replace, and there is a subchar |
| // replace the decoder's default replacement value |
| // if substring, skip test due to current api not supporting |
| // substring replacement |
| if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) { |
| if (cc.cbopt.length() > 1) { |
| if (cc.cbopt.charAt(1) == '=') { |
| logln("Skipping test due to limitation in Java API - substitution string not supported"); |
| |
| } else { |
| // // read NUL-separated subchar first, if any |
| // copy the subchar from Latin-1 characters |
| // start after the NUL |
| if (cc.cbopt.charAt(1) == 0x00) { |
| cc.cbopt = cc.cbopt.substring(2); |
| |
| try { |
| decoder.replaceWith(cc.cbopt); |
| } catch (Exception e) { |
| logln("Skipping test due to limitation in Java API - substitution character sequence size error"); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Check the step to unicode |
| boolean ok; |
| int resultLength; |
| |
| String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked |
| { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } }; |
| /* TODO: currently not supported test steps, getNext API is not supported for now |
| { "-1", "getNext" }, |
| { "-2", "toU(bulk)+getNext" }, |
| { "-3", "getNext+toU(bulk)" }, |
| { "-4", "toU(1)+getNext" }, |
| { "-5", "getNext+toU(1)" }, |
| { "-12", "toU(5)+getNext" }, |
| { "-13", "getNext+toU(5)" }};*/ |
| |
| ok = true; |
| int step; |
| // testing by steps using the CoderResult cr = charset.decoder(in,out,flush) api |
| for (int i = 0; i < steps.length && ok; ++i) { |
| step = Integer.parseInt(steps[i][0]); |
| |
| if (step < 0 && !cc.finalFlush) { |
| continue; |
| } |
| logln("Testing step:[" + step + "]"); |
| |
| try { |
| resultLength = stepToUnicode(cc, decoder, step); |
| ok = checkToUnicode(cc, resultLength); |
| } catch (Exception ex) { |
| errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]"); |
| ex.printStackTrace(System.out); |
| return; |
| } |
| } |
| |
| //testing the java's out = charset.decoder(in) api |
| while (ok && cc.finalFlush) { |
| logln("Testing java charset.decoder(in):"); |
| cc.toUnicodeResult = null; |
| CharBuffer out = null; |
| |
| try { |
| out = decoder.decode(ByteBuffer.wrap(cc.bytes.array())); |
| out.position(out.limit()); |
| if (out.limit() < cc.unicode.length()) { |
| int pos = out.position(); |
| char[] temp = out.array(); |
| out = CharBuffer.allocate(cc.bytes.limit()); |
| out.put(temp); |
| out.position(pos); |
| CoderResult cr = decoder.flush(out); |
| if (cr.isOverflow()) { |
| logln("Overflow error with flushing decodering"); |
| } |
| } |
| |
| cc.toUnicodeResult = out; |
| |
| ok = checkToUnicode(cc, out.limit()); |
| if (!ok) { |
| break; |
| } |
| } catch (Exception e) { |
| //check the error code to see if it matches cc.errorCode |
| logln("Decoder returned an error code"); |
| logln("ErrorCode expected is: " + cc.outErrorCode); |
| logln("Error Result is: " + e.toString()); |
| } |
| break; |
| } |
| |
| return; |
| } |
| |
| |
| |
| |
| private int stepToUnicode(ConversionCase cc, CharsetDecoder decoder, |
| int step) |
| |
| { |
| ByteBuffer source; |
| CharBuffer target; |
| boolean flush = false; |
| int sourceLen; |
| source = cc.bytes; |
| sourceLen = cc.bytes.limit(); |
| source.position(0); |
| target = CharBuffer.allocate(cc.unicode.length() + 4); |
| target.position(0); |
| cc.toUnicodeResult = null; |
| decoder.reset(); |
| |
| if (step >= 0) { |
| |
| int iStep = step; |
| int oStep = step; |
| |
| for (;;) { |
| |
| if (step != 0) { |
| source.limit((iStep <= sourceLen) ? iStep : sourceLen); |
| target.limit((oStep <= target.capacity()) ? oStep : target |
| .capacity()); |
| flush = (cc.finalFlush && source.limit() == sourceLen); |
| |
| } else { |
| //bulk mode |
| source.limit(sourceLen); |
| target.limit(target.capacity()); |
| flush = cc.finalFlush; |
| } |
| // convert |
| CoderResult cr = null; |
| if (source.hasRemaining()) { |
| |
| cr = decoder.decode(source, target, flush); |
| // check pointers and errors |
| if (cr.isOverflow()) { |
| // the partial target is filled, set a new limit, |
| oStep = (target.position() + step); |
| target.limit((oStep < target.capacity()) ? oStep |
| : target.capacity()); |
| if (target.limit() > target.capacity()) { |
| //target has reached its limit, an error occurred or test case has an error code |
| //check error code |
| logln("UnExpected error: Target Buffer is larger than capacity"); |
| break; |
| } |
| |
| } else if (cr.isError()) { |
| //check the error code to see if it matches cc.errorCode |
| logln("Decoder returned an error code"); |
| logln("ErrorCode expected is: " + cc.outErrorCode); |
| logln("Error Result is: " + cr.toString()); |
| break; |
| } |
| |
| } else { |
| if (source.limit() == sourceLen) { |
| |
| cr = decoder.decode(source, target, true); |
| |
| //due to limitation of the API we need to check for target limit for expected |
| if (target.limit() != cc.unicode.length()) { |
| target.limit(cc.unicode.length()); |
| cr = decoder.flush(target); |
| if (cr.isError()) { |
| errln("Flush operation failed"); |
| } |
| } |
| break; |
| } |
| } |
| iStep += step; |
| oStep += step; |
| |
| } |
| |
| }// if(step ==0) |
| |
| //-------------------------------------------------------------------------- |
| else /* step<0 */{ |
| /* |
| * step==-1: call only ucnv_getNextUChar() |
| * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar() |
| * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input, |
| * else give it at most (-step-2)/2 bytes |
| */ |
| |
| for (;;) { |
| // convert |
| if ((step & 1) != 0 /* odd: -1, -3, -5, ... */) { |
| |
| target.limit(target.position() < target.capacity() ? target |
| .position() + 1 : target.capacity()); |
| |
| // decode behavior is return to output target 1 character |
| CoderResult cr = null; |
| |
| //similar to getNextUChar() , input is the whole string, while outputs only 1 character |
| source.limit(sourceLen); |
| while (target.position() != target.limit() |
| && source.hasRemaining()) { |
| cr = decoder.decode(source, target, |
| source.limit() == sourceLen); |
| |
| if (cr.isOverflow()) { |
| |
| if (target.limit() >= target.capacity()) { |
| // target has reached its limit, an error occurred |
| logln("UnExpected error: Target Buffer is larger than capacity"); |
| break; |
| } else { |
| //1 character has been consumed |
| target.limit(target.position() + 1); |
| break; |
| } |
| } else if (cr.isError()) { |
| logln("Decoder returned an error code"); |
| logln("ErrorCode expected is: " + cc.outErrorCode); |
| logln("Error Result is: " + cr.toString()); |
| |
| cc.toUnicodeResult = target; |
| return target.position(); |
| } |
| |
| else { |
| // one character has been consumed |
| if (target.limit() == target.position()) { |
| target.limit(target.position() + 1); |
| break; |
| } |
| } |
| |
| } |
| if (source.position() == sourceLen) { |
| |
| // due to limitation of the API we need to check |
| // for target limit for expected |
| cr = decoder.decode(source, target, true); |
| if (target.position() != cc.unicode.length()) { |
| |
| target.limit(cc.unicode.length()); |
| cr = decoder.flush(target); |
| if (cr.isError()) { |
| errln("Flush operation failed"); |
| } |
| } |
| break; |
| } |
| // alternate between -n-1 and -n but leave -1 alone |
| if (step < -1) { |
| ++step; |
| } |
| } else {/* step is even */ |
| // allow only one UChar output |
| |
| target.limit(target.position() < target.capacity() ? target |
| .position() + 1 : target.capacity()); |
| if (step == -2) { |
| source.limit(sourceLen); |
| } else { |
| source.limit(source.position() + (-step - 2) / 2); |
| if (source.limit() > sourceLen) { |
| source.limit(sourceLen); |
| } |
| } |
| CoderResult cr = decoder.decode(source, target, source |
| .limit() == sourceLen); |
| // check pointers and errors |
| if (cr.isOverflow()) { |
| // one character has been consumed |
| if (target.limit() >= target.capacity()) { |
| // target has reached its limit, an error occurred |
| logln("Unexpected error: Target Buffer is larger than capacity"); |
| break; |
| } |
| } else if (cr.isError()) { |
| logln("Decoder returned an error code"); |
| logln("ErrorCode expected is: " + cc.outErrorCode); |
| logln("Error Result is: " + cr.toString()); |
| break; |
| } |
| |
| --step; |
| } |
| } |
| } |
| |
| //-------------------------------------------------------------------------- |
| |
| cc.toUnicodeResult = target; |
| return target.position(); |
| } |
| |
| |
| |
| private boolean checkToUnicode(ConversionCase cc, int resultLength) { |
| return checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult); |
| } |
| |
| |
| private void TestGetUnicodeSet(DataMap testcase) { |
| /* |
| * charset - will be opened, and ucnv_getUnicodeSet() called on it // |
| * map - set of code points and strings that must be in the returned set // |
| * mapnot - set of code points and strings that must *not* be in the // |
| * returned set // which - numeric UConverterUnicodeSet value Headers { |
| * "charset", "map", "mapnot", "which" } |
| */ |
| |
| |
| // retrieve test case data |
| ConversionCase cc = new ConversionCase(); |
| CharsetProviderICU provider = new CharsetProviderICU(); |
| CharsetICU charset ; |
| |
| |
| UnicodeSet mapset = new UnicodeSet(); |
| UnicodeSet mapnotset = new UnicodeSet(); |
| UnicodeSet unicodeset = new UnicodeSet(); |
| String ellipsis = "0x2e"; |
| cc.charset = ((ICUResourceBundle) testcase.getObject("charset")) |
| .getString(); |
| cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString(); |
| cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot")) |
| .getString(); |
| |
| |
| int which = ((ICUResourceBundle) testcase.getObject("which")).getInt(); // only checking for ROUNDTRIP_SET |
| |
| // ----for debugging only |
| logln(""); |
| logln("TestGetUnicodeSet[" + cc.charset + "] "); |
| logln("..............................................."); |
| |
| try{ |
| // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata |
| charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*') |
| ? (CharsetICU) provider.charsetForName(cc.charset.substring(1), |
| "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader()) |
| : (CharsetICU) provider.charsetForName(cc.charset); |
| |
| //checking for converter that are not supported at this point |
| try{ |
| if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" || |
| charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" || |
| charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" || |
| charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){ |
| |
| logln("Converter not supported at this point :" +charset.displayName()); |
| return; |
| } |
| |
| if(which==1){ |
| logln("Fallback set not supported at this point for converter : "+charset.displayName()); |
| return; |
| } |
| |
| }catch(Exception e){ |
| return; |
| } |
| |
| mapset.clear(); |
| mapnotset.clear(); |
| |
| mapset.applyPattern(cc.map,false); |
| mapnotset.applyPattern(cc.mapnot,false); |
| |
| charset.getUnicodeSet(unicodeset, which); |
| UnicodeSet diffset = new UnicodeSet(); |
| |
| //are there items that must be in unicodeset but are not? |
| (diffset = mapset).removeAll(unicodeset); |
| if(!diffset.isEmpty()){ |
| StringBuffer s = new StringBuffer(diffset.toPattern(true)); |
| if(s.length()>100){ |
| s.replace(0, 0x7fffffff, ellipsis); |
| } |
| errln("error in missing items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString()); |
| } |
| |
| //are the items that must not be in unicodeset but are? |
| (diffset=mapnotset).retainAll(unicodeset); |
| if(!diffset.isEmpty()){ |
| StringBuffer s = new StringBuffer(diffset.toPattern(true)); |
| if(s.length()>100){ |
| s.replace(0, 0x7fffffff, ellipsis); |
| } |
| errln("contains unexpected items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString()); |
| } |
| } catch (Exception e) { |
| errln("getUnicodeSet returned an error code"); |
| errln("ErrorCode expected is: " + cc.outErrorCode); |
| errln("Error Result is: " + e.toString()); |
| return; |
| } |
| } |
| |
| /** |
| * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the |
| * start of the stream for example U+FEFF (the Unicode BOM/signature |
| * character) that can be ignored. |
| * |
| * Detects Unicode signature byte sequences at the start of the byte stream |
| * and returns number of bytes of the BOM of the indicated Unicode charset. |
| * 0 is returned when no Unicode signature is recognized. |
| * |
| */ |
| |
| private String detectUnicodeSignature(ByteBuffer source) { |
| int signatureLength = 0; // number of bytes of the signature |
| final int SIG_MAX_LEN = 5; |
| String sigUniCharset = null; // states what unicode charset is the BOM |
| int i = 0; |
| |
| /* |
| * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we |
| * don't misdetect something |
| */ |
| byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, |
| (byte) 0xa5 }; |
| |
| while (i < source.limit() && i < SIG_MAX_LEN) { |
| start[i] = source.get(i); |
| i++; |
| } |
| |
| if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) { |
| signatureLength = 2; |
| sigUniCharset = "UTF-16BE"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) { |
| if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) { |
| signatureLength = 4; |
| sigUniCharset = "UTF-32LE"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else { |
| signatureLength = 2; |
| sigUniCharset = "UTF-16LE"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } |
| } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB |
| && start[2] == (byte) 0xBF) { |
| signatureLength = 3; |
| sigUniCharset = "UTF-8"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00 |
| && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) { |
| signatureLength = 4; |
| sigUniCharset = "UTF-32BE"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE |
| && start[2] == (byte) 0xFF) { |
| signatureLength = 3; |
| sigUniCharset = "SCSU"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE |
| && start[2] == (byte) 0x28) { |
| signatureLength = 3; |
| sigUniCharset = "BOCU-1"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F |
| && start[2] == (byte) 0x76) { |
| |
| if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) { |
| signatureLength = 5; |
| sigUniCharset = "UTF-7"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 |
| || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) { |
| signatureLength = 4; |
| sigUniCharset = "UTF-7"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } |
| } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73 |
| && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) { |
| signatureLength = 4; |
| sigUniCharset = "UTF-EBCDIC"; |
| source.position(signatureLength); |
| return sigUniCharset; |
| } |
| |
| /* no known Unicode signature byte sequence recognized */ |
| return null; |
| } |
| |
| String printbytes(ByteBuffer buf, int pos) { |
| int cur = buf.position(); |
| String res = " (" + pos + ")==["; |
| for (int i = 0; i < pos; i++) { |
| res += "(" + i + ")" + hex(buf.get(i) & 0xff).substring(2) + " "; |
| } |
| buf.position(cur); |
| return res + "]"; |
| } |
| |
| String printchars(CharBuffer buf, int pos) { |
| int cur = buf.position(); |
| String res = " (" + pos + ")==["; |
| for (int i = 0; i < pos; i++) { |
| res += "(" + i + ")" + hex(buf.get(i)) + " "; |
| } |
| buf.position(cur); |
| return res + "]"; |
| } |
| |
| private boolean checkResultsFromUnicode(ConversionCase cc, ByteBuffer expected, |
| ByteBuffer output) { |
| |
| boolean res = true; |
| expected.rewind(); |
| output.limit(output.position()); |
| output.rewind(); |
| |
| // remove any BOM signature before checking |
| detectUnicodeSignature(output); // sets the position to after the BOM |
| output = output.slice(); // removes anything before the current position |
| |
| if (output.limit() != expected.limit()) { |
| errln("Test failed: output length does not match expected for charset: " + cc.charset |
| + " [" + cc.caseNr + "]"); |
| res = false; |
| } else { |
| while (output.hasRemaining()) { |
| if (output.get() != expected.get()) { |
| errln("Test failed: output does not match expected for charset: " + cc.charset |
| + " [" + cc.caseNr + "]"); |
| res = false; |
| break; |
| } |
| } |
| } |
| |
| if (res) { |
| logln("[" + cc.caseNr + "]:" + cc.charset); |
| logln("Input: " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length())); |
| logln("Output: " + printbytes(output, output.limit())); |
| logln("Expected: " + printbytes(expected, expected.limit())); |
| logln("Passed"); |
| } |
| else { |
| errln("[" + cc.caseNr + "]:" + cc.charset); |
| errln("Input: " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length())); |
| errln("Output: " + printbytes(output, output.limit())); |
| errln("Expected: " + printbytes(expected, expected.limit())); |
| errln("Failed"); |
| } |
| return res; |
| } |
| |
| private boolean checkResultsToUnicode(ConversionCase cc, String expected, CharBuffer output) { |
| |
| boolean res = true; |
| output.limit(output.position()); |
| output.rewind(); |
| |
| // test to see if the conversion matches actual results |
| if (output.limit() != expected.length()) { |
| if (skipIfBeforeICU(4,2,0)) { // TIME BOMB |
| logln("Skipping test:(" + cc.charset + ") due to time bomb"); |
| } else { |
| errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]"); |
| } |
| res = false; |
| } else { |
| for (int i = 0; i < expected.length(); i++) { |
| if (output.get(i) != expected.charAt(i)) { |
| errln("Test failed: output does not match expected for charset: " + cc.charset |
| + " [" + cc.caseNr + "]"); |
| res = false; |
| break; |
| } |
| } |
| } |
| |
| if (res) { |
| logln("[" + cc.caseNr + "]:" + cc.charset); |
| logln("Input: " + printbytes(cc.bytes, cc.bytes.limit())); |
| logln("Output: " + printchars(output, output.limit())); |
| logln("Expected: " + printchars(CharBuffer.wrap(expected), expected.length())); |
| logln("Passed"); |
| } |
| else if (skipIfBeforeICU(4,2,0)) { |
| // TIME BOMB |
| } else { |
| errln("[" + cc.caseNr + "]:" + cc.charset); |
| errln("Input: " + printbytes(cc.bytes, cc.bytes.limit())); |
| errln("Output: " + printchars(output, output.limit())); |
| errln("Expected: " + printchars(CharBuffer.wrap(expected), expected.length())); |
| errln("Failed"); |
| } |
| return res; |
| } |
| |
| private byte[] toByteArray(String str) { |
| byte[] ret = new byte[str.length()]; |
| for (int i = 0; i < ret.length; i++) { |
| char ch = str.charAt(i); |
| if (ch <= 0xFF) { |
| ret[i] = (byte) ch; |
| } else { |
| throw new IllegalArgumentException(" byte value out of range: " + ch); |
| } |
| } |
| return ret; |
| } |
| } |