ICU-6039 Fixed! Along with removing a redundant operation from CharsetUTF8 and adding a porting a new test from icu4c to icu4j.
X-SVN-Rev: 22973
diff --git a/icu4j/src/com/ibm/icu/charset/CharsetASCII.java b/icu4j/src/com/ibm/icu/charset/CharsetASCII.java
index cfc5477..e8777af 100644
--- a/icu4j/src/com/ibm/icu/charset/CharsetASCII.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetASCII.java
@@ -347,10 +347,7 @@
}
} else {
fromUChar32 = lead;
- if (flush)
- return CoderResult.malformedForLength(1);
- else
- return CoderResult.UNDERFLOW;
+ return CoderResult.UNDERFLOW;
}
}
diff --git a/icu4j/src/com/ibm/icu/charset/CharsetUTF8.java b/icu4j/src/com/ibm/icu/charset/CharsetUTF8.java
index 0893d90..9d46c17a 100644
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF8.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF8.java
@@ -112,9 +112,7 @@
toUnicodeStatus = char32;
mode = bytes;
toULength = i;
- cr = (flush)
- ? CoderResult.malformedForLength(i)
- : CoderResult.UNDERFLOW;
+ cr = CoderResult.UNDERFLOW;
break;
}
if (((ch = sourceArray[sourceIndex++]) & 0xc0) != 0x80) {
@@ -232,9 +230,7 @@
toUnicodeStatus = char32;
mode = bytes;
toULength = i;
- cr = (flush)
- ? CoderResult.malformedForLength(i)
- : CoderResult.UNDERFLOW;
+ cr = CoderResult.UNDERFLOW;
break;
}
if (((ch = source.get(sourceIndex++)) & 0xc0) != 0x80) {
@@ -405,7 +401,7 @@
}
/* reach the next char into char32 */
- char32 = sourceArray[sourceIndex++] & 0xffff;
+ char32 = sourceArray[sourceIndex++];
if (char32 <= 0x7f) {
/* 1 byte to encode from char32 */
@@ -504,7 +500,7 @@
}
/* reach the next char into char32 */
- char32 = source.get(sourceIndex++) & 0xffff;
+ char32 = source.get(sourceIndex++);
if (char32 <= 0x7f) {
/* 1 byte to encode from char32 */
@@ -570,7 +566,7 @@
/* we need to read another char to match up the surrogate stored in char32 */
if (sourceIndex >= sourceLimit) {
fromUChar32 = char32;
- return (flush) ? CoderResult.malformedForLength(1) : CoderResult.UNDERFLOW;
+ return CoderResult.UNDERFLOW;
}
try {
@@ -618,7 +614,7 @@
/* we need to read another char to match up the surrogate stored in char32 */
if (sourceIndex >= sourceLimit) {
fromUChar32 = char32;
- return (flush) ? CoderResult.malformedForLength(1) : CoderResult.UNDERFLOW;
+ return CoderResult.UNDERFLOW;
}
try {
diff --git a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
index 8b071f2..6224a47 100644
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@@ -514,7 +514,7 @@
char[] chars = new char[size[0] + size[1] + size[2]];
int i = 0;
int x, y;
-
+
// 0 to 1 << 7 (1 byters)
for (; i < size[0]; i++) {
bytes[i] = (byte) i;
@@ -628,6 +628,81 @@
}
+ public void TestUTF8Surrogates() {
+ byte[][] in = new byte[][] {
+ { (byte)0x61, },
+ { (byte)0xc2, (byte)0x80, },
+ { (byte)0xe0, (byte)0xa0, (byte)0x80, },
+ { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, },
+ { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, },
+ { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, },
+ };
+
+ /* expected test results */
+ char[][] results = new char[][] {
+ /* number of bytes read, code point */
+ { '\u0061', },
+ { '\u0080', },
+ { '\u0800', },
+ { '\ud800', '\udc00', }, // 10000
+ { '\udbd0', '\udf21', }, // 104321
+ { '\ud801', '\udc01', }, // 10401
+ };
+
+ /* error test input */
+ byte[][] in2 = new byte[][] {
+ { (byte)0x61, },
+ { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */
+ (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */
+ (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */
+ (byte)0xc0, (byte)0xc0, /* illegal trail byte */
+ (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */
+ (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */
+ (byte)0xfe, /* illegal byte altogether */
+ (byte)0x62, },
+ };
+
+ /* expected error test results */
+ char[][] results2 = new char[][] {
+ /* number of bytes read, code point */
+ { '\u0062', },
+ { '\u0062', },
+ };
+
+ String converter = "UTF-8";
+ CharsetProvider icu = new CharsetProviderICU();
+ Charset icuChar = icu.charsetForName(converter);
+ CharsetDecoder decoder = icuChar.newDecoder();
+
+ int i;
+ try {
+ for (i = 0; i < in.length; i++) {
+ ByteBuffer source = ByteBuffer.wrap(in[i]);
+ CharBuffer expected = CharBuffer.wrap(results[i]);
+ smBufDecode(decoder, converter, source, expected, true, false,
+ true);
+ smBufDecode(decoder, converter, source, expected, true, false,
+ false);
+ }
+ } catch (Exception ex) {
+ errln("Incorrect result in " + converter);
+ }
+ try {
+ for (i = 0; i < in2.length; i++) {
+ ByteBuffer source = ByteBuffer.wrap(in2[i]);
+ CharBuffer expected = CharBuffer.wrap(results2[i]);
+ decoder.onMalformedInput(CodingErrorAction.IGNORE);
+ smBufDecode(decoder, converter, source, expected, true, false,
+ true);
+ smBufDecode(decoder, converter, source, expected, true, false,
+ false);
+ }
+ } catch (Exception ex) {
+ errln("Incorrect result in " + converter);
+ }
+ }
+
+
// public void TestCharsetCallback() {
// String currentTest = "initialization";
// try {