| /** |
| ******************************************************************************* |
| * Copyright (C) 2006, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| * |
| ******************************************************************************* |
| */ |
| package com.ibm.icu.charset; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.IntBuffer; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| |
| import com.ibm.icu.text.UTF16; |
| /** |
| * @author Niti Hantaweepant |
| */ |
| class CharsetUTF32 extends CharsetICU { |
| |
| protected byte[] fromUSubstitution = new byte[]{(byte)0, (byte)0, (byte)0xff, (byte)0xfd}; |
| |
| public CharsetUTF32(String icuCanonicalName, String javaCanonicalName, String[] aliases){ |
| super(icuCanonicalName, javaCanonicalName, aliases); |
| maxBytesPerChar = 4; |
| minBytesPerChar = 4; |
| maxCharsPerByte = 1; |
| } |
| class CharsetDecoderUTF32 extends CharsetDecoderICU{ |
| |
| public CharsetDecoderUTF32(CharsetICU cs) { |
| super(cs); |
| } |
| |
| protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){ |
| CoderResult cr = CoderResult.UNDERFLOW; |
| |
| int sourceArrayIndex = source.position(); |
| int ch, i; |
| |
| donefornow: |
| { |
| /* UTF-8 returns here for only non-offset, this needs to change.*/ |
| if (toUnicodeStatus != 0 && target.hasRemaining()) { |
| i = toULength; /* restore # of bytes consumed */ |
| |
| ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/ |
| toUnicodeStatus = 0; |
| toULength =0; |
| |
| while (i < 4) { |
| if (sourceArrayIndex < source.limit()) { |
| ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK); |
| toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++); |
| } |
| else { |
| /* stores a partially calculated target*/ |
| /* + 1 to make 0 a valid character */ |
| toUnicodeStatus = ch + 1; |
| toULength = (byte) i; |
| break donefornow; |
| } |
| } |
| |
| if (ch <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(ch)) { |
| /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
| if (ch <= UConverterConstants.MAXIMUM_UCS2) |
| { |
| /* fits in 16 bits */ |
| target.put((char)ch); |
| } |
| else { |
| /* write out the surrogates */ |
| target.put(UTF16.getLeadSurrogate(ch)); |
| ch = UTF16.getTrailSurrogate(ch); |
| if (target.hasRemaining()) { |
| target.put((char)ch); |
| } |
| else { |
| /* Put in overflow buffer (not handled here) */ |
| charErrorBufferArray[0] = (char) ch; |
| charErrorBufferLength = 1; |
| cr = CoderResult.OVERFLOW; |
| } |
| } |
| } |
| else { |
| toULength = (byte)i; |
| cr = CoderResult.malformedForLength(sourceArrayIndex); |
| break donefornow; |
| } |
| } |
| |
| while (sourceArrayIndex < source.limit() && target.hasRemaining()) { |
| i = 0; |
| ch = 0; |
| |
| while (i < 4) { |
| if (sourceArrayIndex < source.limit()) { |
| ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK); |
| toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++); |
| } |
| else { |
| /* stores a partially calculated target*/ |
| /* + 1 to make 0 a valid character */ |
| toUnicodeStatus = ch + 1; |
| toULength = (byte) i; |
| break donefornow; |
| } |
| } |
| |
| if (ch <= UConverterSharedData.MAXIMUM_UTF && !isSurrogate(ch)) { |
| /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
| if (ch <= UConverterSharedData.MAXIMUM_UCS2) |
| { |
| /* fits in 16 bits */ |
| target.put((char) ch); |
| } |
| else { |
| /* write out the surrogates */ |
| target.put(UTF16.getLeadSurrogate(ch)); |
| ch = UTF16.getTrailSurrogate(ch); |
| if (target.hasRemaining()) { |
| target.put((char)ch); |
| } |
| else { |
| /* Put in overflow buffer (not handled here) */ |
| charErrorBufferArray[0] = (char) ch; |
| charErrorBufferLength = 1; |
| cr = CoderResult.OVERFLOW; |
| break; |
| } |
| } |
| } |
| else { |
| toULength = (byte)i; |
| cr = CoderResult.malformedForLength(sourceArrayIndex); |
| break; |
| } |
| } |
| } |
| |
| if (sourceArrayIndex < source.limit() && !target.hasRemaining()) { |
| /* End of target buffer */ |
| cr = CoderResult.OVERFLOW; |
| } |
| |
| source.position(sourceArrayIndex); |
| return cr; |
| } |
| } |
| |
| class CharsetEncoderUTF32 extends CharsetEncoderICU{ |
| |
| public CharsetEncoderUTF32(CharsetICU cs) { |
| super(cs, fromUSubstitution); |
| implReset(); |
| } |
| |
| private final static int NEED_TO_WRITE_BOM = 1; |
| |
| protected void implReset() { |
| super.implReset(); |
| fromUnicodeStatus = NEED_TO_WRITE_BOM; |
| } |
| |
| protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){ |
| CoderResult cr = CoderResult.UNDERFLOW; |
| if(!source.hasRemaining()) { |
| /* no input, nothing to do */ |
| return cr; |
| } |
| |
| /* write the BOM if necessary */ |
| if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) { |
| byte[] bom={ 0, 0, (byte)0xfe, (byte)0xff }; |
| cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1); |
| if(cr.isError()){ |
| return cr; |
| } |
| fromUnicodeStatus=0; |
| } |
| |
| int ch, ch2; |
| int indexToWrite; |
| byte temp[] = new byte[4]; |
| temp[0] = 0; |
| int sourceArrayIndex = source.position(); |
| |
| boolean doloop = true; |
| if (fromUChar32 != 0) { |
| ch = fromUChar32; |
| fromUChar32 = 0; |
| //lowsurogate: |
| if (sourceArrayIndex < source.limit()) { |
| ch2 = source.get(sourceArrayIndex); |
| if (UTF16.isTrailSurrogate((char)ch2)) { |
| ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE; |
| sourceArrayIndex++; |
| } |
| else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| fromUChar32 = ch; |
| cr = CoderResult.malformedForLength(sourceArrayIndex); |
| doloop = false; |
| } |
| } |
| else { |
| /* ran out of source */ |
| fromUChar32 = ch; |
| if (flush) { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| cr = CoderResult.malformedForLength(sourceArrayIndex); |
| } |
| doloop = false; |
| } |
| |
| /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
| temp[1] = (byte) (ch >>> 16 & 0x1F); |
| temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */ |
| temp[3] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */ |
| |
| for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) { |
| if (target.hasRemaining()) { |
| target.put(temp[indexToWrite]); |
| } |
| else { |
| errorBuffer[errorBufferLength++] = temp[indexToWrite]; |
| cr = CoderResult.OVERFLOW; |
| } |
| } |
| } |
| |
| if(doloop) { |
| while (sourceArrayIndex < source.limit() && target.hasRemaining()) { |
| ch = source.get(sourceArrayIndex++); |
| |
| if (UTF16.isSurrogate((char)ch)) { |
| if (UTF16.isLeadSurrogate((char)ch)) { |
| //lowsurogate: |
| if (sourceArrayIndex < source.limit()) { |
| ch2 = source.get(sourceArrayIndex); |
| if (UTF16.isTrailSurrogate((char)ch2)) { |
| ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE; |
| sourceArrayIndex++; |
| } |
| else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| fromUChar32 = ch; |
| cr = CoderResult.OVERFLOW; |
| break; |
| } |
| } |
| else { |
| /* ran out of source */ |
| fromUChar32 = ch; |
| if (flush) { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| cr = CoderResult.malformedForLength(sourceArrayIndex); |
| } |
| break; |
| } |
| } |
| else { |
| fromUChar32 = ch; |
| cr = CoderResult.malformedForLength(sourceArrayIndex); |
| break; |
| } |
| } |
| |
| /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
| temp[1] = (byte) (ch >>> 16 & 0x1F); |
| temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */ |
| temp[3] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */ |
| |
| for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) { |
| if (target.hasRemaining()) { |
| target.put(temp[indexToWrite]); |
| } |
| else { |
| errorBuffer[errorBufferLength++] = temp[indexToWrite]; |
| cr = CoderResult.OVERFLOW; |
| } |
| } |
| } |
| } |
| |
| if (sourceArrayIndex < source.limit() && !target.hasRemaining()) { |
| cr = CoderResult.OVERFLOW; |
| } |
| source.position(sourceArrayIndex); |
| return cr; |
| } |
| } |
| public CharsetDecoder newDecoder() { |
| return new CharsetDecoderUTF32(this); |
| } |
| |
| public CharsetEncoder newEncoder() { |
| return new CharsetEncoderUTF32(this); |
| } |
| } |