| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 2000, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * |
| * uconv_cnv.c: |
| * Implements all the low level conversion functions |
| * T_UnicodeConverter_{to,from}Unicode_$ConversionType |
| * |
| * Change history: |
| * |
| * 06/29/2000 helena Major rewrite of the callback APIs. |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "unicode/ucnv_err.h" |
| #include "ucnv_cnv.h" |
| #include "unicode/ucnv.h" |
| #include "cmemory.h" |
| |
| /*Empties the internal unicode output buffer */ |
| void flushInternalUnicodeBuffer (UConverter * _this, |
| UChar * myTarget, |
| int32_t * myTargetIndex, |
| int32_t targetLength, |
| int32_t** offsets, |
| UErrorCode * err) |
| { |
| int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength; |
| |
| if (myUCharErrorBufferLength <= targetLength) |
| { |
| /*we have enough space |
| *So we just copy the whole Error Buffer in to the output stream*/ |
| uprv_memcpy (myTarget, |
| _this->UCharErrorBuffer, |
| sizeof (UChar) * myUCharErrorBufferLength); |
| if (offsets) |
| { |
| int32_t i=0; |
| for (i=0; i<myUCharErrorBufferLength;i++) (*offsets)[i] = -1; |
| *offsets += myUCharErrorBufferLength; |
| } |
| *myTargetIndex += myUCharErrorBufferLength; |
| _this->UCharErrorBufferLength = 0; |
| } |
| else |
| { |
| /* We don't have enough space so we copy as much as we can |
| * on the output stream and update the object |
| * by updating the internal buffer*/ |
| uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength); |
| if (offsets) |
| { |
| int32_t i=0; |
| for (i=0; i< targetLength;i++) (*offsets)[i] = -1; |
| *offsets += targetLength; |
| } |
| uprv_memmove (_this->UCharErrorBuffer, |
| _this->UCharErrorBuffer + targetLength, |
| sizeof (UChar) * (myUCharErrorBufferLength - targetLength)); |
| _this->UCharErrorBufferLength -= (int8_t) targetLength; |
| *myTargetIndex = targetLength; |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| } |
| |
| return; |
| } |
| |
| /*Empties the internal codepage output buffer */ |
| void flushInternalCharBuffer (UConverter * _this, |
| char *myTarget, |
| int32_t * myTargetIndex, |
| int32_t targetLength, |
| int32_t** offsets, |
| UErrorCode * err) |
| { |
| int32_t myCharErrorBufferLength = _this->charErrorBufferLength; |
| |
| /*we have enough space */ |
| if (myCharErrorBufferLength <= targetLength) |
| { |
| uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength); |
| if (offsets) |
| { |
| int32_t i=0; |
| for (i=0; i<myCharErrorBufferLength;i++) (*offsets)[i] = -1; |
| *offsets += myCharErrorBufferLength; |
| } |
| |
| *myTargetIndex += myCharErrorBufferLength; |
| _this->charErrorBufferLength = 0; |
| } |
| else |
| /* We don't have enough space so we copy as much as we can |
| * on the output stream and update the object*/ |
| { |
| uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength); |
| if (offsets) |
| { |
| int32_t i=0; |
| for (i=0; i< targetLength;i++) (*offsets)[i] = -1; |
| *offsets += targetLength; |
| } |
| uprv_memmove (_this->charErrorBuffer, |
| _this->charErrorBuffer + targetLength, |
| (myCharErrorBufferLength - targetLength)); |
| _this->charErrorBufferLength -= (int8_t) targetLength; |
| *myTargetIndex = targetLength; |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| } |
| |
| return; |
| } |
| |
| /** |
| * This function is useful for implementations of getNextUChar(). |
| * After a call to a callback function or to toUnicode(), an output buffer |
| * begins with a Unicode code point that needs to be returned as UChar32, |
| * and all following code units must be prepended to the - potentially |
| * prefilled - overflow buffer in the UConverter. |
| * The buffer should be at least of capacity UTF_MAX_CHAR_LENGTH so that a |
| * complete UChar32's UChars fit into it. |
| * |
| * @param cnv The converter that will get remaining UChars copied to its overflow area. |
| * @param buffer An array of UChars that was passed into a callback function |
| * or a toUnicode() function. |
| * @param length The number of code units (UChars) that are actually in the buffer. |
| * This must be >0. |
| * @return The code point from the first UChars in the buffer. |
| */ |
| U_CFUNC UChar32 |
| ucnv_getUChar32KeepOverflow(UConverter *cnv, const UChar *buffer, int32_t length) { |
| UChar32 c; |
| int32_t i; |
| |
| if(length<=0) { |
| return 0xffff; |
| } |
| |
| /* get the first code point in the buffer */ |
| i=0; |
| UTF_NEXT_CHAR_SAFE(buffer, i, length, c, FALSE); |
| if(i<length) { |
| /* there are UChars left in the buffer that need to go into the overflow buffer */ |
| UChar *overflow=cnv->UCharErrorBuffer; |
| int32_t j=cnv->UCharErrorBufferLength; |
| |
| if(j>0) { |
| /* move the overflow buffer contents to make room for the extra UChars */ |
| int32_t k; |
| |
| cnv->UCharErrorBufferLength=(int8_t)(k=(length-i)+j); |
| do { |
| overflow[--k]=overflow[--j]; |
| } while(j>0); |
| } else { |
| cnv->UCharErrorBufferLength=(int8_t)(length-i); |
| } |
| |
| /* copy the remaining UChars to the beginning of the overflow buffer */ |
| do { |
| overflow[j++]=buffer[i++]; |
| } while(i<length); |
| } |
| return c; |
| } |
| |
| /* update target offsets after a callback call */ |
| U_CFUNC int32_t * |
| ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex) { |
| if(offsets!=NULL) { |
| if(sourceIndex>=0) { |
| /* add the sourceIndex to the relative offsets that the callback wrote */ |
| while(length>0) { |
| *offsets+=sourceIndex; |
| ++offsets; |
| --length; |
| } |
| } else { |
| /* sourceIndex==-1, set -1 offsets */ |
| while(length>0) { |
| *offsets=-1; |
| ++offsets; |
| --length; |
| } |
| } |
| return offsets; |
| } else { |
| return NULL; |
| } |
| } |
| |
| /* |
| * This is a simple implementation of ucnv_getNextUChar() that uses the |
| * converter's toUnicode() function. See ucnv_cnv.h for details. |
| */ |
| U_CFUNC UChar32 |
| ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs, |
| T_ToUnicodeFunction toU, |
| UBool collectPairs, |
| UErrorCode *pErrorCode) { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| const char *realLimit=pArgs->sourceLimit; |
| |
| pArgs->target=buffer; |
| pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH; |
| |
| while(pArgs->source<realLimit) { |
| /* feed in one byte at a time to make sure to get only one character out */ |
| pArgs->sourceLimit=pArgs->source+1; |
| pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit); |
| |
| /* convert this byte and check the result */ |
| toU(pArgs, pErrorCode); |
| if(U_SUCCESS(*pErrorCode)) { |
| int32_t length=pArgs->target-buffer; |
| |
| /* this test is UTF-16 specific */ |
| if(/* some output and |
| (source consumed or don't collect surrogate pairs or not a surrogate or a surrogate pair) */ |
| length>0 && |
| (pArgs->flush || !collectPairs || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2) |
| ) { |
| return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length); |
| } |
| /* else continue with the loop */ |
| } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| *pErrorCode=U_ZERO_ERROR; |
| return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, UTF_MAX_CHAR_LENGTH); |
| } else { |
| /* U_FAILURE() */ |
| return 0xffff; |
| } |
| } |
| |
| /* no output because of empty input or only state changes and skipping callbacks */ |
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0xffff; |
| } |