| /* |
| ********************************************************************** |
| * Copyright (C) 2000, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * file name: ucnvsbcs.cpp |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2000feb03 |
| * created by: Markus W. Scherer |
| * |
| * Change history: |
| * |
| * 05/09/00 helena Added implementation to handle fallback mappings. |
| * 06/20/2000 helena OS/400 port changes; mostly typecast. |
| * 06/29/2000 helena Major rewrite of the callback APIs. |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "cmemory.h" |
| #include "ucmp16.h" |
| #include "ucmp8.h" |
| #include "unicode/ucnv_err.h" |
| #include "ucnv_bld.h" |
| #include "unicode/ucnv.h" |
| #include "ucnv_cnv.h" |
| |
| /* SBCS --------------------------------------------------------------------- */ |
| |
| static void |
| _SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) { |
| const uint8_t *oldraw = raw; |
| sharedData->table->sbcs.toUnicode = (UChar *)raw; |
| raw += sizeof(uint16_t)*256; oldraw = raw; |
| ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode); |
| if (sharedData->staticData->hasFromUnicodeFallback == TRUE) |
| { |
| if(((raw-oldraw)&3)!=0) { |
| raw+=4-((raw-oldraw)&3); /* pad to 4 */ |
| } |
| ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode); |
| } |
| if (sharedData->staticData->hasToUnicodeFallback == TRUE) |
| { |
| if(((raw-oldraw)&3)!=0) { |
| raw+=4-((raw-oldraw)&3); /* pad to 4 */ |
| } |
| sharedData->table->sbcs.toUnicodeFallback = (UChar *)raw; |
| } |
| } |
| |
| static void |
| _SBCSUnload(UConverterSharedData *sharedData) { |
| ucmp8_close (&sharedData->table->sbcs.fromUnicode); |
| if (sharedData->staticData->hasFromUnicodeFallback == TRUE) |
| ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback); |
| uprv_free (sharedData->table); |
| } |
| |
| U_CFUNC void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args, |
| UErrorCode * err) |
| { |
| char *mySource = (char *) args->source; |
| UChar *myTarget = args->target; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| int32_t targetLength = args->targetLimit - myTarget; |
| int32_t sourceLength = args->sourceLimit - (char *) mySource; |
| UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL; |
| UChar targetUniChar = 0x0000; |
| |
| myToUnicode = args->converter->sharedData->table->sbcs.toUnicode; |
| myToUnicodeFallback = args->converter->sharedData->table->sbcs.toUnicodeFallback; |
| while (mySourceIndex < sourceLength) |
| { |
| |
| /*writing the UniChar to the output stream */ |
| if (myTargetIndex < targetLength) |
| { |
| /*gets the corresponding UniChar */ |
| targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]]; |
| |
| if (targetUniChar < 0xfffe) |
| { |
| /* writes the UniChar to the output stream */ |
| myTarget[myTargetIndex++] = targetUniChar; |
| } |
| else |
| { |
| if ((args->converter->useFallback == TRUE) && |
| (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE)) |
| { |
| /* Look up in the fallback table first */ |
| UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]]; |
| if (fallbackUniChar < 0xfffe) |
| { |
| myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar; |
| } |
| } |
| if (targetUniChar >= 0xfffe) |
| { |
| const char *saveSource = args->source; |
| UChar *saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| UConverterCallbackReason reason; |
| |
| if (targetUniChar == 0xfffe) |
| { |
| reason = UCNV_UNASSIGNED; |
| *err = U_INVALID_CHAR_FOUND; |
| } |
| else |
| { |
| reason = UCNV_ILLEGAL; |
| *err = U_ILLEGAL_CHAR_FOUND; |
| } |
| |
| args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1]; |
| args->converter->invalidCharLength = 1; |
| |
| args->target = myTarget + myTargetIndex; |
| args->source = mySource + mySourceIndex; |
| |
| /* to do hsys: add more smarts to the codeUnits and length later */ |
| ToU_CALLBACK_MACRO(args->converter->toUContext, |
| args, |
| args->converter->invalidCharBuffer, |
| args->converter->invalidCharLength, |
| reason, |
| err); |
| /* Hsys: calculate the source and target advancement */ |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| if (U_FAILURE (*err)) break; |
| args->converter->invalidCharLength = 0; |
| } |
| } |
| } |
| else |
| { |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| break; |
| } |
| } |
| |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| |
| return; |
| } |
| |
| U_CFUNC void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args, |
| UErrorCode * err) |
| { |
| const UChar *mySource = args->source; |
| unsigned char *myTarget = (unsigned char *) args->target; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| int32_t targetLength = args->targetLimit - (char *) myTarget; |
| int32_t sourceLength = args->sourceLimit - mySource; |
| CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; |
| unsigned char targetChar = 0x00; |
| UConverterCallbackReason reason; |
| |
| myFromUnicode = &args->converter->sharedData->table->sbcs.fromUnicode; |
| myFromUnicodeFallback = &args->converter->sharedData->table->sbcs.fromUnicodeFallback; |
| /*writing the char to the output stream */ |
| /* HSYS : to do : finish the combining of the surrogate characters later */ |
| /* |
| if (args->converter->fromUSurrogateLead != 0 && UTF_IS_TRAIL(mySource[mySourceIndex])) |
| { |
| } |
| */ |
| while (mySourceIndex < sourceLength) |
| { |
| targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]); |
| |
| if (myTargetIndex < targetLength) |
| { |
| mySourceIndex++; |
| if (targetChar != 0 || !mySource[mySourceIndex - 1]) |
| { |
| /*writes the char to the output stream */ |
| myTarget[myTargetIndex++] = targetChar; |
| } |
| else if ((args->converter->useFallback == TRUE) && |
| (args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE)) |
| { |
| /* Look up in the fallback table first */ |
| targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]); |
| if (targetChar != 0 || !mySource[mySourceIndex - 1]) |
| { |
| /*writes the char to the output stream */ |
| myTarget[myTargetIndex++] = targetChar; |
| } |
| } |
| if (targetChar == 0 && mySource[mySourceIndex-1] != 0) |
| { |
| *err = U_INVALID_CHAR_FOUND; |
| reason = UCNV_UNASSIGNED; |
| |
| args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1]; |
| args->converter->invalidUCharLength = 1; |
| if (UTF_IS_LEAD(mySource[mySourceIndex-1])) |
| { |
| /*if (mySource < args->sourceLimit)*/ |
| if(mySourceIndex < sourceLength) |
| { |
| if (UTF_IS_TRAIL(mySource[mySourceIndex])) |
| { |
| args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex]; |
| args->converter->invalidUCharLength++; |
| mySourceIndex++; |
| } |
| else |
| { |
| reason = UCNV_ILLEGAL; |
| } |
| } |
| else if (args->flush == TRUE) |
| { |
| reason = UCNV_ILLEGAL; |
| *err = U_TRUNCATED_CHAR_FOUND; |
| } |
| else |
| { |
| args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0]; |
| /* do not call the callback */ |
| } |
| } |
| if (args->converter->fromUSurrogateLead == 0) |
| { |
| const UChar *saveSource = args->source; |
| char *saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| args->target = (char *)myTarget+myTargetIndex; |
| args->source = mySource+mySourceIndex; |
| /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ |
| /* Check if we have encountered a surrogate pair. If first UChar is lead byte |
| and second UChar is trail byte, it's a surrogate char. If UChar is lead byte |
| but second UChar is not trail byte, it's illegal sequence. If neither, it's |
| plain unassigned code point.*/ |
| FromU_CALLBACK_MACRO(args->converter->fromUContext, |
| args, |
| args->converter->invalidUCharBuffer, |
| args->converter->invalidUCharLength, |
| (UChar32) (args->converter->invalidUCharLength == 2 ? |
| UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], |
| args->converter->invalidUCharBuffer[1]) |
| : args->converter->invalidUCharBuffer[0]), |
| reason, |
| err); |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| if (U_FAILURE (*err)) |
| { |
| break; |
| } |
| args->converter->invalidUCharLength = 0; |
| } |
| } |
| } |
| else |
| { |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| break; |
| } |
| |
| } |
| |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| |
| |
| return; |
| } |
| |
| U_CFUNC UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args, |
| UErrorCode* err) |
| { |
| UChar myUChar; |
| |
| if (U_FAILURE(*err)) return 0xffff; |
| |
| if (args->source+1 > args->sourceLimit) |
| { |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0xffff; |
| } |
| |
| /*Gets the corresponding codepoint*/ |
| myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)]; |
| |
| if (myUChar < 0xfffe) return myUChar; |
| else |
| { |
| UChar* myUCharPtr = &myUChar; |
| UConverterCallbackReason reason; |
| |
| /* Do the fallback stuff */ |
| if ((args->converter->useFallback == TRUE)&& |
| (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE)) |
| { |
| UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)]; |
| if (fallbackUChar < 0xfffe) return fallbackUChar; |
| } |
| |
| if (myUChar == 0xfffe) |
| { |
| reason = UCNV_UNASSIGNED; |
| *err = U_INVALID_CHAR_FOUND; |
| } |
| else |
| { |
| reason = UCNV_ILLEGAL; |
| *err = U_ILLEGAL_CHAR_FOUND; |
| } |
| |
| /*Calls the ErrorFunctor */ |
| /*It's is very likely that the ErrorFunctor will write to the |
| *internal buffers */ |
| args->target = myUCharPtr; |
| args->targetLimit = myUCharPtr + 1; |
| args->converter->fromCharErrorBehaviour(args->converter->toUContext, |
| args, |
| args->source - 1, |
| 1, |
| reason, |
| err); |
| |
| /*makes the internal caching transparent to the user*/ |
| if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; |
| |
| return myUChar; |
| } |
| } |
| |
| static const UConverterImpl _SBCSImpl={ |
| UCNV_SBCS, |
| |
| _SBCSLoad, |
| _SBCSUnload, |
| |
| NULL, |
| NULL, |
| NULL, |
| |
| T_UConverter_toUnicode_SBCS, |
| NULL, |
| T_UConverter_fromUnicode_SBCS, |
| NULL, |
| T_UConverter_getNextUChar_SBCS, |
| |
| NULL |
| }; |
| |
| |
| /* Static data is in tools/makeconv/ucnvstat.c for data-based |
| * converters. Be sure to update it as well. |
| */ |
| |
| const UConverterSharedData _SBCSData={ |
| sizeof(UConverterSharedData), 1, |
| NULL, NULL, NULL, FALSE, &_SBCSImpl, |
| 0 |
| }; |
| |
| /* DBCS --------------------------------------------------------------------- */ |
| |
| U_CFUNC void |
| _DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) { |
| const uint8_t *oldraw = raw; |
| ucmp16_initFromData(&sharedData->table->dbcs.toUnicode,&raw, pErrorCode); |
| if(((raw-oldraw)&3)!=0) { |
| raw+=4-((raw-oldraw)&3); /* pad to 4 */ |
| } |
| oldraw = raw; |
| ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode); |
| if (sharedData->staticData->hasFromUnicodeFallback == TRUE) |
| { |
| if(((raw-oldraw)&3)!=0) { |
| raw+=4-((raw-oldraw)&3); /* pad to 4 */ |
| } |
| ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode); |
| oldraw = raw; |
| } |
| if (sharedData->staticData->hasToUnicodeFallback == TRUE) |
| { |
| if(((raw-oldraw)&3)!=0) { |
| raw+=4-((raw-oldraw)&3); /* pad to 4 */ |
| } |
| ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode); |
| } |
| } |
| |
| U_CFUNC void |
| _DBCSUnload(UConverterSharedData *sharedData) { |
| ucmp16_close (&sharedData->table->dbcs.fromUnicode); |
| ucmp16_close (&sharedData->table->dbcs.toUnicode); |
| if (sharedData->staticData->hasFromUnicodeFallback == TRUE) |
| ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback); |
| if (sharedData->staticData->hasToUnicodeFallback == TRUE) |
| ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback); |
| uprv_free (sharedData->table); |
| } |
| |
| U_CFUNC void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args, |
| UErrorCode * err) |
| { |
| const char *mySource = ( char *) args->source; |
| UChar *myTarget = args->target; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| int32_t targetLength = args->targetLimit - myTarget; |
| int32_t sourceLength = args->sourceLimit - (char *) mySource; |
| CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL; |
| UChar targetUniChar = 0x0000; |
| UChar mySourceChar = 0x0000; |
| |
| myToUnicode = &args->converter->sharedData->table->dbcs.toUnicode; |
| myToUnicodeFallback = &args->converter->sharedData->table->dbcs.toUnicodeFallback; |
| |
| while (mySourceIndex < sourceLength) |
| { |
| if (myTargetIndex < targetLength) |
| { |
| /*gets the corresponding UniChar */ |
| mySourceChar = (unsigned char) mySource[mySourceIndex++]; |
| |
| /*We have no internal state, we should */ |
| if (args->converter->toUnicodeStatus == 0x00) |
| { |
| args->converter->toUnicodeStatus = (unsigned char) mySourceChar; |
| } |
| else |
| { |
| if (args->converter->toUnicodeStatus != 0x00) |
| { |
| mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00FF)); |
| args->converter->toUnicodeStatus = 0x00; |
| } |
| |
| targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); |
| |
| /*writing the UniChar to the output stream */ |
| if (targetUniChar < 0xfffe) |
| { |
| /*writes the UniChar to the output stream */ |
| myTarget[myTargetIndex++] = targetUniChar; |
| } |
| else if ((args->converter->useFallback == TRUE) && |
| (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE)) |
| { |
| UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar); |
| if (fallbackUniChar < 0xfffe) |
| { |
| myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar; |
| } |
| } |
| if (targetUniChar >= 0xfffe) |
| { |
| const char *saveSource = args->source; |
| UChar *saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| UConverterCallbackReason reason; |
| |
| if (targetUniChar == 0xfffe) |
| { |
| reason = UCNV_UNASSIGNED; |
| *err = U_INVALID_CHAR_FOUND; |
| } |
| else |
| { |
| reason = UCNV_ILLEGAL; |
| *err = U_ILLEGAL_CHAR_FOUND; |
| } |
| |
| args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8); |
| args->converter->invalidCharBuffer[1] = (char) mySourceChar; |
| args->converter->invalidCharLength = 2; |
| |
| args->target = myTarget + myTargetIndex; |
| args->source = mySource + mySourceIndex; |
| |
| /* to do hsys: add more smarts to the codeUnits and length later */ |
| ToU_CALLBACK_MACRO(args->converter->toUContext, |
| args, |
| args->converter->invalidCharBuffer, |
| args->converter->invalidCharLength, |
| reason, |
| err); |
| /* Hsys: calculate the source and target advancement */ |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| if (U_FAILURE (*err)) break; |
| args->converter->invalidCharLength = 0; |
| } |
| } |
| } |
| else |
| { |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| break; |
| } |
| } |
| |
| /*If at the end of conversion we are still carrying state information |
| *flush is TRUE, we can deduce that the input stream is truncated |
| */ |
| if ((args->flush == TRUE) |
| && (mySourceIndex == sourceLength) |
| && (args->converter->toUnicodeStatus != 0x00)) |
| { |
| |
| if (U_SUCCESS(*err)) |
| { |
| *err = U_TRUNCATED_CHAR_FOUND; |
| args->converter->toUnicodeStatus = 0x00; |
| } |
| } |
| |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| |
| return; |
| } |
| |
| U_CFUNC void T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args, |
| UErrorCode * err) |
| { |
| const UChar *mySource = args->source; |
| unsigned char *myTarget = (unsigned char *) args->target; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| int32_t targetLength = args->targetLimit - (char *) myTarget; |
| int32_t sourceLength = args->sourceLimit - mySource; |
| CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; |
| UChar targetUniChar = 0x0000; |
| UChar mySourceChar = 0x0000; |
| UConverterCallbackReason reason; |
| |
| myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode; |
| myFromUnicodeFallback = &args->converter->sharedData->table->dbcs.fromUnicodeFallback; |
| |
| /*writing the char to the output stream */ |
| while (mySourceIndex < sourceLength) |
| { |
| |
| if (myTargetIndex < targetLength) |
| { |
| mySourceChar = (UChar) mySource[mySourceIndex++]; |
| |
| /*Gets the corresponding codepoint */ |
| targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); |
| if (targetUniChar != missingCharMarker) |
| { |
| /*writes the char to the output stream */ |
| myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); |
| if (myTargetIndex < targetLength) |
| { |
| myTarget[myTargetIndex++] = (char) targetUniChar; |
| } |
| else |
| { |
| args->converter->charErrorBuffer[0] = (char) targetUniChar; |
| args->converter->charErrorBufferLength = 1; |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| } |
| } |
| else if ((args->converter->useFallback == TRUE) && |
| (args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE)) |
| { |
| |
| targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar); |
| if (targetUniChar != missingCharMarker) |
| { |
| /*writes the char to the output stream */ |
| myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); |
| if (myTargetIndex < targetLength) |
| { |
| myTarget[myTargetIndex++] = (char) targetUniChar; |
| } |
| else |
| { |
| args->converter->charErrorBuffer[0] = (char) targetUniChar; |
| args->converter->charErrorBufferLength = 1; |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| } |
| } |
| } |
| if (targetUniChar == missingCharMarker) |
| { |
| *err = U_INVALID_CHAR_FOUND; |
| reason = UCNV_UNASSIGNED; |
| |
| args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1]; |
| args->converter->invalidUCharLength = 1; |
| if (UTF_IS_LEAD(mySource[mySourceIndex-1])) |
| { |
| /*if (mySource < args->sourceLimit) */ |
| if(mySourceIndex < sourceLength) |
| { |
| if (UTF_IS_TRAIL(mySource[mySourceIndex])) |
| { |
| args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex]; |
| args->converter->invalidUCharLength++; |
| mySourceIndex++; |
| } |
| else |
| { |
| reason = UCNV_ILLEGAL; |
| } |
| } |
| else if (args->flush == TRUE) |
| { |
| reason = UCNV_ILLEGAL; |
| *err = U_TRUNCATED_CHAR_FOUND; |
| } |
| else |
| { |
| args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0]; |
| /* do not call the callback */ |
| } |
| } |
| if (args->converter->fromUSurrogateLead == 0) |
| { |
| /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ |
| /* Check if we have encountered a surrogate pair. If first UChar is lead byte |
| and second UChar is trail byte, it's a surrogate char. If UChar is lead byte |
| but second UChar is not trail byte, it's illegal sequence. If neither, it's |
| plain unassigned code point.*/ |
| const UChar *saveSource = args->source; |
| char *saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| args->target = (char*)myTarget + myTargetIndex; |
| args->source = mySource + mySourceIndex; |
| FromU_CALLBACK_MACRO(args->converter->fromUContext, |
| args, |
| args->converter->invalidUCharBuffer, |
| args->converter->invalidUCharLength, |
| (UChar32) (args->converter->invalidUCharLength == 2 ? |
| UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], |
| args->converter->invalidUCharBuffer[1]) |
| : args->converter->invalidUCharBuffer[0]), |
| reason, |
| err); |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| if (U_FAILURE (*err)) |
| { |
| break; |
| } |
| args->converter->invalidUCharLength = 0; |
| } |
| } |
| } |
| else |
| { |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| break; |
| } |
| } |
| |
| args->target += myTargetIndex; |
| args->source += mySourceIndex;; |
| |
| |
| return; |
| } |
| |
| U_CFUNC UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args, |
| UErrorCode* err) |
| { |
| UChar myUChar; |
| |
| if (U_FAILURE(*err)) return 0xffff; |
| /*Checks boundaries and set appropriate error codes*/ |
| if (args->source+2 > args->sourceLimit) |
| { |
| if (args->source >= args->sourceLimit) |
| { |
| /*Either caller has reached the end of the byte stream*/ |
| *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| } |
| else if ((args->source+1) == args->sourceLimit) |
| { |
| /* a character was cut in half*/ |
| *err = U_TRUNCATED_CHAR_FOUND; |
| } |
| |
| return 0xffff; |
| } |
| |
| /*Gets the corresponding codepoint*/ |
| myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicode), |
| (uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source+1)))); |
| |
| /*update the input pointer*/ |
| args->source += 2; |
| if (myUChar < 0xfffe) return myUChar; |
| else |
| { |
| UChar* myUCharPtr = &myUChar; |
| UConverterCallbackReason reason; |
| |
| /* Do the fallback stuff */ |
| if ((args->converter->useFallback == TRUE) && |
| (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE)) |
| { |
| UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback), |
| (uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1)))); |
| if (fallbackUChar < 0xfffe) |
| { |
| args->source += 2; |
| return fallbackUChar; |
| } |
| } |
| |
| if (myUChar == 0xfffe) |
| { |
| reason = UCNV_UNASSIGNED; |
| *err = U_INVALID_CHAR_FOUND; |
| } |
| else |
| { |
| reason = UCNV_ILLEGAL; |
| *err = U_ILLEGAL_CHAR_FOUND; |
| } |
| |
| args->target = myUCharPtr; |
| args->targetLimit = myUCharPtr + 1; |
| /*It's is very likely that the ErrorFunctor will write to the |
| *internal buffers */ |
| args->converter->fromCharErrorBehaviour(args->converter->toUContext, |
| args, |
| args->source - 2, |
| 2, |
| reason, |
| err); |
| /*makes the internal caching transparent to the user*/ |
| if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; |
| |
| return myUChar; |
| } |
| } |
| |
| static const UConverterImpl _DBCSImpl={ |
| UCNV_DBCS, |
| |
| _DBCSLoad, |
| _DBCSUnload, |
| |
| NULL, |
| NULL, |
| NULL, |
| |
| T_UConverter_toUnicode_DBCS, |
| NULL, |
| T_UConverter_fromUnicode_DBCS, |
| NULL, |
| T_UConverter_getNextUChar_DBCS, |
| |
| NULL |
| }; |
| |
| |
| /* Static data is in tools/makeconv/ucnvstat.c for data-based |
| * converters. Be sure to update it as well. |
| */ |
| |
| const UConverterSharedData _DBCSData={ |
| sizeof(UConverterSharedData), 1, |
| NULL, NULL, NULL, FALSE, &_DBCSImpl, |
| 0, /* tounicodestatus */ |
| }; |