| /* |
| ********************************************************************** |
| * Copyright (C) 2000, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * file name: ucnvhz.c |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2000oct16 |
| * created by: Ram Viswanadha |
| * 10/31/2000 Ram Implemented offsets logic function |
| * |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "cmemory.h" |
| #include "ucmp16.h" |
| #include "ucmp8.h" |
| #include "unicode/ucnv_err.h" |
| #include "ucnv_bld.h" |
| #include "unicode/ucnv.h" |
| #include "ucnv_cnv.h" |
| #include "unicode/ustring.h" |
| #include "cstring.h" |
| |
| #define UCNV_TILDE 0x7E /* ~ */ |
| #define UCNV_OPEN_BRACE 0x7B /* { */ |
| #define UCNV_CLOSE_BRACE 0x7D /* } */ |
| #define SB_ESCAPE "\x7E\x7D" |
| #define DB_ESCAPE "\x7E\x7B" |
| |
| #define TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS, myConverterData, err){ \ |
| if(*err ==U_BUFFER_OVERFLOW_ERROR){ \ |
| /*save the state and return */ \ |
| args->target += myTargetIndex; \ |
| args->source += mySourceIndex; \ |
| myConverterData->sourceIndex = 0; \ |
| myConverterData->targetIndex = 0; \ |
| args->converter->fromUnicodeStatus = isTargetUCharDBCS; \ |
| return; \ |
| } \ |
| } |
| |
| /*********** HZ Converter Protos ***********/ |
| static void _HZOpen(UConverter *cnv, const char *name, const char *locale, uint32_t options,UErrorCode *errorCode); |
| static void _HZClose(UConverter *converter); |
| static void _HZReset(UConverter *converter); |
| |
| U_CFUNC void UConverter_toUnicode_HZ(UConverterToUnicodeArgs *args, |
| UErrorCode *err); |
| |
| U_CFUNC void UConverter_toUnicode_HZ_OFFSETS_LOGIC (UConverterToUnicodeArgs *args, |
| UErrorCode *err); |
| |
| U_CFUNC void UConverter_fromUnicode_HZ(UConverterFromUnicodeArgs *args, |
| UErrorCode *err); |
| |
| U_CFUNC void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs *args, |
| UErrorCode *err); |
| |
| U_CFUNC UChar32 UConverter_getNextUChar_HZ (UConverterToUnicodeArgs *pArgs, |
| UErrorCode *pErrorCode); |
| |
| static UConverterImpl _HZImpl={ |
| UCNV_HZ, |
| |
| NULL, |
| NULL, |
| |
| _HZOpen, |
| _HZClose, |
| _HZReset, |
| |
| UConverter_toUnicode_HZ, |
| UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
| UConverter_fromUnicode_HZ, |
| UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
| UConverter_getNextUChar_HZ, |
| |
| NULL, |
| NULL |
| }; |
| |
| const UConverterStaticData _HZStaticData={ |
| sizeof(UConverterStaticData), |
| "HZ", |
| 2023, UCNV_IBM, UCNV_HZ, 1, 4, |
| { 0x1a, 0, 0, 0 },1, FALSE, FALSE, |
| { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} /* reserved */ |
| }; |
| |
| |
| const UConverterSharedData _HZData={ |
| sizeof(UConverterSharedData), ~((uint32_t) 0), |
| NULL, NULL, &_HZStaticData, FALSE, &_HZImpl, |
| 0 |
| }; |
| |
| typedef struct{ |
| int32_t targetIndex; |
| int32_t sourceIndex; |
| UBool isEscapeAppended; |
| UConverter* gbConverter; |
| UBool isStateDBCS; |
| UBool isTargetUCharDBCS; |
| }UConverterDataHZ; |
| |
| |
| |
| static void _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){ |
| cnv->toUnicodeStatus = 0; |
| cnv->fromUnicodeStatus= 0; |
| cnv->mode=0; |
| cnv->fromUSurrogateLead=0x0000; |
| cnv->extraInfo = uprv_malloc (sizeof (UConverterDataHZ)); |
| if(cnv->extraInfo != NULL){ |
| ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode); |
| ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; |
| ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; |
| ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; |
| ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; |
| ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; |
| ucnv_setSubstChars(cnv,"\x7E\x7D\x1A", 3, errorCode); |
| } |
| |
| |
| } |
| static void _HZClose(UConverter *cnv){ |
| |
| ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); |
| uprv_free(cnv->extraInfo); |
| |
| } |
| static void _HZReset(UConverter *cnv){ |
| cnv->toUnicodeStatus = 0; |
| cnv->fromUnicodeStatus= 0; |
| cnv->mode=0; |
| cnv->fromUSurrogateLead=0x0000; |
| if(cnv->extraInfo != NULL){ |
| ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; |
| ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; |
| ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; |
| ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; |
| ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; |
| } |
| } |
| |
| /**************************************HZ Encoding************************************************* |
| * Rules for HZ encoding |
| * |
| * In ASCII mode, a byte is interpreted as an ASCII character, unless a |
| * '~' is encountered. The character '~' is an escape character. By |
| * convention, it must be immediately followed ONLY by '~', '{' or '\n' |
| * (<LF>), with the following special meaning. |
| |
| * 1. The escape sequence '~~' is interpreted as a '~'. |
| * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. |
| * 3. The escape sequence '~\n' is a line-continuation marker to be |
| * consumed with no output produced. |
| * In GB mode, characters are interpreted two bytes at a time as (pure) |
| * GB codes until the escape-from-GB code '~}' is read. This code |
| * switches the mode from GB back to ASCII. (Note that the escape- |
| * from-GB code '~}' ($7E7D) is outside the defined GB range.) |
| * |
| * Source: RFC 1842 |
| */ |
| |
| U_CFUNC void UConverter_toUnicode_HZ(UConverterToUnicodeArgs *args, |
| UErrorCode* err){ |
| char tempBuf[3]; |
| const char* pBuf; |
| const char *mySource = ( char *) args->source; |
| UChar *myTarget = args->target; |
| char *tempLimit = &tempBuf[2]+1; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| const char *mySourceLimit = args->sourceLimit; |
| UChar32 targetUniChar = 0x0000; |
| UChar mySourceChar = 0x0000; |
| UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
| |
| |
| /*Arguments Check*/ |
| if (U_FAILURE(*err)) |
| return; |
| |
| if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){ |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| |
| while(mySource< args->sourceLimit){ |
| |
| if(myTarget < args->targetLimit){ |
| |
| mySourceChar= (unsigned char) *mySource++; |
| |
| /*if( mySourceChar <= 0x20){ |
| myData->isStateDBCS = FALSE; |
| }*/ |
| |
| switch(mySourceChar){ |
| case 0x0A: |
| if(args->converter->mode ==UCNV_TILDE){ |
| args->converter->mode=0; |
| |
| } |
| *(myTarget++)=(UChar)mySourceChar; |
| continue; |
| |
| case UCNV_TILDE: |
| if(args->converter->mode ==UCNV_TILDE){ |
| *(myTarget++)=(UChar)mySourceChar; |
| args->converter->mode=0; |
| continue; |
| |
| } |
| else if(args->converter->toUnicodeStatus !=0){ |
| args->converter->mode=0; |
| break; |
| } |
| else{ |
| args->converter->mode = UCNV_TILDE; |
| continue; |
| } |
| |
| |
| case UCNV_OPEN_BRACE: |
| if(args->converter->mode == UCNV_TILDE){ |
| args->converter->mode=0; |
| myData->isStateDBCS = TRUE; |
| continue; |
| } |
| else{ |
| break; |
| } |
| |
| |
| case UCNV_CLOSE_BRACE: |
| if(args->converter->mode == UCNV_TILDE){ |
| args->converter->mode=0; |
| myData->isStateDBCS = FALSE; |
| continue; |
| } |
| else{ |
| break; |
| } |
| |
| default: |
| /* if the first byte is equal to TILDE and the trail byte |
| * is not a valid byte then it is an error condition |
| */ |
| if(args->converter->mode == UCNV_TILDE){ |
| args->converter->mode=0; |
| mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); |
| goto SAVE_STATE; |
| } |
| |
| break; |
| |
| } |
| |
| if(myData->isStateDBCS){ |
| if(args->converter->toUnicodeStatus == 0x00){ |
| args->converter->toUnicodeStatus = (UChar) mySourceChar; |
| continue; |
| } |
| else{ |
| tempBuf[0] = (char) (args->converter->toUnicodeStatus +0x80); |
| tempBuf[1] = (char) (mySourceChar+0x80); |
| mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); |
| args->converter->toUnicodeStatus =0x00; |
| pBuf = &tempBuf[0]; |
| tempLimit = &tempBuf[2]+1; |
| targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, |
| &pBuf,tempLimit,args->converter->useFallback); |
| } |
| } |
| else{ |
| if(args->converter->fromUnicodeStatus == 0x00){ |
| tempBuf[0] = (char) mySourceChar; |
| pBuf = &tempBuf[0]; |
| tempLimit = &tempBuf[1]; |
| targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, |
| &pBuf,tempLimit,args->converter->useFallback); |
| } |
| else{ |
| goto SAVE_STATE; |
| } |
| |
| } |
| if(targetUniChar < 0xfffe){ |
| *(myTarget++)=(UChar)targetUniChar; |
| } |
| else if(targetUniChar>=0xfffe){ |
| SAVE_STATE: |
| { |
| const char *saveSource = args->source; |
| UChar *saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| UConverterCallbackReason reason; |
| |
| if(targetUniChar == 0xfffe){ |
| reason = UCNV_UNASSIGNED; |
| *err = U_INVALID_CHAR_FOUND; |
| } |
| else{ |
| reason = UCNV_ILLEGAL; |
| *err = U_ILLEGAL_CHAR_FOUND; |
| } |
| if(myData->isStateDBCS){ |
| |
| args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80); |
| args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80); |
| } |
| else{ |
| args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar; |
| } |
| args->target = myTarget; |
| args->source = mySource; |
| ToU_CALLBACK_MACRO( args->converter->toUContext, |
| args, |
| args->converter->invalidCharBuffer, |
| args->converter->invalidCharLength, |
| reason, |
| err); |
| myTarget = args->target; |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| args->converter->invalidCharLength=0; |
| if(U_FAILURE(*err)) |
| break; |
| |
| } |
| } |
| } |
| else{ |
| *err =U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| } |
| if((args->flush==TRUE) |
| && (mySource == mySourceLimit) |
| && ( args->converter->toUnicodeStatus !=0x00)){ |
| if(U_SUCCESS(*err)){ |
| *err = U_TRUNCATED_CHAR_FOUND; |
| args->converter->toUnicodeStatus = 0x00; |
| } |
| } |
| /* Reset the state of converter if we consumed |
| * the source and flush is true |
| */ |
| if( (mySource == mySourceLimit) && args->flush){ |
| _HZReset(args->converter); |
| } |
| |
| args->target = myTarget; |
| args->source = mySource; |
| } |
| |
| |
| U_CFUNC void UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, |
| UErrorCode* err){ |
| char tempBuf[3]; |
| const char* pBuf; |
| const char *mySource = ( char *) args->source; |
| UChar *myTarget = args->target; |
| char *tempLimit = &tempBuf[3]; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| const char *mySourceLimit = args->sourceLimit; |
| UChar32 targetUniChar = 0x0000; |
| UChar mySourceChar = 0x0000; |
| UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
| |
| /*Arguments Check*/ |
| if (U_FAILURE(*err)) |
| return; |
| |
| if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){ |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| |
| while(mySource< args->sourceLimit){ |
| |
| if(myTarget < args->targetLimit){ |
| |
| mySourceChar= (unsigned char) *mySource++; |
| |
| |
| /*if( mySourceChar <= 0x20){ |
| myData->isStateDBCS = FALSE; |
| }*/ |
| |
| switch(mySourceChar){ |
| case 0x0A: |
| if(args->converter->mode ==UCNV_TILDE){ |
| args->converter->mode=0; |
| |
| } |
| *(myTarget++)=(UChar)mySourceChar; |
| continue; |
| |
| case UCNV_TILDE: |
| if(args->converter->mode ==UCNV_TILDE){ |
| *(myTarget++)=(UChar)mySourceChar; |
| args->converter->mode=0; |
| continue; |
| |
| } |
| else if(args->converter->toUnicodeStatus !=0){ |
| args->converter->mode=0; |
| break; |
| } |
| else{ |
| args->converter->mode = UCNV_TILDE; |
| continue; |
| } |
| |
| |
| case UCNV_OPEN_BRACE: |
| if(args->converter->mode == UCNV_TILDE){ |
| args->converter->mode=0; |
| myData->isStateDBCS = TRUE; |
| continue; |
| } |
| else{ |
| break; |
| } |
| |
| |
| case UCNV_CLOSE_BRACE: |
| if(args->converter->mode == UCNV_TILDE){ |
| args->converter->mode=0; |
| myData->isStateDBCS = FALSE; |
| continue; |
| } |
| else{ |
| break; |
| } |
| |
| default: |
| /* if the first byte is equal to TILDE and the trail byte |
| * is not a valid byte then it is an error condition |
| */ |
| if(args->converter->mode == UCNV_TILDE){ |
| args->converter->mode=0; |
| mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); |
| goto SAVE_STATE; |
| } |
| |
| break; |
| |
| } |
| |
| if(myData->isStateDBCS){ |
| if(args->converter->toUnicodeStatus == 0x00){ |
| args->converter->toUnicodeStatus = (UChar) mySourceChar; |
| continue; |
| } |
| else{ |
| tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ; |
| tempBuf[1] = (char) (mySourceChar+0x80); |
| mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); |
| args->converter->toUnicodeStatus =0x00; |
| pBuf = &tempBuf[0]; |
| tempLimit = &tempBuf[2]+1; |
| targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, |
| &pBuf,tempLimit,args->converter->useFallback); |
| } |
| } |
| else{ |
| if(args->converter->fromUnicodeStatus == 0x00){ |
| tempBuf[0] = (char) mySourceChar; |
| pBuf = &tempBuf[0]; |
| tempLimit = &tempBuf[1]; |
| targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, |
| &pBuf,tempLimit,args->converter->useFallback); |
| } |
| else{ |
| goto SAVE_STATE; |
| } |
| |
| } |
| if(targetUniChar < 0xfffe){ |
| if(myData->isStateDBCS){ |
| args->offsets[myTarget - args->target]= mySource - args->source - 2; |
| } |
| else{ |
| args->offsets[myTarget - args->target]= mySource - args->source - 1; |
| } |
| *(myTarget++)=(UChar)targetUniChar; |
| } |
| else if(targetUniChar>=0xfffe){ |
| SAVE_STATE: |
| { |
| const char *saveSource = args->source; |
| UChar *saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| |
| UConverterCallbackReason reason; |
| int32_t currentOffset ; |
| int32_t My_i = myTarget - args->target; |
| |
| if(targetUniChar == 0xfffe){ |
| reason = UCNV_UNASSIGNED; |
| *err = U_INVALID_CHAR_FOUND; |
| } |
| else{ |
| reason = UCNV_ILLEGAL; |
| *err = U_ILLEGAL_CHAR_FOUND; |
| } |
| if(myData->isStateDBCS){ |
| |
| args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80); |
| args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80); |
| currentOffset= mySource - args->source -2; |
| |
| } |
| else{ |
| args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar; |
| currentOffset= mySource - args->source -1; |
| } |
| args->offsets = args->offsets?args->offsets+(myTarget - args->target):0; |
| args->target = myTarget; |
| args->source = mySource; |
| myTarget = saveTarget; |
| ToU_CALLBACK_OFFSETS_LOGIC_MACRO( args->converter->toUContext, |
| args, |
| args->converter->invalidCharBuffer, |
| args->converter->invalidCharLength, |
| reason, |
| err); |
| args->converter->invalidCharLength=0; |
| args->source = saveSource; |
| myTarget = args->target; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| if(U_FAILURE(*err)) |
| break; |
| } |
| } |
| } |
| else{ |
| *err =U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| } |
| if((args->flush==TRUE) |
| && (mySource == mySourceLimit) |
| && ( args->converter->toUnicodeStatus !=0x00)){ |
| if(U_SUCCESS(*err)){ |
| *err = U_TRUNCATED_CHAR_FOUND; |
| args->converter->toUnicodeStatus = 0x00; |
| } |
| } |
| /* Reset the state of converter if we consumed |
| * the source and flush is true |
| */ |
| if( (mySource == mySourceLimit) && args->flush){ |
| _HZReset(args->converter); |
| } |
| |
| args->target = myTarget; |
| args->source = mySource; |
| } |
| |
| static void concatEscape(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength, |
| const char* strToAppend,UErrorCode* err,int len,int32_t *sourceIndex){ |
| while(len-->0){ |
| if(*targetIndex < *targetLength){ |
| args->target[*targetIndex] = (unsigned char) *strToAppend; |
| if(args->offsets!=NULL){ |
| args->offsets[*targetIndex] = *sourceIndex-1; |
| } |
| (*targetIndex)++; |
| } |
| else{ |
| args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; |
| *err =U_BUFFER_OVERFLOW_ERROR; |
| } |
| strToAppend++; |
| } |
| } |
| |
| static void concatString(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength, |
| const uint32_t* strToAppend,UErrorCode* err, int32_t *sourceIndex){ |
| |
| if(*strToAppend < 0x00FF){ |
| if( (*targetIndex)+1 >= *targetLength){ |
| args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| }else{ |
| args->target[*targetIndex] = (unsigned char) *strToAppend; |
| |
| if(args->offsets!=NULL){ |
| args->offsets[*targetIndex] = *sourceIndex-1; |
| } |
| (*targetIndex)++; |
| |
| } |
| } |
| else{ |
| if(*targetIndex < *targetLength){ |
| args->target[*targetIndex] =(unsigned char) ((*strToAppend>>8)-0x80 ); |
| if(args->offsets!=NULL){ |
| args->offsets[*targetIndex] = *sourceIndex-1; |
| } |
| (*targetIndex)++; |
| |
| if(*targetIndex < *targetLength){ |
| args->target[(*targetIndex)] =(unsigned char) ((*strToAppend & 0x00FF)-0x80); |
| |
| if(args->offsets!=NULL){ |
| args->offsets[*targetIndex] = *sourceIndex-1; |
| } |
| (*targetIndex)++; |
| } |
| else{ |
| args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((*strToAppend & 0x00FF)-0x80); |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| |
| } |
| |
| } |
| else{ |
| args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((*strToAppend>>8)-0x80); |
| args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((*strToAppend & 0x00FF)-0x80); |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| if(args->offsets!=NULL){ |
| args->offsets[*targetIndex] = *sourceIndex-1; |
| |
| } |
| } |
| } |
| |
| } |
| |
| U_CFUNC void UConverter_fromUnicode_HZ(UConverterFromUnicodeArgs *args, UErrorCode *err){ |
| |
| |
| const UChar *mySource = args->source; |
| unsigned char *myTarget = (unsigned char *) args->target; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| uint32_t targetValue=0; |
| int32_t targetLength = args->targetLimit - args->target; |
| int32_t sourceLength = args->sourceLimit - args->source; |
| int32_t length=0; |
| uint32_t targetUniChar = 0x0000; |
| UChar32 mySourceChar = 0x0000,c=0x0000; |
| UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; |
| UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; |
| UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| UConverterCallbackReason reason; |
| UBool isEscapeAppended =FALSE; |
| |
| /*Arguments Check*/ |
| if (U_FAILURE(*err)) |
| return; |
| |
| if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){ |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) { |
| goto getTrail; |
| } |
| /*writing the char to the output stream */ |
| while (mySourceIndex < sourceLength){ |
| |
| if (myTargetIndex < targetLength){ |
| |
| c=mySourceChar = (UChar) args->source[mySourceIndex++]; |
| |
| /*Handle surrogates */ |
| if(UTF_IS_SURROGATE(mySourceChar)) { |
| if(UTF_IS_SURROGATE_FIRST(mySourceChar)) { |
| args->converter->fromUSurrogateLead = (UChar)mySourceChar; |
| getTrail: |
| /*look ahead to find the trail surrogate*/ |
| if(mySourceIndex < sourceLength) { |
| /* test the following code unit */ |
| UChar trail=(UChar) args->source[mySourceIndex]; |
| if(UTF_IS_SECOND_SURROGATE(trail)) { |
| ++mySourceIndex; |
| mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail); |
| args->converter->fromUSurrogateLead = 0x00; |
| /* convert this surrogate code point */ |
| /* exit this condition tree */ |
| } else { |
| /* this is an unmatched lead code unit (1st surrogate) */ |
| /* callback(illegal) */ |
| reason=UCNV_ILLEGAL; |
| *err=U_ILLEGAL_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| } else { |
| /* no more input */ |
| break; |
| } |
| } else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| reason=UCNV_ILLEGAL; |
| *err=U_ILLEGAL_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| } |
| oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| if(mySourceChar == 0x7E){ |
| concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex); |
| continue; |
| } |
| else{ |
| length= _MBCSFromUChar32(myConverterData->gbConverter->sharedData, |
| mySourceChar,&targetValue,args->converter->useFallback); |
| targetUniChar = (UChar32) targetValue; |
| |
| } |
| /* only DBCS or SBCS characters are expected*/ |
| if(length > 2 || length==0){ |
| reason =UCNV_ILLEGAL; |
| *err =U_INVALID_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| /* DB haracters with high bit set to 1 are expected */ |
| if(((targetUniChar & 0x8080) != 0x8080)&& length==2){ |
| reason =UCNV_ILLEGAL; |
| *err =U_INVALID_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| |
| myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); |
| |
| if (targetUniChar != missingCharMarker){ |
| |
| if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ |
| /*Shifting from a double byte to single byte mode*/ |
| if(!isTargetUCharDBCS){ |
| concatEscape(args, &myTargetIndex, &targetLength, SB_ESCAPE,err, 2,&mySourceIndex); |
| myConverterData->isEscapeAppended =isEscapeAppended =TRUE; |
| } |
| else{ /* Shifting from a single byte to double byte mode*/ |
| concatEscape(args, &myTargetIndex, &targetLength, DB_ESCAPE,err, 2,&mySourceIndex); |
| myConverterData->isEscapeAppended =isEscapeAppended =TRUE; |
| |
| } |
| } |
| |
| concatString(args, &myTargetIndex, &targetLength,&targetUniChar,err, &mySourceIndex); |
| |
| TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err); |
| } |
| else{ |
| |
| CALLBACK: |
| { |
| const UChar* saveSource = args->source; |
| char* saveTarget = args->target; |
| int32_t *saveOffsets = args->offsets; |
| *err = U_INVALID_CHAR_FOUND; |
| args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar; |
| args->converter->invalidUCharLength = 1; |
| |
| myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| FromU_CALLBACK_MACRO(args->converter->fromUContext, |
| args, |
| args->converter->invalidUCharBuffer, |
| 1, |
| (UChar32) mySourceChar, |
| UCNV_UNASSIGNED, |
| err); |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| isTargetUCharDBCS=(UBool)myConverterData->isTargetUCharDBCS; |
| args->converter->fromUSurrogateLead =0x00; |
| myConverterData->isEscapeAppended =isEscapeAppended =FALSE; |
| args->converter->invalidUCharLength = 0; |
| if (U_FAILURE (*err)) |
| break; |
| |
| } |
| } |
| } |
| else{ |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| targetUniChar=missingCharMarker; |
| } |
| /*If at the end of conversion we are still carrying state information |
| *flush is TRUE, we can deduce that the input stream is truncated |
| */ |
| if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == sourceLength) && args->flush){ |
| if (U_SUCCESS(*err)){ |
| *err = U_TRUNCATED_CHAR_FOUND; |
| args->converter->toUnicodeStatus = 0x00; |
| } |
| } |
| /* Reset the state of converter if we consumed |
| * the source and flush is true |
| */ |
| if( (mySourceIndex == sourceLength) && args->flush){ |
| _HZReset(args->converter); |
| } |
| |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; |
| |
| return; |
| } |
| |
| |
| U_CFUNC void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, |
| UErrorCode * err){ |
| const UChar *mySource = args->source; |
| unsigned char *myTarget = (unsigned char *) args->target; |
| int32_t mySourceIndex = 0; |
| int32_t myTargetIndex = 0; |
| int32_t targetLength = args->targetLimit - args->target; |
| int32_t sourceLength = args->sourceLimit - args->source; |
| int32_t length=0; |
| uint32_t targetUniChar = 0x0000; |
| UChar32 mySourceChar = 0x0000,c=0x0000; |
| UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; |
| UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; |
| UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| UConverterCallbackReason reason; |
| UBool isEscapeAppended =FALSE; |
| |
| /*Arguments Check*/ |
| if (U_FAILURE(*err)) |
| return; |
| |
| if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){ |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) { |
| goto getTrail; |
| } |
| /*writing the char to the output stream */ |
| while (mySourceIndex < sourceLength){ |
| |
| if (myTargetIndex < targetLength){ |
| |
| c=mySourceChar = (UChar) args->source[mySourceIndex++]; |
| |
| /*Handle surrogates */ |
| if(UTF_IS_SURROGATE(mySourceChar)) { |
| if(UTF_IS_SURROGATE_FIRST(mySourceChar)) { |
| args->converter->fromUSurrogateLead = (UChar) mySourceChar; |
| getTrail: |
| /*look ahead to find the trail surrogate*/ |
| if(mySourceIndex < sourceLength) { |
| /* test the following code unit */ |
| UChar trail=(UChar) args->source[mySourceIndex]; |
| if(UTF_IS_SECOND_SURROGATE(trail)) { |
| ++mySourceIndex; |
| mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail); |
| args->converter->fromUSurrogateLead = 0x00; |
| /* convert this surrogate code point */ |
| /* exit this condition tree */ |
| } else { |
| /* this is an unmatched lead code unit (1st surrogate) */ |
| /* callback(illegal) */ |
| reason=UCNV_ILLEGAL; |
| *err=U_ILLEGAL_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| } else { |
| /* no more input */ |
| break; |
| } |
| } else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| reason=UCNV_ILLEGAL; |
| *err=U_ILLEGAL_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| } |
| oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| if(mySourceChar == 0x7E){ |
| concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex); |
| continue; |
| } |
| else{ |
| length= _MBCSFromUChar32(myConverterData->gbConverter->sharedData, |
| mySourceChar,&targetUniChar,args->converter->useFallback); |
| |
| } |
| /* only DBCS or SBCS characters are expected*/ |
| if(length > 2 || length==0){ |
| reason =UCNV_ILLEGAL; |
| *err =U_INVALID_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| /* DB haracters with high bit set to 1 are expected */ |
| if(((targetUniChar & 0x8080) != 0x8080)&& length==2){ |
| reason =UCNV_ILLEGAL; |
| *err =U_INVALID_CHAR_FOUND; |
| goto CALLBACK; |
| } |
| |
| myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); |
| |
| if (targetUniChar != missingCharMarker){ |
| |
| if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ |
| /*Shifting from a double byte to single byte mode*/ |
| if(!isTargetUCharDBCS){ |
| concatEscape(args, &myTargetIndex, &targetLength, SB_ESCAPE,err, 2,&mySourceIndex); |
| myConverterData->isEscapeAppended =isEscapeAppended =TRUE; |
| } |
| else{ /* Shifting from a single byte to double byte mode*/ |
| concatEscape(args, &myTargetIndex, &targetLength, DB_ESCAPE,err, 2,&mySourceIndex); |
| myConverterData->isEscapeAppended =isEscapeAppended =TRUE; |
| |
| } |
| } |
| |
| concatString(args, &myTargetIndex, &targetLength,&targetUniChar,err, &mySourceIndex); |
| |
| TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err); |
| } |
| else{ |
| |
| CALLBACK: |
| { |
| int32_t currentOffset = args->offsets[myTargetIndex-1]+1; |
| char * saveTarget = args->target; |
| const UChar* saveSource = args->source; |
| int32_t *saveOffsets = args->offsets; |
| *err = U_INVALID_CHAR_FOUND; |
| args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar; |
| args->converter->invalidUCharLength = 1; |
| |
| /* Breaks out of the loop since behaviour was set to stop */ |
| args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| args->offsets = args->offsets?args->offsets+myTargetIndex:0; |
| FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext, |
| args, |
| args->converter->invalidUCharBuffer, |
| 1, |
| (UChar32)mySourceChar, |
| UCNV_UNASSIGNED, |
| err); |
| args->source = saveSource; |
| args->target = saveTarget; |
| args->offsets = saveOffsets; |
| isTargetUCharDBCS=(UBool)myConverterData->isTargetUCharDBCS; |
| args->converter->fromUSurrogateLead =0x00; |
| myConverterData->isEscapeAppended =isEscapeAppended =FALSE; |
| args->converter->invalidUCharLength = 0; |
| if (U_FAILURE (*err)) |
| break; |
| |
| } |
| } |
| } |
| else{ |
| *err = U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| targetUniChar=missingCharMarker; |
| } |
| /*If at the end of conversion we are still carrying state information |
| *flush is TRUE, we can deduce that the input stream is truncated |
| */ |
| if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == sourceLength) && args->flush){ |
| if (U_SUCCESS(*err)){ |
| *err = U_TRUNCATED_CHAR_FOUND; |
| args->converter->toUnicodeStatus = 0x00; |
| } |
| } |
| /* Reset the state of converter if we consumed |
| * the source and flush is true |
| */ |
| if( (mySourceIndex == sourceLength) && args->flush){ |
| _HZReset(args->converter); |
| } |
| |
| args->target += myTargetIndex; |
| args->source += mySourceIndex; |
| myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; |
| |
| return; |
| } |
| |
| U_CFUNC UChar32 UConverter_getNextUChar_HZ (UConverterToUnicodeArgs * pArgs, |
| UErrorCode *pErrorCode){ |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| const char *realLimit=pArgs->sourceLimit; |
| |
| pArgs->target=buffer; |
| pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH; |
| |
| while(pArgs->source<realLimit) { |
| /* feed in one byte at a time to make sure to get only one character out */ |
| pArgs->sourceLimit=pArgs->source+1; |
| pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit); |
| UConverter_toUnicode_HZ(pArgs, pErrorCode); |
| if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { |
| return 0xffff; |
| } else if(pArgs->target!=buffer) { |
| if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| *pErrorCode=U_ZERO_ERROR; |
| } |
| return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer); |
| } |
| } |
| |
| /* no output because of empty input or only state changes and skipping callbacks */ |
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0xffff; |
| } |