| /******************************************************************** |
| * COPYRIGHT: |
| * Copyright (c) 1997-2003, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ********************************************************************/ |
| /******************************************************************************** |
| * |
| * File CCONVTST.C |
| * |
| * Modification History: |
| * Name Description |
| * Steven R. Loomis 7/8/1999 Adding input buffer test |
| ********************************************************************************* |
| */ |
| #include <stdio.h> |
| #include "cstring.h" |
| #include "unicode/uloc.h" |
| #include "unicode/ucnv.h" |
| #include "unicode/ucnv_err.h" |
| #include "cintltst.h" |
| #include "unicode/utypes.h" |
| #include "unicode/ustring.h" |
| #include "unicode/ucol.h" |
| #include "cmemory.h" |
| |
| static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); |
| static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); |
| #if !UCONFIG_NO_COLLATION |
| static void TestJitterbug981(void); |
| #endif |
| static void TestJitterbug1293(void); |
| static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; |
| static void TestConverterTypesAndStarters(void); |
| static void TestAmbiguous(void); |
| static void TestSignatureDetection(void); |
| static void TestUTF7(void); |
| static void TestIMAP(void); |
| static void TestUTF8(void); |
| static void TestCESU8(void); |
| static void TestUTF16(void); |
| static void TestUTF16BE(void); |
| static void TestUTF16LE(void); |
| static void TestUTF32(void); |
| static void TestUTF32BE(void); |
| static void TestUTF32LE(void); |
| static void TestLATIN1(void); |
| static void TestSBCS(void); |
| static void TestDBCS(void); |
| static void TestMBCS(void); |
| static void TestISO_2022(void); |
| static void TestISO_2022_JP(void); |
| static void TestISO_2022_JP_1(void); |
| static void TestISO_2022_JP_2(void); |
| static void TestISO_2022_KR(void); |
| static void TestISO_2022_KR_1(void); |
| static void TestISO_2022_CN(void); |
| static void TestISO_2022_CN_EXT(void); |
| static void TestJIS(void); |
| static void TestHZ(void); |
| static void TestSCSU(void); |
| static void TestEBCDIC_STATEFUL(void); |
| static void TestGB18030(void); |
| static void TestLMBCS(void); |
| static void TestJitterbug255(void); |
| static void TestEBCDICUS4XML(void); |
| static void TestJitterbug915(void); |
| static void TestISCII(void); |
| static void TestConv(const uint16_t in[], |
| int len, |
| const char* conv, |
| const char* lang, |
| char byteArr[], |
| int byteArrLen); |
| static void TestRoundTrippingAllUTF(void); |
| static void TestCoverageMBCS(void); |
| static void TestJitterbug2346(void); |
| static void TestJitterbug2411(void); |
| void addTestNewConvert(TestNode** root); |
| |
| /* open a converter, using test data if it begins with '@' */ |
| static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); |
| |
| |
| #define NEW_MAX_BUFFER 999 |
| |
| static int32_t gInBufferSize = NEW_MAX_BUFFER; |
| static int32_t gOutBufferSize = NEW_MAX_BUFFER; |
| static char gNuConvTestName[1024]; |
| |
| #define nct_min(x,y) ((x<y) ? x : y) |
| |
| static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) |
| { |
| if(cnv && cnv[0] == '@') { |
| return ucnv_openPackage(loadTestData(err), cnv+1, err); |
| } else { |
| return ucnv_open(cnv, err); |
| } |
| } |
| |
| static void printSeq(const unsigned char* a, int len) |
| { |
| int i=0; |
| log_verbose("{"); |
| while (i<len) |
| log_verbose("0x%02x ", a[i++]); |
| log_verbose("}\n"); |
| } |
| |
| static void printUSeq(const UChar* a, int len) |
| { |
| int i=0; |
| log_verbose("{U+"); |
| while (i<len) log_verbose("0x%04x ", a[i++]); |
| log_verbose("}\n"); |
| } |
| |
| static void printSeqErr(const unsigned char* a, int len) |
| { |
| int i=0; |
| fprintf(stderr, "{"); |
| while (i<len) |
| fprintf(stderr, "0x%02x ", a[i++]); |
| fprintf(stderr, "}\n"); |
| } |
| |
| static void printUSeqErr(const UChar* a, int len) |
| { |
| int i=0; |
| fprintf(stderr, "{U+"); |
| while (i<len) |
| fprintf(stderr, "0x%04x ", a[i++]); |
| fprintf(stderr,"}\n"); |
| } |
| |
| static void |
| TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) |
| { |
| const char* s0; |
| const char* s=(char*)source; |
| const int32_t *r=results; |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UChar32 c; |
| |
| while(s<limit) { |
| s0=s; |
| c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); |
| if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { |
| break; /* no more significant input */ |
| } else if(U_FAILURE(errorCode)) { |
| log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); |
| break; |
| } else if( |
| /* test the expected number of input bytes only if >=0 */ |
| (*r>=0 && (int32_t)(s-s0)!=*r) || |
| c!=*(r+1) |
| ) { |
| log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", |
| message, c, (s-s0), *(r+1), *r); |
| break; |
| } |
| r+=2; |
| } |
| } |
| |
| static void |
| TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) |
| { |
| const char* s=(char*)source; |
| UErrorCode errorCode=U_ZERO_ERROR; |
| uint32_t c; |
| c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); |
| if(errorCode != expected){ |
| log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); |
| } |
| if(c != 0xFFFD && c != 0xffff){ |
| log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); |
| } |
| |
| } |
| |
| static void TestInBufSizes(void) |
| { |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); |
| #if 1 |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); |
| TestNewConvertWithBufferSizes(1,1); |
| TestNewConvertWithBufferSizes(2,3); |
| TestNewConvertWithBufferSizes(3,2); |
| #endif |
| } |
| |
| static void TestOutBufSizes(void) |
| { |
| #if 1 |
| TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); |
| TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); |
| TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); |
| TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); |
| TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); |
| TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); |
| |
| #endif |
| } |
| |
| |
| void addTestNewConvert(TestNode** root) |
| { |
| addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); |
| addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); |
| addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); |
| addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); |
| addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); |
| addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); |
| addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); |
| addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); |
| |
| /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ |
| addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); |
| addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); |
| addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); |
| addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); |
| addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); |
| addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); |
| addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); |
| addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); |
| |
| addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); |
| addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); |
| addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); |
| addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); |
| addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); |
| addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); |
| addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); |
| addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); |
| addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); |
| addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); |
| addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); |
| addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); |
| addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); |
| addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); |
| addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); |
| addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); |
| addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); |
| addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); |
| addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); |
| addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); |
| addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); |
| #if !UCONFIG_NO_COLLATION |
| addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); |
| #endif |
| addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); |
| addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); |
| addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); |
| addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); |
| addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); |
| |
| } |
| |
| |
| /* Note that this test already makes use of statics, so it's not really |
| multithread safe. |
| This convenience function lets us make the error messages actually useful. |
| */ |
| |
| static void setNuConvTestName(const char *codepage, const char *direction) |
| { |
| sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", |
| codepage, |
| direction, |
| gInBufferSize, |
| gOutBufferSize); |
| } |
| |
| typedef enum |
| { |
| TC_OK = 0, /* test was OK */ |
| TC_MISMATCH = 1, /* Match failed - err was printed */ |
| TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ |
| } ETestConvertResult; |
| |
| /* Note: This function uses global variables and it will not do offset |
| checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ |
| static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, |
| const char *codepage, const int32_t *expectOffsets , UBool useFallback) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| UConverter *conv = 0; |
| uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */ |
| int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| uint8_t *p; |
| const UChar *src; |
| uint8_t *end; |
| uint8_t *targ; |
| int32_t *offs; |
| int i; |
| int32_t realBufferSize; |
| uint8_t *realBufferEnd; |
| const UChar *realSourceEnd; |
| const UChar *sourceLimit; |
| UBool checkOffsets = TRUE; |
| UBool doFlush; |
| |
| for(i=0;i<NEW_MAX_BUFFER;i++) |
| junkout[i] = 0xF0; |
| for(i=0;i<NEW_MAX_BUFFER;i++) |
| junokout[i] = 0xFF; |
| |
| setNuConvTestName(codepage, "FROM"); |
| |
| log_verbose("\n========= %s\n", gNuConvTestName); |
| |
| conv = my_ucnv_open(codepage, &status); |
| |
| if(U_FAILURE(status)) |
| { |
| log_data_err("Couldn't open converter %s\n",codepage); |
| return TC_FAIL; |
| } |
| if(useFallback){ |
| ucnv_setFallback(conv,useFallback); |
| } |
| |
| log_verbose("Converter opened..\n"); |
| |
| src = source; |
| targ = junkout; |
| offs = junokout; |
| |
| realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| realBufferEnd = junkout + realBufferSize; |
| realSourceEnd = source + sourceLen; |
| |
| if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) |
| checkOffsets = FALSE; |
| |
| do |
| { |
| end = nct_min(targ + gOutBufferSize, realBufferEnd); |
| sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); |
| |
| doFlush = (UBool)(sourceLimit == realSourceEnd); |
| |
| if(targ == realBufferEnd) { |
| log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); |
| return TC_FAIL; |
| } |
| log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); |
| |
| |
| status = U_ZERO_ERROR; |
| |
| ucnv_fromUnicode (conv, |
| (char **)&targ, |
| (const char*)end, |
| &src, |
| sourceLimit, |
| checkOffsets ? offs : NULL, |
| doFlush, /* flush if we're at the end of the input data */ |
| &status); |
| } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); |
| |
| if(U_FAILURE(status)) { |
| log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); |
| return TC_FAIL; |
| } |
| |
| log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", |
| sourceLen, targ-junkout); |
| |
| if(VERBOSITY) |
| { |
| char junk[9999]; |
| char offset_str[9999]; |
| uint8_t *ptr; |
| |
| junk[0] = 0; |
| offset_str[0] = 0; |
| for(ptr = junkout;ptr<targ;ptr++) { |
| sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); |
| sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); |
| } |
| |
| log_verbose(junk); |
| printSeq((const uint8_t *)expect, expectLen); |
| if ( checkOffsets ) { |
| log_verbose("\nOffsets:"); |
| log_verbose(offset_str); |
| } |
| log_verbose("\n"); |
| } |
| ucnv_close(conv); |
| |
| if(expectLen != targ-junkout) { |
| log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); |
| log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); |
| printf("\nGot:"); |
| printSeqErr((const unsigned char*)junkout, targ-junkout); |
| printf("\nExpected:"); |
| printSeqErr((const unsigned char*)expect, expectLen); |
| return TC_MISMATCH; |
| } |
| |
| if (checkOffsets && (expectOffsets != 0) ) { |
| log_verbose("comparing %d offsets..\n", targ-junkout); |
| if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ |
| log_err("did not get the expected offsets. %s\n", gNuConvTestName); |
| printSeqErr((const unsigned char*)junkout, targ-junkout); |
| log_err("\n"); |
| log_err("Got : "); |
| for(p=junkout;p<targ;p++) { |
| log_err("%d,", junokout[p-junkout]); |
| } |
| log_err("\n"); |
| log_err("Expected: "); |
| for(i=0; i<(targ-junkout); i++) { |
| log_err("%d,", expectOffsets[i]); |
| } |
| log_err("\n"); |
| } |
| } |
| |
| log_verbose("comparing..\n"); |
| if(!memcmp(junkout, expect, expectLen)) { |
| log_verbose("Matches!\n"); |
| return TC_OK; |
| } else { |
| log_err("String does not match u->%s\n", gNuConvTestName); |
| printUSeqErr(source, sourceLen); |
| printf("\nGot:"); |
| printSeqErr((const unsigned char *)junkout, expectLen); |
| printf("\nExpected:"); |
| printSeqErr((const unsigned char *)expect, expectLen); |
| |
| return TC_MISMATCH; |
| } |
| } |
| |
| /* Note: This function uses global variables and it will not do offset |
| checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ |
| static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, |
| const char *codepage, const int32_t *expectOffsets, UBool useFallback) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| UConverter *conv = 0; |
| UChar junkout[NEW_MAX_BUFFER]; /* FIX */ |
| int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| const uint8_t *src; |
| const uint8_t *realSourceEnd; |
| const uint8_t *srcLimit; |
| UChar *p; |
| UChar *targ; |
| UChar *end; |
| int32_t *offs; |
| int i; |
| UBool checkOffsets = TRUE; |
| |
| int32_t realBufferSize; |
| UChar *realBufferEnd; |
| |
| |
| for(i=0;i<NEW_MAX_BUFFER;i++) |
| junkout[i] = 0xFFFE; |
| |
| for(i=0;i<NEW_MAX_BUFFER;i++) |
| junokout[i] = -1; |
| |
| setNuConvTestName(codepage, "TO"); |
| |
| log_verbose("\n========= %s\n", gNuConvTestName); |
| |
| conv = my_ucnv_open(codepage, &status); |
| |
| if(U_FAILURE(status)) |
| { |
| log_data_err("Couldn't open converter %s\n",gNuConvTestName); |
| return TC_FAIL; |
| } |
| if(useFallback){ |
| ucnv_setFallback(conv,useFallback); |
| } |
| log_verbose("Converter opened..\n"); |
| |
| src = source; |
| targ = junkout; |
| offs = junokout; |
| |
| realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| realBufferEnd = junkout + realBufferSize; |
| realSourceEnd = src + sourcelen; |
| |
| if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) |
| checkOffsets = FALSE; |
| |
| do |
| { |
| end = nct_min( targ + gOutBufferSize, realBufferEnd); |
| srcLimit = nct_min(realSourceEnd, src + gInBufferSize); |
| |
| if(targ == realBufferEnd) |
| { |
| log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); |
| return TC_FAIL; |
| } |
| log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); |
| |
| /* oldTarg = targ; */ |
| |
| status = U_ZERO_ERROR; |
| |
| ucnv_toUnicode (conv, |
| &targ, |
| end, |
| (const char **)&src, |
| (const char *)srcLimit, |
| checkOffsets ? offs : NULL, |
| (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ |
| &status); |
| |
| /* offs += (targ-oldTarg); */ |
| |
| } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ |
| |
| if(U_FAILURE(status)) |
| { |
| log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); |
| return TC_FAIL; |
| } |
| |
| log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", |
| sourcelen, targ-junkout); |
| if(VERBOSITY) |
| { |
| char junk[9999]; |
| char offset_str[9999]; |
| UChar *ptr; |
| |
| junk[0] = 0; |
| offset_str[0] = 0; |
| |
| for(ptr = junkout;ptr<targ;ptr++) |
| { |
| sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); |
| sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); |
| } |
| |
| log_verbose(junk); |
| printUSeq(expect, expectlen); |
| if ( checkOffsets ) |
| { |
| log_verbose("\nOffsets:"); |
| log_verbose(offset_str); |
| } |
| log_verbose("\n"); |
| } |
| ucnv_close(conv); |
| |
| log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); |
| |
| if (checkOffsets && (expectOffsets != 0)) |
| { |
| if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ |
| log_err("did not get the expected offsets. %s\n",gNuConvTestName); |
| log_err("Got: "); |
| for(p=junkout;p<targ;p++) { |
| log_err("%d,", junokout[p-junkout]); |
| } |
| log_err("\n"); |
| log_err("Expected: "); |
| for(i=0; i<(targ-junkout); i++) { |
| log_err("%d,", expectOffsets[i]); |
| } |
| log_err("\n"); |
| log_err("output: "); |
| for(i=0; i<(targ-junkout); i++) { |
| log_err("%X,", junkout[i]); |
| } |
| log_err("\n"); |
| log_err("input: "); |
| for(i=0; i<(src-source); i++) { |
| log_err("%X,", (unsigned char)source[i]); |
| } |
| log_err("\n"); |
| } |
| } |
| |
| if(!memcmp(junkout, expect, expectlen*2)) |
| { |
| log_verbose("Matches!\n"); |
| return TC_OK; |
| } |
| else |
| { |
| log_err("String does not match. %s\n", gNuConvTestName); |
| log_verbose("String does not match. %s\n", gNuConvTestName); |
| printf("\nGot:"); |
| printUSeqErr(junkout, expectlen); |
| printf("\nExpected:"); |
| printUSeqErr(expect, expectlen); |
| return TC_MISMATCH; |
| } |
| } |
| |
| |
| static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) |
| { |
| /** test chars #1 */ |
| /* 1 2 3 1Han 2Han 3Han . */ |
| UChar sampleText[] = |
| { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; |
| |
| |
| const uint8_t expectedUTF8[] = |
| { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; |
| int32_t toUTF8Offs[] = |
| { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; |
| int32_t fmUTF8Offs[] = |
| { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d }; |
| |
| /* Same as UTF8, but with ^[%B preceeding */ |
| const uint8_t expectedISO2022[] = |
| { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; |
| int32_t toISO2022Offs[] = |
| { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, |
| 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ |
| int32_t fmISO2022Offs[] = |
| { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ |
| |
| /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ |
| const uint8_t expectedIBM930[] = |
| { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B }; |
| int32_t toIBM930Offs[] = |
| { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, }; |
| int32_t fmIBM930Offs[] = |
| { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c}; |
| |
| /* 1 2 3 0 h1 h2 h3 . MBCS*/ |
| const uint8_t expectedIBM943[] = |
| { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e }; |
| int32_t toIBM943Offs [] = |
| { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 }; |
| int32_t fmIBM943Offs[] = |
| { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a}; |
| |
| /* 1 2 3 0 h1 h2 h3 . DBCS*/ |
| const uint8_t expectedIBM9027[] = |
| { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe}; |
| int32_t toIBM9027Offs [] = |
| { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; |
| |
| /* 1 2 3 0 <?> <?> <?> . SBCS*/ |
| const uint8_t expectedIBM920[] = |
| { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e }; |
| int32_t toIBM920Offs [] = |
| { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; |
| |
| /* 1 2 3 0 <?> <?> <?> . SBCS*/ |
| const uint8_t expectedISO88593[] = |
| { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E }; |
| int32_t toISO88593Offs[] = |
| { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; |
| |
| /* 1 2 3 0 <?> <?> <?> . LATIN_1*/ |
| const uint8_t expectedLATIN1[] = |
| { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E }; |
| int32_t toLATIN1Offs[] = |
| { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; |
| |
| |
| /* etc */ |
| const uint8_t expectedUTF16BE[] = |
| { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e }; |
| int32_t toUTF16BEOffs[]= |
| { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; |
| int32_t fmUTF16BEOffs[] = |
| { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e }; |
| |
| const uint8_t expectedUTF16LE[] = |
| { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 }; |
| int32_t toUTF16LEOffs[]= |
| { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; |
| int32_t fmUTF16LEOffs[] = |
| { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e }; |
| |
| const uint8_t expectedUTF32BE[] = |
| { 0x00, 0x00, 0x00, 0x31, |
| 0x00, 0x00, 0x00, 0x32, |
| 0x00, 0x00, 0x00, 0x33, |
| 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x4e, 0x00, |
| 0x00, 0x00, 0x4e, 0x8c, |
| 0x00, 0x00, 0x4e, 0x09, |
| 0x00, 0x00, 0x00, 0x2e }; |
| int32_t toUTF32BEOffs[]= |
| { 0x00, 0x00, 0x00, 0x00, |
| 0x01, 0x01, 0x01, 0x01, |
| 0x02, 0x02, 0x02, 0x02, |
| 0x03, 0x03, 0x03, 0x03, |
| 0x04, 0x04, 0x04, 0x04, |
| 0x05, 0x05, 0x05, 0x05, |
| 0x06, 0x06, 0x06, 0x06, |
| 0x07, 0x07, 0x07, 0x07, |
| 0x08, 0x08, 0x08, 0x08 }; |
| int32_t fmUTF32BEOffs[] = |
| { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c }; |
| |
| const uint8_t expectedUTF32LE[] = |
| { 0x31, 0x00, 0x00, 0x00, |
| 0x32, 0x00, 0x00, 0x00, |
| 0x33, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x4e, 0x00, 0x00, |
| 0x8c, 0x4e, 0x00, 0x00, |
| 0x09, 0x4e, 0x00, 0x00, |
| 0x2e, 0x00, 0x00, 0x00 }; |
| int32_t toUTF32LEOffs[]= |
| { 0x00, 0x00, 0x00, 0x00, |
| 0x01, 0x01, 0x01, 0x01, |
| 0x02, 0x02, 0x02, 0x02, |
| 0x03, 0x03, 0x03, 0x03, |
| 0x04, 0x04, 0x04, 0x04, |
| 0x05, 0x05, 0x05, 0x05, |
| 0x06, 0x06, 0x06, 0x06, |
| 0x07, 0x07, 0x07, 0x07, |
| 0x08, 0x08, 0x08, 0x08 }; |
| int32_t fmUTF32LEOffs[] = |
| { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c }; |
| |
| |
| |
| |
| /** Test chars #2 **/ |
| |
| /* Sahha [health], slashed h's */ |
| const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; |
| const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; |
| |
| /* LMBCS */ |
| const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; |
| const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; |
| int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; |
| int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; |
| /*********************************** START OF CODE finally *************/ |
| |
| gInBufferSize = insize; |
| gOutBufferSize = outsize; |
| |
| log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); |
| |
| |
| #if 1 |
| /*UTF-8*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); |
| |
| log_verbose("Test surrogate behaviour for UTF8\n"); |
| { |
| const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; |
| const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, |
| 0xf0, 0x90, 0x90, 0x81, |
| 0xef, 0xbf, 0xbd |
| }; |
| int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; |
| testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), |
| expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); |
| |
| |
| } |
| /*ISO-2022*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); |
| /*UTF16 LE*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); |
| /*UTF16 BE*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); |
| /*UTF32 LE*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); |
| /*UTF32 BE*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); |
| /*LATIN_1*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); |
| /*EBCDIC_STATEFUL*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); |
| |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); |
| |
| /*MBCS*/ |
| |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); |
| /*DBCS*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); |
| /*SBCS*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); |
| /*SBCS*/ |
| testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); |
| |
| |
| /****/ |
| #endif |
| |
| #if 1 |
| /*UTF-8*/ |
| testConvertToU(expectedUTF8, sizeof(expectedUTF8), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); |
| /*ISO-2022*/ |
| testConvertToU(expectedISO2022, sizeof(expectedISO2022), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); |
| /*UTF16 LE*/ |
| testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); |
| /*UTF16 BE*/ |
| testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); |
| /*UTF32 LE*/ |
| testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); |
| /*UTF32 BE*/ |
| testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); |
| /*EBCDIC_STATEFUL*/ |
| testConvertToU(expectedIBM930, sizeof(expectedIBM930), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE); |
| /*MBCS*/ |
| testConvertToU(expectedIBM943, sizeof(expectedIBM943), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE); |
| |
| /* Try it again to make sure it still works */ |
| testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), |
| sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); |
| |
| testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), |
| malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); |
| |
| testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), |
| expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); |
| |
| /*LMBCS*/ |
| testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), |
| expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); |
| testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), |
| LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); |
| |
| /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ |
| { |
| /* encode directly set D and set O */ |
| static const uint8_t utf7[] = { |
| /* |
| Hi Mom -+Jjo--! |
| A+ImIDkQ. |
| +- |
| +ZeVnLIqe |
| */ |
| 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, |
| 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, |
| 0x2b, 0x2d, |
| 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 |
| }; |
| static const UChar unicode[] = { |
| /* |
| Hi Mom -<WHITE SMILING FACE>-! |
| A<NOT IDENTICAL TO><ALPHA>. |
| + |
| [Japanese word "nihongo"] |
| */ |
| 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, |
| 0x41, 0x2262, 0x0391, 0x2e, |
| 0x2b, |
| 0x65e5, 0x672c, 0x8a9e |
| }; |
| static const int32_t toUnicodeOffsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, |
| 15, 17, 19, 23, |
| 24, |
| 27, 29, 32 |
| }; |
| static const int32_t fromUnicodeOffsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, |
| 11, 12, 12, 12, 13, 13, 13, 13, 14, |
| 15, 15, |
| 16, 16, 16, 17, 17, 17, 18, 18, 18 |
| }; |
| |
| /* same but escaping set O (the exclamation mark) */ |
| static const uint8_t utf7Restricted[] = { |
| /* |
| Hi Mom -+Jjo--+ACE- |
| A+ImIDkQ. |
| +- |
| +ZeVnLIqe |
| */ |
| 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, |
| 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, |
| 0x2b, 0x2d, |
| 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 |
| }; |
| static const int32_t toUnicodeOffsetsR[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, |
| 19, 21, 23, 27, |
| 28, |
| 31, 33, 36 |
| }; |
| static const int32_t fromUnicodeOffsetsR[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, |
| 11, 12, 12, 12, 13, 13, 13, 13, 14, |
| 15, 15, |
| 16, 16, 16, 17, 17, 17, 18, 18, 18 |
| }; |
| |
| testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); |
| |
| testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); |
| |
| testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); |
| |
| testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); |
| } |
| |
| /* |
| * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, |
| * modified according to RFC 2060, |
| * and supplemented with the one example in RFC 2060 itself. |
| */ |
| { |
| static const uint8_t imap[] = { |
| /* Hi Mom -&Jjo--! |
| A&ImIDkQ-. |
| &- |
| &ZeVnLIqe- |
| \ |
| ~peter |
| /mail |
| /&ZeVnLIqe- |
| /&U,BTFw- |
| */ |
| 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, |
| 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, |
| 0x26, 0x2d, |
| 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, |
| 0x5c, |
| 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, |
| 0x2f, 0x6d, 0x61, 0x69, 0x6c, |
| 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, |
| 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d |
| }; |
| static const UChar unicode[] = { |
| /* Hi Mom -<WHITE SMILING FACE>-! |
| A<NOT IDENTICAL TO><ALPHA>. |
| & |
| [Japanese word "nihongo"] |
| \ |
| ~peter |
| /mail |
| /<65e5, 672c, 8a9e> |
| /<53f0, 5317> |
| */ |
| 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, |
| 0x41, 0x2262, 0x0391, 0x2e, |
| 0x26, |
| 0x65e5, 0x672c, 0x8a9e, |
| 0x5c, |
| 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, |
| 0x2f, 0x6d, 0x61, 0x69, 0x6c, |
| 0x2f, 0x65e5, 0x672c, 0x8a9e, |
| 0x2f, 0x53f0, 0x5317 |
| }; |
| static const int32_t toUnicodeOffsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, |
| 15, 17, 19, 24, |
| 25, |
| 28, 30, 33, |
| 37, |
| 38, 39, 40, 41, 42, 43, |
| 44, 45, 46, 47, 48, |
| 49, 51, 53, 56, |
| 60, 62, 64 |
| }; |
| static const int32_t fromUnicodeOffsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, |
| 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, |
| 15, 15, |
| 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, |
| 19, |
| 20, 21, 22, 23, 24, 25, |
| 26, 27, 28, 29, 30, |
| 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, |
| 35, 36, 36, 36, 37, 37, 37, 37, 37 |
| }; |
| |
| testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); |
| |
| testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); |
| } |
| |
| /* Test UTF-8 bad data handling*/ |
| { |
| static const uint8_t utf8[]={ |
| 0x61, |
| 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ |
| 0x00, |
| 0x62, |
| 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ |
| 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ |
| 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ |
| 0xdf, 0xbf, /* 7ff */ |
| 0xbf, /* truncated tail */ |
| 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ |
| 0x02 |
| }; |
| |
| static const uint16_t utf8Expected[]={ |
| 0x0061, |
| 0xfffd, |
| 0x0000, |
| 0x0062, |
| 0xfffd, |
| 0xfffd, |
| 0xdbff, 0xdfff, |
| 0x07ff, |
| 0xfffd, |
| 0xfffd, |
| 0x0002 |
| }; |
| |
| static const int32_t utf8Offsets[]={ |
| 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 |
| }; |
| testConvertToU(utf8, sizeof(utf8), |
| utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); |
| |
| } |
| |
| /* Test UTF-32BE bad data handling*/ |
| { |
| static const uint8_t utf32[]={ |
| 0x00, 0x00, 0x00, 0x61, |
| 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ |
| 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ |
| 0x00, 0x00, 0x00, 0x62, |
| 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ |
| 0x00, 0x00, 0x01, 0x62, |
| 0x00, 0x00, 0x02, 0x62 |
| }; |
| |
| static const uint16_t utf32Expected[]={ |
| 0x0061, |
| 0xfffd, /* 0x110000 out of range */ |
| 0xDBFF, /* 0x10FFFF in range */ |
| 0xDFFF, |
| 0x0062, |
| 0xfffd, /* 0xffffffff out of range */ |
| 0xfffd, /* 0x7fffffff out of range */ |
| 0x0162, |
| 0x0262 |
| }; |
| |
| static const int32_t utf32Offsets[]={ |
| 0, 4, 8, 8, 12, 16, 20, 24, 28 |
| }; |
| testConvertToU(utf32, sizeof(utf32), |
| utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); |
| |
| } |
| |
| /* Test UTF-32LE bad data handling*/ |
| { |
| static const uint8_t utf32[]={ |
| 0x61, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ |
| 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ |
| 0x62, 0x00, 0x00, 0x00, |
| 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ |
| 0x62, 0x01, 0x00, 0x00, |
| 0x62, 0x02, 0x00, 0x00, |
| }; |
| |
| static const uint16_t utf32Expected[]={ |
| 0x0061, |
| 0xfffd, /* 0x110000 out of range */ |
| 0xDBFF, /* 0x10FFFF in range */ |
| 0xDFFF, |
| 0x0062, |
| 0xfffd, /* 0xffffffff out of range */ |
| 0xfffd, /* 0x7fffffff out of range */ |
| 0x0162, |
| 0x0262 |
| }; |
| |
| static const int32_t utf32Offsets[]={ |
| 0, 4, 8, 8, 12, 16, 20, 24, 28 |
| }; |
| testConvertToU(utf32, sizeof(utf32), |
| utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); |
| |
| } |
| } |
| |
| static void TestCoverageMBCS(){ |
| #if 0 |
| UErrorCode status = U_ZERO_ERROR; |
| const char *directory = loadTestData(&status); |
| char* tdpath = NULL; |
| char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); |
| int len = strlen(directory); |
| char* index=NULL; |
| |
| tdpath = (char*) malloc(sizeof(char) * (len * 2)); |
| uprv_strcpy(saveDirectory,u_getDataDirectory()); |
| log_verbose("Retrieved data directory %s \n",saveDirectory); |
| uprv_strcpy(tdpath,directory); |
| index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); |
| |
| if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ |
| *(index+1)=0; |
| } |
| u_setDataDirectory(tdpath); |
| log_verbose("ICU data directory is set to: %s \n" ,tdpath); |
| #endif |
| |
| /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm |
| which is test file for MBCS conversion with single-byte codepage data.*/ |
| { |
| |
| /* MBCS with single byte codepage data test1.ucm*/ |
| const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; |
| const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; |
| int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; |
| |
| /*from Unicode*/ |
| testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), |
| expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); |
| } |
| |
| /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm |
| which is test file for MBCS conversion with three-byte codepage data.*/ |
| { |
| |
| /* MBCS with three byte codepage data test3.ucm*/ |
| const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; |
| const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; |
| int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; |
| |
| const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; |
| const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; |
| int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; |
| |
| /*from Unicode*/ |
| testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), |
| expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); |
| |
| /*to Unicode*/ |
| testConvertToU(test3input, sizeof(test3input), |
| expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); |
| |
| } |
| |
| /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm |
| which is test file for MBCS conversion with four-byte codepage data.*/ |
| { |
| |
| /* MBCS with three byte codepage data test4.ucm*/ |
| static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; |
| static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; |
| static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; |
| |
| static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; |
| static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; |
| static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; |
| |
| /*from Unicode*/ |
| testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), |
| expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); |
| |
| /*to Unicode*/ |
| testConvertToU(test4input, sizeof(test4input), |
| expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); |
| |
| } |
| #if 0 |
| free(tdpath); |
| /* restore the original data directory */ |
| log_verbose("Setting the data directory to %s \n", saveDirectory); |
| u_setDataDirectory(saveDirectory); |
| free(saveDirectory); |
| #endif |
| |
| } |
| |
| static void TestConverterType(const char *convName, UConverterType convType) { |
| UConverter* myConverter; |
| UErrorCode err = U_ZERO_ERROR; |
| |
| myConverter = my_ucnv_open(convName, &err); |
| |
| if (U_FAILURE(err)) { |
| log_data_err("Failed to create an %s converter\n", convName); |
| return; |
| } |
| else |
| { |
| if (ucnv_getType(myConverter)!=convType) { |
| log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", |
| convName, convType); |
| } |
| else { |
| log_verbose("ucnv_getType %s ok\n", convName); |
| } |
| } |
| ucnv_close(myConverter); |
| } |
| |
| static void TestConverterTypesAndStarters() |
| { |
| UConverter* myConverter; |
| UErrorCode err = U_ZERO_ERROR; |
| UBool mystarters[256]; |
| |
| /* const UBool expectedKSCstarters[256] = { |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ |
| |
| |
| log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); |
| |
| myConverter = ucnv_open("ksc", &err); |
| if (U_FAILURE(err)) { |
| log_data_err("Failed to create an ibm-ksc converter\n"); |
| return; |
| } |
| else |
| { |
| if (ucnv_getType(myConverter)!=UCNV_MBCS) |
| log_err("ucnv_getType Failed for ibm-949\n"); |
| else |
| log_verbose("ucnv_getType ibm-949 ok\n"); |
| |
| if(myConverter!=NULL) |
| ucnv_getStarters(myConverter, mystarters, &err); |
| |
| /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) |
| log_err("Failed ucnv_getStarters for ksc\n"); |
| else |
| log_verbose("ucnv_getStarters ok\n");*/ |
| |
| } |
| ucnv_close(myConverter); |
| |
| TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); |
| TestConverterType("ibm-878", UCNV_SBCS); |
| TestConverterType("iso-8859-1", UCNV_LATIN_1); |
| TestConverterType("ibm-1208", UCNV_UTF8); |
| TestConverterType("utf-8", UCNV_UTF8); |
| TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); |
| TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); |
| TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); |
| TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); |
| TestConverterType("iso-2022", UCNV_ISO_2022); |
| TestConverterType("hz", UCNV_HZ); |
| TestConverterType("scsu", UCNV_SCSU); |
| TestConverterType("x-iscii-de", UCNV_ISCII); |
| TestConverterType("ascii", UCNV_US_ASCII); |
| TestConverterType("utf-7", UCNV_UTF7); |
| TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); |
| TestConverterType("bocu-1", UCNV_BOCU1); |
| } |
| |
| static void |
| TestAmbiguousConverter(UConverter *cnv) { |
| static const char inBytes[2]={ 0x61, 0x5c }; |
| UChar outUnicode[20]={ 0, 0, 0, 0 }; |
| |
| const char *s; |
| UChar *u; |
| UErrorCode errorCode; |
| UBool isAmbiguous; |
| |
| /* try to convert an 'a' and a US-ASCII backslash */ |
| errorCode=U_ZERO_ERROR; |
| s=inBytes; |
| u=outUnicode; |
| ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode); |
| if(U_FAILURE(errorCode)) { |
| /* we do not care about general failures in this test; the input may just not be mappable */ |
| return; |
| } |
| |
| if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) { |
| /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ |
| return; |
| } |
| |
| isAmbiguous=ucnv_isAmbiguous(cnv); |
| |
| /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ |
| if((outUnicode[1]!=0x5c)!=isAmbiguous) { |
| log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", |
| ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous); |
| return; |
| } |
| |
| if(outUnicode[1]!=0x5c) { |
| /* needs fixup, fix it */ |
| ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); |
| if(outUnicode[1]!=0x5c) { |
| /* the fix failed */ |
| log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); |
| return; |
| } |
| } |
| } |
| |
| static void TestAmbiguous() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; |
| const char target[] = { |
| /* "\\usr\\local\\share\\data\\icutest.txt" */ |
| 0x5c, 0x75, 0x73, 0x72, |
| 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, |
| 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, |
| 0x5c, 0x64, 0x61, 0x74, 0x61, |
| 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, |
| 0 |
| }; |
| UChar asciiResult[200], sjisResult[200]; |
| int32_t asciiLength = 0, sjisLength = 0, i; |
| const char *name; |
| |
| /* enumerate all converters */ |
| status=U_ZERO_ERROR; |
| for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { |
| cnv=ucnv_open(name, &status); |
| if(U_SUCCESS(status)) { |
| TestAmbiguousConverter(cnv); |
| ucnv_close(cnv); |
| } else { |
| log_err("error: unable to open available converter \"%s\"\n", name); |
| status=U_ZERO_ERROR; |
| } |
| } |
| |
| sjis_cnv = ucnv_open("ibm-943", &status); |
| if (U_FAILURE(status)) |
| { |
| log_data_err("Failed to create a SJIS converter\n"); |
| return; |
| } |
| ascii_cnv = ucnv_open("LATIN-1", &status); |
| if (U_FAILURE(status)) |
| { |
| log_data_err("Failed to create a LATIN-1 converter\n"); |
| ucnv_close(sjis_cnv); |
| return; |
| } |
| /* convert target from SJIS to Unicode */ |
| sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status); |
| if (U_FAILURE(status)) |
| { |
| log_err("Failed to convert the SJIS string.\n"); |
| ucnv_close(sjis_cnv); |
| ucnv_close(ascii_cnv); |
| return; |
| } |
| /* convert target from Latin-1 to Unicode */ |
| asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status); |
| if (U_FAILURE(status)) |
| { |
| log_err("Failed to convert the Latin-1 string.\n"); |
| free(sjisResult); |
| ucnv_close(sjis_cnv); |
| ucnv_close(ascii_cnv); |
| return; |
| } |
| if (!ucnv_isAmbiguous(sjis_cnv)) |
| { |
| log_err("SJIS converter should contain ambiguous character mappings.\n"); |
| free(sjisResult); |
| free(asciiResult); |
| ucnv_close(sjis_cnv); |
| ucnv_close(ascii_cnv); |
| return; |
| } |
| if (u_strcmp(sjisResult, asciiResult) == 0) |
| { |
| log_err("File separators for SJIS don't need to be fixed.\n"); |
| } |
| ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); |
| if (u_strcmp(sjisResult, asciiResult) != 0) |
| { |
| log_err("Fixing file separator for SJIS failed.\n"); |
| } |
| ucnv_close(sjis_cnv); |
| ucnv_close(ascii_cnv); |
| } |
| |
| static void |
| TestSignatureDetection(){ |
| /* with null terminated strings */ |
| { |
| static const char* data[] = { |
| "\xFE\xFF\x00\x00", /* UTF-16BE */ |
| "\xFF\xFE\x00\x00", /* UTF-16LE */ |
| "\xEF\xBB\xBF\x00", /* UTF-8 */ |
| "\x0E\xFE\xFF\x00", /* SCSU */ |
| |
| "\xFE\xFF", /* UTF-16BE */ |
| "\xFF\xFE", /* UTF-16LE */ |
| "\xEF\xBB\xBF", /* UTF-8 */ |
| "\x0E\xFE\xFF", /* SCSU */ |
| |
| "\xFE\xFF\x41\x42", /* UTF-16BE */ |
| "\xFF\xFE\x41\x41", /* UTF-16LE */ |
| "\xEF\xBB\xBF\x41", /* UTF-8 */ |
| "\x0E\xFE\xFF\x41", /* SCSU */ |
| |
| "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ |
| "\x2B\x2F\x76\x38\x41", /* UTF-7 */ |
| "\x2B\x2F\x76\x39\x41", /* UTF-7 */ |
| "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ |
| "\x2B\x2F\x76\x2F\x41" /* UTF-7 */ |
| }; |
| static const char* expected[] = { |
| "UTF-16BE", |
| "UTF-16LE", |
| "UTF-8", |
| "SCSU", |
| |
| "UTF-16BE", |
| "UTF-16LE", |
| "UTF-8", |
| "SCSU", |
| |
| "UTF-16BE", |
| "UTF-16LE", |
| "UTF-8", |
| "SCSU", |
| |
| "UTF-7", |
| "UTF-7", |
| "UTF-7", |
| "UTF-7", |
| "UTF-7" |
| }; |
| static const int32_t expectedLength[] ={ |
| 2, |
| 2, |
| 3, |
| 3, |
| |
| 2, |
| 2, |
| 3, |
| 3, |
| |
| 2, |
| 2, |
| 3, |
| 3, |
| |
| 5, |
| 4, |
| 4, |
| 4, |
| 4 |
| }; |
| int i=0; |
| UErrorCode err; |
| int32_t signatureLength = -1; |
| const char* source = NULL; |
| const char* enc = NULL; |
| for( ; i<sizeof(data)/sizeof(char*); i++){ |
| err = U_ZERO_ERROR; |
| source = data[i]; |
| enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); |
| if(U_FAILURE(err)){ |
| log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); |
| continue; |
| } |
| if(enc == NULL || strcmp(enc,expected[i]) !=0){ |
| log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); |
| continue; |
| } |
| if(signatureLength != expectedLength[i]){ |
| log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); |
| } |
| } |
| } |
| { |
| static const char* data[] = { |
| "\xFE\xFF\x00", /* UTF-16BE */ |
| "\xFF\xFE\x00", /* UTF-16LE */ |
| "\xEF\xBB\xBF\x00", /* UTF-8 */ |
| "\x0E\xFE\xFF\x00", /* SCSU */ |
| "\x00\x00\xFE\xFF", /* UTF-32BE */ |
| "\xFF\xFE\x00\x00", /* UTF-32LE */ |
| "\xFE\xFF", /* UTF-16BE */ |
| "\xFF\xFE", /* UTF-16LE */ |
| "\xEF\xBB\xBF", /* UTF-8 */ |
| "\x0E\xFE\xFF", /* SCSU */ |
| "\x00\x00\xFE\xFF", /* UTF-32BE */ |
| "\xFF\xFE\x00\x00", /* UTF-32LE */ |
| "\xFE\xFF\x41\x42", /* UTF-16BE */ |
| "\xFF\xFE\x41\x41", /* UTF-16LE */ |
| "\xEF\xBB\xBF\x41", /* UTF-8 */ |
| "\x0E\xFE\xFF\x41", /* SCSU */ |
| "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ |
| "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ |
| "\xFB\xEE\x28", /* BOCU-1 */ |
| "\xFF\x41\x42" /* NULL */ |
| }; |
| static const int len[] = { |
| 3, |
| 3, |
| 4, |
| 4, |
| 4, |
| 4, |
| 2, |
| 2, |
| 3, |
| 3, |
| 4, |
| 4, |
| 4, |
| 4, |
| 4, |
| 4, |
| 5, |
| 5, |
| 3, |
| 3 |
| }; |
| |
| static const char* expected[] = { |
| "UTF-16BE", |
| "UTF-16LE", |
| "UTF-8", |
| "SCSU", |
| "UTF-32BE", |
| "UTF-32LE", |
| "UTF-16BE", |
| "UTF-16LE", |
| "UTF-8", |
| "SCSU", |
| "UTF-32BE", |
| "UTF-32LE", |
| "UTF-16BE", |
| "UTF-16LE", |
| "UTF-8", |
| "SCSU", |
| "UTF-32BE", |
| "UTF-32LE", |
| "BOCU-1", |
| NULL |
| }; |
| static const int32_t expectedLength[] ={ |
| 2, |
| 2, |
| 3, |
| 3, |
| 4, |
| 4, |
| 2, |
| 2, |
| 3, |
| 3, |
| 4, |
| 4, |
| 2, |
| 2, |
| 3, |
| 3, |
| 4, |
| 4, |
| 3, |
| 0 |
| }; |
| int i=0; |
| UErrorCode err; |
| int32_t signatureLength = -1; |
| int32_t sourceLength=-1; |
| const char* source = NULL; |
| const char* enc = NULL; |
| for( ; i<sizeof(data)/sizeof(char*); i++){ |
| err = U_ZERO_ERROR; |
| source = data[i]; |
| sourceLength = len[i]; |
| enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); |
| if(U_FAILURE(err)){ |
| log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); |
| continue; |
| } |
| if(enc == NULL || strcmp(enc,expected[i]) !=0){ |
| if(expected[i] !=NULL){ |
| log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); |
| continue; |
| } |
| } |
| if(signatureLength != expectedLength[i]){ |
| log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); |
| } |
| } |
| } |
| } |
| |
| void |
| static TestUTF7() { |
| /* test input */ |
| static const uint8_t in[]={ |
| /* H - +Jjo- - ! +- +2AHcAQ */ |
| 0x48, |
| 0x2d, |
| 0x2b, 0x4a, 0x6a, 0x6f, |
| 0x2d, 0x2d, |
| 0x21, |
| 0x2b, 0x2d, |
| 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 1, 0x48, |
| 1, 0x2d, |
| 4, 0x263a, /* <WHITE SMILING FACE> */ |
| 2, 0x2d, |
| 1, 0x21, |
| 2, 0x2b, |
| 7, 0x10401 |
| }; |
| |
| const char *cnvName; |
| const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("UTF-7", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "UTF-7"); |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| cnvName = ucnv_getName(cnv, &errorCode); |
| if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { |
| log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); |
| } |
| ucnv_close(cnv); |
| } |
| |
| void |
| static TestIMAP() { |
| /* test input */ |
| static const uint8_t in[]={ |
| /* H - &Jjo- - ! &- &2AHcAQ- \ */ |
| 0x48, |
| 0x2d, |
| 0x26, 0x4a, 0x6a, 0x6f, |
| 0x2d, 0x2d, |
| 0x21, |
| 0x26, 0x2d, |
| 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 1, 0x48, |
| 1, 0x2d, |
| 4, 0x263a, /* <WHITE SMILING FACE> */ |
| 2, 0x2d, |
| 1, 0x21, |
| 2, 0x26, |
| 7, 0x10401 |
| }; |
| |
| const char *cnvName; |
| const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| cnvName = ucnv_getName(cnv, &errorCode); |
| if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { |
| log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); |
| } |
| ucnv_close(cnv); |
| } |
| |
| void |
| static TestUTF8() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x61, |
| 0xc2, 0x80, |
| 0xe0, 0xa0, 0x80, |
| 0xf0, 0x90, 0x80, 0x80, |
| 0xf4, 0x84, 0x8c, 0xa1, |
| 0xf0, 0x90, 0x90, 0x81 |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 1, 0x61, |
| 2, 0x80, |
| 3, 0x800, |
| 4, 0x10000, |
| 4, 0x104321, |
| 4, 0x10401 |
| }; |
| |
| /* error test input */ |
| static const uint8_t in2[]={ |
| 0x61, |
| 0xc0, 0x80, /* illegal non-shortest form */ |
| 0xe0, 0x80, 0x80, /* illegal non-shortest form */ |
| 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ |
| 0xc0, 0xc0, /* illegal trail byte */ |
| 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ |
| 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ |
| 0xfe, /* illegal byte altogether */ |
| 0x62 |
| }; |
| |
| /* expected error test results */ |
| static const int32_t results2[]={ |
| /* number of bytes read, code point */ |
| 1, 0x61, |
| 22, 0x62 |
| }; |
| |
| UConverterToUCallback cb; |
| const void *p; |
| |
| const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("UTF-8", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "UTF-8"); |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| |
| /* test error behavior with a skip callback */ |
| ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); |
| source=(const char *)in2; |
| limit=(const char *)(in2+sizeof(in2)); |
| TestNextUChar(cnv, source, limit, results2, "UTF-8"); |
| |
| ucnv_close(cnv); |
| } |
| |
| void |
| static TestCESU8() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x61, |
| 0xc2, 0x80, |
| 0xe0, 0xa0, 0x80, |
| 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, |
| 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, |
| 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, |
| 0xef, 0xbf, 0xbc |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 1, 0x61, |
| 2, 0x80, |
| 3, 0x800, |
| 6, 0x10000, |
| 3, 0xdc01, |
| -1,0xd802, /* may read 3 or 6 bytes */ |
| -1,0x10ffff,/* may read 0 or 3 bytes */ |
| 3, 0xfffc |
| }; |
| |
| /* error test input */ |
| static const uint8_t in2[]={ |
| 0x61, |
| 0xc0, 0x80, /* illegal non-shortest form */ |
| 0xe0, 0x80, 0x80, /* illegal non-shortest form */ |
| 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ |
| 0xc0, 0xc0, /* illegal trail byte */ |
| 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ |
| 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ |
| 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ |
| 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ |
| 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ |
| 0xfe, /* illegal byte altogether */ |
| 0x62 |
| }; |
| |
| /* expected error test results */ |
| static const int32_t results2[]={ |
| /* number of bytes read, code point */ |
| 1, 0x61, |
| 34, 0x62 |
| }; |
| |
| UConverterToUCallback cb; |
| const void *p; |
| |
| const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("CESU-8", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "CESU-8"); |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| |
| /* test error behavior with a skip callback */ |
| ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); |
| source=(const char *)in2; |
| limit=(const char *)(in2+sizeof(in2)); |
| TestNextUChar(cnv, source, limit, results2, "CESU-8"); |
| |
| ucnv_close(cnv); |
| } |
| |
| void |
| static TestUTF16() { |
| /* test input */ |
| static const uint8_t in1[]={ |
| 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff |
| }; |
| static const uint8_t in2[]={ |
| 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff |
| }; |
| static const uint8_t in3[]={ |
| 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 |
| }; |
| |
| /* expected test results */ |
| static const int32_t results1[]={ |
| /* number of bytes read, code point */ |
| 4, 0x4e00, |
| 2, 0xfeff |
| }; |
| static const int32_t results2[]={ |
| /* number of bytes read, code point */ |
| 4, 0x004e, |
| 2, 0xfffe |
| }; |
| static const int32_t results3[]={ |
| /* number of bytes read, code point */ |
| 2, 0xfefe, |
| 2, 0x4e00, |
| 2, 0xfeff, |
| 4, 0x20001 |
| }; |
| |
| const char *source, *limit; |
| |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("UTF-16", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| |
| source=(const char *)in1, limit=(const char *)in1+sizeof(in1); |
| TestNextUChar(cnv, source, limit, results1, "UTF-16"); |
| |
| source=(const char *)in2, limit=(const char *)in2+sizeof(in2); |
| ucnv_resetToUnicode(cnv); |
| TestNextUChar(cnv, source, limit, results2, "UTF-16"); |
| |
| source=(const char *)in3, limit=(const char *)in3+sizeof(in3); |
| ucnv_resetToUnicode(cnv); |
| TestNextUChar(cnv, source, limit, results3, "UTF-16"); |
| |
| /* Test the condition when source >= sourceLimit */ |
| ucnv_resetToUnicode(cnv); |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| |
| ucnv_close(cnv); |
| } |
| |
| void |
| static TestUTF16BE() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x00, 0x61, |
| 0x00, 0xc0, |
| 0x00, 0x31, |
| 0x00, 0xf4, |
| 0xce, 0xfe, |
| 0xd8, 0x01, 0xdc, 0x01 |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 2, 0x61, |
| 2, 0xc0, |
| 2, 0x31, |
| 2, 0xf4, |
| 2, 0xcefe, |
| 4, 0x10401 |
| }; |
| |
| const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("utf-16be", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "UTF-16BE"); |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| /*Test for the condition where there is an invalid character*/ |
| { |
| static const uint8_t source2[]={0x61}; |
| ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); |
| TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); |
| } |
| #if 0 |
| /* |
| * Test disabled because currently the UTF-16BE/LE converters are supposed |
| * to not set errors for unpaired surrogates. |
| * This may change with |
| * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 |
| */ |
| |
| /*Test for the condition where there is a surrogate pair*/ |
| { |
| const uint8_t source2[]={0xd8, 0x01}; |
| TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); |
| } |
| #endif |
| ucnv_close(cnv); |
| } |
| |
| static void |
| TestUTF16LE() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x61, 0x00, |
| 0x31, 0x00, |
| 0x4e, 0x2e, |
| 0x4e, 0x00, |
| 0x01, 0xd8, 0x01, 0xdc |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 2, 0x61, |
| 2, 0x31, |
| 2, 0x2e4e, |
| 2, 0x4e, |
| 4, 0x10401 |
| }; |
| |
| const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("utf-16le", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "UTF-16LE"); |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| /*Test for the condition where there is an invalid character*/ |
| { |
| static const uint8_t source2[]={0x61}; |
| ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); |
| TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); |
| } |
| #if 0 |
| /* |
| * Test disabled because currently the UTF-16BE/LE converters are supposed |
| * to not set errors for unpaired surrogates. |
| * This may change with |
| * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 |
| */ |
| |
| /*Test for the condition where there is a surrogate character*/ |
| { |
| static const uint8_t source2[]={0x01, 0xd8}; |
| TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); |
| } |
| #endif |
| |
| ucnv_close(cnv); |
| } |
| |
| void |
| static TestUTF32() { |
| /* test input */ |
| static const uint8_t in1[]={ |
| 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff |
| }; |
| static const uint8_t in2[]={ |
| 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 |
| }; |
| static const uint8_t in3[]={ |
| 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 |
| }; |
| |
| /* expected test results */ |
| static const int32_t results1[]={ |
| /* number of bytes read, code point */ |
| 8, 0x100f00, |
| 4, 0xfeff |
| }; |
| static const int32_t results2[]={ |
| /* number of bytes read, code point */ |
| 8, 0x0f1000, |
| 4, 0xfffe |
| }; |
| static const int32_t results3[]={ |
| /* number of bytes read, code point */ |
| 4, 0xfefe, |
| 4, 0x100f00, |
| 4, 0xfffd, /* unmatched surrogate */ |
| 4, 0xfffd /* unmatched surrogate */ |
| }; |
| |
| const char *source, *limit; |
| |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("UTF-32", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| |
| source=(const char *)in1, limit=(const char *)in1+sizeof(in1); |
| TestNextUChar(cnv, source, limit, results1, "UTF-32"); |
| |
| source=(const char *)in2, limit=(const char *)in2+sizeof(in2); |
| ucnv_resetToUnicode(cnv); |
| TestNextUChar(cnv, source, limit, results2, "UTF-32"); |
| |
| source=(const char *)in3, limit=(const char *)in3+sizeof(in3); |
| ucnv_resetToUnicode(cnv); |
| TestNextUChar(cnv, source, limit, results3, "UTF-32"); |
| |
| /* Test the condition when source >= sourceLimit */ |
| ucnv_resetToUnicode(cnv); |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| |
| ucnv_close(cnv); |
| } |
| |
| static void |
| TestUTF32BE() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x00, 0x00, 0x00, 0x61, |
| 0x00, 0x00, 0x30, 0x61, |
| 0x00, 0x00, 0xdc, 0x00, |
| 0x00, 0x00, 0xd8, 0x00, |
| 0x00, 0x00, 0xdf, 0xff, |
| 0x00, 0x00, 0xff, 0xfe, |
| 0x00, 0x10, 0xab, 0xcd, |
| 0x00, 0x10, 0xff, 0xff |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 4, 0x61, |
| 4, 0x3061, |
| 4, 0xfffd, |
| 4, 0xfffd, |
| 4, 0xfffd, |
| 4, 0xfffe, |
| 4, 0x10abcd, |
| 4, 0x10ffff |
| }; |
| |
| /* error test input */ |
| static const uint8_t in2[]={ |
| 0x00, 0x00, 0x00, 0x61, |
| 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ |
| 0x00, 0x00, 0x00, 0x62, |
| 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ |
| 0x00, 0x00, 0x01, 0x62, |
| 0x00, 0x00, 0x02, 0x62 |
| }; |
| |
| /* expected error test results */ |
| static const int32_t results2[]={ |
| /* number of bytes read, code point */ |
| 4, 0x61, |
| 8, 0x62, |
| 12, 0x162, |
| 4, 0x262 |
| }; |
| |
| UConverterToUCallback cb; |
| const void *p; |
| |
| const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "UTF-32BE"); |
| |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| |
| /* test error behavior with a skip callback */ |
| ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); |
| source=(const char *)in2; |
| limit=(const char *)(in2+sizeof(in2)); |
| TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); |
| |
| ucnv_close(cnv); |
| } |
| |
| static void |
| TestUTF32LE() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x61, 0x00, 0x00, 0x00, |
| 0x61, 0x30, 0x00, 0x00, |
| 0x00, 0xdc, 0x00, 0x00, |
| 0x00, 0xd8, 0x00, 0x00, |
| 0xff, 0xdf, 0x00, 0x00, |
| 0xfe, 0xff, 0x00, 0x00, |
| 0xcd, 0xab, 0x10, 0x00, |
| 0xff, 0xff, 0x10, 0x00 |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 4, 0x61, |
| 4, 0x3061, |
| 4, 0xfffd, |
| 4, 0xfffd, |
| 4, 0xfffd, |
| 4, 0xfffe, |
| 4, 0x10abcd, |
| 4, 0x10ffff |
| }; |
| |
| /* error test input */ |
| static const uint8_t in2[]={ |
| 0x61, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ |
| 0x62, 0x00, 0x00, 0x00, |
| 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ |
| 0x62, 0x01, 0x00, 0x00, |
| 0x62, 0x02, 0x00, 0x00, |
| }; |
| |
| /* expected error test results */ |
| static const int32_t results2[]={ |
| /* number of bytes read, code point */ |
| 4, 0x61, |
| 8, 0x62, |
| 12, 0x162, |
| 4, 0x262, |
| }; |
| |
| UConverterToUCallback cb; |
| const void *p; |
| |
| const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| UErrorCode errorCode=U_ZERO_ERROR; |
| UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); |
| if(U_FAILURE(errorCode)) { |
| log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); |
| return; |
| } |
| TestNextUChar(cnv, source, limit, results, "UTF-32LE"); |
| |
| /* Test the condition when source >= sourceLimit */ |
| TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); |
| |
| /* test error behavior with a skip callback */ |
| ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); |
| source=(const char *)in2; |
| limit=(const char *)(in2+sizeof(in2)); |
| TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); |
| |
| ucnv_close(cnv); |
| } |
| |
| static void |
| TestLATIN1() { |
| /* test input */ |
| static const uint8_t in[]={ |
| 0x61, |
| 0x31, |
| 0x32, |
| 0xc0, |
| 0xf0, |
| 0xf4, |
| }; |
| |
| /* expected test results */ |
| static const int32_t results[]={ |
| /* number of bytes read, code point */ |
| 1, 0x61, |
| 1, 0x31, |
| 1, 0x32, |
| 1, 0xc0, |
| 1, 0xf0, |
| 1, 0xf4, |
| }; |
| static const uint16_t in1[] = { |
| 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, |
| 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, |
| 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, |
| 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, |
| 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, |
| 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, |
| 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, |
| 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, |
| 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, |
| 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, |
| 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, |
| 0xcb, 0x82 |
| }; |
| static const uint8_t out1[] = { |
| 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, |
| 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, |
| 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, |
| 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, |
| 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, |
| 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, |
| 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, |
| 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, |
| 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, |
| 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, |
| 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, |
| 0xcb, 0x82 |
| }; |
| static const uint16_t in2[]={ |
| 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, |
| 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, |
| 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, |
| 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, |
| 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, |
| 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, |
| 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, |
| 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, |
| 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, |
| 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, |
| 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, |
| 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, |
| 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, |
| 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B
|