blob: b25a8ea14beb16bd62c214e77c32fa3f3de149dd [file] [log] [blame]
/*
**********************************************************************
* Copyright (C) 1998-2011, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
*
* File date.c
*
* Modification History:
*
* Date Name Description
* 2011-Jan-16 srl Created.
*******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/uloc.h"
#include "unicode/unorm2.h"
#include "unicode/unorm.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
#include <stdio.h>
#define log_data_err printf
#define log_verbose printf
#define log_err printf
#define ERRMSG printf("%s:%d: error=%s\n", __FILE__,__LINE__,u_errorName(error))
#if UCONFIG_NO_NORMALIZATION
#error UCONFIG_NO_NORMALIZATION set
#endif
#ifndef LENGTHOF
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#endif
static const char* res2str(UNormalizationCheckResult res) {
switch(res) {
case UNORM_YES: return "YES";
case UNORM_NO: return "NO";
case UNORM_MAYBE: return "MAYBE";
default: return "?";
}
}
void **chainDel = NULL;
/**
* Use this to allocate stuff (such as test strings) to be cleaned up at the end
*/
void *c_malloc(size_t s) {
void **next = malloc(s+sizeof(void*));
next[0]=chainDel;
chainDel = next;
return (void*)(next+1);
}
void c_cleanup() {
while(chainDel) {
void **chainNext = *chainDel;
free(chainDel);
chainDel = chainNext;
}
}
static void TestQuickCheckResultNO()
{
const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
const int SIZE = 10;
int count = 0;
UErrorCode error = U_ZERO_ERROR;
for (; count < SIZE; count ++)
{
/* if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != */
/* UNORM_NO) */
/* { */
/* log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); */
/* return; */
/* } */
if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
UNORM_NO)
{
log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
return;
}
/* if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != */
/* UNORM_NO) */
/* { */
/* log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); */
/* return; */
/* } */
/* if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != */
/* UNORM_NO) */
/* { */
/* log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); */
/* return; */
/* } */
}
}
static void TestQuickCheckResultYES()
{
const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
const int SIZE = 10;
int count = 0;
UErrorCode error = U_ZERO_ERROR;
UChar cp = 0;
while (cp < 0xA0)
{
/* if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) */
/* { */
/* ERRMSG; */
/* log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp); */
/* return; */
/* } */
if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
UNORM_YES)
{
ERRMSG;
log_err("ERROR in NFC quick check at U+%04x\n", cp);
return;
}
/* if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) */
/* { */
/* ERRMSG; */
/* log_err("ERROR in NFKD quick check at U+%04x\n", cp); */
/* return; */
/* } */
/* if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != */
/* UNORM_YES) */
/* { */
/* ERRMSG; */
/* log_err("ERROR in NFKC quick check at U+%04x\n", cp); */
/* return; */
/* } */
cp ++;
}
for (; count < SIZE; count ++)
{
/* if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != */
/* UNORM_YES) */
/* { */
/* ERRMSG; */
/* log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); */
/* return; */
/* } */
if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
!= UNORM_YES)
{
ERRMSG;
log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
return;
}
/* if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != */
/* UNORM_YES) */
/* { */
/* ERRMSG; */
/* log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); */
/* return; */
/* } */
/* if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != */
/* UNORM_YES) */
/* { */
/* ERRMSG; */
/* log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); */
/* return; */
/* } */
}
}
static void TestQuickCheckResultMAYBE()
{
const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
const int SIZE = 10;
int count = 0;
UErrorCode error = U_ZERO_ERROR;
/* NFD and NFKD does not have any MAYBE codepoints */
for (; count < SIZE; count ++)
{
UNormalizationCheckResult res;
if ((res=unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)) !=
UNORM_MAYBE)
{
ERRMSG;
log_data_err("ERROR in NFC quick check at U+%04x - %s wanted MAYBE\n", CPNFC[count], res2str(res));
/* return; */
}
/* if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != */
/* UNORM_MAYBE) */
/* { */
/* ERRMSG; */
/* log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); */
/* return; */
/* } */
}
}
UChar* CharsToUChars(const char* str) {
/* Might be faster to just use uprv_strlen() as the preflight len - liu */
int32_t len = u_unescape(str, 0, 0); /* preflight */
/* Do NOT use malloc() - we are supposed to be acting like user code! */
UChar *buf = (UChar*) malloc(sizeof(UChar) * (len + 1));
u_unescape(str, buf, len + 1);
return buf;
}
const static char* canonTests[][3] = {
/* Input*/ /*Decomposed*/ /*Composed*/
{ "cat", "cat", "cat" },
{ "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
{ "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
{ "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
{ "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
{ "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
{ "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
{ "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
{ "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
{ "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
{ "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
{ "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
{ "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
{ "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
{ "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
{ "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
{ "Henry IV", "Henry IV", "Henry IV" },
{ "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
{ "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
{ "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
{ "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
{ "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
{ "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
{ "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" } /* hw_ka + ten*/
};
const static char* compatTests[][3] = {
/* Input*/ /*Decomposed */ /*Composed*/
{ "cat", "cat", "cat" },
{ "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
{ "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
{ "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
{ "Henry IV", "Henry IV", "Henry IV" },
{ "Henry \\u2163", "Henry IV", "Henry IV" },
{ "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
{ "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
{ "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
/*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
{ "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
{ "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" } /* ka + hw_ten*/
};
char *austrdup(const UChar* unichars)
{
int length;
char *newString;
length = u_strlen ( unichars );
/*newString = (char*)malloc ( sizeof( char ) * 4 * ( length + 1 ) );*/ /* this leaks for now */
newString = (char*)c_malloc ( sizeof( char ) * 4 * ( length + 1 ) ); /* this shouldn't */
if ( newString == NULL )
return NULL;
u_austrcpy ( newString, unichars );
return newString;
}
char *aescstrdup(const UChar* unichars,int32_t length){
char *newString,*targetLimit,*target;
UConverterFromUCallback cb;
const void *p;
UErrorCode errorCode = U_ZERO_ERROR;
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# ifdef OS390
static const char convName[] = "ibm-1047";
# else
static const char convName[] = "ibm-37";
# endif
#else
static const char convName[] = "US-ASCII";
#endif
UConverter* conv = ucnv_open(convName, &errorCode);
if(length==-1){
length = u_strlen( unichars);
}
newString = (char*)c_malloc ( sizeof(char) * 8 * (length +1));
target = newString;
targetLimit = newString+sizeof(char) * 8 * (length +1);
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, &cb, &p, &errorCode);
ucnv_fromUnicode(conv,&target,targetLimit, &unichars, (UChar*)(unichars+length),NULL,TRUE,&errorCode);
ucnv_close(conv);
*target = '\0';
return newString;
}
static UBool assertEqual(const UChar* result, const char* expected, int32_t index)
{
UBool rc = TRUE;
UChar *expectedUni = CharsToUChars(expected);
if(u_strcmp(result, expectedUni)!=0){
log_err("ERROR in index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
aescstrdup(result,-1) );
rc = FALSE;
} /* else {
log_verbose("OK %d: got %s\n", index, austrdup(result,-1));
} */
free(expectedUni);
return rc;
}
void TestCanonDecompCompose()
{
UErrorCode status = U_ZERO_ERROR;
int32_t x, neededLen, resLen;
UChar *source=NULL, *result=NULL;
status = U_ZERO_ERROR;
resLen=0;
log_verbose("Testing unorm_normalize with Decomp can compose compat\n");
for(x=0; x < LENGTHOF(canonTests); x++)
{
source=CharsToUChars(canonTests[x][0]);
neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, NULL, 0, &status);
if(status==U_BUFFER_OVERFLOW_ERROR)
{
status=U_ZERO_ERROR;
resLen=neededLen+1;
result=(UChar*)malloc(sizeof(UChar*) * resLen);
unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, result, resLen, &status);
if(U_FAILURE(status)){
log_data_err("ERROR in unorm_normalize at %s: %s - (Are you missing data?)\n", austrdup(source),u_errorName(status) );
} else {
if(assertEqual(result, canonTests[x][2], x)) {
log_verbose("OK %d: %s -> %s\n", x, aescstrdup(source,-1), aescstrdup(result,-1));
}
}
free(result);
} else {
log_data_err("ERROR in normalization of %s: status was %s should be U_BUFFER_OVERFLOW_ERROR, neededLen %d, len was %d\n",
austrdup(source), u_errorName(status), neededLen, u_strlen(source));
}
free(source);
}
}
static void TestQuickCheckStringResult()
{
#if 1
int count;
UChar *d = NULL;
UChar *c = NULL;
UErrorCode error = U_ZERO_ERROR;
for (count = 0; count < LENGTHOF(canonTests); count ++)
{
UNormalizationCheckResult res;
d = CharsToUChars(canonTests[count][1]);
c = CharsToUChars(canonTests[count][2]);
/* if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != */
/* UNORM_YES) */
/* { */
/* log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count); */
/* return; */
/* } */
if ((res=unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error)) ==
UNORM_NO)
{
log_err("ERROR in NFC quick check for string at count %d, got %s wanted %s\n", count, res2str(res), res2str(UNORM_NO));
return;
}
free(d);
free(c);
/* printf("OK: %s = NO \n", canonTests[count][2]); */
}
/* for (count = 0; count < LENGTHOF(compatTests); count ++) */
/* { */
/* d = CharsToUChars(compatTests[count][1]); */
/* c = CharsToUChars(compatTests[count][2]); */
/* if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != */
/* UNORM_YES) */
/* { */
/* log_err("ERROR in NFKD quick check for string at count %d\n", count); */
/* return; */
/* } */
/* if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != */
/* UNORM_YES) */
/* { */
/* log_err("ERROR in NFKC quick check for string at count %d\n", count); */
/* return; */
/* } */
/* free(d); */
/* free(c); */
/* } */
#endif
}
void TestQuickCheck()
{
TestQuickCheckResultNO();
TestQuickCheckResultYES();
TestQuickCheckResultMAYBE();
TestQuickCheckStringResult();
}
int main()
{
char *dl = NULL;
UErrorCode status = U_ZERO_ERROR;
int fail=0;
u_init(&status);
if(U_FAILURE(status)) {
fprintf(stderr, "Error: u_init returned %s - check that data was built properly.\n\n\n\n", u_errorName(status));
return 1;
}
#if 1
dl = uloc_getDefault();
printf("Default Name: %s\n", dl!=NULL?dl:"<NULL>");
printf("u_iscntrl(U+%04X)=%d\n", 0x0009, u_iscntrl(0x0009));
printf("u_iscntrl(U+%04X)=%d\n", 0x0020, u_iscntrl(0x0020));
printf("u_tolower(U+%04X)=U+%04X\n", 0x2C1F, u_tolower(0x2C1F));
printf("u_tolower(U+%04X)=U+%04X\n", 0xA65C, u_tolower(0xA65C));
printf("Testing QuickCheck..");
fflush(stdout);
TestQuickCheck();
printf("Testing norm instance..");
fflush(stdout);
/* norm */
{
UErrorCode errorCode;
const UNormalizer2 *norm2;
int length;
UChar buffer16[300];
UChar source[50] = { 0x000A, 0x0000 };
int rc;
/*
* Test for an example that unorm_getCanonStartSet() delivers
* all characters that compose from the input one,
* even in multiple steps.
* For example, the set for "I" (0049) should contain both
* I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
* In general, the set for the middle such character should be a subset
* of the set for the first.
*/
errorCode=U_ZERO_ERROR;
norm2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("unorm2_getInstance(NFC) failed - %s\n", u_errorName(errorCode));
fail++;
}
memset(buffer16,0,300*sizeof(buffer16[0]));
length=unorm2_normalize(norm2, source, 1, buffer16, LENGTHOF(buffer16), &errorCode);
printf("test1, returned length %d\n", length);
if(length!=1) {
printf("Error, length should be 1\n");
fail++;
}
rc=memcmp(buffer16,source,1);
if(rc) {
printf("Error: comparison failed!\n");
fail++;
}
memset(buffer16,0,300*sizeof(buffer16[0]));
length=unorm2_normalize(norm2, source, -1, buffer16, LENGTHOF(buffer16), &errorCode);
printf("test2, returned length %d\n", length);
if(length!=1) {
printf("Error, length should be 1\n");
fail++;
}
rc=memcmp(buffer16,source,1);
if(rc) {
printf("Error: comparison failed!\n");
fail++;
}
unorm2_close(norm2);
}
#endif
TestCanonDecompCompose();
#if 1
{
UNormalizationCheckResult res;
UChar cpnfc = 0x0306;
UErrorCode error = U_ZERO_ERROR;
res = unorm_quickCheck(&cpnfc, 1, UNORM_NFC, &error);
printf("CH: U+%04X, res=%s, err=%s\n", cpnfc, res2str(res), u_errorName(error));
}
#endif
printf("Pure C test OK: %s\n", u_errorName(status));
fflush(stdout);
c_cleanup();
u_cleanup();
printf("u_cleanup() OK: %s\n", u_errorName(status));
fflush(stdout);
return status || fail;
}