blob: eec25394d69af2a9d672335f185f8fc50ca4c8de [file] [log] [blame]
/* look for TODO or 'unimp' */
/*
*******************************************************************************
*
* Copyright (C) 2009-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: normalizer2.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009nov22
* created by: Markus W. Scherer
*
* ported from normalizer2.cpp on 2011-feb-15 by srl into C
*/
#include "unicode/utypes.h"
#include "unicode/unorm.h"
#include "umutex.h"
/*#include "normalizer2impl.h"*/
#include "ucln_cmn.h"
#include "uhash.h"
#include "cmemory.h"
#include "udatamem.h"
#if defined(ICU4C0)
#ifndef UNORM_DEBUG
/* #define UNORM_DEBUG 1 */
#endif
#ifdef UNORM_DEBUG
#include <stdio.h>
#include <ctype.h>
static char dbg_buf[256];
const char *dbg_uchars(UChar *u) {
int32_t n;
int32_t i;
int32_t c=0;
dbg_buf[0]=0;
for(i=0;u[i]&&(i<20);i++) {
UChar ch = u[i];
c+=strlen(dbg_buf+c);
if(ch<0x7f && isprint((char)ch)) {
printf(dbg_buf+c,"'%c', ", (char)ch);
} else {
printf(dbg_buf+c,"U+%04X, ", ch);
}
}
return dbg_buf;
}
#endif
#define UNORM_ENABLE_FCD 0 /* enables FCD and other modes. Not implemented. */
#include "ustr_imp.h"
#include "unicode/ustring.h"
#include "norm2imp.h"
#if 0
#ifndef UNORM_DEBUG
/* for unimp */
#include <stdio.h>
#endif
static UBool _unimp(UErrorCode *e, const char *f, int l) {
printf("%s:%d: ERROR: unimplemented!!!\n", f, l);
*e = U_REGEX_UNIMPLEMENTED;
return FALSE;
}
#define unimp(e) _unimp(e,__FILE__,__LINE__)
#else
/* no definition of unimp */
#endif
#define fcdTrie() (_this->newFCDTrie)
#define getFCD16(c) UTRIE2_GET16(_this->newFCDTrie, c)
#define getFCD16FromSingleLead(c) UTRIE2_GET16_FROM_U16_SINGLE_LEAD(fcdTrie(), c)
#define getFCD16FromSupplementary(c) UTRIE2_GET16_FROM_SUPP(fcdTrie(), c)
#define getFCD16FromSurrogatePair(c,x) getFCD16FromSupplementary(U16_GET_SUPPLEMENTARY(c, x))
#define getMapping(x) (_this->extraData+(x))
#define getNorm16(x) (UTRIE2_GET16(_this->normTrie,(x)))
#define isCompYesAndZeroCC(x) ((x)<_this->minNoNo)
#define isMaybeOrNonZeroCC(norm16) ((norm16)>=_this->minMaybeYes)
#define isDecompNoAlgorithmic(norm16) ((norm16)>=_this->limitNoNo)
#define isInert(norm16) ((norm16)==0)
#define isMaybe(norm16) (_this->minMaybeYes<=(norm16) && (norm16)<=JAMO_VT)
#define mapAlgorithmic(c, norm16) ((c)+(norm16)-(_this->minMaybeYes-MAX_DELTA-1))
/* some prototypes - not all */
static UBool ReorderingBuffer_appendZeroCCStr(ReorderingBuffer *buffer, const UChar *s, const UChar *sLimit, UErrorCode *errorCode);
static UBool
Normalizer2_comp_compose(Normalizer2 *_this, const UChar *src, const UChar *limit,
UBool onlyContiguous,
UBool doCompose,
ReorderingBuffer *buffer,
UErrorCode *errorCode) ;
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_get2Instance(const char *packageName,
const char *name,
UNormalizationMode mode,
UErrorCode *errorCode);
static const UChar *
Normalizer2_decomp_decompose(Normalizer2 *_this, const UChar *src, const UChar *limit,
ReorderingBuffer *buffer,
UErrorCode *errorCode);
static const UChar *Normalizer2Impl_findPreviousCompBoundary(Normalizer2 *_this, const UChar *start, const UChar *p);
static const UChar *Normalizer2Impl_findNextCompBoundary(Normalizer2 *_this, const UChar *p, const UChar *limit);
/** end prototypes **/
#ifdef UNORM_DEBUG
#define MODENAME_STR \
"____\0" \
"NONE\0" \
"NFD \0" \
"NFKD\0" \
"NFC \0" \
"NFKC\0" \
"FCD \0" \
"!!!!\0"
#define MODENAME(x) (MODENAME_STR+((int)x)*5)
#define MODE2NAME_STR \
"COMP\0" \
"DECM\0" \
"FCD \0" \
"FCC \0" \
"???1\0" \
"???2\0" \
"???3\0" \
"!!!!\0"
#define MODE2NAME(x) (MODE2NAME_STR+((int)x-(int)UNORM2_COMPOSE)*5)
#endif
/* ---- FACTORY ---- */
static Normalizer2 **singletons = NULL;
static UNormalizer2 *getSingleton(UNormalizationMode mode, const char *str, UErrorCode *errorCode) {
Normalizer2 *ret = NULL;
Normalizer2 *newOne = NULL;
Normalizer2 **theSingletons = NULL;
UMTX_CHECK(NULL,singletons,theSingletons);
if(theSingletons == NULL) {
Normalizer2 **list = (Normalizer2**)uprv_malloc(sizeof(Normalizer2*)*UNORM_MODE_COUNT);
uprv_memset(list, sizeof(Normalizer2*)*UNORM_MODE_COUNT,0);
umtx_lock(NULL);
if(singletons == NULL) {
singletons=list;
list=NULL;
}
umtx_unlock(NULL);
if(list!=NULL) {
uprv_free(list); /* someone beat us to it. */
}
if(singletons==NULL) {
*errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
}
UMTX_CHECK(NULL,(singletons[(int)mode]),ret);
if(ret!=NULL) {
return (UNormalizer2*)ret;
}
/* make up */
newOne = (Normalizer2*)unorm2_get2Instance(NULL,str,mode,errorCode);
if(U_FAILURE(*errorCode)) {
unorm2_close((UNormalizer2*)newOne);
return NULL;
} else if(newOne==NULL) {
*errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
ret = newOne;
/* put it in the cache */
umtx_lock(NULL);
if(singletons[(int)mode]==NULL) {
singletons[(int)mode] = newOne;
newOne = NULL;
} else {
ret = singletons[(int)mode];
}
umtx_unlock(NULL);
if(newOne!=NULL) {
unorm2_close((UNormalizer2*)newOne);
}
return (UNormalizer2*)ret;
}
static const UNormalizer2 *
Normalizer2Factory_getInstance(UNormalizationMode mode, UErrorCode *errorCode) {
if(U_FAILURE(*errorCode)) {
return NULL;
}
switch(mode) {
#if UNORM_ENABLE_FCD
case UNORM_NFD:
return getSingleton(mode, "nfc", errorCode);
case UNORM_NFKD:
return getSingleton(mode, "nfkc", errorCode);
#endif
case UNORM_NFC:
return getSingleton(mode, "nfc", errorCode);
#if UNORM_ENABLE_FCD
case UNORM_NFKC:
return getSingleton(mode, "nfkc", errorCode);
case UNORM_FCD:
return getSingleton(mode, "nfc", errorCode);
#endif
default: /* UNORM_NONE */
*errorCode = U_REGEX_UNIMPLEMENTED; /* not implemented */
#if defined(UNORM_DEBUG)
fprintf(stderr, "Loading noop for mode #%d=%s\n",(int)mode, MODENAME(mode));
#endif
case UNORM_NONE:
return getSingleton(mode, NULL, errorCode);
}
}
/* INSTANCE */
static UBool U_CALLCONV
_isAcceptable(void *context,
const char *type, const char *name,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==1
) {
Normalizer2 *me=(Normalizer2 *)context;
uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
return TRUE;
} else {
return FALSE;
}
}
static void Normalizer2_load(Normalizer2 *_this, const char *packageName, const char *name, UErrorCode *errorCode) {
/* from normalizer2impl.cpp: Normalizr2Impl::load */
_this->memory=udata_openChoice(packageName, "nrm", name, _isAcceptable, _this, errorCode);
if(U_FAILURE(*errorCode)) {
#if defined(UNORM_DEBUG)
fprintf(stderr, "%s:%d: error %s, can't open data %s/%s.%s\n", __FILE__, __LINE__, u_errorName(*errorCode), packageName?packageName:"<NULL>", name, "nrm");
#endif
return;
}
{
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(_this->memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) {
*errorCode=U_INVALID_FORMAT_ERROR; /* Not enough indexes. */
return;
}
_this->minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
_this->minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
_this->minYesNo=inIndexes[IX_MIN_YES_NO];
_this->minNoNo=inIndexes[IX_MIN_NO_NO];
_this->limitNoNo=inIndexes[IX_LIMIT_NO_NO];
_this->minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
{
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
_this->normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
inBytes+offset, nextOffset-offset, NULL,
errorCode);
if(U_FAILURE(*errorCode)) {
return;
}
offset=nextOffset;
_this->maybeYesCompositions=(const uint16_t *)(inBytes+offset);
_this->extraData=_this->maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-_this->minMaybeYes);
}
}
}
#if UNORM_ENABLE_FCD
static void Normalizer2Impl_setFCD16FromNorm16(Normalizer2 *_this, UChar32 start, UChar32 end, uint16_t norm16,
UTrie2 *newFCDTrie, UErrorCode *errorCode) {
/* Only loops for 1:1 algorithmic mappings. */
for(;;) {
if(norm16>=MIN_NORMAL_MAYBE_YES) {
norm16&=0xff;
norm16|=norm16<<8;
} else if(norm16<=_this->minYesNo || _this->minMaybeYes<=norm16) {
/* no decomposition or Hangul syllable, all zeros */
break;
} else if(_this->limitNoNo<=norm16) {
int32_t delta=norm16-(_this->minMaybeYes-MAX_DELTA-1);
if(start==end) {
start+=delta;
norm16=getNorm16(start);
} else {
/* the same delta leads from different original characters to different mappings */
do {
UChar32 c=start+delta;
Normalizer2Impl_setFCD16FromNorm16(_this,c, c, getNorm16(c), newFCDTrie, errorCode);
} while(++start<=end);
break;
}
} else {
/* c decomposes, get everything from the variable-length extra data */
const uint16_t *mapping= _this->extraData+norm16; /* getMapping(norm16); */
uint16_t firstUnit=*mapping;
if((firstUnit&MAPPING_LENGTH_MASK)==0) {
/* A character that is deleted (maps to an empty string) must */
/* get the worst-case lccc and tccc values because arbitrary */
/* characters on both sides will become adjacent. */
norm16=0x1ff;
} else {
if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
norm16=mapping[1]&0xff00; /* lccc */
} else {
norm16=0;
}
norm16|=firstUnit>>8; /* tccc */
}
}
utrie2_setRange32(newFCDTrie, start, end, norm16, TRUE, errorCode);
break;
}
}
/* Collect (OR together) the FCD values for a range of supplementary characters, */
/* for their lead surrogate code unit. */
static UBool U_CALLCONV
enumRangeOrValue(const void *context, UChar32 start, UChar32 end, uint32_t value) {
*((uint32_t *)context)|=value;
return TRUE;
}
/* Set the FCD value for a range of same-norm16 characters. */
static UBool U_CALLCONV
enumRangeHandler(void *context, UChar32 start, UChar32 end, uint32_t value) {
Normalizer2 *_this = (Normalizer2*)context;
if(value!=0) {
Normalizer2Impl_setFCD16FromNorm16(_this, start, end, (uint16_t)value, _this->newFCDTrie, &(_this->fcdErrorCode));
}
return (U_SUCCESS(_this->fcdErrorCode));
}
static UTrie2 *FCDTrieSingleton_createInstance(Normalizer2 *me, UErrorCode *errorCode) {
me->newFCDTrie=utrie2_open(0, 0, errorCode);
if(U_SUCCESS(*errorCode)) {
UChar lead;
utrie2_enum(me->normTrie, NULL, enumRangeHandler, me);
for(lead=0xd800; lead<0xdc00; ++lead) {
uint32_t oredValue=utrie2_get32(me->newFCDTrie, lead);
utrie2_enumForLeadSurrogate(me->newFCDTrie, lead, NULL, enumRangeOrValue, &oredValue);
if(oredValue!=0) {
/* Set a "bad" value for makeFCD() to break the quick check loop */
/* and look up the value for the supplementary code point. */
/* If there is any lccc, then set the worst-case lccc of 1. */
/* The ORed-together value's tccc is already the worst case. */
if(oredValue>0xff) {
oredValue=0x100|(oredValue&0xff);
}
utrie2_set32ForLeadSurrogateCodeUnit(me->newFCDTrie, lead, oredValue, errorCode);
}
}
utrie2_freeze(me->newFCDTrie, UTRIE2_16_VALUE_BITS, errorCode);
if(U_SUCCESS(*errorCode)) {
return me->newFCDTrie;
}
}
utrie2_close(me->newFCDTrie);
me->newFCDTrie=NULL;
return NULL;
}
#endif
static void U_CALLCONV Normalizer2_close(struct Normalizer2* _this) {
#if UNORM_ENABLE_FCD
utrie2_close(_this->newFCDTrie);
#endif
udata_close(_this->memory);
utrie2_close(_this->normTrie);
}
static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0;
}
static UBool isMostDecompYesAndZeroCC(Normalizer2* _this, uint16_t norm16) {
return norm16<_this->minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
}
static UBool isDecompYes(Normalizer2* _this,uint16_t norm16) { return norm16<_this->minYesNo || _this->minMaybeYes<=norm16; }
static UNormalizationCheckResult U_CALLCONV Normalizer2_noop_quickCheck(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
return UNORM_YES;
}
static int32_t Hangul_decompose(UChar32 c, UChar buffer[3]) {
c-=HANGUL_BASE;
{
UChar32 c2=c%JAMO_T_COUNT;
c/=JAMO_T_COUNT;
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
if(c2==0) {
return 2;
} else {
buffer[2]=(UChar)(JAMO_T_BASE+c2);
return 3;
}
}
}
static UChar *ReorderingBuffer_getLimit(ReorderingBuffer* buffer) {
return buffer->limit;
}
static UChar *ReorderingBuffer_getStart(ReorderingBuffer* buffer) {
return buffer->start;
}
static UBool ReorderingBuffer_isEmpty(ReorderingBuffer* buffer) {
return buffer->start==buffer->limit;
}
static void ReorderingBuffer_setLastChar(ReorderingBuffer* buffer, UChar c) {
*(buffer->limit-1)=c;
}
static void ReorderingBuffer_setReorderingLimit(ReorderingBuffer* buffer, UChar* newLimit) {
buffer->remainingCapacity+=(int32_t)(buffer->limit-newLimit);
buffer->reorderStart=buffer->limit=newLimit;
buffer->lastCC=0;
}
static void ReorderingBuffer_construct(ReorderingBuffer *buffer, Normalizer2 *n, UChar *dest, int32_t capacity) {
buffer->impl = n;
buffer->str = dest;
buffer->lastCC=0;
buffer->limit = dest;
buffer->reorderStart=NULL;
buffer->start=dest;
buffer->capacity = capacity;
buffer->remainingCapacity=capacity;
}
static UBool ReorderingBuffer_resize(ReorderingBuffer *buffer, int32_t appendLength, UErrorCode *errorCode) {
int32_t reorderStartIndex=(int32_t)(buffer->reorderStart-buffer->start);
int32_t length=(int32_t)(buffer->limit-buffer->start);
/* str.releaseBuffer(length); */
int32_t newCapacity=length+appendLength;
int32_t doubleCapacity=2*buffer->capacity;
if(newCapacity<doubleCapacity) {
newCapacity=doubleCapacity;
}
if(newCapacity<256) {
newCapacity=256;
}
if(buffer->start!=buffer->str) {
/* it's not the string passedin -- resize it */
buffer->start=uprv_realloc(buffer->start,newCapacity);
} else {
/* ran out of room- make a new buffer */
buffer->start=uprv_malloc(newCapacity);
memcpy(buffer->start,buffer->str,length*sizeof(buffer->start[0]));
}
if(buffer->start==NULL) {
/* getBuffer() already did str.setToBogus() */
*errorCode=U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
buffer->capacity = newCapacity;
buffer->reorderStart=buffer->start+reorderStartIndex;
buffer->limit=buffer->start+length;
buffer->remainingCapacity=buffer->capacity-length;
return TRUE;
}
static UBool ReorderingBuffer_equals(ReorderingBuffer *b, const UChar *oStart, const UChar *oLimit) {
int32_t length=(int32_t)(b->limit-b->start);
return
length==(int32_t)(oLimit-oStart) &&
0==u_memcmp(b->start, oStart, length);
}
static void ReorderingBuffer_remove(ReorderingBuffer *b) {
b->reorderStart=b->limit=b->start;
b->remainingCapacity=b->capacity;
b->lastCC=0;
}
static void ReorderingBuffer_writeCodePoint(UChar *p, UChar32 c) {
if(c<=0xffff) {
*p=(UChar)c;
} else {
p[0]=U16_LEAD(c);
p[1]=U16_TRAIL(c);
}
}
#define setIterator() (buffer->codePointStart=buffer->limit)
static void ReorderingBuffer_skipPrevious(ReorderingBuffer *buffer)
{
buffer->codePointLimit=buffer->codePointStart;
{
UChar c=*--(buffer->codePointStart);
if(U16_IS_TRAIL(c) && buffer->start<buffer->codePointStart && U16_IS_LEAD(*(buffer->codePointStart-1))) {
--(buffer->codePointStart);
}
}
}
static uint8_t ReorderingBuffer_previousCC(ReorderingBuffer *buffer) {
Normalizer2 *_this = buffer->impl;
buffer->codePointLimit=buffer->codePointStart;
if(buffer->reorderStart>=buffer->codePointStart) {
return 0;
}
{
UChar32 c=*--(buffer->codePointStart);
if(c</* Normalizer2Impl::*/ MIN_CCC_LCCC_CP) {
return 0;
}
{
UChar c2;
if(U16_IS_TRAIL(c) && buffer->start<buffer->codePointStart && U16_IS_LEAD(c2=*(buffer->codePointStart-1))) {
--(buffer->codePointStart);
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
return getCCFromYesOrMaybe(/* _this. */getNorm16(c));
}
}
static void ReorderingBuffer_insert(ReorderingBuffer *buffer, UChar32 c, uint8_t cc) {
for(setIterator(), ReorderingBuffer_skipPrevious(buffer); ReorderingBuffer_previousCC(buffer)>cc;) {}
/* insert c at codePointLimit, after the character with prevCC<=cc */
{
UChar *q=buffer->limit;
{
UChar *r=buffer->limit+=U16_LENGTH(c);
do {
*--r=*--q;
} while(buffer->codePointLimit!=q);
ReorderingBuffer_writeCodePoint(q, c);
if(cc<=1) {
buffer->reorderStart=r;
}
}
}
}
static void ReorderingBuffer_removeSuffix(ReorderingBuffer *b, int32_t suffixLength) {
if(suffixLength<(b->limit-b->start)) {
b->limit-=suffixLength;
b->remainingCapacity+=suffixLength;
} else {
b->limit=b->start;
b->remainingCapacity=b->capacity;
}
b->lastCC=0;
b->reorderStart=b->limit;
}
static int32_t ReorderingBuffer_length(ReorderingBuffer *buffer) {
return (int32_t)(buffer->limit-buffer->start);
}
static uint8_t ReorderingBuffer_getLastCC(ReorderingBuffer *buffer) {
return buffer->lastCC;
}
static UBool ReorderingBuffer_appendSupplementary(ReorderingBuffer *buffer, UChar32 c, uint8_t cc, UErrorCode *errorCode) {
if(buffer->remainingCapacity<2 && !ReorderingBuffer_resize(buffer, 2, errorCode)) {
return FALSE;
}
if(buffer->lastCC<=cc || cc==0) {
buffer->limit[0]=U16_LEAD(c);
buffer->limit[1]=U16_TRAIL(c);
buffer->limit+=2;
buffer->lastCC=cc;
if(cc<=1) {
buffer->reorderStart=buffer->limit;
}
} else {
ReorderingBuffer_insert(buffer, c, cc);
}
buffer->remainingCapacity-=2;
return TRUE;
}
/* s must be in NFD, otherwise change the implementation. */
static UBool ReorderingBuffer_appendBMP(ReorderingBuffer *buffer, UChar c, uint8_t cc, UErrorCode *errorCode) {
if(buffer->remainingCapacity==0 && !ReorderingBuffer_resize(buffer, 1, errorCode)) {
return FALSE;
}
if(buffer->lastCC<=cc || cc==0) {
*(buffer->limit)++=c;
buffer->lastCC=cc;
if(cc<=1) {
buffer->reorderStart=buffer->limit;
}
} else {
ReorderingBuffer_insert(buffer, c, cc);
}
--(buffer->remainingCapacity);
return TRUE;
}
static UBool ReorderingBuffer_append(ReorderingBuffer *buffer, UChar32 c, uint8_t cc, UErrorCode *errorCode) {
return (c<=0xffff) ?
ReorderingBuffer_appendBMP(buffer,(UChar)c, cc, errorCode) :
ReorderingBuffer_appendSupplementary(buffer,c, cc, errorCode);
}
static void ReorderingBuffer_close(ReorderingBuffer *buffer) {
if(buffer!=NULL
&& buffer->start!=NULL
&& (buffer->start!=buffer->str)) { /* don't close the buffer if it's "str" (user's original buffer) */
uprv_free(buffer->start);
}
}
static UBool ReorderingBuffer_init(ReorderingBuffer *buffer, int32_t destCapacity, UErrorCode *pErrorCode) {
return TRUE; /* not needed. see ReorderingBuffer_construct */
}
static int32_t ReorderingBuffer_extract(ReorderingBuffer *buffer, Normalizer2 *n, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
int32_t length = buffer->limit - buffer->start;
if(buffer->str!=buffer->start) { /* did we make a new buffer? then copy */
int32_t tlen = length;
if(tlen > capacity) {
tlen = capacity;
}
u_strncpy(dest,buffer->start,tlen);
}
return u_terminateUChars(dest,capacity,length,pErrorCode);
}
static UBool ReorderingBuffer_appendZeroCCStr(ReorderingBuffer *buffer, const UChar *s, const UChar *sLimit, UErrorCode *errorCode) {
int32_t length=(int32_t)(sLimit-s);
if(s==sLimit) {
return TRUE;
}
if(buffer->remainingCapacity<length && !ReorderingBuffer_resize(buffer, length, errorCode)) {
return FALSE;
}
u_memcpy(buffer->limit, s, length);
buffer->limit+=length;
buffer->remainingCapacity-=length;
buffer->lastCC=0;
buffer->reorderStart=buffer->limit;
return TRUE;
}
static UBool ReorderingBuffer_appendLeadTrail(ReorderingBuffer *buffer, const UChar *s, int32_t length,
uint8_t leadCC, uint8_t trailCC,
UErrorCode *errorCode) {
if(length==0) {
return TRUE;
}
if(buffer->remainingCapacity<length && !ReorderingBuffer_resize(buffer, length, errorCode)) {
return FALSE;
}
buffer->remainingCapacity-=length;
if(buffer->lastCC<=leadCC || leadCC==0) {
const UChar *sLimit=s+length;
if(trailCC<=1) {
buffer->reorderStart=buffer->limit+length;
} else if(leadCC<=1) {
buffer->reorderStart=buffer->limit+1; /* Ok if not a code point boundary. */
}
do { *buffer->limit++=*s++; } while(s!=sLimit);
buffer->lastCC=trailCC;
} else {
int32_t i=0;
UChar32 c;
U16_NEXT(s, i, length, c);
ReorderingBuffer_insert(buffer, c, leadCC); /* insert first code point */
while(i<length) {
U16_NEXT(s, i, length, c);
if(i<length) {
/* s must be in NFD, otherwise we need to use getCC(). */
Normalizer2 *_this = buffer->impl;
leadCC=getCCFromYesOrMaybe(getNorm16(c));
} else {
leadCC=trailCC;
}
ReorderingBuffer_append(buffer, c, leadCC, errorCode);
}
}
return TRUE;
}
#if 0
UBool isEmpty() const { return start==limit; }
int32_t length() const { return (int32_t)(limit-start); }
UChar *getStart() { return start; }
UChar *getLimit() { return limit; }
uint8_t getLastCC() const { return lastCC; }
UBool equals(const UChar *start, const UChar *limit) const;
/* For Hangul composition, replacing the Leading consonant Jamo with the syllable. */
void setLastChar(UChar c) {
*(limit-1)=c;
}
UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
return (c<=0xffff) ?
appendBMP((UChar)c, cc, errorCode) :
appendSupplementary(c, cc, errorCode);
}
/* s must be in NFD, otherwise change the implementation. */
UBool append(const UChar *s, int32_t length,
uint8_t leadCC, uint8_t trailCC,
UErrorCode &errorCode);
UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
if(remainingCapacity==0 && !resize(1, errorCode)) {
return FALSE;
}
if(lastCC<=cc || cc==0) {
*limit++=c;
lastCC=cc;
if(cc<=1) {
reorderStart=limit;
}
} else {
insert(c, cc);
}
--remainingCapacity;
return TRUE;
}
UBool appendZeroCC(UChar32 c, UErrorCode &errorCode);
UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode);
void remove();
void removeSuffix(int32_t suffixLength);
void setReorderingLimit(UChar *newLimit) {
remainingCapacity+=(int32_t)(limit-newLimit);
reorderStart=limit=newLimit;
lastCC=0;
}
UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode);
void insert(UChar32 c, uint8_t cc);
static void writeCodePoint(UChar *p, UChar32 c) {
if(c<=0xffff) {
*p=(UChar)c;
} else {
p[0]=U16_LEAD(c);
p[1]=U16_TRAIL(c);
}
}
UBool resize(int32_t appendLength, UErrorCode &errorCode);
const Normalizer2Impl &impl;
UnicodeString &str;
UChar *start, *reorderStart, *limit;
int32_t remainingCapacity;
uint8_t lastCC;
/* private backward iterator */
void setIterator() { codePointStart=limit; }
void skipPrevious(); /* Requires start<codePointStart. */
uint8_t previousCC(); /* Returns 0 if there is no previous character. */
UChar *codePointStart, *codePointLimit;
#endif
static int32_t U_CALLCONV Normalizer2_comp_normalize (struct Normalizer2 *_this,
const UChar *src, int32_t length,
UChar *dest, int32_t capacity,
UErrorCode *pErrorCode) {
int32_t tlen = length;
ReorderingBuffer buffer;
if(U_FAILURE(*pErrorCode)) {
/* #if defined(UNORM_DEBUG) */
/* fprintf(stderr,"normalize noop: err %s\n", u_errorName(*pErrorCode)); */
/* #endif */
return 0;
}
ReorderingBuffer_construct(&buffer, _this, dest, capacity);
if(ReorderingBuffer_init(&buffer, capacity, pErrorCode)) {
Normalizer2_comp_compose(_this, src, length>=0 ? src+length : NULL, _this->onlyContiguous, TRUE, &buffer, pErrorCode);
}
tlen = ReorderingBuffer_extract(&buffer, _this, dest, capacity, pErrorCode);
ReorderingBuffer_close(&buffer);
return tlen;
}
static int32_t U_CALLCONV Normalizer2_noop_normalize (struct Normalizer2 *n,
const UChar *src, int32_t length,
UChar *dest, int32_t capacity,
UErrorCode *pErrorCode) {
int32_t tlen = length;
if(U_FAILURE(*pErrorCode)) {
/* #if defined(UNORM_DEBUG) */
/* fprintf(stderr,"normalize noop: err %s\n", u_errorName(*pErrorCode)); */
/* #endif */
return 0;
}
if(tlen == -1) {
tlen = u_strlen(src);
}
if(capacity<length) {
tlen = capacity;
}
u_strncpy(dest,src,tlen);
return u_terminateUChars(dest,capacity,length,pErrorCode);
}
static const UChar *
Normalizer2_fcd_copyLowPrefixFromNulTerminated(Normalizer2 *_this, const UChar *src,
UChar32 minNeedDataCP,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
/* Make some effort to support NUL-terminated strings reasonably. */
/* Take the part of the fast quick check loop that does not look up */
/* data and check the first part of the string. */
/* After this prefix, determine the string length to simplify the rest */
/* of the code. */
UChar c;
const UChar *prevSrc=src;
while((c=*src++)<minNeedDataCP && c!=0) {}
/* Back out the last character for full processing. */
/* Copy this prefix. */
if(--src!=prevSrc) {
if(buffer!=NULL) {
ReorderingBuffer_appendZeroCCStr(buffer, prevSrc, src, errorCode);
}
}
return src;
}
static const uint16_t *Normalizer2_getCompositionsListForDecompYes(Normalizer2 *_this, uint16_t norm16) {
if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
return NULL;
} else if(norm16<_this->minMaybeYes) {
return _this->extraData+norm16; /* for yesYes; if Jamo L: harmless empty list */
} else {
return _this->maybeYesCompositions+norm16-_this->minMaybeYes;
}
}
static const uint16_t *Normalizer2_getCompositionsListForComposite(Normalizer2 *_this, uint16_t norm16) {
const uint16_t *list=_this->extraData+norm16; /* composite has both mapping & compositions list */
return list+ /* mapping pointer */
1+ /* +1 to skip the first unit with the mapping lenth */
(*list&MAPPING_LENGTH_MASK)+ /* + mapping length */
((*list>>7)&1); /* +1 if MAPPING_HAS_CCC_LCCC_WORD */
}
#if UNORM_ENABLE_FCD
/* Dual functionality: */
/* buffer!=NULL: normalize */
/* buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes */
const UChar *
Normalizer2_fcd_makeFCD(Normalizer2 *_this, const UChar *src, const UChar *limit,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
const UChar *prevBoundary=src;
const UChar *prevSrc;
UChar32 c=0;
int32_t prevFCD16=0;
uint16_t fcd16=0;
const UTrie2 *trie=_this->newFCDTrie;
if(limit==NULL) {
src=Normalizer2_fcd_copyLowPrefixFromNulTerminated(_this,src, MIN_CCC_LCCC_CP, buffer, errorCode);
if(U_FAILURE(*errorCode)) {
return src;
}
limit=u_strchr(src, 0);
}
/* Note: In this function we use buffer->appendZeroCC() because we track */
/* the lead and trail combining classes here, rather than leaving it to */
/* the ReorderingBuffer. */
/* The exception is the call to decomposeShort() which uses the buffer */
/* in the normal way. */
/* Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. */
/* Similar to the prevBoundary in the compose() implementation. */
for(;;) {
/* count code units with lccc==0 */
for(prevSrc=src; src!=limit;) {
if((c=*src)<MIN_CCC_LCCC_CP) {
prevFCD16=~c;
++src;
} else if((fcd16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c))<=0xff) {
prevFCD16=fcd16;
++src;
} else if(!U16_IS_SURROGATE(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if((fcd16=getFCD16(c))<=0xff) {
prevFCD16=fcd16;
src+=U16_LENGTH(c);
} else {
break;
}
}
}
/* copy these code units all at once */
if(src!=prevSrc) {
if(buffer!=NULL && unimp(errorCode) /* !buffer->appendZeroCC(prevSrc, src, errorCode) */) { /* FCD */
break;
}
if(src==limit) {
break;
}
prevBoundary=src;
/* We know that the previous character's lccc==0. */
if(prevFCD16<0) {
/* Fetching the fcd16 value was deferred for this below-U+0300 code point. */
prevFCD16=getFCD16FromSingleLead((UChar)~prevFCD16);
if(prevFCD16>1) {
--prevBoundary;
}
} else {
const UChar *p=src-1;
if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
--p;
/* Need to fetch the previous character's FCD value because */
/* prevFCD16 was just for the trail surrogate code point. */
prevFCD16=getFCD16FromSurrogatePair(p[0], p[1]);
/* Still known to have lccc==0 because its lead surrogate unit had lccc==0. */
}
if(prevFCD16>1) {
prevBoundary=p;
}
}
/* The start of the current character (c). */
prevSrc=src;
} else if(src==limit) {
break;
}
src+=U16_LENGTH(c);
/* The current character (c) at [prevSrc..src[ has a non-zero lead combining class. */
/* Check for proper order, and decompose locally if necessary. */
if((prevFCD16&0xff)<=(fcd16>>8)) {
/* proper order: prev tccc <= current lccc */
if((fcd16&0xff)<=1) {
prevBoundary=src;
}
if(buffer!=NULL && unimp(errorCode) /* !buffer->appendZeroCC(c, errorCode) */) { /* FCD */
break;
}
prevFCD16=fcd16;
continue;
} else if(buffer==NULL) {
return prevBoundary; /* quick check "no" */
} else {
unimp(errorCode); /* FCD */
#if 0
/*
* Back out the part of the source that we copied or appended
* already but is now going to be decomposed.
* prevSrc is set to after what was copied/appended.
*/
buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
/*
* Find the part of the source that needs to be decomposed,
* up to the next safe boundary.
*/
src=findNextFCDBoundary(src, limit);
/*
* The source text does not fulfill the conditions for FCD.
* Decompose and reorder a limited piece of the text.
*/
if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) {
break;
}
prevBoundary=src;
prevFCD16=0;
#endif
}
}
return src;
}
#endif
static UBool Normalizer2Impl_decomposeChar(Normalizer2 *_this, UChar32 c, uint16_t norm16,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
/* Only loops for 1:1 algorithmic mappings. */
for(;;) {
/* get the decomposition and the lead and trail cc's */
if(isDecompYes(_this,norm16)) {
/* c does not decompose */
return ReorderingBuffer_append(buffer, c, getCCFromYesOrMaybe(norm16), errorCode);
} else if(isHangul(norm16)) {
/* Hangul syllable: decompose algorithmically */
UChar jamos[3];
return ReorderingBuffer_appendZeroCCStr(buffer, jamos, jamos+Hangul_decompose(c, jamos), errorCode);
} else if(isDecompNoAlgorithmic(norm16)) {
c=mapAlgorithmic(c, norm16);
norm16=getNorm16(c);
} else {
/* c decomposes, get everything from the variable-length extra data */
const uint16_t *mapping;
uint16_t firstUnit;
int32_t length;
uint8_t leadCC, trailCC;
mapping=getMapping(norm16);
firstUnit=*mapping++;
length=firstUnit&MAPPING_LENGTH_MASK;
trailCC=(uint8_t)(firstUnit>>8);
if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
leadCC=(uint8_t)(*mapping++>>8);
} else {
leadCC=0;
}
return ReorderingBuffer_appendLeadTrail(buffer, (const UChar *)mapping, length, leadCC, trailCC, errorCode);
}
}
}
/* Dual functionality: */
/* buffer!=NULL: normalize */
/* buffer==NULL: isNormalized/spanQuickCheckYes */
static const UChar *
Normalizer2_decomp_decompose(Normalizer2 *_this, const UChar *src, const UChar *limit,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
/* only for quick check */
const UChar *prevBoundary=src;
uint8_t prevCC=0;
const UChar *prevSrc;
UChar32 c=0;
uint16_t norm16=0;
UChar32 minNoCP=_this->minDecompNoCP;
if(limit==NULL) {
src=Normalizer2_fcd_copyLowPrefixFromNulTerminated(_this, src, minNoCP, buffer, errorCode);
if(U_FAILURE(*errorCode)) {
return src;
}
limit=u_strchr(src, 0);
}
for(;;) {
/* count code units below the minimum or with irrelevant data for the quick check */
for(prevSrc=src; src!=limit;) {
if( (c=*src)<minNoCP ||
isMostDecompYesAndZeroCC(_this, norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(_this->normTrie, c))
) {
++src;
} else if(!U16_IS_SURROGATE(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if(isMostDecompYesAndZeroCC(_this, norm16=getNorm16(c))) {
src+=U16_LENGTH(c);
} else {
break;
}
}
}
/* copy these code units all at once */
if(src!=prevSrc) {
if(buffer!=NULL) {
if(ReorderingBuffer_appendZeroCCStr(buffer, prevSrc, src, errorCode)) {
break;
}
} else {
prevCC=0;
prevBoundary=src;
}
}
if(src==limit) {
break;
}
/* Check one above-minimum, relevant code point. */
src+=U16_LENGTH(c);
if(buffer!=NULL) {
if(!Normalizer2Impl_decomposeChar(_this, c, norm16, buffer, errorCode)) {
break;
}
} else {
if(isDecompYes(_this,norm16)) {
uint8_t cc=getCCFromYesOrMaybe(norm16);
if(prevCC<=cc || cc==0) {
prevCC=cc;
if(cc<=1) {
prevBoundary=src;
}
continue;
}
}
return prevBoundary; /* "no" or cc out of order*/
}
}
return src;
}
static uint8_t Normalizer2_getTrailCCFromCompYesAndZeroCC(Normalizer2 *_this, const UChar *cpStart, const UChar *cpLimit) {
UChar32 c;
uint16_t prevNorm16;
if(cpStart==(cpLimit-1)) {
c=*cpStart;
} else {
c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);
}
prevNorm16=getNorm16(c);
if(prevNorm16<=_this->minYesNo) {
return 0; /* yesYes and Hangul LV/LVT have ccc=tccc=0 */
} else {
return (uint8_t)(*getMapping(prevNorm16)>>8); /* tccc from yesNo */
}
}
static void Normalizer2Impl_composeAndAppend(Normalizer2 *_this, const UChar *src, const UChar *limit,
UBool doCompose,
UBool onlyContiguous,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
if(!ReorderingBuffer_isEmpty(buffer)) {
const UChar *firstStarterInSrc=Normalizer2Impl_findNextCompBoundary(_this,src, limit);
if(src!=firstStarterInSrc) {
UChar *middleStart;
int32_t middleLength;
const UChar *lastStarterInDest=Normalizer2Impl_findPreviousCompBoundary(_this, ReorderingBuffer_getStart(buffer),
ReorderingBuffer_getLimit(buffer));
middleLength = (int32_t)(ReorderingBuffer_getLimit(buffer)-lastStarterInDest)+ /* middle = [lastStarterInDest..limit] */
(int32_t)(firstStarterInSrc-src); /* middle append [src..firstStarterInSrc] */
middleStart = uprv_malloc(middleLength*sizeof(middleStart[0]));
if(middleStart==NULL) {
*errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
/* C++: UnicodeString middle(lastStarterInDest,(int32_t)(buffer.getLimit()-lastStarterInDest)); (copy chars) */
/* >>>: middle := [lastStarterInDest..limit] */
uprv_memcpy(middleStart,lastStarterInDest,
sizeof(middleStart[0])*(ReorderingBuffer_getLimit(buffer)-lastStarterInDest)); /* copy */
ReorderingBuffer_removeSuffix(buffer, (int32_t)(ReorderingBuffer_getLimit(buffer)-lastStarterInDest));
/* C++: middle.append(src, (int32_t)(firstStarterInSrc-src)); */
/* >>>: middle append [src..firstStarterInSrc] */
uprv_memcpy(middleStart+(ReorderingBuffer_getLimit(buffer)-lastStarterInDest),
src,
sizeof(middleStart[0]*(firstStarterInSrc-src))); /* append */
/* C++: const UChar *middleStart=middle.getBuffer(); */
/* >>>: middleStart: beginning of 'middle' buffer (already done)*/
/* C++: compose(middleStart, middleStart+middle.length(), onlyContiguous, TRUE, buffer, errorCode); */
Normalizer2_comp_compose(_this, middleStart, middleStart+middleLength, onlyContiguous, TRUE, buffer, errorCode);
uprv_free(middleStart);
if(U_FAILURE(*errorCode)) {
return;
}
src=firstStarterInSrc;
}
}
if(doCompose) {
Normalizer2_comp_compose(_this, src, limit, onlyContiguous, TRUE, buffer, errorCode);
} else {
ReorderingBuffer_appendZeroCCStr(buffer, src, limit, errorCode);
}
}
/**
* Does c have a composition boundary before it?
* True if its decomposition begins with a character that has
* ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
* As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
* (isCompYesAndZeroCC()) so we need not decompose.
*/
static UBool Normalizer2Impl_hasCompBoundaryBefore(Normalizer2 *_this, UChar32 c, uint16_t norm16) {
for(;;) {
if(isCompYesAndZeroCC(norm16)) {
return TRUE;
} else if(isMaybeOrNonZeroCC(norm16)) {
return FALSE;
} else if(isDecompNoAlgorithmic(norm16)) {
c=mapAlgorithmic(c, norm16);
norm16=getNorm16(c);
} else {
/* c decomposes, get everything from the variable-length extra data */
int32_t i=0;
UChar32 c;
const uint16_t *mapping=getMapping(norm16);
{
uint16_t firstUnit=*mapping++;
if((firstUnit&MAPPING_LENGTH_MASK)==0) {
return FALSE;
}
if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*mapping++&0xff00)) {
return FALSE; /* non-zero leadCC */
}
U16_NEXT_UNSAFE(mapping, i, c);
return isCompYesAndZeroCC(getNorm16(c));
}
}
}
}
static UBool Normalizer2Impl_hasCompBoundaryAfter(Normalizer2 *_this, UChar32 c, UBool onlyContiguous, UBool testInert) {
for(;;) {
uint16_t norm16=getNorm16(c);
if(isInert(norm16)) {
return TRUE;
} else if(norm16<=_this->minYesNo) {
/* Hangul LVT (==minYesNo) has a boundary after it. */
/* Hangul LV and non-inert yesYes characters combine forward. */
return isHangul(norm16) && !/*Hangul::*/isHangulWithoutJamoT((UChar)c);
} else if(norm16>= (testInert ? _this->minNoNo : _this->minMaybeYes)) {
return FALSE;
} else if(isDecompNoAlgorithmic(norm16)) {
c=mapAlgorithmic(c, norm16);
} else {
/* c decomposes, get everything from the variable-length extra data. */
/* If testInert, then c must be a yesNo character which has lccc=0, */
/* otherwise it could be a noNo. */
const uint16_t *mapping=getMapping(norm16);
{
uint16_t firstUnit=*mapping;
/* TRUE if */
/* c is not deleted, and */
/* it and its decomposition do not combine forward, and it has a starter, and */
/* if FCC then trailCC<=1 */
return
(firstUnit&MAPPING_LENGTH_MASK)!=0 &&
(firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 &&
(!_this->onlyContiguous || firstUnit<=0x1ff);
}
}
}
}
typedef struct {
const UTrie2 *trie;
const UChar *codePointStart, *codePointLimit;
UChar32 codePoint;
const UChar *start;
} BackwardsUTrie2StringIterator;
static void BackwardsUTrie2StringIterator_init(BackwardsUTrie2StringIterator *iter, const UTrie2*t, const UChar *s, const UChar *p) {
iter->trie = t;
iter->codePointStart=p;
iter->codePointLimit=p;
iter->codePoint=U_SENTINEL;
iter->start=s;
}
static uint16_t BackwardsUTrie2StringIterator_previous16(BackwardsUTrie2StringIterator *iter) {
uint16_t result;
iter->codePointLimit=iter->codePointStart;
if(iter->start>=iter->codePointStart) {
iter->codePoint=U_SENTINEL;
return 0;
}
UTRIE2_U16_PREV16(iter->trie, iter->start, iter->codePointStart, iter->codePoint, result);
return result;
}
typedef struct {
const UTrie2 *trie;
const UChar *codePointStart, *codePointLimit;
UChar32 codePoint;
const UChar *limit;
} ForwardUTrie2StringIterator;
static void ForwardUTrie2StringIterator_init(ForwardUTrie2StringIterator *iter, const UTrie2*t, const UChar *p, const UChar *l) {
iter->trie = t;
iter->codePointStart=p;
iter->codePointLimit=p;
iter->codePoint=U_SENTINEL;
iter->limit=l;
}
static uint16_t ForwardUTrie2StringIterator_next16(ForwardUTrie2StringIterator *iter) {
uint16_t result;
iter->codePointStart=iter->codePointLimit;
if(iter->limit == iter->codePointLimit) {
iter->codePoint=U_SENTINEL;
return 0;
}
UTRIE2_U16_NEXT16(iter->trie, iter->codePointLimit, iter->limit, iter->codePoint, result);
return result;
}
static const UChar *Normalizer2Impl_findPreviousCompBoundary(Normalizer2 *_this, const UChar *start, const UChar *p) {
BackwardsUTrie2StringIterator iter;
uint16_t norm16;
BackwardsUTrie2StringIterator_init(&iter, _this->normTrie, start, p);
do {
norm16=BackwardsUTrie2StringIterator_previous16(&iter);
} while(!Normalizer2Impl_hasCompBoundaryBefore(_this, iter.codePoint, norm16));
/* We could also test hasCompBoundaryAfter() and return iter.codePointLimit, */
/* but that's probably not worth the extra cost. */
return iter.codePointStart;
}
static const UChar *Normalizer2Impl_findNextCompBoundary(Normalizer2 *_this, const UChar *p, const UChar *limit) {
uint16_t norm16;
ForwardUTrie2StringIterator iter;
ForwardUTrie2StringIterator_init(&iter, _this->normTrie, p, limit);
do {
norm16=ForwardUTrie2StringIterator_next16(&iter);
} while(!Normalizer2Impl_hasCompBoundaryBefore(_this, iter.codePoint, norm16));
return iter.codePointStart;
}
/*
* Finds the recomposition result for
* a forward-combining "lead" character,
* specified with a pointer to its compositions list,
* and a backward-combining "trail" character.
*
* If the lead and trail characters combine, then this function returns
* the following "compositeAndFwd" value:
* Bits 21..1 composite character
* Bit 0 set if the composite is a forward-combining starter
* otherwise it returns -1.
*
* The compositions list has (trail, compositeAndFwd) pair entries,
* encoded as either pairs or triples of 16-bit units.
* The last entry has the high bit of its first unit set.
*
* The list is sorted by ascending trail characters (there are no duplicates).
* A linear search is used.
*
* See normalizer2impl.h for a more detailed description
* of the compositions list format.
*/
static int32_t Normalizer2Impl_combine(Normalizer2 *_this, const uint16_t *list, UChar32 trail) {
uint16_t key1, firstUnit;
if(trail<COMP_1_TRAIL_LIMIT) {
/* trail character is 0..33FF */
/* result entry may have 2 or 3 units */
key1=(uint16_t)(trail<<1);
while(key1>(firstUnit=*list)) {
list+=2+(firstUnit&COMP_1_TRIPLE);
}
if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
if(firstUnit&COMP_1_TRIPLE) {
return ((int32_t)list[1]<<16)|list[2];
} else {
return list[1];
}
}
} else {
/* trail character is 3400..10FFFF */
/* result entry has 3 units */
uint16_t secondUnit;
uint16_t key2;
key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
((trail>>COMP_1_TRAIL_SHIFT))&
~COMP_1_TRIPLE);
key2 =(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
for(;;) {
if(key1>(firstUnit=*list)) {
list+=2+(firstUnit&COMP_1_TRIPLE);
} else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
if(key2>(secondUnit=list[1])) {
if(firstUnit&COMP_1_LAST_TUPLE) {
break;
} else {
list+=3;
}
} else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
} else {
break;
}
} else {
break;
}
}
}
return -1;
}
/*
* Recomposes the buffer text starting at recomposeStartIndex
* (which is in NFD - decomposed and canonically ordered),
* and truncates the buffer contents.
*
* Note that recomposition never lengthens the text:
* Any character consists of either one or two code units;
* a composition may contain at most one more code unit than the original starter,
* while the combining mark that is removed has at least one code unit.
*/
static void Normalizer2Impl_recompose(Normalizer2 *_this, ReorderingBuffer *buffer, int32_t recomposeStartIndex,
UBool onlyContiguous) {
UChar *p;
UChar *limit;
UChar *starter, *pRemove, *q, *r;
const uint16_t *compositionsList;
UChar32 c, compositeAndFwd;
uint16_t norm16;
uint8_t cc, prevCC;
UBool starterIsSupplementary;
p=ReorderingBuffer_getStart(buffer)+recomposeStartIndex;
limit=ReorderingBuffer_getLimit(buffer);
if(p==limit) {
return;
}
/* Some of the following variables are not used until we have a forward-combining starter */
/* and are only initialized now to avoid compiler warnings. */
compositionsList=NULL; /* used as indicator for whether we have a forward-combining starter */
starter=NULL;
starterIsSupplementary=FALSE;
prevCC=0;
for(;;) {
UTRIE2_U16_NEXT16(_this->normTrie, p, limit, c, norm16);
cc=getCCFromYesOrMaybe(norm16);
if( /* this character combines backward and */
isMaybe(norm16) &&
/* we have seen a starter that combines forward and */
compositionsList!=NULL &&
/* the backward-combining character is not blocked */
(prevCC<cc || prevCC==0)
) {
if(isJamoVT(norm16)) {
/* c is a Jamo V/T, see if we can compose it with the previous character. */
if(c</*Hangul::*/JAMO_T_BASE) {
/* c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. */
UChar prev=(UChar)(*starter-/*Hangul::*/JAMO_L_BASE);
if(prev</*Hangul::*/JAMO_L_COUNT) {
UChar t;
UChar syllable=(UChar)
(/*Hangul::*/HANGUL_BASE+
(prev*/*Hangul::*/JAMO_V_COUNT+(c-/*Hangul::*/JAMO_V_BASE))*
/*Hangul::*/JAMO_T_COUNT);
pRemove=p-1;
if(p!=limit && (t=(UChar)(*p-/*Hangul::*/JAMO_T_BASE))</*Hangul::*/JAMO_T_COUNT) {
++p;
syllable+=t; /* The next character was a Jamo T. */
}
*starter=syllable;
/* remove the Jamo V/T */
q=pRemove;
r=p;
while(r<limit) {
*q++=*r++;
}
limit=q;
p=pRemove;
}
}
/*
* No "else" for Jamo T:
* Since the input is in NFD, there are no Hangul LV syllables that
* a Jamo T could combine with.
* All Jamo Ts are combined above when handling Jamo Vs.
*/
if(p==limit) {
break;
}
compositionsList=NULL;
continue;
} else if((compositeAndFwd=Normalizer2Impl_combine(_this, compositionsList, c))>=0) {
/* The starter and the combining mark (c) do combine. */
UChar32 composite;
composite =compositeAndFwd>>1;
/* Replace the starter with the composite, remove the combining mark. */
pRemove=p-U16_LENGTH(c); /* pRemove & p: start & limit of the combining mark */
if(starterIsSupplementary) {
if(U_IS_SUPPLEMENTARY(composite)) {
/* both are supplementary */
starter[0]=U16_LEAD(composite);
starter[1]=U16_TRAIL(composite);
} else {
*starter=(UChar)composite;
/* The composite is shorter than the starter, */
/* move the intermediate characters forward one. */
starterIsSupplementary=FALSE;
q=starter+1;
r=q+1;
while(r<pRemove) {
*q++=*r++;
}
--pRemove;
}
} else if(U_IS_SUPPLEMENTARY(composite)) {
/* The composite is longer than the starter, */
/* move the intermediate characters back one. */
starterIsSupplementary=TRUE;
++starter; /* temporarily increment for the loop boundary */
q=pRemove;
r=++pRemove;
while(starter<q) {
*--r=*--q;
}
*starter=U16_TRAIL(composite);
*--starter=U16_LEAD(composite); /* undo the temporary increment */
} else {
/* both are on the BMP */
*starter=(UChar)composite;
}
/* remove the combining mark by moving the following text over it */
if(pRemove<p) {
q=pRemove;
r=p;
while(r<limit) {
*q++=*r++;
}
limit=q;
p=pRemove;
}
/* Keep prevCC because we removed the combining mark. */
if(p==limit) {
break;
}
/* Is the composite a starter that combines forward? */
if(compositeAndFwd&1) {
compositionsList=
Normalizer2_getCompositionsListForComposite(_this, getNorm16(composite));
} else {
compositionsList=NULL;
}
/* We combined; continue with looking for compositions. */
continue;
}
}
/* no combination this time */
prevCC=cc;
if(p==limit) {
break;
}
/* If c did not combine, then check if it is a starter. */
if(cc==0) {
/* Found a new starter. */
if((compositionsList=Normalizer2_getCompositionsListForDecompYes(_this, norm16))!=NULL) {
/* It may combine with something, prepare for it. */
if(U_IS_BMP(c)) {
starterIsSupplementary=FALSE;
starter=p-1;
} else {
starterIsSupplementary=TRUE;
starter=p-2;
}
}
} else if(onlyContiguous) {
/* FCC: no discontiguous compositions; any intervening character blocks. */
compositionsList=NULL;
}
}
ReorderingBuffer_setReorderingLimit(buffer, limit);
}
/* Decompose a short piece of text which is likely to contain characters that */
/* fail the quick check loop and/or where the quick check loop's overhead */
/* is unlikely to be amortized. */
/* Called by the compose() and makeFCD() implementations. */
static UBool Normalizer2Impl_decomposeShort(Normalizer2 *_this, const UChar *src, const UChar *limit,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
while(src<limit) {
UChar32 c;
uint16_t norm16;
UTRIE2_U16_NEXT16(_this->normTrie, src, limit, c, norm16);
if(!Normalizer2Impl_decomposeChar(_this, c, norm16, buffer, errorCode)) {
return FALSE;
}
}
return TRUE;
}
/* Very similar to composeQuickCheck(): Make the same changes in both places if relevant. */
/* doCompose: normalize */
/* !doCompose: isNormalized (buffer must be empty and initialized) */
static UBool
Normalizer2_comp_compose(Normalizer2 *_this, const UChar *src, const UChar *limit,
UBool onlyContiguous,
UBool doCompose,
ReorderingBuffer *buffer,
UErrorCode *errorCode) {
/*
* prevBoundary points to the last character before the current one
* that has a composition boundary before it with ccc==0 and quick check "yes".
* Keeping track of prevBoundary saves us looking for a composition boundary
* when we find a "no" or "maybe".
*
* When we back out from prevSrc back to prevBoundary,
* then we also remove those same characters (which had been simply copied
* or canonically-order-inserted) from the ReorderingBuffer.
* Therefore, at all times, the [prevBoundary..prevSrc[ source units
* must correspond 1:1 to destination units at the end of the destination buffer.
*/
const UChar *prevBoundary=src;
const UChar *prevSrc;
UChar32 c=0;
uint16_t norm16=0;
/* only for isNormalized */
uint8_t prevCC=0;
UChar32 minNoMaybeCP=_this->minCompNoMaybeCP;
if(limit==NULL) {
UErrorCode errorCode2=U_ZERO_ERROR;
src=Normalizer2_fcd_copyLowPrefixFromNulTerminated(_this, src, minNoMaybeCP, NULL, &errorCode2);
limit=u_strchr(src, 0);
}
for(;;) {
int32_t recomposeStartIndex;
/* count code units below the minimum or with irrelevant data for the quick check */
for(prevSrc=src; src!=limit;) {
if( (c=*src)<minNoMaybeCP ||
isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(_this->normTrie, c))
) {
++src;
} else if(!U16_IS_SURROGATE(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
src+=U16_LENGTH(c);
} else {
break;
}
}
}
/* copy these code units all at once */
if(src!=prevSrc) {
if(doCompose) {
if(!ReorderingBuffer_appendZeroCCStr(buffer, prevSrc, src, errorCode)) {
break;
}
} else {
prevCC=0;
}
if(src==limit) {
break;
}
/* Set prevBoundary to the last character in the quick check loop. */
prevBoundary=src-1;
if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
U16_IS_LEAD(*(prevBoundary-1))
) {
--prevBoundary;
}
/* The start of the current character (c). */
prevSrc=src;
} else if(src==limit) {
break;
}
src+=U16_LENGTH(c);
/*
* isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
* c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
* or has ccc!=0.
* Check for Jamo V/T, then for regular characters.
* c is not a Hangul syllable or Jamo L because those have "yes" properties.
*/
if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
UBool needToDecompose=FALSE;
UChar prev=*(prevSrc-1);
if(c</* Hangul:: */JAMO_T_BASE) {
/* c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. */
prev=(UChar)(prev-/* Hangul:: */JAMO_L_BASE);
if(prev</* Hangul:: */JAMO_L_COUNT) {
UChar t;
UChar syllable=(UChar)
(/* Hangul:: */HANGUL_BASE+
(prev*/* Hangul:: */JAMO_V_COUNT+(c-/* Hangul:: */JAMO_V_BASE))*
/* Hangul:: */JAMO_T_COUNT);
if(!doCompose) {
return FALSE;
}
if(src!=limit && (t=(UChar)(*src-/* Hangul:: */JAMO_T_BASE))</* Hangul:: */JAMO_T_COUNT) {
++src;
syllable+=t; /* The next character was a Jamo T. */
prevBoundary=src;
ReorderingBuffer_setLastChar(buffer,syllable);
continue;
}
/* If we see L+V+x where x!=T then we drop to the slow path, */
/* decompose and recompose. */
/* This is to deal with NFKC finding normal L and V but a */
/* compatibility variant of a T. We need to either fully compose that */
/* combination here (which would complicate the code and may not work */
/* with strange custom data) or use the slow path -- or else our replacing */
/* two input characters (L+V) with one output character (LV syllable) */
/* would violate the invariant that [prevBoundary..prevSrc[ has the same */
/* length as what we appended to the buffer since prevBoundary. */
needToDecompose=TRUE;
}
} else if(/* Hangul:: */isHangulWithoutJamoT(prev)) {
/* c is a Jamo Trailing consonant, */
/* compose with previous Hangul LV that does not contain a Jamo T. */
if(!doCompose) {
return FALSE;
}
ReorderingBuffer_setLastChar(buffer, (UChar)(prev+c-/* Hangul:: */JAMO_T_BASE));
prevBoundary=src;
continue;
}
if(!needToDecompose) {
/* The Jamo V/T did not compose into a Hangul syllable. */
if(doCompose) {
if(!ReorderingBuffer_appendBMP(buffer, (UChar)c, 0, errorCode)) {
break;
}
} else {
prevCC=0;
}
continue;
}
}
/*
* Source buffer pointers:
*
* all done quick check current char not yet
* "yes" but (c) processed
* may combine
* forward
* [-------------[-------------[-------------[-------------[
* | | | | |
* orig. src prevBoundary prevSrc src limit
*
*
* Destination buffer pointers inside the ReorderingBuffer:
*
* all done might take not filled yet
* characters for
* reordering
* [-------------[-------------[-------------[
* | | | |
* start reorderStart limit |
* +remainingCap.+
*/
if(norm16>=MIN_YES_YES_WITH_CC) {
uint8_t cc=(uint8_t)norm16; /* cc!=0 */
if( onlyContiguous && /* FCC */
(doCompose ? ReorderingBuffer_getLastCC(buffer) : prevCC)==0 &&
prevBoundary<prevSrc &&
/* ReorderingBuffer_getLastCC(buffer)==0 && prevBoundary<prevSrc tell us that */
/* [prevBoundary..prevSrc[ (which is exactly one character under these conditions) */
/* passed the quick check "yes && ccc==0" test. */
/* Check whether the last character was a "yesYes" or a "yesNo". */
/* If a "yesNo", then we get its trailing ccc from its */
/* mapping and check for canonical order. */
/* All other cases are ok. */
Normalizer2_getTrailCCFromCompYesAndZeroCC(_this,prevBoundary, prevSrc)>cc
) {
/* Fails FCD test, need to decompose and contiguously recompose. */
if(!doCompose) {
return FALSE;
}
} else if(doCompose) {
if(!ReorderingBuffer_append(buffer, c, cc, errorCode)) {
break;
}
continue;
} else if(prevCC<=cc) {
prevCC=cc;
continue;
} else {
return FALSE;
}
} else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
return FALSE;
}
/*
* Find appropriate boundaries around this character,
* decompose the source text from between the boundaries,
* and recompose it.
*
* We may need to remove the last few characters from the ReorderingBuffer
* to account for source text that was copied or appended
* but needs to take part in the recomposition.
*/
/*
* Find the last composition boundary in [prevBoundary..src[.
* It is either the decomposition of the current character (at prevSrc),
* or prevBoundary.
*/
if(Normalizer2Impl_hasCompBoundaryBefore(_this,c, norm16)) {
prevBoundary=prevSrc;
} else if(doCompose) {
ReorderingBuffer_removeSuffix(buffer, (int32_t)(prevSrc-prevBoundary));
}
/* Find the next composition boundary in [src..limit[ - */
/* modifies src to point to the next starter. */
src=(UChar *)Normalizer2Impl_findNextCompBoundary(_this,src, limit);
/* Decompose [prevBoundary..src[ into the buffer and then recompose that part of it. */
recomposeStartIndex=ReorderingBuffer_length(buffer);
if(!Normalizer2Impl_decomposeShort(_this, prevBoundary, src, buffer, errorCode)) {
break;
}
Normalizer2Impl_recompose(_this, buffer, recomposeStartIndex, _this->onlyContiguous);
if(!doCompose) {
if(!ReorderingBuffer_equals(buffer, prevBoundary, src)) {
return FALSE;
}
ReorderingBuffer_remove(buffer);
prevCC=0;
}
/* Move to the next starter. We never need to look back before this point again. */
prevBoundary=src;
}
return TRUE;
}
/* Very similar to compose(): Make the same changes in both places if relevant. */
/* pQCResult==NULL: spanQuickCheckYes */
/* pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES) */
static const UChar *
Normalizer2_comp_composeQuickCheck(Normalizer2 *_this, const UChar *src, const UChar *limit,
UBool onlyContiguous,
UNormalizationCheckResult *pQCResult) {
/*
* prevBoundary points to the last character before the current one
* that has a composition boundary before it with ccc==0 and quick check "yes".
*/
const UChar *prevBoundary=src;
const UChar *prevSrc;
UChar32 c=0;
uint16_t norm16=0;
uint8_t prevCC=0;
UChar32 minNoMaybeCP=_this->minCompNoMaybeCP;
if(limit==NULL) {
UErrorCode errorCode=U_ZERO_ERROR;
src=Normalizer2_fcd_copyLowPrefixFromNulTerminated(_this, src, minNoMaybeCP, NULL, &errorCode);
limit=u_strchr(src, 0);
}
for(;;) {
/* count code units below the minimum or with irrelevant data for the quick check */
for(prevSrc=src;;) {
if(src==limit) {
return src;
}
if( (c=*src)<minNoMaybeCP ||
isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(_this->normTrie, c))
) {
++src;
} else if(!U16_IS_SURROGATE(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
src+=U16_LENGTH(c);
} else {
break;
}
}
}
if(src!=prevSrc) {
/* Set prevBoundary to the last character in the quick check loop. */
prevBoundary=src-1;
if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
U16_IS_LEAD(*(prevBoundary-1))
) {
--prevBoundary;
}
prevCC=0;
/* The start of the current character (c). */
prevSrc=src;
}
src+=U16_LENGTH(c);
/*
* isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
* c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
* or has ccc!=0.
*/
if(isMaybeOrNonZeroCC(norm16)) {
uint8_t cc=getCCFromYesOrMaybe(norm16);
if( onlyContiguous && /* FCC */
cc!=0 &&
prevCC==0 &&
prevBoundary<prevSrc &&
/* prevCC==0 && prevBoundary<prevSrc tell us that */
/* [prevBoundary..prevSrc[ (which is exactly one character under these conditions) */
/* passed the quick check "yes && ccc==0" test. */
/* Check whether the last character was a "yesYes" or a "yesNo". */
/* If a "yesNo", then we get its trailing ccc from its */
/* mapping and check for canonical order. */
/* All other cases are ok. */
Normalizer2_getTrailCCFromCompYesAndZeroCC(_this,prevBoundary, prevSrc)>cc
) {
/* Fails FCD test. */
} else if(prevCC<=cc || cc==0) {
prevCC=cc;
if(norm16<MIN_YES_YES_WITH_CC) {
if(pQCResult!=NULL) {
*pQCResult=UNORM_MAYBE;
} else {
return prevBoundary;
}
}
continue;
}
}
if(pQCResult!=NULL) {
*pQCResult=UNORM_NO;
}
return prevBoundary;
}
}
#if UNORM_ENABLE_FCD
static UChar* Normalizer2_fcd_spanQuickCheckYes(struct Normalizer2* n, const UChar *s, const UChar* limit, UErrorCode *pErrorCode) {
return Normalizer2_fcd_makeFCD(n, s, limit, NULL, pErrorCode);
}
static UChar* Normalizer2_decomp_spanQuickCheckYes(struct Normalizer2* n, const UChar *s, const UChar* limit, UErrorCode *pErrorCode) {
return Normalizer2_decomp_decompose(n, s, limit, NULL, pErrorCode);
}
#endif
static const UChar* Normalizer2_comp_spanQuickCheckYes(struct Normalizer2* n, const UChar *s, const UChar* limit, UErrorCode *pErrorCode) {
return Normalizer2_comp_composeQuickCheck(n, s, limit, n->onlyContiguous, NULL);
}
#if UNORM_ENABLE_FCD
static UBool Normalizer2_fcd_isNormalized(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
return((s+length)==Normalizer2_fcd_spanQuickCheckYes(n,s,s+length,pErrorCode));
}
static UBool Normalizer2_decomp_isNormalized(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
return((s+length)==Normalizer2_decomp_spanQuickCheckYes(n,s,s+length,pErrorCode));
}
#endif
#if 0
static UBool Normalizer2_comp_isNormalized(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
return((s+length)==Normalizer2_comp_spanQuickCheckYes(n,s,s+length,pErrorCode));
}
#endif
#if UNORM_ENABLE_FCD
static UNormalizationCheckResult U_CALLCONV Normalizer2_fcd_quickCheck(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
return Normalizer2_fcd_isNormalized(n, s, length, pErrorCode)?UNORM_YES:UNORM_NO;
}
#endif
#if UNORM_ENABLE_FCD
static UNormalizationCheckResult U_CALLCONV Normalizer2_decomp_quickCheck(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
return Normalizer2_decomp_isNormalized(n, s, length, pErrorCode)?UNORM_YES:UNORM_NO;
}
#endif
static UNormalizationCheckResult U_CALLCONV Normalizer2_comp_quickCheck(struct Normalizer2* n, const UChar *s, int32_t length, UErrorCode *pErrorCode) {
UNormalizationCheckResult qcResult=UNORM_YES;
Normalizer2_comp_composeQuickCheck(n, s, s+length, n->onlyContiguous, &qcResult);
return qcResult;
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_get2Instance(const char *packageName,
const char *name,
UNormalizationMode mode,
UErrorCode *errorCode) {
Normalizer2 *_this = NULL;
if(U_FAILURE(*errorCode)) {
return NULL;
}
_this = uprv_malloc(sizeof(Normalizer2));
if(_this==NULL) {
*errorCode = U_MEMORY_ALLOCATION_ERROR;
goto cleanup;
}
uprv_memset(_this,sizeof(Normalizer2),0); /* zero out */
if(name == NULL) {
/* no-op */
_this->quickCheck = Normalizer2_noop_quickCheck;
_this->normalize = Normalizer2_noop_normalize;
} else {
Normalizer2_load(_this, packageName, name, errorCode);
_this->mode = mode;
/* Set up functions */
_this->close = Normalizer2_close;
_this->onlyContiguous = FALSE; /* maybe true for FCC? */
switch(mode) {
#if UNORM_ENABLE_FCD
case UNORM_FCD:
{
FCDTrieSingleton_createInstance(_this, errorCode);
_this->quickCheck = Normalizer2_fcd_quickCheck;
_this->normalize = Normalizer2_noop_normalize;
}
break;
case UNORM_NFD:
{
_this->quickCheck = Normalizer2_decomp_quickCheck;
_this->normalize = Normalizer2_noop_normalize;
}
break;
#endif
case UNORM_NFC:
{
_this->quickCheck = Normalizer2_comp_quickCheck;
_this->normalize = Normalizer2_comp_normalize;
#if defined(UNORM_DEBUG)
fprintf(stderr, "setting NFC for mode=%s\n", MODENAME(mode));
#endif
}
break;
default:
{
_this->quickCheck = Normalizer2_noop_quickCheck;
_this->normalize = Normalizer2_noop_normalize;
}
break;
}
if(_this->normalize == Normalizer2_noop_normalize) {
#if defined(UNORM_DEBUG)
fprintf(stderr, "IMP: using noop for %d=%s [name=%s] normalize\n", (int)mode, MODENAME(mode),name);
#endif
}
if(_this->quickCheck == Normalizer2_noop_quickCheck) {
#if defined(UNORM_DEBUG)
fprintf(stderr, "IMP: using noop for %d=%s [name=%s] quickCheck\n", (int)mode, MODENAME(mode),name);
#endif
}
}
if(U_FAILURE(*errorCode)) {
goto cleanup;
}
return (UNormalizer2*)_this;
cleanup:
if(_this !=NULL) {
unorm2_close((UNormalizer2*)_this);
/* uprv_free(_this);*/
}
return NULL;
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode *errorCode) {
if(U_FAILURE(*errorCode)) return NULL;
switch(mode) {
case UNORM2_COMPOSE:
#if defined(UNORM_DEBUG)
printf("using UNORM_NFC for: unorm2_getInstance(%s,%s,%s...\n",
packageName,name,MODE2NAME(mode));
#endif
return unorm2_get2Instance(packageName,name,UNORM_NFC, errorCode);
default:
#if defined(UNORM_DEBUG)
printf("Unimplemented: unorm2_getInstance(%s,%s,%s...\n",
packageName,name,MODE2NAME(mode));
#endif
*errorCode = U_REGEX_UNIMPLEMENTED;
return NULL;
}
}
U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 *norm2) {
Normalizer2 *norm = (Normalizer2*)norm2;
if(norm==NULL) return;
if((norm->close)!=NULL) norm->close(norm);
uprv_free(norm2);
}
U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 *norm2,
const UChar *s, int32_t length,
UErrorCode *pErrorCode) {
Normalizer2 *norm = (Normalizer2*)norm2;
if(U_FAILURE(*pErrorCode)) {
return UNORM_NO;
}
if(s==NULL || length<-1) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return UNORM_NO;
}
return norm->quickCheck(norm, s, length, pErrorCode);
}
U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 *norm2,
const UChar *src, int32_t length,
UChar *dest, int32_t capacity,
UErrorCode *pErrorCode) {
Normalizer2 *norm = (Normalizer2*)norm2;
if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(src==NULL || length<-1 || capacity<0 || (dest==NULL && capacity>0) || src==dest) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return norm->normalize(norm,src,length,dest,capacity,pErrorCode);
}
/** UNORM { for tests.. } */
U_CAPI UNormalizationCheckResult U_EXPORT2
unorm_quickCheck(const UChar *src,
int32_t srcLength,
UNormalizationMode mode,
UErrorCode *pErrorCode) {
const UNormalizer2 *n2= Normalizer2Factory_getInstance(mode, pErrorCode);
return unorm2_quickCheck(n2, src, srcLength, pErrorCode);
}
/** Public API for normalizing. */
U_CAPI int32_t U_EXPORT2
unorm_normalize(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
const UNormalizer2 *n2= Normalizer2Factory_getInstance(mode, pErrorCode);
/* if(options&UNORM_UNICODE_3_2) { */
/* FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); */
/* return unorm2_normalize((const UNormalizer2 *)&fn2, */
/* src, srcLength, dest, destCapacity, pErrorCode); */
/* } else */ {
return unorm2_normalize(n2,
src, srcLength, dest, destCapacity, pErrorCode);
}
}
#endif