|  | /* | 
|  | ******************************************************************************* | 
|  | * | 
|  | *   Copyright (C) 2001-2012, International Business Machines | 
|  | *   Corporation and others.  All Rights Reserved. | 
|  | * | 
|  | ******************************************************************************* | 
|  | *   file name:  ucaelems.cpp | 
|  | *   encoding:   US-ASCII | 
|  | *   tab size:   8 (not used) | 
|  | *   indentation:4 | 
|  | * | 
|  | *   created 02/22/2001 | 
|  | *   created by: Vladimir Weinstein | 
|  | * | 
|  | *   This program reads the Franctional UCA table and generates | 
|  | *   internal format for UCA table as well as inverse UCA table. | 
|  | *   It then writes binary files containing the data: ucadata.dat | 
|  | *   & invuca.dat | 
|  | * | 
|  | *   date        name       comments | 
|  | *   03/02/2001  synwee     added setMaxExpansion | 
|  | *   03/07/2001  synwee     merged UCA's maxexpansion and tailoring's | 
|  | */ | 
|  |  | 
|  | #include "unicode/utypes.h" | 
|  |  | 
|  | #if !UCONFIG_NO_COLLATION | 
|  |  | 
|  | #include "unicode/uchar.h" | 
|  | #include "unicode/unistr.h" | 
|  | #include "unicode/ucoleitr.h" | 
|  | #include "unicode/normlzr.h" | 
|  | #include "unicode/utf16.h" | 
|  | #include "normalizer2impl.h" | 
|  | #include "ucol_elm.h" | 
|  | #include "ucol_tok.h" | 
|  | #include "ucol_cnt.h" | 
|  | #include "unicode/caniter.h" | 
|  | #include "cmemory.h" | 
|  | #include "uassert.h" | 
|  |  | 
|  | U_NAMESPACE_USE | 
|  |  | 
|  | static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status); | 
|  |  | 
|  | U_CDECL_BEGIN | 
|  | static int32_t U_CALLCONV | 
|  | prefixLookupHash(const UHashTok e) { | 
|  | UCAElements *element = (UCAElements *)e.pointer; | 
|  | UChar buf[256]; | 
|  | UHashTok key; | 
|  | key.pointer = buf; | 
|  | uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar)); | 
|  | buf[element->cSize] = 0; | 
|  | //key.pointer = element->cPoints; | 
|  | //element->cPoints[element->cSize] = 0; | 
|  | return uhash_hashUChars(key); | 
|  | } | 
|  |  | 
|  | static int8_t U_CALLCONV | 
|  | prefixLookupComp(const UHashTok e1, const UHashTok e2) { | 
|  | UCAElements *element1 = (UCAElements *)e1.pointer; | 
|  | UCAElements *element2 = (UCAElements *)e2.pointer; | 
|  |  | 
|  | UChar buf1[256]; | 
|  | UHashTok key1; | 
|  | key1.pointer = buf1; | 
|  | uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar)); | 
|  | buf1[element1->cSize] = 0; | 
|  |  | 
|  | UChar buf2[256]; | 
|  | UHashTok key2; | 
|  | key2.pointer = buf2; | 
|  | uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar)); | 
|  | buf2[element2->cSize] = 0; | 
|  |  | 
|  | return uhash_compareUChars(key1, key2); | 
|  | } | 
|  | U_CDECL_END | 
|  |  | 
|  | static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) { | 
|  | if(U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | if(expansions->CEs == NULL) { | 
|  | expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); | 
|  | /* test for NULL */ | 
|  | if (expansions->CEs == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | expansions->size = INIT_EXP_TABLE_SIZE; | 
|  | expansions->position = 0; | 
|  | } | 
|  |  | 
|  | if(expansions->position == expansions->size) { | 
|  | uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t)); | 
|  | if(newData == NULL) { | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stderr, "out of memory for expansions\n"); | 
|  | #endif | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return -1; | 
|  | } | 
|  | expansions->CEs = newData; | 
|  | expansions->size *= 2; | 
|  | } | 
|  |  | 
|  | expansions->CEs[expansions->position] = value; | 
|  | return(expansions->position++); | 
|  | } | 
|  |  | 
|  | U_CAPI tempUCATable*  U_EXPORT2 | 
|  | uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) { | 
|  | MaxJamoExpansionTable *maxjet; | 
|  | MaxExpansionTable *maxet; | 
|  | tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable)); | 
|  | /* test for NULL */ | 
|  | if (t == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return NULL; | 
|  | } | 
|  | uprv_memset(t, 0, sizeof(tempUCATable)); | 
|  |  | 
|  | maxet  = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable)); | 
|  | if (maxet == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | uprv_memset(maxet, 0, sizeof(MaxExpansionTable)); | 
|  | t->maxExpansions       = maxet; | 
|  |  | 
|  | maxjet = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable)); | 
|  | if (maxjet == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | uprv_memset(maxjet, 0, sizeof(MaxJamoExpansionTable)); | 
|  | t->maxJamoExpansions = maxjet; | 
|  |  | 
|  | t->image = image; | 
|  | t->options = opts; | 
|  |  | 
|  | t->UCA = UCA; | 
|  | t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable)); | 
|  | /* test for NULL */ | 
|  | if (t->expansions == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | uprv_memset(t->expansions, 0, sizeof(ExpansionTable)); | 
|  |  | 
|  | t->mapping = utrie_open(NULL, NULL, UCOL_ELM_TRIE_CAPACITY, | 
|  | UCOL_SPECIAL_FLAG | (initTag<<24), | 
|  | UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24), | 
|  | TRUE); // Do your own mallocs for the structure, array and have linear Latin 1 | 
|  | if (U_FAILURE(*status)) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, NULL, status); | 
|  | if (U_FAILURE(*status)) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | uhash_setValueDeleter(t->prefixLookup, uprv_free); | 
|  |  | 
|  | t->contractions = uprv_cnttab_open(t->mapping, status); | 
|  | if (U_FAILURE(*status)) { | 
|  | goto cleanup; | 
|  | } | 
|  |  | 
|  | /* copy UCA's maxexpansion and merge as we go along */ | 
|  | if (UCA != NULL) { | 
|  | /* adding an extra initial value for easier manipulation */ | 
|  | maxet->size            = (int32_t)(UCA->lastEndExpansionCE - UCA->endExpansionCE) + 2; | 
|  | maxet->position        = maxet->size - 1; | 
|  | maxet->endExpansionCE  = | 
|  | (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size); | 
|  | /* test for NULL */ | 
|  | if (maxet->endExpansionCE == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | maxet->expansionCESize = | 
|  | (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size); | 
|  | /* test for NULL */ | 
|  | if (maxet->expansionCESize == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | /* initialized value */ | 
|  | *(maxet->endExpansionCE)  = 0; | 
|  | *(maxet->expansionCESize) = 0; | 
|  | uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE, | 
|  | sizeof(uint32_t) * (maxet->size - 1)); | 
|  | uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize, | 
|  | sizeof(uint8_t) * (maxet->size - 1)); | 
|  | } | 
|  | else { | 
|  | maxet->size     = 0; | 
|  | } | 
|  | maxjet->endExpansionCE = NULL; | 
|  | maxjet->isV = NULL; | 
|  | maxjet->size = 0; | 
|  | maxjet->position = 0; | 
|  | maxjet->maxLSize = 1; | 
|  | maxjet->maxVSize = 1; | 
|  | maxjet->maxTSize = 1; | 
|  |  | 
|  | t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); | 
|  | /* test for NULL */ | 
|  | if (t->unsafeCP == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); | 
|  | /* test for NULL */ | 
|  | if (t->contrEndCP == NULL) { | 
|  | goto allocation_failure; | 
|  | } | 
|  | uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE); | 
|  | uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE); | 
|  | t->cmLookup = NULL; | 
|  | return t; | 
|  |  | 
|  | allocation_failure: | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | cleanup: | 
|  | uprv_uca_closeTempTable(t); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static tempUCATable* U_EXPORT2 | 
|  | uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) { | 
|  | if(U_FAILURE(*status)) { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable)); | 
|  | /* test for NULL */ | 
|  | if (r == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return NULL; | 
|  | } | 
|  | uprv_memset(r, 0, sizeof(tempUCATable)); | 
|  |  | 
|  | /* mapping */ | 
|  | if(t->mapping != NULL) { | 
|  | /*r->mapping = ucmpe32_clone(t->mapping, status);*/ | 
|  | r->mapping = utrie_clone(NULL, t->mapping, NULL, 0); | 
|  | } | 
|  |  | 
|  | // a hashing clone function would be very nice. We have none currently... | 
|  | // However, we should be good, as closing should not produce any prefixed elements. | 
|  | r->prefixLookup = NULL; // prefixes are not used in closing | 
|  |  | 
|  | /* expansions */ | 
|  | if(t->expansions != NULL) { | 
|  | r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable)); | 
|  | /* test for NULL */ | 
|  | if (r->expansions == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | r->expansions->position = t->expansions->position; | 
|  | r->expansions->size = t->expansions->size; | 
|  | if(t->expansions->CEs != NULL) { | 
|  | r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size); | 
|  | /* test for NULL */ | 
|  | if (r->expansions->CEs == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position); | 
|  | } else { | 
|  | r->expansions->CEs = NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | if(t->contractions != NULL) { | 
|  | r->contractions = uprv_cnttab_clone(t->contractions, status); | 
|  | // Check for cloning failure. | 
|  | if (r->contractions == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | r->contractions->mapping = r->mapping; | 
|  | } | 
|  |  | 
|  | if(t->maxExpansions != NULL) { | 
|  | r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable)); | 
|  | /* test for NULL */ | 
|  | if (r->maxExpansions == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | r->maxExpansions->size = t->maxExpansions->size; | 
|  | r->maxExpansions->position = t->maxExpansions->position; | 
|  | if(t->maxExpansions->endExpansionCE != NULL) { | 
|  | r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size); | 
|  | /* test for NULL */ | 
|  | if (r->maxExpansions->endExpansionCE == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memset(r->maxExpansions->endExpansionCE, 0xDB, sizeof(uint32_t)*t->maxExpansions->size); | 
|  | uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t)); | 
|  | } else { | 
|  | r->maxExpansions->endExpansionCE = NULL; | 
|  | } | 
|  | if(t->maxExpansions->expansionCESize != NULL) { | 
|  | r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size); | 
|  | /* test for NULL */ | 
|  | if (r->maxExpansions->expansionCESize == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memset(r->maxExpansions->expansionCESize, 0xDB, sizeof(uint8_t)*t->maxExpansions->size); | 
|  | uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t)); | 
|  | } else { | 
|  | r->maxExpansions->expansionCESize = NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | if(t->maxJamoExpansions != NULL) { | 
|  | r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable)); | 
|  | /* test for NULL */ | 
|  | if (r->maxJamoExpansions == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | r->maxJamoExpansions->size = t->maxJamoExpansions->size; | 
|  | r->maxJamoExpansions->position = t->maxJamoExpansions->position; | 
|  | r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize; | 
|  | r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize; | 
|  | r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize; | 
|  | if(t->maxJamoExpansions->size != 0) { | 
|  | r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size); | 
|  | /* test for NULL */ | 
|  | if (r->maxJamoExpansions->endExpansionCE == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t)); | 
|  | r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size); | 
|  | /* test for NULL */ | 
|  | if (r->maxJamoExpansions->isV == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool)); | 
|  | } else { | 
|  | r->maxJamoExpansions->endExpansionCE = NULL; | 
|  | r->maxJamoExpansions->isV = NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | if(t->unsafeCP != NULL) { | 
|  | r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); | 
|  | /* test for NULL */ | 
|  | if (r->unsafeCP == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE); | 
|  | } | 
|  |  | 
|  | if(t->contrEndCP != NULL) { | 
|  | r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); | 
|  | /* test for NULL */ | 
|  | if (r->contrEndCP == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | goto cleanup; | 
|  | } | 
|  | uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE); | 
|  | } | 
|  |  | 
|  | r->UCA = t->UCA; | 
|  | r->image = t->image; | 
|  | r->options = t->options; | 
|  |  | 
|  | return r; | 
|  | cleanup: | 
|  | uprv_uca_closeTempTable(t); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  |  | 
|  | U_CAPI void  U_EXPORT2 | 
|  | uprv_uca_closeTempTable(tempUCATable *t) { | 
|  | if(t != NULL) { | 
|  | if (t->expansions != NULL) { | 
|  | uprv_free(t->expansions->CEs); | 
|  | uprv_free(t->expansions); | 
|  | } | 
|  | if(t->contractions != NULL) { | 
|  | uprv_cnttab_close(t->contractions); | 
|  | } | 
|  | if (t->mapping != NULL) { | 
|  | utrie_close(t->mapping); | 
|  | } | 
|  |  | 
|  | if(t->prefixLookup != NULL) { | 
|  | uhash_close(t->prefixLookup); | 
|  | } | 
|  |  | 
|  | if (t->maxExpansions != NULL) { | 
|  | uprv_free(t->maxExpansions->endExpansionCE); | 
|  | uprv_free(t->maxExpansions->expansionCESize); | 
|  | uprv_free(t->maxExpansions); | 
|  | } | 
|  |  | 
|  | if (t->maxJamoExpansions->size > 0) { | 
|  | uprv_free(t->maxJamoExpansions->endExpansionCE); | 
|  | uprv_free(t->maxJamoExpansions->isV); | 
|  | } | 
|  | uprv_free(t->maxJamoExpansions); | 
|  |  | 
|  | uprv_free(t->unsafeCP); | 
|  | uprv_free(t->contrEndCP); | 
|  |  | 
|  | if (t->cmLookup != NULL) { | 
|  | uprv_free(t->cmLookup->cPoints); | 
|  | uprv_free(t->cmLookup); | 
|  | } | 
|  |  | 
|  | uprv_free(t); | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Looks for the maximum length of all expansion sequences ending with the same | 
|  | * collation element. The size required for maxexpansion and maxsize is | 
|  | * returned if the arrays are too small. | 
|  | * @param endexpansion the last expansion collation element to be added | 
|  | * @param expansionsize size of the expansion | 
|  | * @param maxexpansion data structure to store the maximum expansion data. | 
|  | * @param status error status | 
|  | * @returns size of the maxexpansion and maxsize used. | 
|  | */ | 
|  | static int uprv_uca_setMaxExpansion(uint32_t           endexpansion, | 
|  | uint8_t            expansionsize, | 
|  | MaxExpansionTable *maxexpansion, | 
|  | UErrorCode        *status) | 
|  | { | 
|  | if (maxexpansion->size == 0) { | 
|  | /* we'll always make the first element 0, for easier manipulation */ | 
|  | maxexpansion->endExpansionCE = | 
|  | (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t)); | 
|  | /* test for NULL */ | 
|  | if (maxexpansion->endExpansionCE == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | *(maxexpansion->endExpansionCE) = 0; | 
|  | maxexpansion->expansionCESize = | 
|  | (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t)); | 
|  | /* test for NULL */; | 
|  | if (maxexpansion->expansionCESize == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | *(maxexpansion->expansionCESize) = 0; | 
|  | maxexpansion->size     = INIT_EXP_TABLE_SIZE; | 
|  | maxexpansion->position = 0; | 
|  | } | 
|  |  | 
|  | if (maxexpansion->position + 1 == maxexpansion->size) { | 
|  | uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE, | 
|  | 2 * maxexpansion->size * sizeof(uint32_t)); | 
|  | if (neweece == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | maxexpansion->endExpansionCE  = neweece; | 
|  |  | 
|  | uint8_t  *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize, | 
|  | 2 * maxexpansion->size * sizeof(uint8_t)); | 
|  | if (neweces == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | maxexpansion->expansionCESize = neweces; | 
|  | maxexpansion->size *= 2; | 
|  | } | 
|  |  | 
|  | uint32_t *pendexpansionce = maxexpansion->endExpansionCE; | 
|  | uint8_t  *pexpansionsize  = maxexpansion->expansionCESize; | 
|  | int      pos              = maxexpansion->position; | 
|  |  | 
|  | uint32_t *start = pendexpansionce; | 
|  | uint32_t *limit = pendexpansionce + pos; | 
|  |  | 
|  | /* using binary search to determine if last expansion element is | 
|  | already in the array */ | 
|  | uint32_t *mid; | 
|  | int       result = -1; | 
|  | while (start < limit - 1) { | 
|  | mid = start + ((limit - start) >> 1); | 
|  | if (endexpansion <= *mid) { | 
|  | limit = mid; | 
|  | } | 
|  | else { | 
|  | start = mid; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (*start == endexpansion) { | 
|  | result = (int)(start - pendexpansionce); | 
|  | } | 
|  | else if (*limit == endexpansion) { | 
|  | result = (int)(limit - pendexpansionce); | 
|  | } | 
|  |  | 
|  | if (result > -1) { | 
|  | /* found the ce in expansion, we'll just modify the size if it is | 
|  | smaller */ | 
|  | uint8_t *currentsize = pexpansionsize + result; | 
|  | if (*currentsize < expansionsize) { | 
|  | *currentsize = expansionsize; | 
|  | } | 
|  | } | 
|  | else { | 
|  | /* we'll need to squeeze the value into the array. | 
|  | initial implementation. */ | 
|  | /* shifting the subarray down by 1 */ | 
|  | int      shiftsize     = (int)((pendexpansionce + pos) - start); | 
|  | uint32_t *shiftpos     = start + 1; | 
|  | uint8_t  *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce); | 
|  |  | 
|  | /* okay need to rearrange the array into sorted order */ | 
|  | if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */ | 
|  | *(pendexpansionce + pos + 1) = endexpansion; | 
|  | *(pexpansionsize + pos + 1)  = expansionsize; | 
|  | } | 
|  | else { | 
|  | uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t)); | 
|  | uprv_memmove(sizeshiftpos + 1, sizeshiftpos, | 
|  | shiftsize * sizeof(uint8_t)); | 
|  | *shiftpos     = endexpansion; | 
|  | *sizeshiftpos = expansionsize; | 
|  | } | 
|  | maxexpansion->position ++; | 
|  |  | 
|  | #ifdef UCOL_DEBUG | 
|  | int   temp; | 
|  | UBool found = FALSE; | 
|  | for (temp = 0; temp < maxexpansion->position; temp ++) { | 
|  | if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) { | 
|  | fprintf(stderr, "expansions %d\n", temp); | 
|  | } | 
|  | if (pendexpansionce[temp] == endexpansion) { | 
|  | found =TRUE; | 
|  | if (pexpansionsize[temp] < expansionsize) { | 
|  | fprintf(stderr, "expansions size %d\n", temp); | 
|  | } | 
|  | } | 
|  | } | 
|  | if (pendexpansionce[temp] == endexpansion) { | 
|  | found =TRUE; | 
|  | if (pexpansionsize[temp] < expansionsize) { | 
|  | fprintf(stderr, "expansions size %d\n", temp); | 
|  | } | 
|  | } | 
|  | if (!found) | 
|  | fprintf(stderr, "expansion not found %d\n", temp); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | return maxexpansion->position; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Sets the maximum length of all jamo expansion sequences ending with the same | 
|  | * collation element. The size required for maxexpansion and maxsize is | 
|  | * returned if the arrays are too small. | 
|  | * @param ch the jamo codepoint | 
|  | * @param endexpansion the last expansion collation element to be added | 
|  | * @param expansionsize size of the expansion | 
|  | * @param maxexpansion data structure to store the maximum expansion data. | 
|  | * @param status error status | 
|  | * @returns size of the maxexpansion and maxsize used. | 
|  | */ | 
|  | static int uprv_uca_setMaxJamoExpansion(UChar                  ch, | 
|  | uint32_t               endexpansion, | 
|  | uint8_t                expansionsize, | 
|  | MaxJamoExpansionTable *maxexpansion, | 
|  | UErrorCode            *status) | 
|  | { | 
|  | UBool isV = TRUE; | 
|  | if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) { | 
|  | /* determines L for Jamo, doesn't need to store this since it is never | 
|  | at the end of a expansion */ | 
|  | if (maxexpansion->maxLSize < expansionsize) { | 
|  | maxexpansion->maxLSize = expansionsize; | 
|  | } | 
|  | return maxexpansion->position; | 
|  | } | 
|  |  | 
|  | if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) { | 
|  | /* determines V for Jamo */ | 
|  | if (maxexpansion->maxVSize < expansionsize) { | 
|  | maxexpansion->maxVSize = expansionsize; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) { | 
|  | isV = FALSE; | 
|  | /* determines T for Jamo */ | 
|  | if (maxexpansion->maxTSize < expansionsize) { | 
|  | maxexpansion->maxTSize = expansionsize; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (maxexpansion->size == 0) { | 
|  | /* we'll always make the first element 0, for easier manipulation */ | 
|  | maxexpansion->endExpansionCE = | 
|  | (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t)); | 
|  | /* test for NULL */; | 
|  | if (maxexpansion->endExpansionCE == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | *(maxexpansion->endExpansionCE) = 0; | 
|  | maxexpansion->isV = | 
|  | (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool)); | 
|  | /* test for NULL */; | 
|  | if (maxexpansion->isV == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | uprv_free(maxexpansion->endExpansionCE); | 
|  | maxexpansion->endExpansionCE = NULL; | 
|  | return 0; | 
|  | } | 
|  | *(maxexpansion->isV) = 0; | 
|  | maxexpansion->size     = INIT_EXP_TABLE_SIZE; | 
|  | maxexpansion->position = 0; | 
|  | } | 
|  |  | 
|  | if (maxexpansion->position + 1 == maxexpansion->size) { | 
|  | maxexpansion->size *= 2; | 
|  | maxexpansion->endExpansionCE = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE, | 
|  | maxexpansion->size * sizeof(uint32_t)); | 
|  | if (maxexpansion->endExpansionCE == NULL) { | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stderr, "out of memory for maxExpansions\n"); | 
|  | #endif | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | maxexpansion->isV  = (UBool *)uprv_realloc(maxexpansion->isV, | 
|  | maxexpansion->size * sizeof(UBool)); | 
|  | if (maxexpansion->isV == NULL) { | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stderr, "out of memory for maxExpansions\n"); | 
|  | #endif | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | uprv_free(maxexpansion->endExpansionCE); | 
|  | maxexpansion->endExpansionCE = NULL; | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | uint32_t *pendexpansionce = maxexpansion->endExpansionCE; | 
|  | int       pos             = maxexpansion->position; | 
|  |  | 
|  | while (pos > 0) { | 
|  | pos --; | 
|  | if (*(pendexpansionce + pos) == endexpansion) { | 
|  | return maxexpansion->position; | 
|  | } | 
|  | } | 
|  |  | 
|  | *(pendexpansionce + maxexpansion->position) = endexpansion; | 
|  | *(maxexpansion->isV + maxexpansion->position) = isV; | 
|  | maxexpansion->position ++; | 
|  |  | 
|  | return maxexpansion->position; | 
|  | } | 
|  |  | 
|  |  | 
|  | static void ContrEndCPSet(uint8_t *table, UChar c) { | 
|  | uint32_t    hash; | 
|  | uint8_t     *htByte; | 
|  |  | 
|  | hash = c; | 
|  | if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { | 
|  | hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; | 
|  | } | 
|  | htByte = &table[hash>>3]; | 
|  | *htByte |= (1 << (hash & 7)); | 
|  | } | 
|  |  | 
|  |  | 
|  | static void unsafeCPSet(uint8_t *table, UChar c) { | 
|  | uint32_t    hash; | 
|  | uint8_t     *htByte; | 
|  |  | 
|  | hash = c; | 
|  | if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { | 
|  | if (hash >= 0xd800 && hash <= 0xf8ff) { | 
|  | /*  Part of a surrogate, or in private use area.            */ | 
|  | /*   These don't go in the table                            */ | 
|  | return; | 
|  | } | 
|  | hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; | 
|  | } | 
|  | htByte = &table[hash>>3]; | 
|  | *htByte |= (1 << (hash & 7)); | 
|  | } | 
|  |  | 
|  | static void | 
|  | uprv_uca_createCMTable(tempUCATable *t, int32_t noOfCM, UErrorCode *status) { | 
|  | t->cmLookup = (CombinClassTable *)uprv_malloc(sizeof(CombinClassTable)); | 
|  | if (t->cmLookup==NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  | t->cmLookup->cPoints=(UChar *)uprv_malloc(noOfCM*sizeof(UChar)); | 
|  | if (t->cmLookup->cPoints ==NULL) { | 
|  | uprv_free(t->cmLookup); | 
|  | t->cmLookup = NULL; | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | t->cmLookup->size=noOfCM; | 
|  | uprv_memset(t->cmLookup->index, 0, sizeof(t->cmLookup->index)); | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  | static void | 
|  | uprv_uca_copyCMTable(tempUCATable *t, UChar *cm, uint16_t *index) { | 
|  | int32_t count=0; | 
|  |  | 
|  | for (int32_t i=0; i<256; ++i) { | 
|  | if (index[i]>0) { | 
|  | // cPoints is ordered by combining class value. | 
|  | uprv_memcpy(t->cmLookup->cPoints+count, cm+(i<<8), index[i]*sizeof(UChar)); | 
|  | count += index[i]; | 
|  | } | 
|  | t->cmLookup->index[i]=count; | 
|  | } | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* 1. to the UnsafeCP hash table, add all chars with combining class != 0     */ | 
|  | /* 2. build combining marks table for all chars with combining class != 0     */ | 
|  | static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) { | 
|  |  | 
|  | UChar              c; | 
|  | uint16_t           fcd;     // Hi byte is lead combining class. lo byte is trailing combing class. | 
|  | UBool buildCMTable = (t->cmLookup==NULL); // flag for building combining class table | 
|  | UChar *cm=NULL; | 
|  | uint16_t index[256]; | 
|  | int32_t count=0; | 
|  | const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status); | 
|  | if (U_FAILURE(*status)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (buildCMTable) { | 
|  | if (cm==NULL) { | 
|  | cm = (UChar *)uprv_malloc(sizeof(UChar)*UCOL_MAX_CM_TAB); | 
|  | if (cm==NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  | } | 
|  | uprv_memset(index, 0, sizeof(index)); | 
|  | } | 
|  | for (c=0; c<0xffff; c++) { | 
|  | if (U16_IS_LEAD(c)) { | 
|  | fcd = 0; | 
|  | if (nfcImpl->singleLeadMightHaveNonZeroFCD16(c)) { | 
|  | UChar32 supp = U16_GET_SUPPLEMENTARY(c, 0xdc00); | 
|  | UChar32 suppLimit = supp + 0x400; | 
|  | while (supp < suppLimit) { | 
|  | fcd |= nfcImpl->getFCD16FromNormData(supp++); | 
|  | } | 
|  | } | 
|  | } else { | 
|  | fcd = nfcImpl->getFCD16(c); | 
|  | } | 
|  | if (fcd >= 0x100 ||               // if the leading combining class(c) > 0 || | 
|  | (U16_IS_LEAD(c) && fcd != 0)) {//    c is a leading surrogate with some FCD data | 
|  | if (buildCMTable) { | 
|  | uint32_t cClass = fcd & 0xff; | 
|  | //uint32_t temp=(cClass<<8)+index[cClass]; | 
|  | cm[(cClass<<8)+index[cClass]] = c; // | 
|  | index[cClass]++; | 
|  | count++; | 
|  | } | 
|  | unsafeCPSet(t->unsafeCP, c); | 
|  | } | 
|  | } | 
|  |  | 
|  | // copy to cm table | 
|  | if (buildCMTable) { | 
|  | uprv_uca_createCMTable(t, count, status); | 
|  | if(U_FAILURE(*status)) { | 
|  | if (cm!=NULL) { | 
|  | uprv_free(cm); | 
|  | } | 
|  | return; | 
|  | } | 
|  | uprv_uca_copyCMTable(t, cm, index); | 
|  | } | 
|  |  | 
|  | if(t->prefixLookup != NULL) { | 
|  | int32_t i = -1; | 
|  | const UHashElement *e = NULL; | 
|  | UCAElements *element = NULL; | 
|  | UChar NFCbuf[256]; | 
|  | while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) { | 
|  | element = (UCAElements *)e->value.pointer; | 
|  | // codepoints here are in the NFD form. We need to add the | 
|  | // first code point of the NFC form to unsafe, because | 
|  | // strcoll needs to backup over them. | 
|  | unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0, | 
|  | NFCbuf, 256, status); | 
|  | unsafeCPSet(t->unsafeCP, NFCbuf[0]); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (cm!=NULL) { | 
|  | uprv_free(cm); | 
|  | } | 
|  | } | 
|  |  | 
|  | static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE, | 
|  | UCAElements *element, UErrorCode *status) | 
|  | { | 
|  | // currently the longest prefix we're supporting in Japanese is two characters | 
|  | // long. Although this table could quite easily mimic complete contraction stuff | 
|  | // there is no good reason to make a general solution, as it would require some | 
|  | // error prone messing. | 
|  | CntTable *contractions = t->contractions; | 
|  | UChar32 cp; | 
|  | uint32_t cpsize = 0; | 
|  | UChar *oldCP = element->cPoints; | 
|  | uint32_t oldCPSize = element->cSize; | 
|  |  | 
|  |  | 
|  | contractions->currentTag = SPEC_PROC_TAG; | 
|  |  | 
|  | // here, we will normalize & add prefix to the table. | 
|  | uint32_t j = 0; | 
|  | #ifdef UCOL_DEBUG | 
|  | for(j=0; j<element->cSize; j++) { | 
|  | fprintf(stdout, "CP: %04X ", element->cPoints[j]); | 
|  | } | 
|  | fprintf(stdout, "El: %08X Pref: ", CE); | 
|  | for(j=0; j<element->prefixSize; j++) { | 
|  | fprintf(stdout, "%04X ", element->prefix[j]); | 
|  | } | 
|  | fprintf(stdout, "%08X ", element->mapCE); | 
|  | #endif | 
|  |  | 
|  | for (j = 1; j<element->prefixSize; j++) {   /* First add NFD prefix chars to unsafe CP hash table */ | 
|  | // Unless it is a trail surrogate, which is handled algoritmically and | 
|  | // shouldn't take up space in the table. | 
|  | if(!(U16_IS_TRAIL(element->prefix[j]))) { | 
|  | unsafeCPSet(t->unsafeCP, element->prefix[j]); | 
|  | } | 
|  | } | 
|  |  | 
|  | UChar tempPrefix = 0; | 
|  |  | 
|  | for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards | 
|  | // therefore, we will promptly reverse the prefix buffer... | 
|  | tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1); | 
|  | *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j]; | 
|  | element->prefix[j] = tempPrefix; | 
|  | } | 
|  |  | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stdout, "Reversed: "); | 
|  | for(j=0; j<element->prefixSize; j++) { | 
|  | fprintf(stdout, "%04X ", element->prefix[j]); | 
|  | } | 
|  | fprintf(stdout, "%08X\n", element->mapCE); | 
|  | #endif | 
|  |  | 
|  | // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix | 
|  | if(!(U16_IS_TRAIL(element->cPoints[0]))) { | 
|  | unsafeCPSet(t->unsafeCP, element->cPoints[0]); | 
|  | } | 
|  |  | 
|  | // Maybe we need this... To handle prefixes completely in the forward direction... | 
|  | //if(element->cSize == 1) { | 
|  | //  if(!(U16_IS_TRAIL(element->cPoints[0]))) { | 
|  | //    ContrEndCPSet(t->contrEndCP, element->cPoints[0]); | 
|  | //  } | 
|  | //} | 
|  |  | 
|  | element->cPoints = element->prefix; | 
|  | element->cSize = element->prefixSize; | 
|  |  | 
|  | // Add the last char of the contraction to the contraction-end hash table. | 
|  | // unless it is a trail surrogate, which is handled algorithmically and | 
|  | // shouldn't be in the table | 
|  | if(!(U16_IS_TRAIL(element->cPoints[element->cSize -1]))) { | 
|  | ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]); | 
|  | } | 
|  |  | 
|  | // First we need to check if contractions starts with a surrogate | 
|  | U16_NEXT(element->cPoints, cpsize, element->cSize, cp); | 
|  |  | 
|  | // If there are any Jamos in the contraction, we should turn on special | 
|  | // processing for Jamos | 
|  | if(UCOL_ISJAMO(element->prefix[0])) { | 
|  | t->image->jamoSpecial = TRUE; | 
|  | } | 
|  | /* then we need to deal with it */ | 
|  | /* we could aready have something in table - or we might not */ | 
|  |  | 
|  | if(!isPrefix(CE)) { | 
|  | /* if it wasn't contraction, we wouldn't end up here*/ | 
|  | int32_t firstContractionOffset = 0; | 
|  | firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status); | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); | 
|  | uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status); | 
|  | uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status); | 
|  | CE =  constructContractCE(SPEC_PROC_TAG, firstContractionOffset); | 
|  | } else { /* we are adding to existing contraction */ | 
|  | /* there were already some elements in the table, so we need to add a new contraction */ | 
|  | /* Two things can happen here: either the codepoint is already in the table, or it is not */ | 
|  | int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status); | 
|  | if(position > 0) {       /* if it is we just continue down the chain */ | 
|  | uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status); | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); | 
|  | uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status); | 
|  | } else {                  /* if it isn't, we will have to create a new sequence */ | 
|  | uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); | 
|  | uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status); | 
|  | } | 
|  | } | 
|  |  | 
|  | element->cPoints = oldCP; | 
|  | element->cSize = oldCPSize; | 
|  |  | 
|  | return CE; | 
|  | } | 
|  |  | 
|  | // Note regarding surrogate handling: We are interested only in the single | 
|  | // or leading surrogates in a contraction. If a surrogate is somewhere else | 
|  | // in the contraction, it is going to be handled as a pair of code units, | 
|  | // as it doesn't affect the performance AND handling surrogates specially | 
|  | // would complicate code way too much. | 
|  | static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE, | 
|  | UCAElements *element, UErrorCode *status) | 
|  | { | 
|  | CntTable *contractions = t->contractions; | 
|  | UChar32 cp; | 
|  | uint32_t cpsize = 0; | 
|  |  | 
|  | contractions->currentTag = CONTRACTION_TAG; | 
|  |  | 
|  | // First we need to check if contractions starts with a surrogate | 
|  | U16_NEXT(element->cPoints, cpsize, element->cSize, cp); | 
|  |  | 
|  | if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first | 
|  | uint32_t j = 0; | 
|  | for (j=1; j<element->cSize; j++) {   /* First add contraction chars to unsafe CP hash table */ | 
|  | // Unless it is a trail surrogate, which is handled algoritmically and | 
|  | // shouldn't take up space in the table. | 
|  | if(!(U16_IS_TRAIL(element->cPoints[j]))) { | 
|  | unsafeCPSet(t->unsafeCP, element->cPoints[j]); | 
|  | } | 
|  | } | 
|  | // Add the last char of the contraction to the contraction-end hash table. | 
|  | // unless it is a trail surrogate, which is handled algorithmically and | 
|  | // shouldn't be in the table | 
|  | if(!(U16_IS_TRAIL(element->cPoints[element->cSize -1]))) { | 
|  | ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]); | 
|  | } | 
|  |  | 
|  | // If there are any Jamos in the contraction, we should turn on special | 
|  | // processing for Jamos | 
|  | if(UCOL_ISJAMO(element->cPoints[0])) { | 
|  | t->image->jamoSpecial = TRUE; | 
|  | } | 
|  | /* then we need to deal with it */ | 
|  | /* we could aready have something in table - or we might not */ | 
|  | element->cPoints+=cpsize; | 
|  | element->cSize-=cpsize; | 
|  | if(!isContraction(CE)) { | 
|  | /* if it wasn't contraction, we wouldn't end up here*/ | 
|  | int32_t firstContractionOffset = 0; | 
|  | firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status); | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); | 
|  | uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status); | 
|  | uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status); | 
|  | CE =  constructContractCE(CONTRACTION_TAG, firstContractionOffset); | 
|  | } else { /* we are adding to existing contraction */ | 
|  | /* there were already some elements in the table, so we need to add a new contraction */ | 
|  | /* Two things can happen here: either the codepoint is already in the table, or it is not */ | 
|  | int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status); | 
|  | if(position > 0) {       /* if it is we just continue down the chain */ | 
|  | uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status); | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); | 
|  | uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status); | 
|  | } else {                  /* if it isn't, we will have to create a new sequence */ | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); | 
|  | uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status); | 
|  | } | 
|  | } | 
|  | element->cPoints-=cpsize; | 
|  | element->cSize+=cpsize; | 
|  | /*ucmpe32_set(t->mapping, cp, CE);*/ | 
|  | utrie_set32(t->mapping, cp, CE); | 
|  | } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */ | 
|  | /*ucmpe32_set(t->mapping, cp, element->mapCE);*/ | 
|  | utrie_set32(t->mapping, cp, element->mapCE); | 
|  | } else { /* fill out the first stage of the contraction with the surrogate CE */ | 
|  | uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status); | 
|  | uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status); | 
|  | } | 
|  | return CE; | 
|  | } | 
|  |  | 
|  |  | 
|  | static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) { | 
|  | int32_t firstContractionOffset = 0; | 
|  | //    uint32_t contractionElement = UCOL_NOT_FOUND; | 
|  |  | 
|  | if(U_FAILURE(*status)) { | 
|  | return UCOL_NOT_FOUND; | 
|  | } | 
|  |  | 
|  | /* end of recursion */ | 
|  | if(element->cSize == 1) { | 
|  | if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) { | 
|  | uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status); | 
|  | uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status); | 
|  | return existingCE; | 
|  | } else { | 
|  | return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */ | 
|  | } | 
|  | } | 
|  |  | 
|  | /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */ | 
|  | /* for both backward and forward cycles */ | 
|  |  | 
|  | /* we encountered either an empty space or a non-contraction element */ | 
|  | /* this means we are constructing a new contraction sequence */ | 
|  | element->cPoints++; | 
|  | element->cSize--; | 
|  | if(!isCntTableElement(existingCE)) { | 
|  | /* if it wasn't contraction, we wouldn't end up here*/ | 
|  | firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status); | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); | 
|  | uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status); | 
|  | uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status); | 
|  | existingCE =  constructContractCE(contractions->currentTag, firstContractionOffset); | 
|  | } else { /* we are adding to existing contraction */ | 
|  | /* there were already some elements in the table, so we need to add a new contraction */ | 
|  | /* Two things can happen here: either the codepoint is already in the table, or it is not */ | 
|  | int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status); | 
|  | if(position > 0) {       /* if it is we just continue down the chain */ | 
|  | uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status); | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); | 
|  | uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status); | 
|  | } else {                  /* if it isn't, we will have to create a new sequence */ | 
|  | uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); | 
|  | uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status); | 
|  | } | 
|  | } | 
|  | element->cPoints--; | 
|  | element->cSize++; | 
|  | return existingCE; | 
|  | } | 
|  |  | 
|  | static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) { | 
|  | uint32_t CE = UCOL_NOT_FOUND; | 
|  | // This should add a completely ignorable element to the | 
|  | // unsafe table, so that backward iteration will skip | 
|  | // over it when treating contractions. | 
|  | uint32_t i = 0; | 
|  | if(element->mapCE == 0) { | 
|  | for(i = 0; i < element->cSize; i++) { | 
|  | if(!U16_IS_TRAIL(element->cPoints[i])) { | 
|  | unsafeCPSet(t->unsafeCP, element->cPoints[i]); | 
|  | } | 
|  | } | 
|  | } | 
|  | if(element->cSize > 1) { /* we're adding a contraction */ | 
|  | uint32_t i = 0; | 
|  | UChar32 cp; | 
|  |  | 
|  | U16_NEXT(element->cPoints, i, element->cSize, cp); | 
|  | /*CE = ucmpe32_get(t->mapping, cp);*/ | 
|  | CE = utrie_get32(t->mapping, cp, NULL); | 
|  |  | 
|  | CE = uprv_uca_addContraction(t, CE, element, status); | 
|  | } else { /* easy case, */ | 
|  | /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/ | 
|  | CE = utrie_get32(t->mapping, element->cPoints[0], NULL); | 
|  |  | 
|  | if( CE != UCOL_NOT_FOUND) { | 
|  | if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */ | 
|  | if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop | 
|  | // Only expansions and regular CEs can go here... Contractions will never happen in this place | 
|  | uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status); | 
|  | /* This loop has to change the CE at the end of contraction REDO!*/ | 
|  | uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status); | 
|  | } | 
|  | } else { | 
|  | /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/ | 
|  | utrie_set32(t->mapping, element->cPoints[0], element->mapCE); | 
|  | if ((element->prefixSize!=0) && (!isSpecial(CE) || (getCETag(CE)!=IMPLICIT_TAG))) { | 
|  | UCAElements *origElem = (UCAElements *)uprv_malloc(sizeof(UCAElements)); | 
|  | /* test for NULL */ | 
|  | if (origElem== NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | /* copy the original UCA value */ | 
|  | origElem->prefixSize = 0; | 
|  | origElem->prefix = NULL; | 
|  | origElem->cPoints = origElem->uchars; | 
|  | origElem->cPoints[0] = element->cPoints[0]; | 
|  | origElem->cSize = 1; | 
|  | origElem->CEs[0]=CE; | 
|  | origElem->mapCE=CE; | 
|  | origElem->noOfCEs=1; | 
|  | uprv_uca_finalizeAddition(t, origElem, status); | 
|  | uprv_free(origElem); | 
|  | } | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]); | 
|  | //*status = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | #endif | 
|  | } | 
|  | } else { | 
|  | /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/ | 
|  | utrie_set32(t->mapping, element->cPoints[0], element->mapCE); | 
|  | } | 
|  | } | 
|  | return CE; | 
|  | } | 
|  |  | 
|  | /* This adds a read element, while testing for existence */ | 
|  | U_CAPI uint32_t  U_EXPORT2 | 
|  | uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) { | 
|  | U_NAMESPACE_USE | 
|  |  | 
|  | ExpansionTable *expansions = t->expansions; | 
|  |  | 
|  | uint32_t i = 1; | 
|  | uint32_t expansion = 0; | 
|  | uint32_t CE; | 
|  |  | 
|  | if(U_FAILURE(*status)) { | 
|  | return 0xFFFF; | 
|  | } | 
|  |  | 
|  | element->mapCE = 0; // clear mapCE so that we can catch expansions | 
|  |  | 
|  | if(element->noOfCEs == 1) { | 
|  | element->mapCE = element->CEs[0]; | 
|  | } else { | 
|  | /* ICU 2.1 long primaries */ | 
|  | /* unfortunately, it looks like we have to look for a long primary here */ | 
|  | /* since in canonical closure we are going to hit some long primaries from */ | 
|  | /* the first phase, and they will come back as continuations/expansions */ | 
|  | /* destroying the effect of the previous opitimization */ | 
|  | /* A long primary is a three byte primary with starting secondaries and tertiaries */ | 
|  | /* It can appear in long runs of only primary differences (like east Asian tailorings) */ | 
|  | /* also, it should not be an expansion, as expansions would break with this */ | 
|  | // This part came in from ucol_bld.cpp | 
|  | //if(tok->expansion == 0 | 
|  | //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1 | 
|  | //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) { | 
|  | /* we will construct a special CE that will go unchanged to the table */ | 
|  | if(element->noOfCEs == 2 // a two CE expansion | 
|  | && isContinuation(element->CEs[1]) // which  is a continuation | 
|  | && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation, | 
|  | && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary | 
|  | && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary | 
|  | ) | 
|  | { | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stdout, "Long primary %04X\n", element->cPoints[0]); | 
|  | #endif | 
|  | element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special | 
|  | | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary | 
|  | | ((element->CEs[1]>>24) & 0xFF);   // third byte of primary | 
|  | } | 
|  | else { | 
|  | expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT) | 
|  | | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4) | 
|  | & 0xFFFFF0)); | 
|  |  | 
|  | for(i = 1; i<element->noOfCEs; i++) { | 
|  | uprv_uca_addExpansion(expansions, element->CEs[i], status); | 
|  | } | 
|  | if(element->noOfCEs <= 0xF) { | 
|  | expansion |= element->noOfCEs; | 
|  | } else { | 
|  | uprv_uca_addExpansion(expansions, 0, status); | 
|  | } | 
|  | element->mapCE = expansion; | 
|  | uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1], | 
|  | (uint8_t)element->noOfCEs, | 
|  | t->maxExpansions, | 
|  | status); | 
|  | if(UCOL_ISJAMO(element->cPoints[0])) { | 
|  | t->image->jamoSpecial = TRUE; | 
|  | uprv_uca_setMaxJamoExpansion(element->cPoints[0], | 
|  | element->CEs[element->noOfCEs - 1], | 
|  | (uint8_t)element->noOfCEs, | 
|  | t->maxJamoExpansions, | 
|  | status); | 
|  | } | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // We treat digits differently - they are "uber special" and should be | 
|  | // processed differently if numeric collation is on. | 
|  | UChar32 uniChar = 0; | 
|  | //printElement(element); | 
|  | if ((element->cSize == 2) && U16_IS_LEAD(element->cPoints[0])){ | 
|  | uniChar = U16_GET_SUPPLEMENTARY(element->cPoints[0], element->cPoints[1]); | 
|  | } else if (element->cSize == 1){ | 
|  | uniChar = element->cPoints[0]; | 
|  | } | 
|  |  | 
|  | // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only | 
|  | // one element to the expansion buffer. When we encounter a digit and we don't | 
|  | // do numeric collation, we will just pick the CE we have and break out of case | 
|  | // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked | 
|  | // a special, further processing will occur. If it's a simple CE, we'll return due | 
|  | // to how the loop is constructed. | 
|  | if (uniChar != 0 && u_isdigit(uniChar)){ | 
|  | expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element | 
|  | if(element->mapCE) { // if there is an expansion, we'll pick it here | 
|  | expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4); | 
|  | } else { | 
|  | expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4); | 
|  | } | 
|  | element->mapCE = expansion; | 
|  |  | 
|  | // Need to go back to the beginning of the digit string if in the middle! | 
|  | if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars | 
|  | unsafeCPSet(t->unsafeCP, (UChar)uniChar); | 
|  | } | 
|  | } | 
|  |  | 
|  | // here we want to add the prefix structure. | 
|  | // I will try to process it as a reverse contraction, if possible. | 
|  | // prefix buffer is already reversed. | 
|  |  | 
|  | if(element->prefixSize!=0) { | 
|  | // We keep the seen prefix starter elements in a hashtable | 
|  | // we need it to be able to distinguish between the simple | 
|  | // codepoints and prefix starters. Also, we need to use it | 
|  | // for canonical closure. | 
|  |  | 
|  | UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements)); | 
|  | /* test for NULL */ | 
|  | if (composed == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | uprv_memcpy(composed, element, sizeof(UCAElements)); | 
|  | composed->cPoints = composed->uchars; | 
|  | composed->prefix = composed->prefixChars; | 
|  |  | 
|  | composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status); | 
|  |  | 
|  |  | 
|  | if(t->prefixLookup != NULL) { | 
|  | UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element); | 
|  | if(uCE != NULL) { // there is already a set of code points here | 
|  | element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status); | 
|  | } else { // no code points, so this spot is clean | 
|  | element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status); | 
|  | uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements)); | 
|  | /* test for NULL */ | 
|  | if (uCE == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return 0; | 
|  | } | 
|  | uprv_memcpy(uCE, element, sizeof(UCAElements)); | 
|  | uCE->cPoints = uCE->uchars; | 
|  | uhash_put(t->prefixLookup, uCE, uCE, status); | 
|  | } | 
|  | if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) { | 
|  | // do it! | 
|  | composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status); | 
|  | } | 
|  | } | 
|  | uprv_free(composed); | 
|  | } | 
|  |  | 
|  | // We need to use the canonical iterator here | 
|  | // the way we do it is to generate the canonically equivalent strings | 
|  | // for the contraction and then add the sequences that pass FCD check | 
|  | if(element->cSize > 1 && !(element->cSize==2 && U16_IS_LEAD(element->cPoints[0]) && U16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included | 
|  | UnicodeString source(element->cPoints, element->cSize); | 
|  | CanonicalIterator it(source, *status); | 
|  | source = it.next(); | 
|  | while(!source.isBogus()) { | 
|  | if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) { | 
|  | element->cSize = source.extract(element->cPoints, 128, *status); | 
|  | uprv_uca_finalizeAddition(t, element, status); | 
|  | } | 
|  | source = it.next(); | 
|  | } | 
|  | CE = element->mapCE; | 
|  | } else { | 
|  | CE = uprv_uca_finalizeAddition(t, element, status); | 
|  | } | 
|  |  | 
|  | return CE; | 
|  | } | 
|  |  | 
|  |  | 
|  | /*void uprv_uca_getMaxExpansionJamo(CompactEIntArray       *mapping, */ | 
|  | static void uprv_uca_getMaxExpansionJamo(UNewTrie       *mapping, | 
|  | MaxExpansionTable     *maxexpansion, | 
|  | MaxJamoExpansionTable *maxjamoexpansion, | 
|  | UBool                  jamospecial, | 
|  | UErrorCode            *status) | 
|  | { | 
|  | const uint32_t VBASE  = 0x1161; | 
|  | const uint32_t TBASE  = 0x11A8; | 
|  | const uint32_t VCOUNT = 21; | 
|  | const uint32_t TCOUNT = 28; | 
|  |  | 
|  | uint32_t v = VBASE + VCOUNT - 1; | 
|  | uint32_t t = TBASE + TCOUNT - 1; | 
|  | uint32_t ce; | 
|  |  | 
|  | while (v >= VBASE) { | 
|  | /*ce = ucmpe32_get(mapping, v);*/ | 
|  | ce = utrie_get32(mapping, v, NULL); | 
|  | if (ce < UCOL_SPECIAL_FLAG) { | 
|  | uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status); | 
|  | } | 
|  | v --; | 
|  | } | 
|  |  | 
|  | while (t >= TBASE) | 
|  | { | 
|  | /*ce = ucmpe32_get(mapping, t);*/ | 
|  | ce = utrie_get32(mapping, t, NULL); | 
|  | if (ce < UCOL_SPECIAL_FLAG) { | 
|  | uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status); | 
|  | } | 
|  | t --; | 
|  | } | 
|  | /*  According to the docs, 99% of the time, the Jamo will not be special */ | 
|  | if (jamospecial) { | 
|  | /* gets the max expansion in all unicode characters */ | 
|  | int     count    = maxjamoexpansion->position; | 
|  | uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize + | 
|  | maxjamoexpansion->maxVSize + | 
|  | maxjamoexpansion->maxTSize); | 
|  | uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize + | 
|  | maxjamoexpansion->maxVSize); | 
|  |  | 
|  | while (count > 0) { | 
|  | count --; | 
|  | if (*(maxjamoexpansion->isV + count) == TRUE) { | 
|  | uprv_uca_setMaxExpansion( | 
|  | *(maxjamoexpansion->endExpansionCE + count), | 
|  | maxVSize, maxexpansion, status); | 
|  | } | 
|  | else { | 
|  | uprv_uca_setMaxExpansion( | 
|  | *(maxjamoexpansion->endExpansionCE + count), | 
|  | maxTSize, maxexpansion, status); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | U_CDECL_BEGIN | 
|  | static inline uint32_t U_CALLCONV | 
|  | getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) | 
|  | { | 
|  | uint32_t value; | 
|  | uint32_t tag; | 
|  | UChar32 limit; | 
|  | UBool inBlockZero; | 
|  |  | 
|  | limit=start+0x400; | 
|  | while(start<limit) { | 
|  | value=utrie_get32(trie, start, &inBlockZero); | 
|  | tag = getCETag(value); | 
|  | if(inBlockZero == TRUE) { | 
|  | start+=UTRIE_DATA_BLOCK_LENGTH; | 
|  | } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) { | 
|  | /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the | 
|  | * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is | 
|  | * nothing in this position and that it should be skipped. | 
|  | */ | 
|  | #ifdef UCOL_DEBUG | 
|  | static int32_t count = 1; | 
|  | fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value); | 
|  | #endif | 
|  | return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset); | 
|  | } else { | 
|  | ++start; | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  | U_CDECL_END | 
|  |  | 
|  | #ifdef UCOL_DEBUG | 
|  | // This is a debug function to print the contents of a trie. | 
|  | // It is used in conjuction with the code around utrie_unserialize call | 
|  | UBool enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) { | 
|  | if(start<0x10000) { | 
|  | fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value); | 
|  | } else { | 
|  | fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, U16_LEAD(start), U16_TRAIL(start), limit, U16_LEAD(limit), U16_TRAIL(limit), value); | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | int32_t | 
|  | myGetFoldingOffset(uint32_t data) { | 
|  | if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) { | 
|  | return (data&0xFFFFFF); | 
|  | } else { | 
|  | return 0; | 
|  | } | 
|  | } | 
|  | #endif | 
|  |  | 
|  | U_CAPI UCATableHeader* U_EXPORT2 | 
|  | uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) { | 
|  | /*CompactEIntArray *mapping = t->mapping;*/ | 
|  | UNewTrie *mapping = t->mapping; | 
|  | ExpansionTable *expansions = t->expansions; | 
|  | CntTable *contractions = t->contractions; | 
|  | MaxExpansionTable *maxexpansion = t->maxExpansions; | 
|  |  | 
|  | if(U_FAILURE(*status)) { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar)); | 
|  |  | 
|  | int32_t contractionsSize = 0; | 
|  | contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status); | 
|  |  | 
|  | /* the following operation depends on the trie data. Therefore, we have to do it before */ | 
|  | /* the trie is compacted */ | 
|  | /* sets jamo expansions */ | 
|  | uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions, | 
|  | t->image->jamoSpecial, status); | 
|  |  | 
|  | /*ucmpe32_compact(mapping);*/ | 
|  | /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/ | 
|  | /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/ | 
|  | /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/ | 
|  |  | 
|  | // After setting the jamo expansions, compact the trie and get the needed size | 
|  | int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status); | 
|  |  | 
|  | uint32_t tableOffset = 0; | 
|  | uint8_t *dataStart; | 
|  |  | 
|  | /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */ | 
|  |  | 
|  | uint32_t toAllocate =(uint32_t)(headersize+ | 
|  | paddedsize(expansions->position*sizeof(uint32_t))+ | 
|  | paddedsize(mappingSize)+ | 
|  | paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+ | 
|  | //paddedsize(0x100*sizeof(uint32_t))  /* Latin1 is now included in the trie */ | 
|  | /* maxexpansion array */ | 
|  | + paddedsize(maxexpansion->position * sizeof(uint32_t)) + | 
|  | /* maxexpansion size array */ | 
|  | paddedsize(maxexpansion->position * sizeof(uint8_t)) + | 
|  | paddedsize(UCOL_UNSAFECP_TABLE_SIZE) +   /*  Unsafe chars             */ | 
|  | paddedsize(UCOL_UNSAFECP_TABLE_SIZE));    /*  Contraction Ending chars */ | 
|  |  | 
|  |  | 
|  | dataStart = (uint8_t *)uprv_malloc(toAllocate); | 
|  | /* test for NULL */ | 
|  | if (dataStart == NULL) { | 
|  | *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | UCATableHeader *myData = (UCATableHeader *)dataStart; | 
|  | // Please, do reset all the fields! | 
|  | uprv_memset(dataStart, 0, toAllocate); | 
|  | // Make sure we know this is reset | 
|  | myData->magic = UCOL_HEADER_MAGIC; | 
|  | myData->isBigEndian = U_IS_BIG_ENDIAN; | 
|  | myData->charSetFamily = U_CHARSET_FAMILY; | 
|  | myData->formatVersion[0] = UCA_FORMAT_VERSION_0; | 
|  | myData->formatVersion[1] = UCA_FORMAT_VERSION_1; | 
|  | myData->formatVersion[2] = UCA_FORMAT_VERSION_2; | 
|  | myData->formatVersion[3] = UCA_FORMAT_VERSION_3; | 
|  | myData->jamoSpecial = t->image->jamoSpecial; | 
|  |  | 
|  | // Don't copy stuff from UCA header! | 
|  | //uprv_memcpy(myData, t->image, sizeof(UCATableHeader)); | 
|  |  | 
|  | myData->contractionSize = contractionsSize; | 
|  |  | 
|  | tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader))); | 
|  |  | 
|  | myData->options = tableOffset; | 
|  | uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet)); | 
|  | tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet))); | 
|  |  | 
|  | /* copy expansions */ | 
|  | /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/ | 
|  | myData->expansion = tableOffset; | 
|  | uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t)); | 
|  | tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t))); | 
|  |  | 
|  | /* contractions block */ | 
|  | if(contractionsSize != 0) { | 
|  | /* copy contraction index */ | 
|  | /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/ | 
|  | myData->contractionIndex = tableOffset; | 
|  | uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar)); | 
|  | tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar))); | 
|  |  | 
|  | /* copy contraction collation elements */ | 
|  | /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/ | 
|  | myData->contractionCEs = tableOffset; | 
|  | uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t)); | 
|  | tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t))); | 
|  | } else { | 
|  | myData->contractionIndex = 0; | 
|  | myData->contractionCEs = 0; | 
|  | } | 
|  |  | 
|  | /* copy mapping table */ | 
|  | /*myData->mappingPosition = dataStart+tableOffset;*/ | 
|  | /*myData->mappingPosition = tableOffset;*/ | 
|  | /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/ | 
|  |  | 
|  | myData->mappingPosition = tableOffset; | 
|  | utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status); | 
|  | #ifdef UCOL_DEBUG | 
|  | // This is debug code to dump the contents of the trie. It needs two functions defined above | 
|  | { | 
|  | UTrie UCAt = { 0 }; | 
|  | uint32_t trieWord; | 
|  | utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status); | 
|  | UCAt.getFoldingOffset = myGetFoldingOffset; | 
|  | if(U_SUCCESS(*status)) { | 
|  | utrie_enum(&UCAt, NULL, enumRange, NULL); | 
|  | } | 
|  | trieWord = UTRIE_GET32_FROM_LEAD(&UCAt, 0xDC01); | 
|  | } | 
|  | #endif | 
|  | tableOffset += paddedsize(mappingSize); | 
|  |  | 
|  |  | 
|  | int32_t i = 0; | 
|  |  | 
|  | /* copy max expansion table */ | 
|  | myData->endExpansionCE      = tableOffset; | 
|  | myData->endExpansionCECount = maxexpansion->position - 1; | 
|  | /* not copying the first element which is a dummy */ | 
|  | uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1, | 
|  | (maxexpansion->position - 1) * sizeof(uint32_t)); | 
|  | tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint32_t))); | 
|  | myData->expansionCESize = tableOffset; | 
|  | uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1, | 
|  | (maxexpansion->position - 1) * sizeof(uint8_t)); | 
|  | tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint8_t))); | 
|  |  | 
|  | /* Unsafe chars table.  Finish it off, then copy it. */ | 
|  | uprv_uca_unsafeCPAddCCNZ(t, status); | 
|  | if (t->UCA != 0) {              /* Or in unsafebits from UCA, making a combined table.    */ | 
|  | for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) { | 
|  | t->unsafeCP[i] |= t->UCA->unsafeCP[i]; | 
|  | } | 
|  | } | 
|  | myData->unsafeCP = tableOffset; | 
|  | uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE); | 
|  | tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE); | 
|  |  | 
|  |  | 
|  | /* Finish building Contraction Ending chars hash table and then copy it out.  */ | 
|  | if (t->UCA != 0) {              /* Or in unsafebits from UCA, making a combined table.    */ | 
|  | for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) { | 
|  | t->contrEndCP[i] |= t->UCA->contrEndCP[i]; | 
|  | } | 
|  | } | 
|  | myData->contrEndCP = tableOffset; | 
|  | uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE); | 
|  | tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE); | 
|  |  | 
|  | if(tableOffset != toAllocate) { | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset); | 
|  | #endif | 
|  | *status = U_INTERNAL_PROGRAM_ERROR; | 
|  | uprv_free(dataStart); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | myData->size = tableOffset; | 
|  | /* This should happen upon ressurection */ | 
|  | /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/ | 
|  | /*uprv_mstrm_close(ms);*/ | 
|  | return myData; | 
|  | } | 
|  |  | 
|  |  | 
|  | struct enumStruct { | 
|  | tempUCATable *t; | 
|  | UCollator *tempColl; | 
|  | UCollationElements* colEl; | 
|  | const Normalizer2Impl *nfcImpl; | 
|  | UnicodeSet *closed; | 
|  | int32_t noOfClosures; | 
|  | UErrorCode *status; | 
|  | }; | 
|  | U_CDECL_BEGIN | 
|  | static UBool U_CALLCONV | 
|  | _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { | 
|  |  | 
|  | if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later | 
|  | UErrorCode *status = ((enumStruct *)context)->status; | 
|  | tempUCATable *t = ((enumStruct *)context)->t; | 
|  | UCollator *tempColl = ((enumStruct *)context)->tempColl; | 
|  | UCollationElements* colEl = ((enumStruct *)context)->colEl; | 
|  | UCAElements el; | 
|  | UChar decompBuffer[4]; | 
|  | const UChar *decomp; | 
|  | int32_t noOfDec = 0; | 
|  |  | 
|  | UChar32 u32 = 0; | 
|  | UChar comp[2]; | 
|  | uint32_t len = 0; | 
|  |  | 
|  | for(u32 = start; u32 < limit; u32++) { | 
|  | decomp = ((enumStruct *)context)->nfcImpl-> | 
|  | getDecomposition(u32, decompBuffer, noOfDec); | 
|  | //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1 | 
|  | //|| (noOfDec == 1 && *decomp != (UChar)u32)) | 
|  | if(decomp != NULL) | 
|  | { | 
|  | len = 0; | 
|  | U16_APPEND_UNSAFE(comp, len, u32); | 
|  | if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) { | 
|  | #ifdef UCOL_DEBUG | 
|  | fprintf(stderr, "Closure: U+%04X -> ", u32); | 
|  | UChar32 c; | 
|  | int32_t i = 0; | 
|  | while(i < noOfDec) { | 
|  | U16_NEXT(decomp, i, noOfDec, c); | 
|  | fprintf(stderr, "%04X ", c); | 
|  | } | 
|  | fprintf(stderr, "\n"); | 
|  | // print CEs for code point vs. decomposition | 
|  | fprintf(stderr, "U+%04X CEs: ", u32); | 
|  | UCollationElements *iter = ucol_openElements(tempColl, comp, len, status); | 
|  | int32_t ce; | 
|  | while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) { | 
|  | fprintf(stderr, "%08X ", ce); | 
|  | } | 
|  | fprintf(stderr, "\nDecomp CEs: "); | 
|  | ucol_setText(iter, decomp, noOfDec, status); | 
|  | while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) { | 
|  | fprintf(stderr, "%08X ", ce); | 
|  | } | 
|  | fprintf(stderr, "\n"); | 
|  | ucol_closeElements(iter); | 
|  | #endif | 
|  | if(((enumStruct *)context)->closed != NULL) { | 
|  | ((enumStruct *)context)->closed->add(u32); | 
|  | } | 
|  | ((enumStruct *)context)->noOfClosures++; | 
|  | el.cPoints = (UChar *)decomp; | 
|  | el.cSize = noOfDec; | 
|  | el.noOfCEs = 0; | 
|  | el.prefix = el.prefixChars; | 
|  | el.prefixSize = 0; | 
|  |  | 
|  | UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el); | 
|  | el.cPoints = comp; | 
|  | el.cSize = len; | 
|  | el.prefix = el.prefixChars; | 
|  | el.prefixSize = 0; | 
|  | if(prefix == NULL) { | 
|  | el.noOfCEs = 0; | 
|  | ucol_setText(colEl, decomp, noOfDec, status); | 
|  | while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { | 
|  | el.noOfCEs++; | 
|  | } | 
|  | } else { | 
|  | el.noOfCEs = 1; | 
|  | el.CEs[0] = prefix->mapCE; | 
|  | // This character uses a prefix. We have to add it | 
|  | // to the unsafe table, as it decomposed form is already | 
|  | // in. In Japanese, this happens for \u309e & \u30fe | 
|  | // Since unsafeCPSet is static in ucol_elm, we are going | 
|  | // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function | 
|  | } | 
|  | uprv_uca_addAnElement(t, &el, status); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  | U_CDECL_END | 
|  |  | 
|  | static void | 
|  | uprv_uca_setMapCE(tempUCATable *t, UCAElements *element, UErrorCode *status) { | 
|  | uint32_t expansion = 0; | 
|  | int32_t j; | 
|  |  | 
|  | ExpansionTable *expansions = t->expansions; | 
|  | if(element->noOfCEs == 2 // a two CE expansion | 
|  | && isContinuation(element->CEs[1]) // which  is a continuation | 
|  | && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation, | 
|  | && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary | 
|  | && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary | 
|  | ) { | 
|  | element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special | 
|  | | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary | 
|  | | ((element->CEs[1]>>24) & 0xFF);   // third byte of primary | 
|  | } else { | 
|  | expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT) | 
|  | | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4) | 
|  | & 0xFFFFF0)); | 
|  |  | 
|  | for(j = 1; j<(int32_t)element->noOfCEs; j++) { | 
|  | uprv_uca_addExpansion(expansions, element->CEs[j], status); | 
|  | } | 
|  | if(element->noOfCEs <= 0xF) { | 
|  | expansion |= element->noOfCEs; | 
|  | } else { | 
|  | uprv_uca_addExpansion(expansions, 0, status); | 
|  | } | 
|  | element->mapCE = expansion; | 
|  | uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1], | 
|  | (uint8_t)element->noOfCEs, | 
|  | t->maxExpansions, | 
|  | status); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void | 
|  | uprv_uca_addFCD4AccentedContractions(tempUCATable *t, | 
|  | UCollationElements* colEl, | 
|  | UChar *data, | 
|  | int32_t len, | 
|  | UCAElements *el, | 
|  | UErrorCode *status) { | 
|  | UChar decomp[256], comp[256]; | 
|  | int32_t decLen, compLen; | 
|  |  | 
|  | decLen = unorm_normalize(data, len, UNORM_NFD, 0, decomp, 256, status); | 
|  | compLen = unorm_normalize(data, len, UNORM_NFC, 0, comp, 256, status); | 
|  | decomp[decLen] = comp[compLen] = 0; | 
|  |  | 
|  | el->cPoints = decomp; | 
|  | el->cSize = decLen; | 
|  | el->noOfCEs = 0; | 
|  | el->prefixSize = 0; | 
|  | el->prefix = el->prefixChars; | 
|  |  | 
|  | UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el); | 
|  | el->cPoints = comp; | 
|  | el->cSize = compLen; | 
|  | el->prefix = el->prefixChars; | 
|  | el->prefixSize = 0; | 
|  | if(prefix == NULL) { | 
|  | el->noOfCEs = 0; | 
|  | ucol_setText(colEl, decomp, decLen, status); | 
|  | while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { | 
|  | el->noOfCEs++; | 
|  | } | 
|  | uprv_uca_setMapCE(t, el, status); | 
|  | uprv_uca_addAnElement(t, el, status); | 
|  | } | 
|  | el->cPoints=NULL; /* don't leak reference to stack */ | 
|  | } | 
|  |  | 
|  | static void | 
|  | uprv_uca_addMultiCMContractions(tempUCATable *t, | 
|  | UCollationElements* colEl, | 
|  | tempTailorContext *c, | 
|  | UCAElements *el, | 
|  | UErrorCode *status) { | 
|  | CombinClassTable *cmLookup = t->cmLookup; | 
|  | UChar  newDecomp[256]; | 
|  | int32_t maxComp, newDecLen; | 
|  | const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status); | 
|  | if (U_FAILURE(*status)) { | 
|  | return; | 
|  | } | 
|  | int16_t curClass = nfcImpl->getFCD16(c->tailoringCM) & 0xff; | 
|  | CompData *precomp = c->precomp; | 
|  | int32_t  compLen = c->compLen; | 
|  | UChar *comp = c->comp; | 
|  | maxComp = c->precompLen; | 
|  |  | 
|  | for (int32_t j=0; j < maxComp; j++) { | 
|  | int32_t count=0; | 
|  | do { | 
|  | if ( count == 0 ) {  // Decompose the saved precomposed char. | 
|  | UChar temp[2]; | 
|  | temp[0]=precomp[j].cp; | 
|  | temp[1]=0; | 
|  | newDecLen = unorm_normalize(temp, 1, UNORM_NFD, 0, | 
|  | newDecomp, sizeof(newDecomp)/sizeof(UChar), status); | 
|  | newDecomp[newDecLen++] = cmLookup->cPoints[c->cmPos]; | 
|  | } | 
|  | else {  // swap 2 combining marks when they are equal. | 
|  | uprv_memcpy(newDecomp, c->decomp, sizeof(UChar)*(c->decompLen)); | 
|  | newDecLen = c->decompLen; | 
|  | newDecomp[newDecLen++] = precomp[j].cClass; | 
|  | } | 
|  | newDecomp[newDecLen] = 0; | 
|  | compLen = unorm_normalize(newDecomp, newDecLen, UNORM_NFC, 0, | 
|  | comp, 256, status); | 
|  | if (compLen==1) { | 
|  | comp[compLen++] = newDecomp[newDecLen++] = c->tailoringCM; | 
|  | comp[compLen] = newDecomp[newDecLen] = 0; | 
|  | el->cPoints = newDecomp; | 
|  | el->cSize = newDecLen; | 
|  |  | 
|  | UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el); | 
|  | el->cPoints = c->comp; | 
|  | el->cSize = compLen; | 
|  | el->prefix = el->prefixChars; | 
|  | el->prefixSize = 0; | 
|  | if(prefix == NULL) { | 
|  | el->noOfCEs = 0; | 
|  | ucol_setText(colEl, newDecomp, newDecLen, status); | 
|  | while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { | 
|  | el->noOfCEs++; | 
|  | } | 
|  | uprv_uca_setMapCE(t, el, status); | 
|  | uprv_uca_finalizeAddition(t, el, status); | 
|  |  | 
|  | // Save the current precomposed char and its class to find any | 
|  | // other combining mark combinations. | 
|  | precomp[c->precompLen].cp=comp[0]; | 
|  | precomp[c->precompLen].cClass = curClass; | 
|  | c->precompLen++; | 
|  | } | 
|  | } | 
|  | } while (++count<2 && (precomp[j].cClass == curClass)); | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | static void | 
|  | uprv_uca_addTailCanonicalClosures(tempUCATable *t, | 
|  | UCollationElements* colEl, | 
|  | UChar baseCh, | 
|  | UChar cMark, | 
|  | UCAElements *el, | 
|  | UErrorCode *status) { | 
|  | CombinClassTable *cmLookup = t->cmLookup; | 
|  | const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status); | 
|  | if (U_FAILURE(*status)) { | 
|  | return; | 
|  | } | 
|  | int16_t maxIndex = nfcImpl->getFCD16(cMark) & 0xff; | 
|  | UCAElements element; | 
|  | uint16_t *index; | 
|  | UChar  decomp[256]; | 
|  | UChar  comp[256]; | 
|  | CompData precomp[256];   // precomposed array | 
|  | int32_t  precompLen = 0; // count for precomp | 
|  | int32_t i, len, decompLen, replacedPos; | 
|  | tempTailorContext c; | 
|  |  | 
|  | if ( cmLookup == NULL ) { | 
|  | return; | 
|  | } | 
|  | index = cmLookup->index; | 
|  | int32_t cClass=nfcImpl->getFCD16(cMark) & 0xff; | 
|  | maxIndex = (int32_t)index[(nfcImpl->getFCD16(cMark) & 0xff)-1]; | 
|  | c.comp = comp; | 
|  | c.decomp = decomp; | 
|  | c.precomp = precomp; | 
|  | c.tailoringCM =  cMark; | 
|  |  | 
|  | if (cClass>0) { | 
|  | maxIndex = (int32_t)index[cClass-1]; | 
|  | } | 
|  | else { | 
|  | maxIndex=0; | 
|  | } | 
|  | decomp[0]=baseCh; | 
|  | for ( i=0; i<maxIndex ; i++ ) { | 
|  | decomp[1] = cmLookup->cPoints[i]; | 
|  | decomp[2]=0; | 
|  | decompLen=2; | 
|  | len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status); | 
|  | if (len==1) { | 
|  | // Save the current precomposed char and its class to find any | 
|  | // other combining mark combinations. | 
|  | precomp[precompLen].cp=comp[0]; | 
|  | precomp[precompLen].cClass = | 
|  | index[nfcImpl->getFCD16(decomp[1]) & 0xff]; | 
|  | precompLen++; | 
|  | replacedPos=0; | 
|  | for (decompLen=0; decompLen< (int32_t)el->cSize; decompLen++) { | 
|  | decomp[decompLen] = el->cPoints[decompLen]; | 
|  | if (decomp[decompLen]==cMark) { | 
|  | replacedPos = decompLen;  // record the position for later use | 
|  | } | 
|  | } | 
|  | if ( replacedPos != 0 ) { | 
|  | decomp[replacedPos]=cmLookup->cPoints[i]; | 
|  | } | 
|  | decomp[decompLen] = 0; | 
|  | len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status); | 
|  | comp[len++] = decomp[decompLen++] = cMark; | 
|  | comp[len] = decomp[decompLen] = 0; | 
|  | element.cPoints = decomp; | 
|  | element.cSize = decompLen; | 
|  | element.noOfCEs = 0; | 
|  | element.prefix = el->prefixChars; | 
|  | element.prefixSize = 0; | 
|  |  | 
|  | UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &element); | 
|  | element.cPoints = comp; | 
|  | element.cSize = len; | 
|  | element.prefix = el->prefixChars; | 
|  | element.prefixSize = 0; | 
|  | if(prefix == NULL) { | 
|  | element.noOfCEs = 0; | 
|  | ucol_setText(colEl, decomp, decompLen, status); | 
|  | while((element.CEs[element.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { | 
|  | element.noOfCEs++; | 
|  | } | 
|  | uprv_uca_setMapCE(t, &element, status); | 
|  | uprv_uca_finalizeAddition(t, &element, status); | 
|  | } | 
|  |  | 
|  | // This is a fix for tailoring contractions with accented | 
|  | // character at the end of contraction string. | 
|  | if ((len>2) && | 
|  | (nfcImpl->getFCD16(comp[len-2]) & 0xff00)==0) { | 
|  | uprv_uca_addFCD4AccentedContractions(t, colEl, comp, len, &element, status); | 
|  | } | 
|  |  | 
|  | if (precompLen >1) { | 
|  | c.compLen = len; | 
|  | c.decompLen = decompLen; | 
|  | c.precompLen = precompLen; | 
|  | c.cmPos = i; | 
|  | uprv_uca_addMultiCMContractions(t, colEl, &c, &element, status); | 
|  | precompLen = c.precompLen; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t U_EXPORT2 | 
|  | uprv_uca_canonicalClosure(tempUCATable *t, | 
|  | UColTokenParser *src, | 
|  | UnicodeSet *closed, | 
|  | UErrorCode *status) | 
|  | { | 
|  | enumStruct context; | 
|  | context.closed = closed; | 
|  | context.noOfClosures = 0; | 
|  | UCAElements el; | 
|  | UColToken *tok; | 
|  | uint32_t i = 0, j = 0; | 
|  | UChar  baseChar, firstCM; | 
|  | context.nfcImpl=Normalizer2Factory::getNFCImpl(*status); | 
|  | if(U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UCollator *tempColl = NULL; | 
|  | tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status); | 
|  | // Check for null pointer | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status); | 
|  | tempColl = ucol_initCollator(tempData, 0, t->UCA, status); | 
|  | if ( tempTable->cmLookup != NULL ) { | 
|  | t->cmLookup = tempTable->cmLookup;  // copy over to t | 
|  | tempTable->cmLookup = NULL; | 
|  | } | 
|  | uprv_uca_closeTempTable(tempTable); | 
|  |  | 
|  | if(U_SUCCESS(*status)) { | 
|  | tempColl->ucaRules = NULL; | 
|  | tempColl->actualLocale = NULL; | 
|  | tempColl->validLocale = NULL; | 
|  | tempColl->requestedLocale = NULL; | 
|  | tempColl->hasRealData = TRUE; | 
|  | tempColl->freeImageOnClose = TRUE; | 
|  | } else if(tempData != 0) { | 
|  | uprv_free(tempData); | 
|  | } | 
|  |  | 
|  | /* produce canonical closure */ | 
|  | UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status); | 
|  | // Check for null pointer | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | context.t = t; | 
|  | context.tempColl = tempColl; | 
|  | context.colEl = colEl; | 
|  | context.status = status; | 
|  | u_enumCharTypes(_enumCategoryRangeClosureCategory, &context); | 
|  |  | 
|  | if ( (src==NULL) || !src->buildCCTabFlag ) { | 
|  | ucol_closeElements(colEl); | 
|  | ucol_close(tempColl); | 
|  | return context.noOfClosures;  // no extra contraction needed to add | 
|  | } | 
|  |  | 
|  | for (i=0; i < src->resultLen; i++) { | 
|  | baseChar = firstCM= (UChar)0; | 
|  | tok = src->lh[i].first; | 
|  | while (tok != NULL && U_SUCCESS(*status)) { | 
|  | el.prefix = el.prefixChars; | 
|  | el.cPoints = el.uchars; | 
|  | if(tok->prefix != 0) { | 
|  | el.prefixSize = tok->prefix>>24; | 
|  | uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar)); | 
|  |  | 
|  | el.cSize = (tok->source >> 24)-(tok->prefix>>24); | 
|  | uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar)); | 
|  | } else { | 
|  | el.prefixSize = 0; | 
|  | *el.prefix = 0; | 
|  |  | 
|  | el.cSize = (tok->source >> 24); | 
|  | uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar)); | 
|  | } | 
|  | if(src->UCA != NULL) { | 
|  | for(j = 0; j<el.cSize; j++) { | 
|  | int16_t fcd = context.nfcImpl->getFCD16(el.cPoints[j]); | 
|  | if ( (fcd & 0xff) == 0 ) { | 
|  | baseChar = el.cPoints[j];  // last base character | 
|  | firstCM=0;  // reset combining mark value | 
|  | } | 
|  | else { | 
|  | if ( (baseChar!=0) && (firstCM==0) ) { | 
|  | firstCM = el.cPoints[j];  // first combining mark | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | if ( (baseChar!= (UChar)0) && (firstCM != (UChar)0) ) { | 
|  | // find all the canonical rules | 
|  | uprv_uca_addTailCanonicalClosures(t, colEl, baseChar, firstCM, &el, status); | 
|  | } | 
|  | tok = tok->next; | 
|  | } | 
|  | } | 
|  | ucol_closeElements(colEl); | 
|  | ucol_close(tempColl); | 
|  |  | 
|  | return context.noOfClosures; | 
|  | } | 
|  |  | 
|  | #endif /* #if !UCONFIG_NO_COLLATION */ |