|  | /* | 
|  | ******************************************************************** | 
|  | * COPYRIGHT: | 
|  | * Copyright (c) 1997-1999, International Business Machines Corporation and | 
|  | * others. All Rights Reserved. | 
|  | ******************************************************************** | 
|  | */ | 
|  |  | 
|  | #ifndef _STDLIB_H | 
|  | #include <stdlib.h> | 
|  | #endif | 
|  |  | 
|  | #ifndef _STDIO_H | 
|  | #include <stdio.h> | 
|  | #endif | 
|  |  | 
|  |  | 
|  | #include "ucmp8.h" | 
|  | #include "cmemory.h" | 
|  |  | 
|  | static int32_t findOverlappingPosition(CompactByteArray* this_obj, | 
|  | uint32_t start, | 
|  | const UChar *tempIndex, | 
|  | int32_t tempIndexCount, | 
|  | uint32_t cycle); | 
|  |  | 
|  | /* internal constants*/ | 
|  |  | 
|  |  | 
|  | int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;} | 
|  | int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;} | 
|  | void  ucmp8_initBogus(CompactByteArray* array) | 
|  | { | 
|  | CompactByteArray* this_obj = array; | 
|  |  | 
|  | if (this_obj == NULL) return; | 
|  |  | 
|  | this_obj->fStructSize = sizeof(CompactByteArray); | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fIndex = NULL; | 
|  | this_obj->fCount = UCMP8_kUnicodeCount; | 
|  | this_obj->fCompact = FALSE; | 
|  | this_obj->fBogus = TRUE; | 
|  | this_obj->fAlias = FALSE; | 
|  | this_obj->fIAmOwned = TRUE; | 
|  | } | 
|  |  | 
|  | /* debug flags*/ | 
|  | /*=======================================================*/ | 
|  | void  ucmp8_init(CompactByteArray* array, int8_t defaultValue) | 
|  | { | 
|  | /* set up the index array and the data array. | 
|  | * the index array always points into particular parts of the data array | 
|  | * it is initially set up to point at regular block boundaries | 
|  | * The following example uses blocks of 4 for simplicity | 
|  | * Example: Expanded | 
|  | * INDEX# 0   1   2   3   4 | 
|  | * INDEX  0   4   8   12  16 ... | 
|  | * ARRAY  abcdeababcedzyabcdea... | 
|  | *        |   |   |   |   |   |... | 
|  | * whenever you set an element in the array, it unpacks to this state | 
|  | * After compression, the index will point to various places in the data array | 
|  | * wherever there is a runs of the same elements as in the original | 
|  | * Example: Compressed | 
|  | * INDEX# 0   1   2   3   4 | 
|  | * INDEX  0   4   1   8   2 ... | 
|  | * ARRAY  abcdeabazyabc... | 
|  | * If you look at the example, index# 2 in the expanded version points | 
|  | * to data position number 8, which has elements "bced". In the compressed | 
|  | * version, index# 2 points to data position 1, which also has "bced" | 
|  | */ | 
|  | CompactByteArray* this_obj = array; | 
|  | int32_t i; | 
|  |  | 
|  | if (this_obj == NULL) return; | 
|  |  | 
|  | this_obj->fStructSize = sizeof(CompactByteArray); | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fIndex = NULL; | 
|  | this_obj->fCount = UCMP8_kUnicodeCount; | 
|  | this_obj->fCompact = FALSE; | 
|  | this_obj->fBogus = FALSE; | 
|  | this_obj->fAlias = FALSE; | 
|  | this_obj->fIAmOwned = TRUE; | 
|  |  | 
|  |  | 
|  | this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount); | 
|  | if (!this_obj->fArray) | 
|  | { | 
|  | this_obj->fBogus = TRUE; | 
|  | return; | 
|  | } | 
|  | this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount); | 
|  | if (!this_obj->fIndex) | 
|  | { | 
|  | uprv_free(this_obj->fArray); | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fBogus = TRUE; | 
|  | return; | 
|  | } | 
|  | for (i = 0; i < UCMP8_kUnicodeCount; ++i) | 
|  | { | 
|  | this_obj->fArray[i] = defaultValue; | 
|  | } | 
|  | for (i = 0; i < UCMP8_kIndexCount; ++i) | 
|  | { | 
|  | this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift); | 
|  | } | 
|  | } | 
|  |  | 
|  | CompactByteArray* ucmp8_open(int8_t defaultValue) | 
|  | { | 
|  | /* set up the index array and the data array. | 
|  | * the index array always points into particular parts of the data array | 
|  | * it is initially set up to point at regular block boundaries | 
|  | * The following example uses blocks of 4 for simplicity | 
|  | * Example: Expanded | 
|  | * INDEX# 0   1   2   3   4 | 
|  | * INDEX  0   4   8   12  16 ... | 
|  | * ARRAY  abcdeababcedzyabcdea... | 
|  | *        |   |   |   |   |   |... | 
|  | * whenever you set an element in the array, it unpacks to this state | 
|  | * After compression, the index will point to various places in the data array | 
|  | * wherever there is a runs of the same elements as in the original | 
|  | * Example: Compressed | 
|  | * INDEX# 0   1   2   3   4 | 
|  | * INDEX  0   4   1   8   2 ... | 
|  | * ARRAY  abcdeabazyabc... | 
|  | * If you look at the example, index# 2 in the expanded version points | 
|  | * to data position number 8, which has elements "bced". In the compressed | 
|  | * version, index# 2 points to data position 1, which also has "bced" | 
|  | */ | 
|  | CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray)); | 
|  | int32_t i; | 
|  |  | 
|  | if (this_obj == NULL) return NULL; | 
|  |  | 
|  | this_obj->fStructSize = sizeof(CompactByteArray); | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fIndex = NULL; | 
|  | this_obj->fCount = UCMP8_kUnicodeCount; | 
|  | this_obj->fCompact = FALSE; | 
|  | this_obj->fBogus = FALSE; | 
|  | this_obj->fAlias = FALSE; | 
|  | this_obj->fIAmOwned = FALSE; | 
|  |  | 
|  |  | 
|  |  | 
|  | this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount); | 
|  | if (!this_obj->fArray) | 
|  | { | 
|  | this_obj->fBogus = TRUE; | 
|  | return NULL; | 
|  | } | 
|  | this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount); | 
|  | if (!this_obj->fIndex) | 
|  | { | 
|  | uprv_free(this_obj->fArray); | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fBogus = TRUE; | 
|  | return NULL; | 
|  | } | 
|  | for (i = 0; i < UCMP8_kUnicodeCount; ++i) | 
|  | { | 
|  | this_obj->fArray[i] = defaultValue; | 
|  | } | 
|  | for (i = 0; i < UCMP8_kIndexCount; ++i) | 
|  | { | 
|  | this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift); | 
|  | } | 
|  |  | 
|  | return this_obj; | 
|  | } | 
|  |  | 
|  | CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray, | 
|  | int8_t *newValues, | 
|  | int32_t count) | 
|  | { | 
|  | CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray)); | 
|  |  | 
|  | ucmp8_initAdopt(this_obj, indexArray, newValues, count); | 
|  | this_obj->fIAmOwned = FALSE; | 
|  | return this_obj; | 
|  | } | 
|  |  | 
|  | CompactByteArray* ucmp8_openAlias(uint16_t *indexArray, | 
|  | int8_t *newValues, | 
|  | int32_t count) | 
|  | { | 
|  | CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray)); | 
|  |  | 
|  | ucmp8_initAlias(this_obj, indexArray, newValues, count); | 
|  | this_obj->fIAmOwned = FALSE; | 
|  | return this_obj; | 
|  | } | 
|  |  | 
|  | /*=======================================================*/ | 
|  |  | 
|  | CompactByteArray* ucmp8_initAdopt(CompactByteArray *this_obj, | 
|  | uint16_t *indexArray, | 
|  | int8_t *newValues, | 
|  | int32_t count) | 
|  | { | 
|  | if (this_obj) { | 
|  | this_obj->fCount = count; | 
|  | this_obj->fBogus = FALSE; | 
|  | this_obj->fStructSize = sizeof(CompactByteArray); | 
|  |  | 
|  | this_obj->fArray = newValues; | 
|  | this_obj->fIndex = indexArray; | 
|  | this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE); | 
|  | this_obj->fAlias = FALSE; | 
|  | this_obj->fIAmOwned = TRUE; | 
|  | } | 
|  |  | 
|  | return this_obj; | 
|  | } | 
|  |  | 
|  | CompactByteArray* ucmp8_initAlias(CompactByteArray *this_obj, | 
|  | uint16_t *indexArray, | 
|  | int8_t *newValues, | 
|  | int32_t count) | 
|  | { | 
|  | if (this_obj) { | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fIndex = NULL; | 
|  | this_obj->fCount = count; | 
|  | this_obj->fBogus = FALSE; | 
|  | this_obj->fStructSize = sizeof(CompactByteArray); | 
|  |  | 
|  | this_obj->fArray = newValues; | 
|  | this_obj->fIndex = indexArray; | 
|  | this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE); | 
|  | this_obj->fAlias = TRUE; | 
|  | this_obj->fIAmOwned = TRUE; | 
|  | } | 
|  |  | 
|  | return this_obj; | 
|  | } | 
|  |  | 
|  | /*=======================================================*/ | 
|  |  | 
|  | void ucmp8_close(CompactByteArray* this_obj) | 
|  | { | 
|  | if(this_obj != NULL) { | 
|  | if(!this_obj->fAlias) { | 
|  | if(this_obj->fArray != NULL) { | 
|  | uprv_free(this_obj->fArray); | 
|  | } | 
|  | if(this_obj->fIndex != NULL) { | 
|  | uprv_free(this_obj->fIndex); | 
|  | } | 
|  | } | 
|  | if(!this_obj->fIAmOwned) /* Called if 'init' was called instead of 'open'. */ | 
|  | { | 
|  | uprv_free(this_obj); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /*=======================================================*/ | 
|  |  | 
|  | void ucmp8_expand(CompactByteArray* this_obj) | 
|  | { | 
|  | /* can optimize later. | 
|  | * if we have to expand, then walk through the blocks instead of using Get | 
|  | * this code unpacks the array by copying the blocks to the normalized position. | 
|  | * Example: Compressed | 
|  | * INDEX# 0   1   2   3   4 | 
|  | * INDEX  0   4   1   8   2 ... | 
|  | * ARRAY  abcdeabazyabc... | 
|  | *  turns into | 
|  | * Example: Expanded | 
|  | * INDEX# 0   1   2   3   4 | 
|  | * INDEX  0   4   8   12  16 ... | 
|  | * ARRAY  abcdeababcedzyabcdea... | 
|  | */ | 
|  | int32_t i; | 
|  | if (this_obj->fCompact) | 
|  | { | 
|  | int8_t* tempArray; | 
|  | tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount); | 
|  | if (!tempArray) | 
|  | { | 
|  | this_obj->fBogus = TRUE; | 
|  | return; | 
|  | } | 
|  | for (i = 0; i < UCMP8_kUnicodeCount; ++i) | 
|  | { | 
|  | tempArray[i] = ucmp8_get(this_obj,(UChar)i);  /* HSYS : How expand?*/ | 
|  | } | 
|  | for (i = 0; i < UCMP8_kIndexCount; ++i) | 
|  | { | 
|  | this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift); | 
|  | } | 
|  | uprv_free(this_obj->fArray); | 
|  | this_obj->fArray = tempArray; | 
|  | this_obj->fCompact = FALSE; | 
|  | this_obj->fAlias = FALSE; | 
|  |  | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /*=======================================================*/ | 
|  | /* this_obj->fArray:    an array to be overlapped | 
|  | * start and count: specify the block to be overlapped | 
|  | * tempIndex:   the overlapped array (actually indices back into inputContents) | 
|  | * inputHash:   an index of hashes for tempIndex, where | 
|  | *      inputHash[i] = XOR of values from i-count+1 to i | 
|  | */ | 
|  | static int32_t | 
|  | findOverlappingPosition(CompactByteArray* this_obj, | 
|  | uint32_t start, | 
|  | const UChar* tempIndex, | 
|  | int32_t tempIndexCount, | 
|  | uint32_t cycle) | 
|  | { | 
|  | /* this_obj is a utility routine for finding blocks that overlap. | 
|  | * IMPORTANT: the cycle number is very important. Small cycles take a lot | 
|  | * longer to work. In some cases, they may be able to get better compaction. | 
|  | */ | 
|  |  | 
|  | int32_t i; | 
|  | int32_t j; | 
|  | int32_t currentCount; | 
|  |  | 
|  | for (i = 0; i < tempIndexCount; i += cycle) | 
|  | { | 
|  | currentCount = UCMP8_kBlockCount; | 
|  | if (i + UCMP8_kBlockCount > tempIndexCount) | 
|  | { | 
|  | currentCount = tempIndexCount - i; | 
|  | } | 
|  | for (j = 0; j < currentCount; ++j) | 
|  | { | 
|  | if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]]) break; | 
|  | } | 
|  | if (j == currentCount) break; | 
|  | } | 
|  |  | 
|  | return i; | 
|  | } | 
|  |  | 
|  | UBool | 
|  | ucmp8_isBogus(const CompactByteArray* this_obj) | 
|  | { | 
|  | return (UBool)(this_obj == NULL || this_obj->fBogus); | 
|  | } | 
|  |  | 
|  | const int8_t* | 
|  | ucmp8_getArray(const CompactByteArray* this_obj) | 
|  | { | 
|  | return this_obj->fArray; | 
|  | } | 
|  |  | 
|  | const uint16_t* | 
|  | ucmp8_getIndex(const CompactByteArray* this_obj) | 
|  | { | 
|  | return this_obj->fIndex; | 
|  | } | 
|  |  | 
|  | int32_t | 
|  | ucmp8_getCount(const CompactByteArray* this_obj) | 
|  | { | 
|  | return this_obj->fCount; | 
|  | } | 
|  |  | 
|  |  | 
|  | void | 
|  | ucmp8_set(CompactByteArray* this_obj, | 
|  | UChar c, | 
|  | int8_t value) | 
|  | { | 
|  | if (this_obj->fCompact == TRUE) | 
|  | { | 
|  | ucmp8_expand(this_obj); | 
|  | if (this_obj->fBogus) return; | 
|  | } | 
|  | this_obj->fArray[(int32_t)c] = value; | 
|  | } | 
|  |  | 
|  |  | 
|  | void | 
|  | ucmp8_setRange(CompactByteArray* this_obj, | 
|  | UChar start, | 
|  | UChar end, | 
|  | int8_t value) | 
|  | { | 
|  | int32_t i; | 
|  | if (this_obj->fCompact == TRUE) | 
|  | { | 
|  | ucmp8_expand(this_obj); | 
|  | if (this_obj->fBogus) return; | 
|  | } | 
|  | for (i = start; i <= end; ++i) | 
|  | { | 
|  | this_obj->fArray[i] = value; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /*=======================================================*/ | 
|  |  | 
|  | void | 
|  | ucmp8_compact(CompactByteArray* this_obj, | 
|  | uint32_t cycle) | 
|  | { | 
|  | if (!this_obj->fCompact) | 
|  | { | 
|  | /* this_obj actually does the compaction. | 
|  | * it walks throught the contents of the expanded array, finding the | 
|  | * first block in the data that matches the contents of the current index. | 
|  | * As it works, it keeps an updated pointer to the last position, | 
|  | * so that it knows how big to make the final array | 
|  | * If the matching succeeds, then the index will point into the data | 
|  | * at some earlier position. | 
|  | * If the matching fails, then last position pointer will be bumped, | 
|  | * and the index will point to that last block of data. | 
|  | */ | 
|  | UChar*    tempIndex; | 
|  | int32_t     tempIndexCount; | 
|  | int8_t*     tempArray; | 
|  | int32_t     iBlock, iIndex; | 
|  |  | 
|  | /* fix cycle, must be 0 < cycle <= blockcount*/ | 
|  | if (cycle < 0) cycle = 1; | 
|  | else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount; | 
|  |  | 
|  | /* make temp storage, larger than we need*/ | 
|  | tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount); | 
|  | if (!tempIndex) | 
|  | { | 
|  | this_obj->fBogus = TRUE; | 
|  | return; | 
|  | } | 
|  | /* set up first block.*/ | 
|  | tempIndexCount = UCMP8_kBlockCount; | 
|  | for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex) | 
|  | { | 
|  | tempIndex[iIndex] = (uint16_t)iIndex; | 
|  | }; /* endfor (iIndex = 0; .....)*/ | 
|  | this_obj->fIndex[0] = 0; | 
|  |  | 
|  | /* for each successive block, find out its first position in the compacted array*/ | 
|  | for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock) | 
|  | { | 
|  | int32_t newCount, firstPosition, block; | 
|  | block = iBlock << UCMP8_kBlockShift; | 
|  | /*      if (debugSmall) if (block > debugSmallLimit) break;*/ | 
|  | firstPosition = findOverlappingPosition(this_obj, | 
|  | block, | 
|  | tempIndex, | 
|  | tempIndexCount, | 
|  | cycle); | 
|  |  | 
|  | /* if not contained in the current list, copy the remainder | 
|  | * invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock | 
|  | * we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount] | 
|  | */ | 
|  | newCount = firstPosition + UCMP8_kBlockCount; | 
|  | if (newCount > tempIndexCount) | 
|  | { | 
|  | for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex) | 
|  | { | 
|  | tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block); | 
|  | } /* endfor (iIndex = tempIndexCount....)*/ | 
|  | tempIndexCount = newCount; | 
|  | } /* endif (newCount > tempIndexCount)*/ | 
|  | this_obj->fIndex[iBlock] = (uint16_t)firstPosition; | 
|  | } /* endfor (iBlock = 1.....)*/ | 
|  |  | 
|  | /* now allocate and copy the items into the array*/ | 
|  | tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t)); | 
|  | if (!tempArray) | 
|  | { | 
|  | this_obj->fBogus = TRUE; | 
|  | uprv_free(tempIndex); | 
|  | return; | 
|  | } | 
|  | for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) | 
|  | { | 
|  | tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]]; | 
|  | } | 
|  | uprv_free(this_obj->fArray); | 
|  | this_obj->fArray = tempArray; | 
|  | this_obj->fCount = tempIndexCount; | 
|  |  | 
|  |  | 
|  | /* free up temp storage*/ | 
|  | uprv_free(tempIndex); | 
|  | this_obj->fCompact = TRUE; | 
|  | } /* endif (!this_obj->fCompact)*/ | 
|  | } | 
|  |  | 
|  | U_CAPI  uint32_t U_EXPORT2 ucmp8_flattenMem (const CompactByteArray* array, UMemoryStream *MS) | 
|  | { | 
|  | int32_t size = 0; | 
|  |  | 
|  | uprv_mstrm_write32(MS, ICU_UCMP8_VERSION); | 
|  | size += 4; | 
|  |  | 
|  | uprv_mstrm_write32(MS, array->fCount); | 
|  | size += 4; | 
|  |  | 
|  | uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP8_kIndexCount); | 
|  | size += sizeof(array->fIndex[0])*UCMP8_kIndexCount; | 
|  |  | 
|  | uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount); | 
|  | size += sizeof(array->fArray[0])*array->fCount; | 
|  |  | 
|  | while(size%4) /* end padding */ | 
|  | { | 
|  | uprv_mstrm_writePadding(MS, 1); /* Pad total so far to even size */ | 
|  | size += 1; | 
|  | } | 
|  |  | 
|  | return size; | 
|  | } | 
|  |  | 
|  | /* We use sizeof(*array), etc so that this code can be as portable as | 
|  | possible between the ucmpX_ family. | 
|  | */ | 
|  |  | 
|  | U_CAPI  void U_EXPORT2 ucmp8_initFromData(CompactByteArray *this_obj, const uint8_t **source, UErrorCode *status) | 
|  | { | 
|  | uint32_t i; | 
|  | const uint8_t *oldSource = *source; | 
|  |  | 
|  | if(U_FAILURE(*status)) | 
|  | return; | 
|  |  | 
|  | this_obj->fArray = NULL; | 
|  | this_obj->fIndex = NULL; | 
|  | this_obj->fBogus = FALSE; | 
|  | this_obj->fStructSize = sizeof(CompactByteArray); | 
|  | this_obj->fCompact = TRUE; | 
|  | this_obj->fAlias = TRUE; | 
|  | this_obj->fIAmOwned = TRUE; | 
|  |  | 
|  | i = * ((const uint32_t*) *source); | 
|  | (*source) += 4; | 
|  |  | 
|  | if(i != ICU_UCMP8_VERSION) | 
|  | { | 
|  | *status = U_INVALID_FORMAT_ERROR; | 
|  | return; | 
|  | } | 
|  |  | 
|  | this_obj->fCount = * ((const uint32_t*)*source); | 
|  | (*source) += 4; | 
|  |  | 
|  | this_obj->fIndex = (uint16_t*) *source; | 
|  | (*source) += sizeof(this_obj->fIndex[0])*UCMP8_kIndexCount; | 
|  |  | 
|  | this_obj->fArray = (int8_t*) *source; | 
|  | (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount; | 
|  |  | 
|  | /* eat up padding */ | 
|  | while((*source-(oldSource))%4) | 
|  | (*source)++; | 
|  | } |