| /*============================================================================ |
| * Copyright (C) 1997-1999, International Business Machines Corporation |
| * and others. All Rights Reserved. |
| * |
| * File cmpshrta.cpp |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 2/5/97 aliu Added CompactIntArray streamIn and streamOut methods. |
| * 3/4/97 aliu Tuned performance of CompactIntArray constructor, |
| * based on performance data indicating that this_obj was slow. |
| * 05/07/97 helena Added isBogus() |
| * 04/26/99 Madhu Ported to C for C Implementation |
| * 11/12/99 srl macroized ucmp32_get() |
| * 11/07/00 weiv aligned implementation with ucmp8 |
| *=============================================================================== |
| */ |
| #include "ucmp32.h" |
| #include "cmemory.h" |
| #include "filestrm.h" |
| #include <stdlib.h> |
| |
| |
| |
| static int32_t ucmp32_findOverlappingPosition(CompactIntArray* this_obj, uint32_t start, |
| const UChar *tempIndex, |
| int32_t tempIndexCount, |
| uint32_t cycle); |
| |
| static UBool debugSmall = FALSE; |
| static uint32_t debugSmallLimit = 30000; |
| |
| /** debug flags |
| *======================================================= |
| */ |
| |
| int32_t ucmp32_getkUnicodeCount() { return UCMP32_kUnicodeCount;} |
| int32_t ucmp32_getkBlockCount() { return UCMP32_kBlockCount;} |
| |
| U_CAPI void ucmp32_streamIn(CompactIntArray* this_obj, FileStream* is) |
| { |
| int32_t newCount, len; |
| char c; |
| if (!T_FileStream_error(is)) |
| { |
| |
| T_FileStream_read(is, &newCount, sizeof(newCount)); |
| if (this_obj->fCount != newCount) |
| { |
| this_obj->fCount = newCount; |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = 0; |
| this_obj->fArray = (int32_t*)uprv_malloc(this_obj->fCount * sizeof(int32_t)); |
| if (!this_obj->fArray) { |
| this_obj->fBogus = TRUE; |
| return; |
| } |
| } |
| T_FileStream_read(is, this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount); |
| T_FileStream_read(is, &len, sizeof(len)); |
| if (len == 0) |
| { |
| uprv_free(this_obj->fIndex); |
| this_obj->fIndex = 0; |
| } |
| else if (len == UCMP32_kIndexCount) |
| { |
| if (this_obj->fIndex == 0) |
| this_obj->fIndex =(uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t)); |
| if (!this_obj->fIndex) { |
| this_obj->fBogus = TRUE; |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = 0; |
| return; |
| } |
| T_FileStream_read(is, this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount); |
| } |
| else |
| { |
| this_obj->fBogus = TRUE; |
| return; |
| } |
| /* char instead of int8_t for Mac compilation*/ |
| T_FileStream_read(is, (char*)&c, sizeof(c)); |
| this_obj->fCompact = (UBool)(c != 0); |
| } |
| } |
| |
| U_CAPI void ucmp32_streamOut(CompactIntArray* this_obj, FileStream* os) |
| { |
| char c; |
| if (!T_FileStream_error(os)) |
| { |
| if (this_obj->fCount != 0 && this_obj->fArray != 0) |
| { |
| T_FileStream_write(os, &(this_obj->fCount), sizeof(this_obj->fCount)); |
| T_FileStream_write(os, this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount); |
| } |
| else |
| { |
| int32_t zero = 0; |
| T_FileStream_write(os, &zero, sizeof(zero)); |
| } |
| |
| if (this_obj->fIndex == 0) |
| { |
| int32_t len = 0; |
| T_FileStream_write(os, &len, sizeof(len)); |
| } |
| else |
| { |
| int32_t len = UCMP32_kIndexCount; |
| T_FileStream_write(os, &len, sizeof(len)); |
| T_FileStream_write(os, this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount); |
| } |
| c = (char)(this_obj->fCompact ? 1 : 0); /* char instead of int8_t for Mac compilation*/ |
| T_FileStream_write(os, (const char*)&c, sizeof(c)); |
| } |
| } |
| |
| U_CAPI void ucmp32_streamMemIn(CompactIntArray* this_obj, UMemoryStream* is) |
| { |
| int32_t newCount, len; |
| char c; |
| if (!uprv_mstrm_error(is)) |
| { |
| |
| uprv_mstrm_read(is, &newCount, sizeof(newCount)); |
| if (this_obj->fCount != newCount) |
| { |
| this_obj->fCount = newCount; |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = 0; |
| this_obj->fArray = (int32_t*)uprv_malloc(this_obj->fCount * sizeof(int32_t)); |
| if (!this_obj->fArray) { |
| this_obj->fBogus = TRUE; |
| return; |
| } |
| } |
| uprv_mstrm_read(is, this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount); |
| uprv_mstrm_read(is, &len, sizeof(len)); |
| if (len == 0) |
| { |
| uprv_free(this_obj->fIndex); |
| this_obj->fIndex = 0; |
| } |
| else if (len == UCMP32_kIndexCount) |
| { |
| if (this_obj->fIndex == 0) |
| this_obj->fIndex =(uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t)); |
| if (!this_obj->fIndex) { |
| this_obj->fBogus = TRUE; |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = 0; |
| return; |
| } |
| uprv_mstrm_read(is, this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount); |
| } |
| else |
| { |
| this_obj->fBogus = TRUE; |
| return; |
| } |
| /* char instead of int8_t for Mac compilation*/ |
| uprv_mstrm_read(is, (char*)&c, sizeof(c)); |
| this_obj->fCompact = (UBool)(c != 0); |
| } |
| } |
| |
| U_CAPI void ucmp32_streamMemOut(CompactIntArray* this_obj, UMemoryStream* os) |
| { |
| char c; |
| if (!uprv_mstrm_error(os)) |
| { |
| if (this_obj->fCount != 0 && this_obj->fArray != 0) |
| { |
| uprv_mstrm_write(os, (uint8_t *)&(this_obj->fCount), sizeof(this_obj->fCount)); |
| uprv_mstrm_write(os, (uint8_t *)this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount); |
| } |
| else |
| { |
| int32_t zero = 0; |
| uprv_mstrm_write(os, (uint8_t *)&zero, sizeof(zero)); |
| } |
| |
| if (this_obj->fIndex == 0) |
| { |
| int32_t len = 0; |
| uprv_mstrm_write(os, (uint8_t *)&len, sizeof(len)); |
| } |
| else |
| { |
| int32_t len = UCMP32_kIndexCount; |
| uprv_mstrm_write(os, (uint8_t *)&len, sizeof(len)); |
| uprv_mstrm_write(os, (uint8_t *)this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount); |
| } |
| c = (char)(this_obj->fCompact ? 1 : 0); /* char instead of int8_t for Mac compilation*/ |
| uprv_mstrm_write(os, (uint8_t *)&c, sizeof(c)); |
| } |
| } |
| |
| CompactIntArray* ucmp32_open(int32_t defaultValue) |
| { |
| uint16_t i; |
| int32_t *p, *p_end; |
| uint16_t *q, *q_end; |
| CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray)); |
| if (this_obj == NULL) return NULL; |
| |
| this_obj->fStructSize = sizeof(CompactIntArray); |
| this_obj->fArray = NULL; |
| this_obj->fIndex = NULL; |
| this_obj->fCount = UCMP32_kUnicodeCount; |
| this_obj->fCompact = FALSE; |
| this_obj->fBogus = FALSE; |
| this_obj->fAlias = FALSE; |
| this_obj->fIAmOwned = FALSE; |
| |
| /*set up the index array and the data array. |
| * the index array always points into particular parts of the data array |
| * it is initially set up to point at regular block boundaries |
| * The following example uses blocks of 4 for simplicity |
| * Example: Expanded |
| * INDEX# 0 1 2 3 4 |
| * INDEX 0 4 8 12 16 ... |
| * ARRAY abcdeababcedzyabcdea... |
| * | | | | | |... |
| * whenever you set an element in the array, it unpacks to this_obj state |
| * After compression, the index will point to various places in the data array |
| * wherever there is a runs of the same elements as in the original |
| * Example: Compressed |
| * INDEX# 0 1 2 3 4 |
| * INDEX 0 4 1 8 2 ... |
| * ARRAY abcdeabazyabc... |
| * If you look at the example, index# 2 in the expanded version points |
| * to data position number 8, which has elements "bced". In the compressed |
| * version, index# 2 points to data position 1, which also has "bced" |
| */ |
| this_obj->fArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t)); |
| if (this_obj->fArray == NULL) { |
| this_obj->fBogus = TRUE; |
| return NULL; |
| } |
| |
| this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t)); |
| if (!this_obj->fIndex) { |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = NULL; |
| this_obj->fBogus = TRUE; |
| return NULL; |
| } |
| p = this_obj->fArray; |
| p_end = p + UCMP32_kUnicodeCount; |
| while (p < p_end) *p++ = defaultValue; |
| |
| q = this_obj->fIndex; |
| q_end = q + UCMP32_kIndexCount; |
| i = 0; |
| while (q < q_end) |
| { |
| *q++ = i; |
| i += (1 << UCMP32_kBlockShift); |
| } |
| return this_obj; |
| } |
| |
| CompactIntArray* ucmp32_openAdopt(uint16_t *indexArray, |
| int32_t *newValues, |
| int32_t count) |
| { |
| CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray)); |
| |
| ucmp32_initAdopt(this_obj, indexArray, newValues, count); |
| this_obj->fIAmOwned = FALSE; |
| return this_obj; |
| } |
| |
| CompactIntArray* ucmp32_openAlias(uint16_t *indexArray, |
| int32_t *newValues, |
| int32_t count) |
| { |
| CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray)); |
| |
| ucmp32_initAlias(this_obj, indexArray, newValues, count); |
| this_obj->fIAmOwned = FALSE; |
| return this_obj; |
| } |
| |
| CompactIntArray* ucmp32_openFromData( const uint8_t **source, |
| UErrorCode *status) |
| { |
| CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray)); |
| |
| ucmp32_initFromData(this_obj, source, status); |
| this_obj->fIAmOwned = FALSE; |
| return this_obj; |
| } |
| /*=======================================================*/ |
| |
| CompactIntArray* ucmp32_initAdopt(CompactIntArray* this_obj, |
| uint16_t *indexArray, |
| int32_t *newValues, |
| int32_t count) |
| { |
| if (this_obj) { |
| this_obj->fCount = count; |
| this_obj->fBogus = FALSE; |
| this_obj->fStructSize = sizeof(CompactIntArray); |
| |
| this_obj->fArray = newValues; |
| this_obj->fIndex = indexArray; |
| this_obj->fCompact = (UBool)((count < UCMP32_kUnicodeCount) ? TRUE : FALSE); |
| this_obj->fAlias = FALSE; |
| this_obj->fIAmOwned = TRUE; |
| } |
| |
| return this_obj; |
| } |
| |
| CompactIntArray* ucmp32_initAlias(CompactIntArray* this_obj, |
| uint16_t *indexArray, |
| int32_t *newValues, |
| int32_t count) |
| { |
| if (this_obj) { |
| this_obj->fCount = count; |
| this_obj->fBogus = FALSE; |
| this_obj->fStructSize = sizeof(CompactIntArray); |
| |
| this_obj->fArray = newValues; |
| this_obj->fIndex = indexArray; |
| this_obj->fCompact = (UBool)((count < UCMP32_kUnicodeCount) ? TRUE : FALSE); |
| this_obj->fAlias = TRUE; |
| this_obj->fIAmOwned = TRUE; |
| } |
| |
| return this_obj; |
| } |
| /*=======================================================*/ |
| |
| void ucmp32_close(CompactIntArray* this_obj) |
| { |
| if(this_obj != NULL) { |
| if(!this_obj->fAlias) { |
| if(this_obj->fArray != NULL) { |
| uprv_free(this_obj->fArray); |
| } |
| if(this_obj->fIndex != NULL) { |
| uprv_free(this_obj->fIndex); |
| } |
| } |
| if(!this_obj->fIAmOwned) { /* Called if 'init' was called instead of 'open'. */ |
| uprv_free(this_obj); |
| } |
| } |
| } |
| |
| UBool ucmp32_isBogus(const CompactIntArray* this_obj) |
| { |
| return (UBool)(this_obj == NULL || this_obj->fBogus); |
| } |
| |
| void ucmp32_expand(CompactIntArray* this_obj) { |
| /* can optimize later. |
| * if we have to expand, then walk through the blocks instead of using Get |
| * this_obj code unpacks the array by copying the blocks to the normalized position. |
| * Example: Compressed |
| * INDEX# 0 1 2 3 4 |
| * INDEX 0 4 1 8 2 ... |
| * ARRAY abcdeabazyabc... |
| * turns into |
| * Example: Expanded |
| * INDEX# 0 1 2 3 4 |
| * INDEX 0 4 8 12 16 ... |
| * ARRAY abcdeababcedzyabcdea... |
| */ |
| int32_t i; |
| int32_t* tempArray; |
| if (this_obj->fCompact) { |
| tempArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t)); |
| if (tempArray == NULL) { |
| this_obj->fBogus = TRUE; |
| return; |
| } |
| for (i = 0; i < UCMP32_kUnicodeCount; ++i) { |
| tempArray[i] = ucmp32_get(this_obj, (UChar)i); /* HSYS : How expand?*/ |
| } |
| for (i = 0; i < UCMP32_kIndexCount; ++i) { |
| this_obj->fIndex[i] = (uint16_t)(i<<UCMP32_kBlockShift); |
| } |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = tempArray; |
| this_obj->fCompact = FALSE; |
| } |
| } |
| |
| uint32_t ucmp32_getCount(const CompactIntArray* this_obj) |
| { |
| return this_obj->fCount; |
| } |
| |
| const int32_t* ucmp32_getArray(const CompactIntArray* this_obj) |
| { |
| return this_obj->fArray; |
| } |
| |
| const uint16_t* ucmp32_getIndex(const CompactIntArray* this_obj) |
| { |
| return this_obj->fIndex; |
| } |
| |
| void ucmp32_set(CompactIntArray* this_obj, UChar c, int32_t value) |
| { |
| if (this_obj->fCompact == TRUE) { |
| ucmp32_expand(this_obj); |
| if (this_obj->fBogus) return; |
| } |
| this_obj->fArray[(int32_t)c] = value; |
| } |
| |
| |
| void ucmp32_setRange(CompactIntArray* this_obj, UChar start, UChar end, int32_t value) |
| { |
| int32_t i; |
| if (this_obj->fCompact == TRUE) { |
| ucmp32_expand(this_obj); |
| if (this_obj->fBogus) return; |
| |
| } |
| for (i = start; i <= end; ++i) { |
| this_obj->fArray[i] = value; |
| } |
| } |
| /*======================================================= |
| * this_obj->fArray: an array to be overlapped |
| * start and count: specify the block to be overlapped |
| * tempIndex: the overlapped array (actually indices back into inputContents) |
| * inputHash: an index of hashes for tempIndex, where |
| * inputHash[i] = XOR of values from i-count+1 to i |
| */ |
| |
| static int32_t ucmp32_findOverlappingPosition(CompactIntArray* this_obj, |
| uint32_t start, |
| const UChar* tempIndex, |
| int32_t tempIndexCount, |
| uint32_t cycle) { |
| /* this_obj is a utility routine for finding blocks that overlap. |
| * IMPORTANT: the cycle number is very important. Small cycles take a lot |
| * longer to work. In some cases, they may be able to get better compaction. |
| */ |
| int32_t i; |
| int32_t j; |
| int32_t currentCount; |
| |
| |
| for (i = 0; i < tempIndexCount; i += cycle) { |
| currentCount = UCMP32_kBlockCount; |
| if (i + UCMP32_kBlockCount > tempIndexCount) { |
| currentCount = tempIndexCount - i; |
| } |
| for (j = 0; j < currentCount; ++j) { |
| if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]]) break; |
| } |
| if (j == currentCount) break; |
| } |
| |
| return i; |
| } |
| /*=======================================================*/ |
| |
| void ucmp32_compact(CompactIntArray* this_obj, int32_t cycle) { |
| /* this_obj actually does the compaction. |
| * it walks throught the contents of the expanded array, finding the |
| * first block in the data that matches the contents of the current index. |
| * As it works, it keeps an updated pointer to the last position, |
| * so that it knows how big to make the final array |
| * If the matching succeeds, then the index will point into the data |
| * at some earlier position. |
| * If the matching fails, then last position pointer will be bumped, |
| * and the index will point to that last block of data. |
| */ |
| UChar* tempIndex; |
| int32_t tempIndexCount; |
| int32_t* tempArray; |
| int32_t iBlock, iIndex; |
| int32_t newCount, firstPosition; |
| uint32_t block; |
| if (!this_obj->fCompact) { |
| |
| /* fix cycle, must be 0 < cycle <= blockcount*/ |
| if (cycle < 0) cycle = 1; |
| else if (cycle > UCMP32_kBlockCount) |
| cycle = UCMP32_kBlockCount; |
| |
| /* make temp storage, larger than we need*/ |
| tempIndex =(UChar*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(uint32_t)); |
| if (tempIndex == NULL) { |
| this_obj->fBogus = TRUE; |
| return; |
| } |
| /* set up first block.*/ |
| tempIndexCount = UCMP32_kBlockCount; |
| for (iIndex = 0; iIndex < UCMP32_kBlockCount; ++iIndex) { |
| tempIndex[iIndex] = (uint16_t)iIndex; |
| }; /* endfor (iIndex = 0; .....)*/ |
| this_obj->fIndex[0] = 0; |
| |
| /* for each successive block, find out its first position in the compacted array*/ |
| for (iBlock = 1; iBlock < UCMP32_kIndexCount; ++iBlock) { |
| |
| block = iBlock<<UCMP32_kBlockShift; |
| if (debugSmall) if (block > debugSmallLimit) break; |
| firstPosition = ucmp32_findOverlappingPosition(this_obj, block, tempIndex, tempIndexCount, cycle); |
| |
| /* if not contained in the current list, copy the remainder |
| * invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock |
| * we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount] |
| */ |
| newCount = firstPosition + UCMP32_kBlockCount; |
| if (newCount > tempIndexCount) { |
| for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex) { |
| tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block); |
| } /* endfor (iIndex = tempIndexCount....)*/ |
| tempIndexCount = newCount; |
| } /*endif (newCount > tempIndexCount)*/ |
| this_obj->fIndex[iBlock] = (uint16_t)firstPosition; |
| } /* endfor (iBlock = 1.....)*/ |
| |
| |
| |
| /* now allocate and copy the items into the array*/ |
| tempArray = (int32_t*)uprv_malloc(tempIndexCount * sizeof(uint32_t)); |
| if (tempArray == NULL) { |
| this_obj->fBogus = TRUE; |
| uprv_free(tempIndex); |
| return; |
| } |
| for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) { |
| tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]]; |
| } |
| uprv_free(this_obj->fArray); |
| this_obj->fArray = tempArray; |
| this_obj->fCount = tempIndexCount; |
| |
| |
| |
| /* free up temp storage*/ |
| uprv_free(tempIndex); |
| this_obj->fCompact = TRUE; |
| |
| #ifdef _DEBUG |
| /*the following line is useful for specific debugging purposes*/ |
| /*fprintf(stderr, "Compacted to %ld with cycle %d\n", fCount, cycle);*/ |
| #endif |
| } /* endif (!this_obj->fCompact)*/ |
| } |
| |
| U_CAPI uint32_t U_EXPORT2 ucmp32_flattenMem (const CompactIntArray* array, UMemoryStream *MS) |
| { |
| int32_t size = 0; |
| |
| uprv_mstrm_write32(MS, ICU_UCMP32_VERSION); |
| size += 4; |
| |
| uprv_mstrm_write32(MS, array->fCount); |
| size += 4; |
| |
| uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP32_kIndexCount); |
| size += sizeof(array->fIndex[0])*UCMP32_kIndexCount; |
| |
| uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount); |
| size += sizeof(array->fArray[0])*array->fCount; |
| |
| while(size%4) /* end padding */ |
| { |
| uprv_mstrm_writePadding(MS, 1); /* Pad total so far to even size */ |
| size += 1; |
| } |
| |
| return size; |
| } |
| |
| U_CAPI void U_EXPORT2 ucmp32_initFromData(CompactIntArray *this_obj, const uint8_t **source, UErrorCode *status) |
| { |
| uint32_t i; |
| const uint8_t *oldSource = *source; |
| |
| if(U_FAILURE(*status)) |
| return; |
| |
| this_obj->fArray = NULL; |
| this_obj->fIndex = NULL; |
| this_obj->fBogus = FALSE; |
| this_obj->fStructSize = sizeof(CompactIntArray); |
| this_obj->fCompact = TRUE; |
| this_obj->fAlias = TRUE; |
| this_obj->fIAmOwned = TRUE; |
| |
| i = * ((const uint32_t*) *source); |
| (*source) += 4; |
| |
| if(i != ICU_UCMP32_VERSION) |
| { |
| *status = U_INVALID_FORMAT_ERROR; |
| return; |
| } |
| |
| this_obj->fCount = * ((const uint32_t*)*source); |
| (*source) += 4; |
| |
| this_obj->fIndex = (uint16_t*) *source; |
| (*source) += sizeof(this_obj->fIndex[0])*UCMP32_kIndexCount; |
| |
| this_obj->fArray = (int32_t*) *source; |
| (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount; |
| |
| /* eat up padding */ |
| while((*source-(oldSource))%4) |
| (*source)++; |
| } |