source/common/ucmp8.c - external/github.com/unicode-org/icu - Git at Google

 /*
 ********************************************************************
 * COPYRIGHT:
 * Copyright (c) 1997-1999, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************
 */

 #ifndef _STDLIB_H
 #include <stdlib.h>
 #endif

 #ifndef _STDIO_H
 #include <stdio.h>
 #endif


 #include "ucmp8.h"
 #include "cmemory.h"

 static int32_t findOverlappingPosition(CompactByteArray* this_obj,
                        uint32_t start,
                        const UChar *tempIndex,
                        int32_t tempIndexCount,
                        uint32_t cycle);

 /* internal constants*/


 int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
 int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
 void  ucmp8_initBogus(CompactByteArray* array)
 {
   CompactByteArray* this_obj = array;

   if (this_obj == NULL) return;

   this_obj->fStructSize = sizeof(CompactByteArray);
   this_obj->fArray = NULL;
   this_obj->fIndex = NULL;
   this_obj->fCount = UCMP8_kUnicodeCount;
   this_obj->fCompact = FALSE;
   this_obj->fBogus = TRUE;
   this_obj->fAlias = FALSE;
   this_obj->fIAmOwned = TRUE;
 }

 /* debug flags*/
 /*=======================================================*/
 void  ucmp8_init(CompactByteArray* array, int8_t defaultValue)
 {
 /* set up the index array and the data array.
  * the index array always points into particular parts of the data array
  * it is initially set up to point at regular block boundaries
  * The following example uses blocks of 4 for simplicity
  * Example: Expanded
  * INDEX# 0   1   2   3   4
  * INDEX  0   4   8   12  16 ...
  * ARRAY  abcdeababcedzyabcdea...
  *        |   |   |   |   |   |...
  * whenever you set an element in the array, it unpacks to this state
  * After compression, the index will point to various places in the data array
  * wherever there is a runs of the same elements as in the original
  * Example: Compressed
  * INDEX# 0   1   2   3   4
  * INDEX  0   4   1   8   2 ...
  * ARRAY  abcdeabazyabc...
  * If you look at the example, index# 2 in the expanded version points
  * to data position number 8, which has elements "bced". In the compressed
  * version, index# 2 points to data position 1, which also has "bced"
  */
   CompactByteArray* this_obj = array;
   int32_t i;

   if (this_obj == NULL) return;

   this_obj->fStructSize = sizeof(CompactByteArray);
   this_obj->fArray = NULL;
   this_obj->fIndex = NULL;
   this_obj->fCount = UCMP8_kUnicodeCount;
   this_obj->fCompact = FALSE;
   this_obj->fBogus = FALSE;
   this_obj->fAlias = FALSE;
   this_obj->fIAmOwned = TRUE;


   this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
   if (!this_obj->fArray)
     {
       this_obj->fBogus = TRUE;
       return;
     }
   this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
   if (!this_obj->fIndex)
     {
       uprv_free(this_obj->fArray);
       this_obj->fArray = NULL;
       this_obj->fBogus = TRUE;
       return;
     }
   for (i = 0; i < UCMP8_kUnicodeCount; ++i)
     {
       this_obj->fArray[i] = defaultValue;
     }
   for (i = 0; i < UCMP8_kIndexCount; ++i)
     {
       this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
     }
 }

 CompactByteArray* ucmp8_open(int8_t defaultValue)
 {
 /* set up the index array and the data array.
  * the index array always points into particular parts of the data array
  * it is initially set up to point at regular block boundaries
  * The following example uses blocks of 4 for simplicity
  * Example: Expanded
  * INDEX# 0   1   2   3   4
  * INDEX  0   4   8   12  16 ...
  * ARRAY  abcdeababcedzyabcdea...
  *        |   |   |   |   |   |...
  * whenever you set an element in the array, it unpacks to this state
  * After compression, the index will point to various places in the data array
  * wherever there is a runs of the same elements as in the original
  * Example: Compressed
  * INDEX# 0   1   2   3   4
  * INDEX  0   4   1   8   2 ...
  * ARRAY  abcdeabazyabc...
  * If you look at the example, index# 2 in the expanded version points
  * to data position number 8, which has elements "bced". In the compressed
  * version, index# 2 points to data position 1, which also has "bced"
  */
   CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
   int32_t i;

   if (this_obj == NULL) return NULL;

   this_obj->fStructSize = sizeof(CompactByteArray);
   this_obj->fArray = NULL;
   this_obj->fIndex = NULL;
   this_obj->fCount = UCMP8_kUnicodeCount;
   this_obj->fCompact = FALSE;
   this_obj->fBogus = FALSE;
   this_obj->fAlias = FALSE;
   this_obj->fIAmOwned = FALSE;


   this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
   if (!this_obj->fArray)
     {
       this_obj->fBogus = TRUE;
       return NULL;
     }
   this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
   if (!this_obj->fIndex)
     {
       uprv_free(this_obj->fArray);
       this_obj->fArray = NULL;
       this_obj->fBogus = TRUE;
       return NULL;
     }
   for (i = 0; i < UCMP8_kUnicodeCount; ++i)
     {
       this_obj->fArray[i] = defaultValue;
     }
   for (i = 0; i < UCMP8_kIndexCount; ++i)
     {
       this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
     }

   return this_obj;
 }

 CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
                   int8_t *newValues,
                   int32_t count)
 {
   CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));

     ucmp8_initAdopt(this_obj, indexArray, newValues, count);
     this_obj->fIAmOwned = FALSE;
     return this_obj;
 }

 CompactByteArray* ucmp8_openAlias(uint16_t *indexArray,
                   int8_t *newValues,
                   int32_t count)
 {
   CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));

   ucmp8_initAlias(this_obj, indexArray, newValues, count);
   this_obj->fIAmOwned = FALSE;
   return this_obj;
 }

 /*=======================================================*/

 CompactByteArray* ucmp8_initAdopt(CompactByteArray *this_obj,
                   uint16_t *indexArray,
                   int8_t *newValues,
                   int32_t count)
 {
   if (this_obj) {
     this_obj->fCount = count;
     this_obj->fBogus = FALSE;
     this_obj->fStructSize = sizeof(CompactByteArray);

     this_obj->fArray = newValues;
     this_obj->fIndex = indexArray;
     this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
     this_obj->fAlias = FALSE;
     this_obj->fIAmOwned = TRUE;
   }

   return this_obj;
 }

 CompactByteArray* ucmp8_initAlias(CompactByteArray *this_obj,
                   uint16_t *indexArray,
                   int8_t *newValues,
                   int32_t count)
 {
   if (this_obj) {
     this_obj->fArray = NULL;
     this_obj->fIndex = NULL;
     this_obj->fCount = count;
     this_obj->fBogus = FALSE;
     this_obj->fStructSize = sizeof(CompactByteArray);

     this_obj->fArray = newValues;
     this_obj->fIndex = indexArray;
     this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
     this_obj->fAlias = TRUE;
     this_obj->fIAmOwned = TRUE;
   }

   return this_obj;
 }

 /*=======================================================*/

 void ucmp8_close(CompactByteArray* this_obj)
 {
   if(this_obj != NULL) {
     if(!this_obj->fAlias) {
       if(this_obj->fArray != NULL) {
         uprv_free(this_obj->fArray);
       }
       if(this_obj->fIndex != NULL) {
         uprv_free(this_obj->fIndex);
       }
     }
     if(!this_obj->fIAmOwned) /* Called if 'init' was called instead of 'open'. */
       {
         uprv_free(this_obj);
       }
   }
 }


 /*=======================================================*/

 void ucmp8_expand(CompactByteArray* this_obj)
 {
   /* can optimize later.
    * if we have to expand, then walk through the blocks instead of using Get
    * this code unpacks the array by copying the blocks to the normalized position.
    * Example: Compressed
    * INDEX# 0   1   2   3   4
    * INDEX  0   4   1   8   2 ...
    * ARRAY  abcdeabazyabc...
    *  turns into
    * Example: Expanded
    * INDEX# 0   1   2   3   4
    * INDEX  0   4   8   12  16 ...
    * ARRAY  abcdeababcedzyabcdea...
    */
     int32_t i;
     if (this_obj->fCompact)
     {
       int8_t* tempArray;
       tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
       if (!tempArray)
       {
           this_obj->fBogus = TRUE;
           return;
       }
       for (i = 0; i < UCMP8_kUnicodeCount; ++i)
       {
           tempArray[i] = ucmp8_get(this_obj,(UChar)i);  /* HSYS : How expand?*/
       }
       for (i = 0; i < UCMP8_kIndexCount; ++i)
       {
           this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
       }
       uprv_free(this_obj->fArray);
       this_obj->fArray = tempArray;
       this_obj->fCompact = FALSE;
       this_obj->fAlias = FALSE;

     }
 }


 /*=======================================================*/
 /* this_obj->fArray:    an array to be overlapped
  * start and count: specify the block to be overlapped
  * tempIndex:   the overlapped array (actually indices back into inputContents)
  * inputHash:   an index of hashes for tempIndex, where
  *      inputHash[i] = XOR of values from i-count+1 to i
  */
 static int32_t
 findOverlappingPosition(CompactByteArray* this_obj,
             uint32_t start,
             const UChar* tempIndex,
             int32_t tempIndexCount,
             uint32_t cycle)
 {
   /* this_obj is a utility routine for finding blocks that overlap.
    * IMPORTANT: the cycle number is very important. Small cycles take a lot
    * longer to work. In some cases, they may be able to get better compaction.
    */

   int32_t i;
   int32_t j;
   int32_t currentCount;

   for (i = 0; i < tempIndexCount; i += cycle)
     {
       currentCount = UCMP8_kBlockCount;
       if (i + UCMP8_kBlockCount > tempIndexCount)
     {
       currentCount = tempIndexCount - i;
         }
       for (j = 0; j < currentCount; ++j)
     {
       if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]]) break;
         }
       if (j == currentCount) break;
     }

   return i;
 }

 UBool
 ucmp8_isBogus(const CompactByteArray* this_obj)
 {
   return (UBool)(this_obj == NULL || this_obj->fBogus);
 }

 const int8_t*
 ucmp8_getArray(const CompactByteArray* this_obj)
 {
   return this_obj->fArray;
 }

 const uint16_t*
 ucmp8_getIndex(const CompactByteArray* this_obj)
 {
   return this_obj->fIndex;
 }

 int32_t
 ucmp8_getCount(const CompactByteArray* this_obj)
 {
   return this_obj->fCount;
 }


 void
 ucmp8_set(CompactByteArray* this_obj,
       UChar c,
       int8_t value)
 {
   if (this_obj->fCompact == TRUE)
     {
       ucmp8_expand(this_obj);
       if (this_obj->fBogus) return;
     }
   this_obj->fArray[(int32_t)c] = value;
 }


 void
 ucmp8_setRange(CompactByteArray* this_obj,
            UChar start,
            UChar end,
            int8_t value)
 {
   int32_t i;
   if (this_obj->fCompact == TRUE)
     {
       ucmp8_expand(this_obj);
       if (this_obj->fBogus) return;
     }
   for (i = start; i <= end; ++i)
     {
       this_obj->fArray[i] = value;
     }
 }


 /*=======================================================*/

 void
 ucmp8_compact(CompactByteArray* this_obj,
           uint32_t cycle)
 {
   if (!this_obj->fCompact)
     {
       /* this_obj actually does the compaction.
        * it walks throught the contents of the expanded array, finding the
        * first block in the data that matches the contents of the current index.
        * As it works, it keeps an updated pointer to the last position,
        * so that it knows how big to make the final array
        * If the matching succeeds, then the index will point into the data
        * at some earlier position.
        * If the matching fails, then last position pointer will be bumped,
        * and the index will point to that last block of data.
        */
       UChar*    tempIndex;
       int32_t     tempIndexCount;
       int8_t*     tempArray;
       int32_t     iBlock, iIndex;

       /* fix cycle, must be 0 < cycle <= blockcount*/
       if (cycle < 0) cycle = 1;
       else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;

       /* make temp storage, larger than we need*/
       tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
       if (!tempIndex)
     {
       this_obj->fBogus = TRUE;
       return;
         }
       /* set up first block.*/
       tempIndexCount = UCMP8_kBlockCount;
       for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
     {
       tempIndex[iIndex] = (uint16_t)iIndex;
         }; /* endfor (iIndex = 0; .....)*/
       this_obj->fIndex[0] = 0;

       /* for each successive block, find out its first position in the compacted array*/
       for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
     {
       int32_t newCount, firstPosition, block;
       block = iBlock << UCMP8_kBlockShift;
       /*      if (debugSmall) if (block > debugSmallLimit) break;*/
       firstPosition = findOverlappingPosition(this_obj,
                           block,
                           tempIndex,
                           tempIndexCount,
                           cycle);

       /* if not contained in the current list, copy the remainder
        * invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
        * we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount]
        */
       newCount = firstPosition + UCMP8_kBlockCount;
       if (newCount > tempIndexCount)
         {
           for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
         {
           tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
         } /* endfor (iIndex = tempIndexCount....)*/
                 tempIndexCount = newCount;
             } /* endif (newCount > tempIndexCount)*/
       this_obj->fIndex[iBlock] = (uint16_t)firstPosition;
         } /* endfor (iBlock = 1.....)*/

       /* now allocate and copy the items into the array*/
       tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
       if (!tempArray)
     {
       this_obj->fBogus = TRUE;
       uprv_free(tempIndex);
       return;
         }
       for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
     {
       tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
         }
       uprv_free(this_obj->fArray);
       this_obj->fArray = tempArray;
       this_obj->fCount = tempIndexCount;


       /* free up temp storage*/
       uprv_free(tempIndex);
       this_obj->fCompact = TRUE;
     } /* endif (!this_obj->fCompact)*/
 }

 U_CAPI  uint32_t U_EXPORT2 ucmp8_flattenMem (const CompactByteArray* array, UMemoryStream *MS)
 {
   int32_t size = 0;

   uprv_mstrm_write32(MS, ICU_UCMP8_VERSION);
   size += 4;

   uprv_mstrm_write32(MS, array->fCount);
   size += 4;

   uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP8_kIndexCount);
   size += sizeof(array->fIndex[0])*UCMP8_kIndexCount;

   uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount);
   size += sizeof(array->fArray[0])*array->fCount;

   while(size%4) /* end padding */
   {
       uprv_mstrm_writePadding(MS, 1); /* Pad total so far to even size */
       size += 1;
   }

   return size;
 }

 /* We use sizeof(*array), etc so that this code can be as portable as
    possible between the ucmpX_ family.
 */

 U_CAPI  void U_EXPORT2 ucmp8_initFromData(CompactByteArray *this_obj, const uint8_t **source, UErrorCode *status)
 {
   uint32_t i;
   const uint8_t *oldSource = *source;

   if(U_FAILURE(*status))
     return;

  this_obj->fArray = NULL;
  this_obj->fIndex = NULL;
  this_obj->fBogus = FALSE;
  this_obj->fStructSize = sizeof(CompactByteArray);
  this_obj->fCompact = TRUE;
  this_obj->fAlias = TRUE;
  this_obj->fIAmOwned = TRUE;

  i = * ((const uint32_t*) *source);
  (*source) += 4;

  if(i != ICU_UCMP8_VERSION)
  {
    *status = U_INVALID_FORMAT_ERROR;
    return;
  }

  this_obj->fCount = * ((const uint32_t*)*source);
  (*source) += 4;

  this_obj->fIndex = (uint16_t*) *source;
  (*source) += sizeof(this_obj->fIndex[0])*UCMP8_kIndexCount;

  this_obj->fArray = (int8_t*) *source;
  (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount;

  /* eat up padding */
  while((*source-(oldSource))%4)
     (*source)++;
 }
	/*
	********************************************************************
	* COPYRIGHT:
	* Copyright (c) 1997-1999, International Business Machines Corporation and
	* others. All Rights Reserved.
	********************************************************************
	*/

	#ifndef _STDLIB_H
	#include <stdlib.h>
	#endif

	#ifndef _STDIO_H
	#include <stdio.h>
	#endif


	#include "ucmp8.h"
	#include "cmemory.h"

	static int32_t findOverlappingPosition(CompactByteArray* this_obj,
	uint32_t start,
	const UChar *tempIndex,
	int32_t tempIndexCount,
	uint32_t cycle);

	/* internal constants*/


	int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
	int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
	void ucmp8_initBogus(CompactByteArray* array)
	{
	CompactByteArray* this_obj = array;

	if (this_obj == NULL) return;

	this_obj->fStructSize = sizeof(CompactByteArray);
	this_obj->fArray = NULL;
	this_obj->fIndex = NULL;
	this_obj->fCount = UCMP8_kUnicodeCount;
	this_obj->fCompact = FALSE;
	this_obj->fBogus = TRUE;
	this_obj->fAlias = FALSE;
	this_obj->fIAmOwned = TRUE;
	}

	/* debug flags*/
	/=======================================================/
	void ucmp8_init(CompactByteArray* array, int8_t defaultValue)
	{
	/* set up the index array and the data array.
	* the index array always points into particular parts of the data array
	* it is initially set up to point at regular block boundaries
	* The following example uses blocks of 4 for simplicity
	* Example: Expanded
	* INDEX# 0 1 2 3 4
	* INDEX 0 4 8 12 16 ...
	* ARRAY abcdeababcedzyabcdea...
	* \| \| \| \| \| \|...
	* whenever you set an element in the array, it unpacks to this state
	* After compression, the index will point to various places in the data array
	* wherever there is a runs of the same elements as in the original
	* Example: Compressed
	* INDEX# 0 1 2 3 4
	* INDEX 0 4 1 8 2 ...
	* ARRAY abcdeabazyabc...
	* If you look at the example, index# 2 in the expanded version points
	* to data position number 8, which has elements "bced". In the compressed
	* version, index# 2 points to data position 1, which also has "bced"
	*/
	CompactByteArray* this_obj = array;
	int32_t i;

	if (this_obj == NULL) return;

	this_obj->fStructSize = sizeof(CompactByteArray);
	this_obj->fArray = NULL;
	this_obj->fIndex = NULL;
	this_obj->fCount = UCMP8_kUnicodeCount;
	this_obj->fCompact = FALSE;
	this_obj->fBogus = FALSE;
	this_obj->fAlias = FALSE;
	this_obj->fIAmOwned = TRUE;


	this_obj->fArray = (int8_t) uprv_malloc(sizeof(int8_t) UCMP8_kUnicodeCount);
	if (!this_obj->fArray)
	{
	this_obj->fBogus = TRUE;
	return;
	}
	this_obj->fIndex = (uint16_t) uprv_malloc(sizeof(uint16_t) UCMP8_kIndexCount);
	if (!this_obj->fIndex)
	{
	uprv_free(this_obj->fArray);
	this_obj->fArray = NULL;
	this_obj->fBogus = TRUE;
	return;
	}
	for (i = 0; i < UCMP8_kUnicodeCount; ++i)
	{
	this_obj->fArray[i] = defaultValue;
	}
	for (i = 0; i < UCMP8_kIndexCount; ++i)
	{
	this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
	}
	}

	CompactByteArray* ucmp8_open(int8_t defaultValue)
	{
	/* set up the index array and the data array.
	* the index array always points into particular parts of the data array
	* it is initially set up to point at regular block boundaries
	* The following example uses blocks of 4 for simplicity
	* Example: Expanded
	* INDEX# 0 1 2 3 4
	* INDEX 0 4 8 12 16 ...
	* ARRAY abcdeababcedzyabcdea...
	* \| \| \| \| \| \|...
	* whenever you set an element in the array, it unpacks to this state
	* After compression, the index will point to various places in the data array
	* wherever there is a runs of the same elements as in the original
	* Example: Compressed
	* INDEX# 0 1 2 3 4
	* INDEX 0 4 1 8 2 ...
	* ARRAY abcdeabazyabc...
	* If you look at the example, index# 2 in the expanded version points
	* to data position number 8, which has elements "bced". In the compressed
	* version, index# 2 points to data position 1, which also has "bced"
	*/
	CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
	int32_t i;

	if (this_obj == NULL) return NULL;

	this_obj->fStructSize = sizeof(CompactByteArray);
	this_obj->fArray = NULL;
	this_obj->fIndex = NULL;
	this_obj->fCount = UCMP8_kUnicodeCount;
	this_obj->fCompact = FALSE;
	this_obj->fBogus = FALSE;
	this_obj->fAlias = FALSE;
	this_obj->fIAmOwned = FALSE;



	this_obj->fArray = (int8_t) uprv_malloc(sizeof(int8_t) UCMP8_kUnicodeCount);
	if (!this_obj->fArray)
	{
	this_obj->fBogus = TRUE;
	return NULL;
	}
	this_obj->fIndex = (uint16_t) uprv_malloc(sizeof(uint16_t) UCMP8_kIndexCount);
	if (!this_obj->fIndex)
	{
	uprv_free(this_obj->fArray);
	this_obj->fArray = NULL;
	this_obj->fBogus = TRUE;
	return NULL;
	}
	for (i = 0; i < UCMP8_kUnicodeCount; ++i)
	{
	this_obj->fArray[i] = defaultValue;
	}
	for (i = 0; i < UCMP8_kIndexCount; ++i)
	{
	this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
	}

	return this_obj;
	}

	CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
	int8_t *newValues,
	int32_t count)
	{
	CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));

	ucmp8_initAdopt(this_obj, indexArray, newValues, count);
	this_obj->fIAmOwned = FALSE;
	return this_obj;
	}

	CompactByteArray* ucmp8_openAlias(uint16_t *indexArray,
	int8_t *newValues,
	int32_t count)
	{
	CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));

	ucmp8_initAlias(this_obj, indexArray, newValues, count);
	this_obj->fIAmOwned = FALSE;
	return this_obj;
	}

	/=======================================================/

	CompactByteArray* ucmp8_initAdopt(CompactByteArray *this_obj,
	uint16_t *indexArray,
	int8_t *newValues,
	int32_t count)
	{
	if (this_obj) {
	this_obj->fCount = count;
	this_obj->fBogus = FALSE;
	this_obj->fStructSize = sizeof(CompactByteArray);

	this_obj->fArray = newValues;
	this_obj->fIndex = indexArray;
	this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
	this_obj->fAlias = FALSE;
	this_obj->fIAmOwned = TRUE;
	}

	return this_obj;
	}

	CompactByteArray* ucmp8_initAlias(CompactByteArray *this_obj,
	uint16_t *indexArray,
	int8_t *newValues,
	int32_t count)
	{
	if (this_obj) {
	this_obj->fArray = NULL;
	this_obj->fIndex = NULL;
	this_obj->fCount = count;
	this_obj->fBogus = FALSE;
	this_obj->fStructSize = sizeof(CompactByteArray);

	this_obj->fArray = newValues;
	this_obj->fIndex = indexArray;
	this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
	this_obj->fAlias = TRUE;
	this_obj->fIAmOwned = TRUE;
	}

	return this_obj;
	}

	/=======================================================/

	void ucmp8_close(CompactByteArray* this_obj)
	{
	if(this_obj != NULL) {
	if(!this_obj->fAlias) {
	if(this_obj->fArray != NULL) {
	uprv_free(this_obj->fArray);
	}
	if(this_obj->fIndex != NULL) {
	uprv_free(this_obj->fIndex);
	}
	}
	if(!this_obj->fIAmOwned) /* Called if 'init' was called instead of 'open'. */
	{
	uprv_free(this_obj);
	}
	}
	}


	/=======================================================/

	void ucmp8_expand(CompactByteArray* this_obj)
	{
	/* can optimize later.
	* if we have to expand, then walk through the blocks instead of using Get
	* this code unpacks the array by copying the blocks to the normalized position.
	* Example: Compressed
	* INDEX# 0 1 2 3 4
	* INDEX 0 4 1 8 2 ...
	* ARRAY abcdeabazyabc...
	* turns into
	* Example: Expanded
	* INDEX# 0 1 2 3 4
	* INDEX 0 4 8 12 16 ...
	* ARRAY abcdeababcedzyabcdea...
	*/
	int32_t i;
	if (this_obj->fCompact)
	{
	int8_t* tempArray;
	tempArray = (int8_t) uprv_malloc(sizeof(int8_t) UCMP8_kUnicodeCount);
	if (!tempArray)
	{
	this_obj->fBogus = TRUE;
	return;
	}
	for (i = 0; i < UCMP8_kUnicodeCount; ++i)
	{
	tempArray[i] = ucmp8_get(this_obj,(UChar)i); /* HSYS : How expand?*/
	}
	for (i = 0; i < UCMP8_kIndexCount; ++i)
	{
	this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
	}
	uprv_free(this_obj->fArray);
	this_obj->fArray = tempArray;
	this_obj->fCompact = FALSE;
	this_obj->fAlias = FALSE;

	}
	}


	/=======================================================/
	/* this_obj->fArray: an array to be overlapped
	* start and count: specify the block to be overlapped
	* tempIndex: the overlapped array (actually indices back into inputContents)
	* inputHash: an index of hashes for tempIndex, where
	* inputHash[i] = XOR of values from i-count+1 to i
	*/
	static int32_t
	findOverlappingPosition(CompactByteArray* this_obj,
	uint32_t start,
	const UChar* tempIndex,
	int32_t tempIndexCount,
	uint32_t cycle)
	{
	/* this_obj is a utility routine for finding blocks that overlap.
	* IMPORTANT: the cycle number is very important. Small cycles take a lot
	* longer to work. In some cases, they may be able to get better compaction.
	*/

	int32_t i;
	int32_t j;
	int32_t currentCount;

	for (i = 0; i < tempIndexCount; i += cycle)
	{
	currentCount = UCMP8_kBlockCount;
	if (i + UCMP8_kBlockCount > tempIndexCount)
	{
	currentCount = tempIndexCount - i;
	}
	for (j = 0; j < currentCount; ++j)
	{
	if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]]) break;
	}
	if (j == currentCount) break;
	}

	return i;
	}

	UBool
	ucmp8_isBogus(const CompactByteArray* this_obj)
	{
	return (UBool)(this_obj == NULL \|\| this_obj->fBogus);
	}

	const int8_t*
	ucmp8_getArray(const CompactByteArray* this_obj)
	{
	return this_obj->fArray;
	}

	const uint16_t*
	ucmp8_getIndex(const CompactByteArray* this_obj)
	{
	return this_obj->fIndex;
	}

	int32_t
	ucmp8_getCount(const CompactByteArray* this_obj)
	{
	return this_obj->fCount;
	}


	void
	ucmp8_set(CompactByteArray* this_obj,
	UChar c,
	int8_t value)
	{
	if (this_obj->fCompact == TRUE)
	{
	ucmp8_expand(this_obj);
	if (this_obj->fBogus) return;
	}
	this_obj->fArray[(int32_t)c] = value;
	}


	void
	ucmp8_setRange(CompactByteArray* this_obj,
	UChar start,
	UChar end,
	int8_t value)
	{
	int32_t i;
	if (this_obj->fCompact == TRUE)
	{
	ucmp8_expand(this_obj);
	if (this_obj->fBogus) return;
	}
	for (i = start; i <= end; ++i)
	{
	this_obj->fArray[i] = value;
	}
	}


	/=======================================================/

	void
	ucmp8_compact(CompactByteArray* this_obj,
	uint32_t cycle)
	{
	if (!this_obj->fCompact)
	{
	/* this_obj actually does the compaction.
	* it walks throught the contents of the expanded array, finding the
	* first block in the data that matches the contents of the current index.
	* As it works, it keeps an updated pointer to the last position,
	* so that it knows how big to make the final array
	* If the matching succeeds, then the index will point into the data
	* at some earlier position.
	* If the matching fails, then last position pointer will be bumped,
	* and the index will point to that last block of data.
	*/
	UChar* tempIndex;
	int32_t tempIndexCount;
	int8_t* tempArray;
	int32_t iBlock, iIndex;

	/* fix cycle, must be 0 < cycle <= blockcount*/
	if (cycle < 0) cycle = 1;
	else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;

	/* make temp storage, larger than we need*/
	tempIndex = (UChar) uprv_malloc(sizeof(UChar) UCMP8_kUnicodeCount);
	if (!tempIndex)
	{
	this_obj->fBogus = TRUE;
	return;
	}
	/* set up first block.*/
	tempIndexCount = UCMP8_kBlockCount;
	for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
	{
	tempIndex[iIndex] = (uint16_t)iIndex;
	}; /* endfor (iIndex = 0; .....)*/
	this_obj->fIndex[0] = 0;

	/* for each successive block, find out its first position in the compacted array*/
	for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
	{
	int32_t newCount, firstPosition, block;
	block = iBlock << UCMP8_kBlockShift;
	/* if (debugSmall) if (block > debugSmallLimit) break;*/
	firstPosition = findOverlappingPosition(this_obj,
	block,
	tempIndex,
	tempIndexCount,
	cycle);

	/* if not contained in the current list, copy the remainder
	* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
	* we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount]
	*/
	newCount = firstPosition + UCMP8_kBlockCount;
	if (newCount > tempIndexCount)
	{
	for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
	{
	tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
	} /* endfor (iIndex = tempIndexCount....)*/
	tempIndexCount = newCount;
	} /* endif (newCount > tempIndexCount)*/
	this_obj->fIndex[iBlock] = (uint16_t)firstPosition;
	} /* endfor (iBlock = 1.....)*/

	/* now allocate and copy the items into the array*/
	tempArray = (int8_t) uprv_malloc(tempIndexCount sizeof(int8_t));
	if (!tempArray)
	{
	this_obj->fBogus = TRUE;
	uprv_free(tempIndex);
	return;
	}
	for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
	{
	tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
	}
	uprv_free(this_obj->fArray);
	this_obj->fArray = tempArray;
	this_obj->fCount = tempIndexCount;


	/* free up temp storage*/
	uprv_free(tempIndex);
	this_obj->fCompact = TRUE;
	} /* endif (!this_obj->fCompact)*/
	}

	U_CAPI uint32_t U_EXPORT2 ucmp8_flattenMem (const CompactByteArray* array, UMemoryStream *MS)
	{
	int32_t size = 0;

	uprv_mstrm_write32(MS, ICU_UCMP8_VERSION);
	size += 4;

	uprv_mstrm_write32(MS, array->fCount);
	size += 4;

	uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP8_kIndexCount);
	size += sizeof(array->fIndex[0])*UCMP8_kIndexCount;

	uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount);
	size += sizeof(array->fArray[0])*array->fCount;

	while(size%4) /* end padding */
	{
	uprv_mstrm_writePadding(MS, 1); /* Pad total so far to even size */
	size += 1;
	}

	return size;
	}

	/* We use sizeof(*array), etc so that this code can be as portable as
	possible between the ucmpX_ family.
	*/

	U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray this_obj, const uint8_t source, UErrorCode status)
	{
	uint32_t i;
	const uint8_t oldSource = source;

	if(U_FAILURE(*status))
	return;

	this_obj->fArray = NULL;
	this_obj->fIndex = NULL;
	this_obj->fBogus = FALSE;
	this_obj->fStructSize = sizeof(CompactByteArray);
	this_obj->fCompact = TRUE;
	this_obj->fAlias = TRUE;
	this_obj->fIAmOwned = TRUE;

	i = * ((const uint32_t) source);
	(*source) += 4;

	if(i != ICU_UCMP8_VERSION)
	{
	*status = U_INVALID_FORMAT_ERROR;
	return;
	}

	this_obj->fCount = * ((const uint32_t)source);
	(*source) += 4;

	this_obj->fIndex = (uint16_t) source;
	(source) += sizeof(this_obj->fIndex[0])UCMP8_kIndexCount;

	this_obj->fArray = (int8_t) source;
	(source) += sizeof(this_obj->fArray[0])this_obj->fCount;

	/* eat up padding */
	while((*source-(oldSource))%4)
	(*source)++;
	}