blob: ba90222fa403d0623838b1b22fd6c8177e8c98a5 [file] [log] [blame]
/*
******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/util/Attic/TrieBuilder.java,v $
* $Date: 2001/03/28 00:01:51 $
* $Revision: 1.2 $
*
******************************************************************************
*/
package com.ibm.util;
/**
* Builder lass to manipulate and generate a trie.
* This is useful for ICU data in primitive types.
* Provides a compact way to store information that is indexed by Unicode
* values, such as character properties, types, keyboard values, etc. This is
* very useful when you have a block of Unicode data that contains significant
* values while the rest of the Unicode data is unused in the application or
* when you have a lot of redundance, such as where all 21,000 Han ideographs
* have the same value. However, lookup is much faster than a hash table.
* A trie of any primitive data type serves two purposes:
* <UL type = round>
* <LI>Fast access of the indexed values.
* <LI>Smaller memory footprint.
* </UL>
* A trie is composed of 2 index array and value array. Combining the 2 index
* array, we could get the indicies of Unicode characters to the value array.
* The first index array will contain indexes corresponding to the first 11
* bits of a 21 bit codepoint, the second index array will contain indexes
* corresponding to the next 6 bits of the code point. The last array will
* contain the values. Hence to access the value of a codepoint, we can use the
* following program.
* <p>
* int firstindex = ch >> FIRST_SET_OF_BITS_SHIFT;<br>
* int secondindex = index1[firstindex] +
* (ch >> NEXT_SET_OF_BITS_SHIFT) & NEXT_SET_OF_BITS_MASK;<br>
* int thirdindex = index2[secondindex] + ch & LAST_SET_OF_BITS_MASK;<br>
* f(ch) = value[thirdindex];<br>
* </p>
* @version $Revision: 1.2 $
* @author Syn Wee Quek
*/
final class TrieBuilder
{
// public methods ----------------------------------------------------
/**
* Takes argument array and forms a compact array into the result arrays.
* The result will be
* <code>
* array[index] == valuearray[indexarray[index]]
* </code>.
* Note : This method is generic, it only takes values from the array.
* @param array value array to be manipulated
* @param start index of the array to process
* @param length of array to process.
* @param blocksize size of each blocks existing in valuearray
* @param indexarray result index array with length = array.length, with
* values which indexes to valuearray.
* @param valuearray result value array compact value array
* @return size of valuearray
*/
static int build(byte array[], int start, int length, int blocksize,
int indexarray[], byte valuearray[])
{
int valuesize = 0;
int valueindex;
int blockcount = 0;
int index = 0;
int min;
while (start < length) {
// for a block of blocksize in the array
// we try to find a similar block in valuearray
for (valueindex = 0; valueindex < valuesize; valueindex ++) {
// testing each block of blocksize at index valueindex in valuearray
// if it is == to array blocks
min = Math.min(blocksize, valuesize - valueindex);
for (blockcount = 0; blockcount < min;blockcount ++) {
if (array[start + blockcount] !=
valuearray[valueindex + blockcount]) {
break;
}
}
if (blockcount == blocksize || valueindex + blockcount == valuesize) {
break;
}
}
// if no similar block is found in value array
// we populate the result arrays with data
for (min = Math.min(blocksize, length - start); blockcount < min;
blockcount ++) {
valuearray[valuesize ++] = array[start + blockcount];
}
indexarray[index ++] = valueindex;
start += blocksize;
}
return valuesize;
}
/**
* Takes argument array and forms a compact array into the result arrays.
* The result will be
* <code>
* array[index] == valuearray[indexarray[index]]
* </code>.
* Note : This method is generic, it only takes values from the array.
* @param array value array to be manipulated
* @param start index of the array to process
* @param length of array to process.
* @param blocksize size of each blocks existing in valuearray
* @param indexarray result index array with length = array.length, with
* values which indexes to valuearray.
* @param valuearray result value array compact value array
* @return size of valuearray
*/
static int build(char array[], int start, int length, int blocksize,
int indexarray[], char valuearray[])
{
int valuesize = 0;
int valueindex;
int blockcount = 0;
int index = 0;
int min;
while (start < length) {
// for a block of blocksize in the array
// we try to find a similar block in valuearray
for (valueindex = 0; valueindex < valuesize; valueindex ++) {
// testing each block of blocksize at index valueindex in valuearray
// if it is == to array blocks
min = Math.min(blocksize, valuesize - valueindex);
for (blockcount = 0; blockcount < min;blockcount ++) {
if (array[start + blockcount] !=
valuearray[valueindex + blockcount]) {
break;
}
}
if (blockcount == blocksize || valueindex + blockcount == valuesize) {
break;
}
}
// if no similar block is found in value array
// we populate the result arrays with data
for (min = Math.min(blocksize, length - start); blockcount < min;
blockcount ++) {
valuearray[valuesize ++] = array[start + blockcount];
}
indexarray[index ++] = valueindex;
start += blocksize;
}
return valuesize;
}
/**
* Takes argument array and forms a compact array into the result arrays.
* The result will be
* <code>
* array[index] == valuearray[indexarray[index]]
* </code>.
* Note : This method is generic, it only takes values from the array.
* @param array value array to be manipulated
* @param start index of the array to process
* @param length of array to process.
* @param blocksize size of each blocks existing in valuearray
* @param indexarray result index array with length = array.length, with
* values which indexes to valuearray.
* @param valuearray result value array compact value array
* @return size of valuearray
*/
static int build(int array[], int start, int length, int blocksize,
int indexarray[], int valuearray[])
{
int valuesize = 0;
int valueindex;
int blockcount = 0;
int index = 0;
int min;
while (start < length) {
// for a block of blocksize in the array
// we try to find a similar block in valuearray
for (valueindex = 0; valueindex < valuesize; valueindex ++) {
// testing each block of blocksize at index valueindex in valuearray
// if it is == to array blocks
min = Math.min(blocksize, valuesize - valueindex);
for (blockcount = 0; blockcount < min; blockcount ++) {
if (array[start + blockcount] !=
valuearray[valueindex + blockcount]) {
break;
}
}
if (blockcount == blocksize || valueindex + blockcount == valuesize) {
break;
}
}
// if no similar block is found in value array
// we populate the result arrays with data
min = Math.min(blocksize, length - start);
for (; blockcount < min; blockcount ++) {
valuearray[valuesize ++] = array[start + blockcount];
}
indexarray[index ++] = valueindex;
start += blocksize;
}
return valuesize;
}
}