src/com/ibm/icu/impl/IntTrieBuilder.java - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 * Copyright (C) 1996-2000, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/IntTrieBuilder.java,v $
 * $Date: 2002/10/31 01:09:18 $
 * $Revision: 1.3 $
 *
 ******************************************************************************
 */

 package com.ibm.icu.impl;

 import com.ibm.icu.lang.UCharacter;
 import java.util.Arrays;

 /**
  * Builder lass to manipulate and generate a trie.
  * This is useful for ICU data in primitive types.
  * Provides a compact way to store information that is indexed by Unicode
  * values, such as character properties, types, keyboard values, etc. This is
  * very useful when you have a block of Unicode data that contains significant
  * values while the rest of the Unicode data is unused in the application or
  * when you have a lot of redundance, such as where all 21,000 Han ideographs
  * have the same value.  However, lookup is much faster than a hash table.
  * A trie of any primitive data type serves two purposes:
  * <UL type = round>
  *     <LI>Fast access of the indexed values.
  *     <LI>Smaller memory footprint.
  * </UL>
  * This is a direct port from the ICU4C version
  * @version            $Revision: 1.3 $
  * @author             Syn Wee Quek
  */
 public class IntTrieBuilder extends TrieBuilder
 {
 	// public constructor ----------------------------------------------

 	/**
 	 * Copy constructor
 	 */
 	public IntTrieBuilder(IntTrieBuilder table)
     {
     	super(table);
 		m_data_ = new int[m_dataCapacity_];
         System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
         m_initialValue_ = table.m_initialValue_;
     }

     /**
      * Constructs a build table
      * @param aliasdata data to be filled into table
      * @param maxdatalength maximum data length allowed in table
      * @param initialvalue inital data value
      * @param latin1linear is latin 1 to be linear
      * @return updated table
      */
     public IntTrieBuilder(int aliasdata[], int maxdatalength,
                           int initialvalue, boolean latin1linear)
     {
     	super();
     	if (maxdatalength < DATA_BLOCK_LENGTH_ || (latin1linear
 	                                               && maxdatalength < 1024)) {
 	        throw new IllegalArgumentException(
 	                                   "Argument maxdatalength is too small");
 	    }

 	    if (aliasdata != null) {
 	        m_data_ = aliasdata;
 	    }
 	    else {
 	        m_data_ = new int[maxdatalength];
 	    }

 	    // preallocate and reset the first data block (block index 0)
 	    int j = DATA_BLOCK_LENGTH_;

 	    if (latin1linear) {
 	        // preallocate and reset the first block (number 0) and Latin-1
 	        // (U+0000..U+00ff) after that made sure above that
 	        // maxDataLength >= 1024
 	        // set indexes to point to consecutive data blocks
 	        int i = 0;
 	        do {
 	            // do this at least for trie->index[0] even if that block is
 	            // only partly used for Latin-1
 	            m_index_[i ++] = j;
 	            j += DATA_BLOCK_LENGTH_;
 	        } while (i < (256 >> SHIFT_));
 	    }

         m_dataLength_ = j;
 	    // reset the initially allocated blocks to the initial value
         Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
 	    m_initialValue_ = initialvalue;
 	    m_dataCapacity_ = maxdatalength;
 	    m_isLatin1Linear_ = latin1linear;
 	    m_isCompacted_ = false;
 	}

 	// public methods -------------------------------------------------------

     /*public final void print()
     {
         int i = 0;
         int oldvalue = m_index_[i];
         int count = 0;
         System.out.println("index length " + m_indexLength_
                            + " --------------------------");
         while (i < m_indexLength_) {
             if (m_index_[i] != oldvalue) {
                 System.out.println("index has " + count + " counts of "
                                    + Integer.toHexString(oldvalue));
                 count = 0;
                 oldvalue = m_index_[i];
             }
             count ++;
             i ++;
         }
         System.out.println("index has " + count + " counts of "
                            + Integer.toHexString(oldvalue));
         i = 0;
         oldvalue = m_data_[i];
         count = 0;
         System.out.println("data length " + m_dataLength_
                            + " --------------------------");
         while (i < m_dataLength_) {
             if (m_data_[i] != oldvalue) {
                 if ((oldvalue & 0xf1000000) == 0xf1000000) {
                     int temp = oldvalue & 0xffffff;
                     temp += 0x320;
                     oldvalue = 0xf1000000 | temp;
                 }
                 if ((oldvalue & 0xf2000000) == 0xf2000000) {
                     int temp = oldvalue & 0xffffff;
                     temp += 0x14a;
                     oldvalue = 0xf2000000 | temp;
                 }
                 System.out.println("data has " + count + " counts of "
                                    + Integer.toHexString(oldvalue));
                 count = 0;
                 oldvalue = m_data_[i];
             }
             count ++;
             i ++;
         }
         if ((oldvalue & 0xf1000000) == 0xf1000000) {
             int temp = oldvalue & 0xffffff;
             temp += 0x320;
             oldvalue = 0xf1000000 | temp;
         }
         if ((oldvalue & 0xf2000000) == 0xf2000000) {
             int temp = oldvalue & 0xffffff;
             temp += 0x14a;
             oldvalue = 0xf2000000 | temp;
         }
         System.out.println("data has " + count + " counts of "
                            + Integer.toHexString(oldvalue));
     }
     */
     /**
      * Gets a 32 bit data from the table data
      * @param ch codepoint which data is to be retrieved
      * @return the 32 bit data
      */
     public int getValue(int ch)
     {
         // valid, uncompacted trie and valid c?
         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
             return 0;
         }

         int block = m_index_[ch >> SHIFT_];
         return m_data_[Math.abs(block) + (ch & MASK_)];
     }

     /**
      * Sets a 32 bit data in the table data
      * @param ch codepoint which data is to be set
      * @param value to set
      * @return true if the set is successful, otherwise
      *              if the table has been compacted return false
      */
     public boolean setValue(int ch, int value)
     {
         // valid, uncompacted trie and valid c?
         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
             return false;
         }

         int block = getDataBlock(ch);
         if (block < 0) {
             return false;
         }

         m_data_[block + (ch & MASK_)] = value;
         return true;
     }

     /**
      * Serializes the build table with 32 bit data
      * @param datamanipulate builder raw fold method implementation
      * @param triedatamanipulate result trie fold method
      * @return a new trie
      */
     public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
                              Trie.DataManipulate triedatamanipulate)
     {
         if (datamanipulate == null) {
             throw new IllegalArgumentException("Parameters can not be null");
         }
         // fold and compact if necessary, also checks that indexLength is
         // within limits
         if (!m_isCompacted_) {
             // compact once without overlap to improve folding
             compact(false);
             // fold the supplementary part of the index array
             fold(datamanipulate);
             // compact again with overlap for minimum data array length
             compact(true);
             m_isCompacted_ = true;
         }
         // is dataLength within limits?
         if (m_dataLength_ >= MAX_DATA_LENGTH_) {
             throw new ArrayIndexOutOfBoundsException("Data length too small");
         }

         char index[] = new char[m_indexLength_];
         int data[] = new int[m_dataLength_];
         // write the index (stage 1) array and the 32-bit data (stage 2) array
         // write 16-bit index values shifted right by INDEX_SHIFT_
         for (int i = 0; i < m_indexLength_; i ++) {
             index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
         }
         // write 32-bit data values
         System.arraycopy(m_data_, 0, data, 0, m_dataLength_);

         int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
         options |= OPTIONS_DATA_IS_32_BIT_;
         if (m_isLatin1Linear_) {
             options |= OPTIONS_LATIN1_IS_LINEAR_;
         }
         return new IntTrie(index, data, m_initialValue_, options,
                            triedatamanipulate);
     }

 	// public data member ---------------------------------------------

 	protected int m_data_[];
 	protected int m_initialValue_;

 	// private methods ------------------------------------------------------

     /**
      * No error checking for illegal arguments.
      * @param ch codepoint to look for
      * @return -1 if no new data block available (out of memory in data array)
      */
     private int getDataBlock(int ch)
     {
         ch >>= SHIFT_;
         int indexValue = m_index_[ch];
         if (indexValue > 0) {
             return indexValue;
         }

         // allocate a new data block
         int newBlock = m_dataLength_;
         int newTop = newBlock + DATA_BLOCK_LENGTH_;
         if (newTop > m_dataCapacity_) {
             // out of memory in the data array
             return -1;
         }
         m_dataLength_ = newTop;
         m_index_[ch] = newBlock;

         // copy-on-write for a block from a setRange()
         Arrays.fill(m_data_, newBlock, newBlock + DATA_BLOCK_LENGTH_,
                     m_initialValue_);
         return newBlock;
     }

     /**
      * Compact a folded build-time trie.
      * The compaction
      * - removes blocks that are identical with earlier ones
      * - overlaps adjacent blocks as much as possible (if overlap == true)
      * - moves blocks in steps of the data granularity
      *
      * It does not
      * - try to move and overlap blocks that are not already adjacent
      * - try to move and overlap blocks that overlap with multiple values in
      * the overlap region
      * @param overlap flag
      */
     private void compact(boolean overlap)
     {
         if (m_isCompacted_) {
             return; // nothing left to do
         }

         // compaction
         // initialize the index map with "block is used/unused" flags
         findUnusedBlocks();

         // if Latin-1 is preallocated and linear, then do not compact Latin-1
         // data
         int overlapStart = DATA_BLOCK_LENGTH_;
         if (m_isLatin1Linear_ && SHIFT_ <= 8) {
             overlapStart += 256;
         }

         int newStart = DATA_BLOCK_LENGTH_;
         int prevEnd = newStart - 1;
         for (int start = newStart; start < m_dataLength_;) {
             // start: index of first entry of current block
             // prevEnd: index to last entry of previous block
             // newStart: index where the current block is to be moved
             // skip blocks that are not used
             if (m_map_[start >> SHIFT_] < 0) {
                 // advance start to the next block
                 start += DATA_BLOCK_LENGTH_;
                 // leave prevEnd and newStart with the previous block!
                 continue;
             }
             // search for an identical block
             if (start >= overlapStart) {
                 int i = findSameDataBlock(m_data_, newStart, start,
                              overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH_);
                 if (i >= 0) {
                     // found an identical block, set the other block's index
                     // value for the current block
                     m_map_[start >> SHIFT_] = i;
                     // advance start to the next block
                     start += DATA_BLOCK_LENGTH_;
                     // leave prevEnd and newStart with the previous block!
                     continue;
                 }
             }
             // see if the beginning of this block can be overlapped with the
             // end of the previous block
             // x: first value in the current block
             int x = m_data_[start];
             int i = 0;
             if (x == m_data_[prevEnd] && overlap && start >= overlapStart)
             {
                 // overlap by at least one
                 for (i = 1; i < DATA_BLOCK_LENGTH_
                      && x == m_data_[start + i]
                      && x == m_data_[prevEnd - i]; ++ i)
                 {
                 }

                 // overlap by i, rounded down for the data block granularity
                 i &= ~(DATA_GRANULARITY_ - 1);
             }
             if (i > 0) {
                 // some overlap
                 m_map_[start >> SHIFT_] = newStart - i;
                 // move the non-overlapping indexes to their new positions
                 start += i;
                 for (i = DATA_BLOCK_LENGTH_ - i; i > 0; -- i) {
                     m_data_[newStart ++] = m_data_[start ++];
                 }
             }
             else if (newStart < start) {
                 // no overlap, just move the indexes to their new positions
                 m_map_[start >> SHIFT_] = newStart;
                 for (i = DATA_BLOCK_LENGTH_; i > 0; -- i) {
                     m_data_[newStart ++] = m_data_[start ++];
                 }
             }
             else { // no overlap && newStart==start
                 m_map_[start >> SHIFT_] = start;
                 newStart += DATA_BLOCK_LENGTH_;
                 start = newStart;
             }

             prevEnd = newStart - 1;
         }
         // now adjust the index (stage 1) table
         for (int i = 0; i < m_indexLength_; ++ i) {
             m_index_[i] = m_map_[m_index_[i] >>> SHIFT_];
         }
         m_dataLength_ = newStart;
     }

     /**
      * Find the same data block
      * @param data array
      * @param dataLength
      * @param otherBlock
      * @param step
      */
     private static final int findSameDataBlock(int data[], int dataLength,
                                                 int otherBlock, int step)
     {
         // ensure that we do not even partially get past dataLength
         dataLength -= DATA_BLOCK_LENGTH_;

         for (int block = 0; block <= dataLength; block += step) {
             int i = 0;
             for (i = 0; i < DATA_BLOCK_LENGTH_; ++ i) {
                 if (data[block + i] != data[otherBlock + i]) {
                     break;
                 }
             }
             if (i == DATA_BLOCK_LENGTH_) {
                 return block;
             }
         }
         return -1;
     }

     /**
      * Fold the normalization data for supplementary code points into
      * a compact area on top of the BMP-part of the trie index,
      * with the lead surrogates indexing this compact area.
      *
      * Duplicate the index values for lead surrogates:
      * From inside the BMP area, where some may be overridden with folded values,
      * to just after the BMP area, where they can be retrieved for
      * code point lookups.
      * @param manipulate fold implementation
      */
     private final void fold(DataManipulate manipulate)
     {
         int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
         int index[] = m_index_;
         // copy the lead surrogate indexes into a temporary array
         System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
                          SURROGATE_BLOCK_COUNT_);

         // to protect the copied lead surrogate values,
         // mark all their indexes as repeat blocks
         // (causes copy-on-write)
         for (char c = 0xd800; c <= 0xdbff; ++ c) {
             int block = index[c >> SHIFT_];
             if (block > 0) {
                 index[c >> SHIFT_] =- block;
             }
         }

         // Fold significant index values into the area just after the BMP
         // indexes.
         // In case the first lead surrogate has significant data,
         // its index block must be used first (in which case the folding is a
         // no-op).
         // Later all folded index blocks are moved up one to insert the copied
         // lead surrogate indexes.
         int indexLength = BMP_INDEX_LENGTH_;
         // search for any index (stage 1) entries for supplementary code points
         for (int c = 0x10000; c < 0x110000;) {
             if (index[c >> SHIFT_] != 0) {
                 // there is data, treat the full block for a lead surrogate
                 c &= ~0x3ff;
                 // is there an identical index block?
                 int block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
                 // get a folded value for [c..c+0x400[ and, if 0, set it for
                 // the lead surrogate
                 int value = manipulate.getFoldedValue(c,
                                                 block + SURROGATE_BLOCK_COUNT_);
                 if (value != 0) {
                     if (!setValue(0xd7c0 + (c >> 10), value)) {
                         // data table overflow
                         throw new ArrayIndexOutOfBoundsException(
                                                         "Data table overflow");
                     }
                     // if we did not find an identical index block...
                     if (block == indexLength) {
                         // move the actual index (stage 1) entries from the
                         // supplementary position to the new one
                         System.arraycopy(index, c >> SHIFT_, index, indexLength,
                                          SURROGATE_BLOCK_COUNT_);
                         indexLength += SURROGATE_BLOCK_COUNT_;
                     }
                 }
                 c += 0x400;
             }
             else {
                 c += DATA_BLOCK_LENGTH_;
             }
         }

         // index array overflow?
         // This is to guarantee that a folding offset is of the form
         // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
         // If the index is too large, then n>=1024 and more than 10 bits are
         // necessary.
         // In fact, it can only ever become n==1024 with completely unfoldable
         // data and the additional block of duplicated values for lead
         // surrogates.
         if (indexLength >= MAX_INDEX_LENGTH_) {
             throw new ArrayIndexOutOfBoundsException("Index table overflow");
         }
         // make space for the lead surrogate index block and insert it between
         // the BMP indexes and the folded ones
         System.arraycopy(index, BMP_INDEX_LENGTH_, index,
                          BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
                          indexLength - BMP_INDEX_LENGTH_);
         System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
                          SURROGATE_BLOCK_COUNT_);
         indexLength += SURROGATE_BLOCK_COUNT_;
         m_indexLength_ = indexLength;
     }
 }
	/*
	******************************************************************************
	* Copyright (C) 1996-2000, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/IntTrieBuilder.java,v $
	* $Date: 2002/10/31 01:09:18 $
	* $Revision: 1.3 $
	*
	******************************************************************************
	*/

	package com.ibm.icu.impl;

	import com.ibm.icu.lang.UCharacter;
	import java.util.Arrays;

	/**
	* Builder lass to manipulate and generate a trie.
	* This is useful for ICU data in primitive types.
	* Provides a compact way to store information that is indexed by Unicode
	* values, such as character properties, types, keyboard values, etc. This is
	* very useful when you have a block of Unicode data that contains significant
	* values while the rest of the Unicode data is unused in the application or
	* when you have a lot of redundance, such as where all 21,000 Han ideographs
	* have the same value. However, lookup is much faster than a hash table.
	* A trie of any primitive data type serves two purposes:
	* <UL type = round>
	* <LI>Fast access of the indexed values.
	* <LI>Smaller memory footprint.
	* </UL>
	* This is a direct port from the ICU4C version
	* @version $Revision: 1.3 $
	* @author Syn Wee Quek
	*/
	public class IntTrieBuilder extends TrieBuilder
	{
	// public constructor ----------------------------------------------

	/**
	* Copy constructor
	*/
	public IntTrieBuilder(IntTrieBuilder table)
	{
	super(table);
	m_data_ = new int[m_dataCapacity_];
	System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
	m_initialValue_ = table.m_initialValue_;
	}

	/**
	* Constructs a build table
	* @param aliasdata data to be filled into table
	* @param maxdatalength maximum data length allowed in table
	* @param initialvalue inital data value
	* @param latin1linear is latin 1 to be linear
	* @return updated table
	*/
	public IntTrieBuilder(int aliasdata[], int maxdatalength,
	int initialvalue, boolean latin1linear)
	{
	super();
	if (maxdatalength < DATA_BLOCK_LENGTH_ \|\| (latin1linear
	&& maxdatalength < 1024)) {
	throw new IllegalArgumentException(
	"Argument maxdatalength is too small");
	}

	if (aliasdata != null) {
	m_data_ = aliasdata;
	}
	else {
	m_data_ = new int[maxdatalength];
	}

	// preallocate and reset the first data block (block index 0)
	int j = DATA_BLOCK_LENGTH_;

	if (latin1linear) {
	// preallocate and reset the first block (number 0) and Latin-1
	// (U+0000..U+00ff) after that made sure above that
	// maxDataLength >= 1024
	// set indexes to point to consecutive data blocks
	int i = 0;
	do {
	// do this at least for trie->index[0] even if that block is
	// only partly used for Latin-1
	m_index_[i ++] = j;
	j += DATA_BLOCK_LENGTH_;
	} while (i < (256 >> SHIFT_));
	}

	m_dataLength_ = j;
	// reset the initially allocated blocks to the initial value
	Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
	m_initialValue_ = initialvalue;
	m_dataCapacity_ = maxdatalength;
	m_isLatin1Linear_ = latin1linear;
	m_isCompacted_ = false;
	}

	// public methods -------------------------------------------------------

	/*public final void print()
	{
	int i = 0;
	int oldvalue = m_index_[i];
	int count = 0;
	System.out.println("index length " + m_indexLength_
	+ " --------------------------");
	while (i < m_indexLength_) {
	if (m_index_[i] != oldvalue) {
	System.out.println("index has " + count + " counts of "
	+ Integer.toHexString(oldvalue));
	count = 0;
	oldvalue = m_index_[i];
	}
	count ++;
	i ++;
	}
	System.out.println("index has " + count + " counts of "
	+ Integer.toHexString(oldvalue));
	i = 0;
	oldvalue = m_data_[i];
	count = 0;
	System.out.println("data length " + m_dataLength_
	+ " --------------------------");
	while (i < m_dataLength_) {
	if (m_data_[i] != oldvalue) {
	if ((oldvalue & 0xf1000000) == 0xf1000000) {
	int temp = oldvalue & 0xffffff;
	temp += 0x320;
	oldvalue = 0xf1000000 \| temp;
	}
	if ((oldvalue & 0xf2000000) == 0xf2000000) {
	int temp = oldvalue & 0xffffff;
	temp += 0x14a;
	oldvalue = 0xf2000000 \| temp;
	}
	System.out.println("data has " + count + " counts of "
	+ Integer.toHexString(oldvalue));
	count = 0;
	oldvalue = m_data_[i];
	}
	count ++;
	i ++;
	}
	if ((oldvalue & 0xf1000000) == 0xf1000000) {
	int temp = oldvalue & 0xffffff;
	temp += 0x320;
	oldvalue = 0xf1000000 \| temp;
	}
	if ((oldvalue & 0xf2000000) == 0xf2000000) {
	int temp = oldvalue & 0xffffff;
	temp += 0x14a;
	oldvalue = 0xf2000000 \| temp;
	}
	System.out.println("data has " + count + " counts of "
	+ Integer.toHexString(oldvalue));
	}
	*/
	/**
	* Gets a 32 bit data from the table data
	* @param ch codepoint which data is to be retrieved
	* @return the 32 bit data
	*/
	public int getValue(int ch)
	{
	// valid, uncompacted trie and valid c?
	if (m_isCompacted_ \|\| ch > UCharacter.MAX_VALUE \|\| ch < 0) {
	return 0;
	}

	int block = m_index_[ch >> SHIFT_];
	return m_data_[Math.abs(block) + (ch & MASK_)];
	}

	/**
	* Sets a 32 bit data in the table data
	* @param ch codepoint which data is to be set
	* @param value to set
	* @return true if the set is successful, otherwise
	* if the table has been compacted return false
	*/
	public boolean setValue(int ch, int value)
	{
	// valid, uncompacted trie and valid c?
	if (m_isCompacted_ \|\| ch > UCharacter.MAX_VALUE \|\| ch < 0) {
	return false;
	}

	int block = getDataBlock(ch);
	if (block < 0) {
	return false;
	}

	m_data_[block + (ch & MASK_)] = value;
	return true;
	}

	/**
	* Serializes the build table with 32 bit data
	* @param datamanipulate builder raw fold method implementation
	* @param triedatamanipulate result trie fold method
	* @return a new trie
	*/
	public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
	Trie.DataManipulate triedatamanipulate)
	{
	if (datamanipulate == null) {
	throw new IllegalArgumentException("Parameters can not be null");
	}
	// fold and compact if necessary, also checks that indexLength is
	// within limits
	if (!m_isCompacted_) {
	// compact once without overlap to improve folding
	compact(false);
	// fold the supplementary part of the index array
	fold(datamanipulate);
	// compact again with overlap for minimum data array length
	compact(true);
	m_isCompacted_ = true;
	}
	// is dataLength within limits?
	if (m_dataLength_ >= MAX_DATA_LENGTH_) {
	throw new ArrayIndexOutOfBoundsException("Data length too small");
	}

	char index[] = new char[m_indexLength_];
	int data[] = new int[m_dataLength_];
	// write the index (stage 1) array and the 32-bit data (stage 2) array
	// write 16-bit index values shifted right by INDEX_SHIFT_
	for (int i = 0; i < m_indexLength_; i ++) {
	index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
	}
	// write 32-bit data values
	System.arraycopy(m_data_, 0, data, 0, m_dataLength_);

	int options = SHIFT_ \| (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
	options \|= OPTIONS_DATA_IS_32_BIT_;
	if (m_isLatin1Linear_) {
	options \|= OPTIONS_LATIN1_IS_LINEAR_;
	}
	return new IntTrie(index, data, m_initialValue_, options,
	triedatamanipulate);
	}

	// public data member ---------------------------------------------

	protected int m_data_[];
	protected int m_initialValue_;

	// private methods ------------------------------------------------------

	/**
	* No error checking for illegal arguments.
	* @param ch codepoint to look for
	* @return -1 if no new data block available (out of memory in data array)
	*/
	private int getDataBlock(int ch)
	{
	ch >>= SHIFT_;
	int indexValue = m_index_[ch];
	if (indexValue > 0) {
	return indexValue;
	}

	// allocate a new data block
	int newBlock = m_dataLength_;
	int newTop = newBlock + DATA_BLOCK_LENGTH_;
	if (newTop > m_dataCapacity_) {
	// out of memory in the data array
	return -1;
	}
	m_dataLength_ = newTop;
	m_index_[ch] = newBlock;

	// copy-on-write for a block from a setRange()
	Arrays.fill(m_data_, newBlock, newBlock + DATA_BLOCK_LENGTH_,
	m_initialValue_);
	return newBlock;
	}

	/**
	* Compact a folded build-time trie.
	* The compaction
	* - removes blocks that are identical with earlier ones
	* - overlaps adjacent blocks as much as possible (if overlap == true)
	* - moves blocks in steps of the data granularity
	*
	* It does not
	* - try to move and overlap blocks that are not already adjacent
	* - try to move and overlap blocks that overlap with multiple values in
	* the overlap region
	* @param overlap flag
	*/
	private void compact(boolean overlap)
	{
	if (m_isCompacted_) {
	return; // nothing left to do
	}

	// compaction
	// initialize the index map with "block is used/unused" flags
	findUnusedBlocks();

	// if Latin-1 is preallocated and linear, then do not compact Latin-1
	// data
	int overlapStart = DATA_BLOCK_LENGTH_;
	if (m_isLatin1Linear_ && SHIFT_ <= 8) {
	overlapStart += 256;
	}

	int newStart = DATA_BLOCK_LENGTH_;
	int prevEnd = newStart - 1;
	for (int start = newStart; start < m_dataLength_;) {
	// start: index of first entry of current block
	// prevEnd: index to last entry of previous block
	// newStart: index where the current block is to be moved
	// skip blocks that are not used
	if (m_map_[start >> SHIFT_] < 0) {
	// advance start to the next block
	start += DATA_BLOCK_LENGTH_;
	// leave prevEnd and newStart with the previous block!
	continue;
	}
	// search for an identical block
	if (start >= overlapStart) {
	int i = findSameDataBlock(m_data_, newStart, start,
	overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH_);
	if (i >= 0) {
	// found an identical block, set the other block's index
	// value for the current block
	m_map_[start >> SHIFT_] = i;
	// advance start to the next block
	start += DATA_BLOCK_LENGTH_;
	// leave prevEnd and newStart with the previous block!
	continue;
	}
	}
	// see if the beginning of this block can be overlapped with the
	// end of the previous block
	// x: first value in the current block
	int x = m_data_[start];
	int i = 0;
	if (x == m_data_[prevEnd] && overlap && start >= overlapStart)
	{
	// overlap by at least one
	for (i = 1; i < DATA_BLOCK_LENGTH_
	&& x == m_data_[start + i]
	&& x == m_data_[prevEnd - i]; ++ i)
	{
	}

	// overlap by i, rounded down for the data block granularity
	i &= ~(DATA_GRANULARITY_ - 1);
	}
	if (i > 0) {
	// some overlap
	m_map_[start >> SHIFT_] = newStart - i;
	// move the non-overlapping indexes to their new positions
	start += i;
	for (i = DATA_BLOCK_LENGTH_ - i; i > 0; -- i) {
	m_data_[newStart ++] = m_data_[start ++];
	}
	}
	else if (newStart < start) {
	// no overlap, just move the indexes to their new positions
	m_map_[start >> SHIFT_] = newStart;
	for (i = DATA_BLOCK_LENGTH_; i > 0; -- i) {
	m_data_[newStart ++] = m_data_[start ++];
	}
	}
	else { // no overlap && newStart==start
	m_map_[start >> SHIFT_] = start;
	newStart += DATA_BLOCK_LENGTH_;
	start = newStart;
	}

	prevEnd = newStart - 1;
	}
	// now adjust the index (stage 1) table
	for (int i = 0; i < m_indexLength_; ++ i) {
	m_index_[i] = m_map_[m_index_[i] >>> SHIFT_];
	}
	m_dataLength_ = newStart;
	}

	/**
	* Find the same data block
	* @param data array
	* @param dataLength
	* @param otherBlock
	* @param step
	*/
	private static final int findSameDataBlock(int data[], int dataLength,
	int otherBlock, int step)
	{
	// ensure that we do not even partially get past dataLength
	dataLength -= DATA_BLOCK_LENGTH_;

	for (int block = 0; block <= dataLength; block += step) {
	int i = 0;
	for (i = 0; i < DATA_BLOCK_LENGTH_; ++ i) {
	if (data[block + i] != data[otherBlock + i]) {
	break;
	}
	}
	if (i == DATA_BLOCK_LENGTH_) {
	return block;
	}
	}
	return -1;
	}

	/**
	* Fold the normalization data for supplementary code points into
	* a compact area on top of the BMP-part of the trie index,
	* with the lead surrogates indexing this compact area.
	*
	* Duplicate the index values for lead surrogates:
	* From inside the BMP area, where some may be overridden with folded values,
	* to just after the BMP area, where they can be retrieved for
	* code point lookups.
	* @param manipulate fold implementation
	*/
	private final void fold(DataManipulate manipulate)
	{
	int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
	int index[] = m_index_;
	// copy the lead surrogate indexes into a temporary array
	System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
	SURROGATE_BLOCK_COUNT_);

	// to protect the copied lead surrogate values,
	// mark all their indexes as repeat blocks
	// (causes copy-on-write)
	for (char c = 0xd800; c <= 0xdbff; ++ c) {
	int block = index[c >> SHIFT_];
	if (block > 0) {
	index[c >> SHIFT_] =- block;
	}
	}

	// Fold significant index values into the area just after the BMP
	// indexes.
	// In case the first lead surrogate has significant data,
	// its index block must be used first (in which case the folding is a
	// no-op).
	// Later all folded index blocks are moved up one to insert the copied
	// lead surrogate indexes.
	int indexLength = BMP_INDEX_LENGTH_;
	// search for any index (stage 1) entries for supplementary code points
	for (int c = 0x10000; c < 0x110000;) {
	if (index[c >> SHIFT_] != 0) {
	// there is data, treat the full block for a lead surrogate
	c &= ~0x3ff;
	// is there an identical index block?
	int block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
	// get a folded value for [c..c+0x400[ and, if 0, set it for
	// the lead surrogate
	int value = manipulate.getFoldedValue(c,
	block + SURROGATE_BLOCK_COUNT_);
	if (value != 0) {
	if (!setValue(0xd7c0 + (c >> 10), value)) {
	// data table overflow
	throw new ArrayIndexOutOfBoundsException(
	"Data table overflow");
	}
	// if we did not find an identical index block...
	if (block == indexLength) {
	// move the actual index (stage 1) entries from the
	// supplementary position to the new one
	System.arraycopy(index, c >> SHIFT_, index, indexLength,
	SURROGATE_BLOCK_COUNT_);
	indexLength += SURROGATE_BLOCK_COUNT_;
	}
	}
	c += 0x400;
	}
	else {
	c += DATA_BLOCK_LENGTH_;
	}
	}

	// index array overflow?
	// This is to guarantee that a folding offset is of the form
	// UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
	// If the index is too large, then n>=1024 and more than 10 bits are
	// necessary.
	// In fact, it can only ever become n==1024 with completely unfoldable
	// data and the additional block of duplicated values for lead
	// surrogates.
	if (indexLength >= MAX_INDEX_LENGTH_) {
	throw new ArrayIndexOutOfBoundsException("Index table overflow");
	}
	// make space for the lead surrogate index block and insert it between
	// the BMP indexes and the folded ones
	System.arraycopy(index, BMP_INDEX_LENGTH_, index,
	BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
	indexLength - BMP_INDEX_LENGTH_);
	System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
	SURROGATE_BLOCK_COUNT_);
	indexLength += SURROGATE_BLOCK_COUNT_;
	m_indexLength_ = indexLength;
	}
	}