icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java - external/github.com/unicode-org/icu - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html#License
 /*
 ******************************************************************************
 * Copyright (C) 1996-2010, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 */

 package com.ibm.icu.impl;

 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.Arrays;

 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UTF16;

 /**
  * Builder class to manipulate and generate a trie.
  * This is useful for ICU data in primitive types.
  * Provides a compact way to store information that is indexed by Unicode
  * values, such as character properties, types, keyboard values, etc. This is
  * very useful when you have a block of Unicode data that contains significant
  * values while the rest of the Unicode data is unused in the application or
  * when you have a lot of redundance, such as where all 21,000 Han ideographs
  * have the same value.  However, lookup is much faster than a hash table.
  * A trie of any primitive data type serves two purposes:
  * <UL type = round>
  *     <LI>Fast access of the indexed values.
  *     <LI>Smaller memory footprint.
  * </UL>
  * This is a direct port from the ICU4C version
  * @author             Syn Wee Quek
  */
 public class IntTrieBuilder extends TrieBuilder
 {
     // public constructor ----------------------------------------------

     /**
      * Copy constructor
      */
     public IntTrieBuilder(IntTrieBuilder table)
     {
         super(table);
         m_data_ = new int[m_dataCapacity_];
         System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
         m_initialValue_ = table.m_initialValue_;
         m_leadUnitValue_ = table.m_leadUnitValue_;
     }

     /**
      * Constructs a build table
      * @param aliasdata data to be filled into table
      * @param maxdatalength maximum data length allowed in table
      * @param initialvalue inital data value
      * @param latin1linear is latin 1 to be linear
      */
     public IntTrieBuilder(int aliasdata[], int maxdatalength,
                           int initialvalue, int leadunitvalue,
                           boolean latin1linear)
     {
         super();
         if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
                                                   && maxdatalength < 1024)) {
             throw new IllegalArgumentException(
                                                "Argument maxdatalength is too small");
         }

         if (aliasdata != null) {
             m_data_ = aliasdata;
         }
         else {
             m_data_ = new int[maxdatalength];
         }

         // preallocate and reset the first data block (block index 0)
         int j = DATA_BLOCK_LENGTH;

         if (latin1linear) {
             // preallocate and reset the first block (number 0) and Latin-1
             // (U+0000..U+00ff) after that made sure above that
             // maxDataLength >= 1024
             // set indexes to point to consecutive data blocks
             int i = 0;
             do {
                 // do this at least for trie->index[0] even if that block is
                 // only partly used for Latin-1
                 m_index_[i ++] = j;
                 j += DATA_BLOCK_LENGTH;
             } while (i < (256 >> SHIFT_));
         }

         m_dataLength_ = j;
         // reset the initially allocated blocks to the initial value
         Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
         m_initialValue_ = initialvalue;
         m_leadUnitValue_ = leadunitvalue;
         m_dataCapacity_ = maxdatalength;
         m_isLatin1Linear_ = latin1linear;
         m_isCompacted_ = false;
     }

     // public methods -------------------------------------------------------

     /*public final void print()
       {
       int i = 0;
       int oldvalue = m_index_[i];
       int count = 0;
       System.out.println("index length " + m_indexLength_
       + " --------------------------");
       while (i < m_indexLength_) {
       if (m_index_[i] != oldvalue) {
       System.out.println("index has " + count + " counts of "
       + Integer.toHexString(oldvalue));
       count = 0;
       oldvalue = m_index_[i];
       }
       count ++;
       i ++;
       }
       System.out.println("index has " + count + " counts of "
       + Integer.toHexString(oldvalue));
       i = 0;
       oldvalue = m_data_[i];
       count = 0;
       System.out.println("data length " + m_dataLength_
       + " --------------------------");
       while (i < m_dataLength_) {
       if (m_data_[i] != oldvalue) {
       if ((oldvalue & 0xf1000000) == 0xf1000000) {
       int temp = oldvalue & 0xffffff;
       temp += 0x320;
       oldvalue = 0xf1000000 | temp;
       }
       if ((oldvalue & 0xf2000000) == 0xf2000000) {
       int temp = oldvalue & 0xffffff;
       temp += 0x14a;
       oldvalue = 0xf2000000 | temp;
       }
       System.out.println("data has " + count + " counts of "
       + Integer.toHexString(oldvalue));
       count = 0;
       oldvalue = m_data_[i];
       }
       count ++;
       i ++;
       }
       if ((oldvalue & 0xf1000000) == 0xf1000000) {
       int temp = oldvalue & 0xffffff;
       temp += 0x320;
       oldvalue = 0xf1000000 | temp;
       }
       if ((oldvalue & 0xf2000000) == 0xf2000000) {
       int temp = oldvalue & 0xffffff;
       temp += 0x14a;
       oldvalue = 0xf2000000 | temp;
       }
       System.out.println("data has " + count + " counts of "
       + Integer.toHexString(oldvalue));
       }
     */
     /**
      * Gets a 32 bit data from the table data
      * @param ch codepoint which data is to be retrieved
      * @return the 32 bit data
      */
     public int getValue(int ch)
     {
         // valid, uncompacted trie and valid c?
         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
             return 0;
         }

         int block = m_index_[ch >> SHIFT_];
         return m_data_[Math.abs(block) + (ch & MASK_)];
     }

     /**
      * Get a 32 bit data from the table data
      * @param ch  code point for which data is to be retrieved.
      * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
      *                      char maps into block zero, otherwise false.
      * @return the 32 bit data value.
      */
     public int getValue(int ch, boolean [] inBlockZero)
     {
         // valid, uncompacted trie and valid c?
         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
             if (inBlockZero != null) {
                 inBlockZero[0] = true;
             }
             return 0;
         }

         int block = m_index_[ch >> SHIFT_];
         if (inBlockZero != null) {
             inBlockZero[0] = (block == 0);
         }
         return m_data_[Math.abs(block) + (ch & MASK_)];
     }


     /**
      * Sets a 32 bit data in the table data
      * @param ch codepoint which data is to be set
      * @param value to set
      * @return true if the set is successful, otherwise
      *              if the table has been compacted return false
      */
     public boolean setValue(int ch, int value)
     {
         // valid, uncompacted trie and valid c?
         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
             return false;
         }

         int block = getDataBlock(ch);
         if (block < 0) {
             return false;
         }

         m_data_[block + (ch & MASK_)] = value;
         return true;
     }

     /**
      * Serializes the build table with 32 bit data
      * @param datamanipulate builder raw fold method implementation
      * @param triedatamanipulate result trie fold method
      * @return a new trie
      */
     public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
                              Trie.DataManipulate triedatamanipulate)
     {
         if (datamanipulate == null) {
             throw new IllegalArgumentException("Parameters can not be null");
         }
         // fold and compact if necessary, also checks that indexLength is
         // within limits
         if (!m_isCompacted_) {
             // compact once without overlap to improve folding
             compact(false);
             // fold the supplementary part of the index array
             fold(datamanipulate);
             // compact again with overlap for minimum data array length
             compact(true);
             m_isCompacted_ = true;
         }
         // is dataLength within limits?
         if (m_dataLength_ >= MAX_DATA_LENGTH_) {
             throw new ArrayIndexOutOfBoundsException("Data length too small");
         }

         char index[] = new char[m_indexLength_];
         int data[] = new int[m_dataLength_];
         // write the index (stage 1) array and the 32-bit data (stage 2) array
         // write 16-bit index values shifted right by INDEX_SHIFT_
         for (int i = 0; i < m_indexLength_; i ++) {
             index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
         }
         // write 32-bit data values
         System.arraycopy(m_data_, 0, data, 0, m_dataLength_);

         int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
         options |= OPTIONS_DATA_IS_32_BIT_;
         if (m_isLatin1Linear_) {
             options |= OPTIONS_LATIN1_IS_LINEAR_;
         }
         return new IntTrie(index, data, m_initialValue_, options,
                            triedatamanipulate);
     }


     /**
      * Serializes the build table to an output stream.
      *
      * Compacts the build-time trie after all values are set, and then
      * writes the serialized form onto an output stream.
      *
      * After this, this build-time Trie can only be serialized again and/or closed;
      * no further values can be added.
      *
      * This function is the rough equivalent of utrie_seriaize() in ICU4C.
      *
      * @param os the output stream to which the seriaized trie will be written.
      *         If nul, the function still returns the size of the serialized Trie.
      * @param reduceTo16Bits If true, reduce the data size to 16 bits.  The resulting
      *         serialized form can then be used to create a CharTrie.
      * @param datamanipulate builder raw fold method implementation
      * @return the number of bytes written to the output stream.
      */
      public int serialize(OutputStream os, boolean reduceTo16Bits,
             TrieBuilder.DataManipulate datamanipulate)  throws IOException {
          if (datamanipulate == null) {
              throw new IllegalArgumentException("Parameters can not be null");
          }

          // fold and compact if necessary, also checks that indexLength is
          // within limits
          if (!m_isCompacted_) {
              // compact once without overlap to improve folding
              compact(false);
              // fold the supplementary part of the index array
              fold(datamanipulate);
              // compact again with overlap for minimum data array length
              compact(true);
              m_isCompacted_ = true;
          }

          // is dataLength within limits?
          int length;
          if (reduceTo16Bits) {
              length = m_dataLength_ + m_indexLength_;
          } else {
              length = m_dataLength_;
          }
          if (length >= MAX_DATA_LENGTH_) {
              throw new ArrayIndexOutOfBoundsException("Data length too small");
          }

          //  struct UTrieHeader {
          //      int32_t   signature;
          //      int32_t   options  (a bit field)
          //      int32_t   indexLength
          //      int32_t   dataLength
          length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
          if(reduceTo16Bits) {
              length+=2*m_dataLength_;
          } else {
              length+=4*m_dataLength_;
          }

          if (os == null) {
              // No output stream.  Just return the length of the serialized Trie, in bytes.
              return length;
          }

          DataOutputStream dos = new DataOutputStream(os);
          dos.writeInt(Trie.HEADER_SIGNATURE_);

          int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
          if(!reduceTo16Bits) {
              options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
          }
          if(m_isLatin1Linear_) {
              options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
          }
          dos.writeInt(options);

          dos.writeInt(m_indexLength_);
          dos.writeInt(m_dataLength_);

          /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
          if(reduceTo16Bits) {
              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
              for (int i=0; i<m_indexLength_; i++) {
                  int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
                  dos.writeChar(v);
              }

              /* write 16-bit data values */
              for(int i=0; i<m_dataLength_; i++) {
                  int v = m_data_[i] & 0x0000ffff;
                  dos.writeChar(v);
              }
          } else {
              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
              for (int i=0; i<m_indexLength_; i++) {
                  int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
                  dos.writeChar(v);
              }

              /* write 32-bit data values */
              for(int i=0; i<m_dataLength_; i++) {
                  dos.writeInt(m_data_[i]);
              }
         }

          return length;

     }


     /**
      * Set a value in a range of code points [start..limit].
      * All code points c with start &lt;= c &lt; limit will get the value if
      * overwrite is true or if the old value is 0.
      * @param start the first code point to get the value
      * @param limit one past the last code point to get the value
      * @param value the value
      * @param overwrite flag for whether old non-initial values are to be
      *        overwritten
      * @return false if a failure occurred (illegal argument or data array
      *               overrun)
      */
     public boolean setRange(int start, int limit, int value,
                             boolean overwrite)
     {
         // repeat value in [start..limit[
         // mark index values for repeat-data blocks by setting bit 31 of the
         // index values fill around existing values if any, if(overwrite)

         // valid, uncompacted trie and valid indexes?
         if (m_isCompacted_ || start < UCharacter.MIN_VALUE
             || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
             || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
             return false;
         }

         if (start == limit) {
             return true; // nothing to do
         }

         if ((start & MASK_) != 0) {
             // set partial block at [start..following block boundary[
             int block = getDataBlock(start);
             if (block < 0) {
                 return false;
             }

             int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
             if (nextStart <= limit) {
                 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
                           value, overwrite);
                 start = nextStart;
             }
             else {
                 fillBlock(block, start & MASK_, limit & MASK_,
                           value, overwrite);
                 return true;
             }
         }

         // number of positions in the last, partial block
         int rest = limit & MASK_;

         // round down limit to a block boundary
         limit &= ~MASK_;

         // iterate over all-value blocks
         int repeatBlock = 0;
         if (value == m_initialValue_) {
             // repeatBlock = 0; assigned above
         }
         else {
             repeatBlock = -1;
         }
         while (start < limit) {
             // get index value
             int block = m_index_[start >> SHIFT_];
             if (block > 0) {
                 // already allocated, fill in value
                 fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
             }
             else if (m_data_[-block] != value && (block == 0 || overwrite)) {
                 // set the repeatBlock instead of the current block 0 or range
                 // block
                 if (repeatBlock >= 0) {
                     m_index_[start >> SHIFT_] = -repeatBlock;
                 }
                 else {
                     // create and set and fill the repeatBlock
                     repeatBlock = getDataBlock(start);
                     if (repeatBlock < 0) {
                         return false;
                     }

                     // set the negative block number to indicate that it is a
                     // repeat block
                     m_index_[start >> SHIFT_] = -repeatBlock;
                     fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
                 }
             }

             start += DATA_BLOCK_LENGTH;
         }

         if (rest > 0) {
             // set partial block at [last block boundary..limit[
             int block = getDataBlock(start);
             if (block < 0) {
                 return false;
             }
             fillBlock(block, 0, rest, value, overwrite);
         }

         return true;
     }

     // protected data member ------------------------------------------------

     protected int m_data_[];
     protected int m_initialValue_;

     //  private data member ------------------------------------------------

     private int m_leadUnitValue_;

     // private methods ------------------------------------------------------

     private int allocDataBlock()
     {
         int newBlock = m_dataLength_;
         int newTop = newBlock + DATA_BLOCK_LENGTH;
         if (newTop > m_dataCapacity_) {
             // out of memory in the data array
             return -1;
         }
         m_dataLength_ = newTop;
         return newBlock;
     }

     /**
      * No error checking for illegal arguments.
      * @param ch codepoint to look for
      * @return -1 if no new data block available (out of memory in data array)
      */
     private int getDataBlock(int ch)
     {
         ch >>= SHIFT_;
         int indexValue = m_index_[ch];
         if (indexValue > 0) {
             return indexValue;
         }

         // allocate a new data block
         int newBlock = allocDataBlock();
         if (newBlock < 0) {
             // out of memory in the data array
             return -1;
         }
         m_index_[ch] = newBlock;

         // copy-on-write for a block from a setRange()
         System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
                          DATA_BLOCK_LENGTH << 2);
         return newBlock;
     }

     /**
      * Compact a folded build-time trie.
      * The compaction
      * - removes blocks that are identical with earlier ones
      * - overlaps adjacent blocks as much as possible (if overlap == true)
      * - moves blocks in steps of the data granularity
      * - moves and overlaps blocks that overlap with multiple values in the overlap region
      *
      * It does not
      * - try to move and overlap blocks that are not already adjacent
      * @param overlap flag
      */
     private void compact(boolean overlap)
     {
         if (m_isCompacted_) {
             return; // nothing left to do
         }

         // compaction
         // initialize the index map with "block is used/unused" flags
         findUnusedBlocks();

         // if Latin-1 is preallocated and linear, then do not compact Latin-1
         // data
         int overlapStart = DATA_BLOCK_LENGTH;
         if (m_isLatin1Linear_ && SHIFT_ <= 8) {
             overlapStart += 256;
         }

         int newStart = DATA_BLOCK_LENGTH;
         int i;
         for (int start = newStart; start < m_dataLength_;) {
             // start: index of first entry of current block
             // newStart: index where the current block is to be moved
             //           (right after current end of already-compacted data)
             // skip blocks that are not used
             if (m_map_[start >>> SHIFT_] < 0) {
                 // advance start to the next block
                 start += DATA_BLOCK_LENGTH;
                 // leave newStart with the previous block!
                 continue;
             }
             // search for an identical block
             if (start >= overlapStart) {
                 i = findSameDataBlock(m_data_, newStart, start,
                                           overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
                 if (i >= 0) {
                     // found an identical block, set the other block's index
                     // value for the current block
                     m_map_[start >>> SHIFT_] = i;
                     // advance start to the next block
                     start += DATA_BLOCK_LENGTH;
                     // leave newStart with the previous block!
                     continue;
                 }
             }
             // see if the beginning of this block can be overlapped with the
             // end of the previous block
             if(overlap && start>=overlapStart) {
                 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
                 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
                     i>0 && !equal_int(m_data_, newStart-i, start, i);
                     i-=DATA_GRANULARITY_) {}
             } else {
                 i=0;
             }
             if (i > 0) {
                 // some overlap
                 m_map_[start >>> SHIFT_] = newStart - i;
                 // move the non-overlapping indexes to their new positions
                 start += i;
                 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
                     m_data_[newStart ++] = m_data_[start ++];
                 }
             }
             else if (newStart < start) {
                 // no overlap, just move the indexes to their new positions
                 m_map_[start >>> SHIFT_] = newStart;
                 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
                     m_data_[newStart ++] = m_data_[start ++];
                 }
             }
             else { // no overlap && newStart==start
                 m_map_[start >>> SHIFT_] = start;
                 newStart += DATA_BLOCK_LENGTH;
                 start = newStart;
             }
         }
         // now adjust the index (stage 1) table
         for (i = 0; i < m_indexLength_; ++ i) {
             m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
         }
         m_dataLength_ = newStart;
     }

     /**
      * Find the same data block
      * @param data array
      * @param dataLength
      * @param otherBlock
      * @param step
      */
     private static final int findSameDataBlock(int data[], int dataLength,
                                                int otherBlock, int step)
     {
         // ensure that we do not even partially get past dataLength
         dataLength -= DATA_BLOCK_LENGTH;

         for (int block = 0; block <= dataLength; block += step) {
             if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
                 return block;
             }
         }
         return -1;
     }

     /**
      * Fold the normalization data for supplementary code points into
      * a compact area on top of the BMP-part of the trie index,
      * with the lead surrogates indexing this compact area.
      *
      * Duplicate the index values for lead surrogates:
      * From inside the BMP area, where some may be overridden with folded values,
      * to just after the BMP area, where they can be retrieved for
      * code point lookups.
      * @param manipulate fold implementation
      */
     private final void fold(DataManipulate manipulate)
     {
         int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
         int index[] = m_index_;
         // copy the lead surrogate indexes into a temporary array
         System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
                          SURROGATE_BLOCK_COUNT_);

         // set all values for lead surrogate code *units* to leadUnitValue
         // so that by default runtime lookups will find no data for associated
         // supplementary code points, unless there is data for such code points
         // which will result in a non-zero folding value below that is set for
         // the respective lead units
         // the above saved the indexes for surrogate code *points*
         // fill the indexes with simplified code from utrie_setRange32()
         int block = 0;
         if (m_leadUnitValue_ == m_initialValue_) {
             // leadUnitValue == initialValue, use all-initial-value block
             // block = 0; if block here left empty
         }
         else {
             // create and fill the repeatBlock
             block = allocDataBlock();
             if (block < 0) {
                 // data table overflow
                 throw new IllegalStateException("Internal error: Out of memory space");
             }
             fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
             // negative block number to indicate that it is a repeat block
             block = -block;
         }
         for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
             m_index_[c] = block;
         }

         // Fold significant index values into the area just after the BMP
         // indexes.
         // In case the first lead surrogate has significant data,
         // its index block must be used first (in which case the folding is a
         // no-op).
         // Later all folded index blocks are moved up one to insert the copied
         // lead surrogate indexes.
         int indexLength = BMP_INDEX_LENGTH_;
         // search for any index (stage 1) entries for supplementary code points
         for (int c = 0x10000; c < 0x110000;) {
             if (index[c >> SHIFT_] != 0) {
                 // there is data, treat the full block for a lead surrogate
                 c &= ~0x3ff;
                 // is there an identical index block?
                 block = findSameIndexBlock(index, indexLength, c >> SHIFT_);

                 // get a folded value for [c..c+0x400[ and,
                 // if different from the value for the lead surrogate code
                 // point, set it for the lead surrogate code unit

                 int value = manipulate.getFoldedValue(c,
                                                       block + SURROGATE_BLOCK_COUNT_);
                 if (value != getValue(UTF16.getLeadSurrogate(c))) {
                     if (!setValue(UTF16.getLeadSurrogate(c), value)) {
                         // data table overflow
                         throw new ArrayIndexOutOfBoundsException(
                                                                  "Data table overflow");
                     }
                     // if we did not find an identical index block...
                     if (block == indexLength) {
                         // move the actual index (stage 1) entries from the
                         // supplementary position to the new one
                         System.arraycopy(index, c >> SHIFT_, index, indexLength,
                                          SURROGATE_BLOCK_COUNT_);
                         indexLength += SURROGATE_BLOCK_COUNT_;
                     }
                 }
                 c += 0x400;
             }
             else {
                 c += DATA_BLOCK_LENGTH;
             }
         }

         // index array overflow?
         // This is to guarantee that a folding offset is of the form
         // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
         // If the index is too large, then n>=1024 and more than 10 bits are
         // necessary.
         // In fact, it can only ever become n==1024 with completely unfoldable
         // data and the additional block of duplicated values for lead
         // surrogates.
         if (indexLength >= MAX_INDEX_LENGTH_) {
             throw new ArrayIndexOutOfBoundsException("Index table overflow");
         }
         // make space for the lead surrogate index block and insert it between
         // the BMP indexes and the folded ones
         System.arraycopy(index, BMP_INDEX_LENGTH_, index,
                          BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
                          indexLength - BMP_INDEX_LENGTH_);
         System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
                          SURROGATE_BLOCK_COUNT_);
         indexLength += SURROGATE_BLOCK_COUNT_;
         m_indexLength_ = indexLength;
     }

     /**
      * @internal
      */
     private void fillBlock(int block, int start, int limit, int value,
                            boolean overwrite)
     {
         limit += block;
         block += start;
         if (overwrite) {
             while (block < limit) {
                 m_data_[block ++] = value;
             }
         }
         else {
             while (block < limit) {
                 if (m_data_[block] == m_initialValue_) {
                     m_data_[block] = value;
                 }
                 ++ block;
             }
         }
     }
 }