src/com/ibm/icu/lang/UCharacterName.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source:
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
 * $Date: 2002/03/15 22:48:07 $
 * $Revision: 1.15 $
 *
 *******************************************************************************
 */
 package com.ibm.icu.lang;

 import java.util.Locale;
 import java.io.InputStream;
 import java.io.DataInputStream;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.text.UTF16;

 /**
 * Internal class to manage character names.
 * Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
 * in an array of char, by default indexes used in this class is refering to
 * a 2 byte count, unless otherwise stated. Cases where the index is refering
 * to a byte count, the index is halved and depending on whether the index is
 * even or odd, the MSB or LSB of the result char at the halved index is
 * returned. For indexes to an array of int, the index is multiplied by 2,
 * result char at the multiplied index and its following char is returned as an
 * int.
 * <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
 * Information on parsing of the binary data is located at
 * <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
 * ReadMe</a>
 * @author Syn Wee Quek
 * @since nov0700
 */

 final class UCharacterName
 {
     // public methods ----------------------------------------------------

     /**
     * toString method for printing
     */
     public String toString()
     {
         StringBuffer result = new StringBuffer("names content \n");
         /*result.append(super.toString());
         result.append('\n');
         result.append("token string offset ");
         result.append(m_tokenstringoffset_);
         result.append("\n");
         result.append("group offset ");
         result.append(m_groupsoffset_);
         result.append("\n");
         result.append("group string offset ");
         result.append(m_groupstringoffset_);
         result.append("\n");
         result.append("alg names offset ");
         result.append(m_algnamesoffset_);
         result.append("\n");
         */
         return result.toString();
     }

     // package protected inner class -------------------------------------

     /**
     * Algorithmic name class
     */
     static final class AlgorithmName
     {
         // protected data members ----------------------------------------

         /**
         * Constant type value of the different AlgorithmName
         */
         protected static final int TYPE_0_ = 0;
         protected static final int TYPE_1_ = 1;

         // protected constructors ----------------------------------------

         /**
         * Constructor
         */
         protected AlgorithmName()
         {
         }

         // protected methods ---------------------------------------------

         /**
         * Sets the information for accessing the algorithmic names
         * @param rangestart starting code point that lies within this name group
         * @param rangeend end code point that lies within this name group
         * @param type algorithm type. There's 2 kinds of algorithmic type. First
         *        which uses code point as part of its name and the other uses
         *        variant postfix strings
         * @param variant algorithmic variant
         * @return true if values are valid
         */
         protected boolean setInfo(int rangestart, int rangeend, byte type,
                                 byte variant)
         {
             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
                 && rangeend <= UCharacter.MAX_VALUE &&
                 (type == TYPE_0_ || type == TYPE_1_)) {
                 m_rangestart_ = rangestart;
                 m_rangeend_ = rangeend;
                 m_type_ = type;
                 m_variant_ = variant;
                 return true;
             }
             return false;
         }

         /**
         * Sets the factor data
         * @param array of factor
         * @return true if factors are valid
         */
         protected boolean setFactor(char factor[])
         {
             if (factor.length == m_variant_) {
                 m_factor_ = factor;
                 return true;
             }
             return false;
         }

         /**
         * Sets the name prefix
         * @param prefix
         * @return true if prefix is set
         */
         protected boolean setPrefix(String prefix)
         {
             if (prefix != null && prefix.length() > 0) {
                 m_prefix_ = prefix;
                 return true;
             }
             return false;
         }

         /**
         * Sets the variant factorized name data
         * @param string variant factorized name data
         * @return true if values are set
         */
         protected boolean setFactorString(byte string[])
         {
             // factor and variant string can be empty for things like
             // hanggul code points
             m_factorstring_ = string;
             return true;
         }

         /**
         * Checks if code point lies in Algorithm object at index
         * @param ch code point
         */
         protected boolean contains(int ch)
         {
             return m_rangestart_ <= ch && ch <= m_rangeend_;
         }

         /**
         * Appends algorithm name of code point into StringBuffer.
         * Note this method does not check for validity of code point in Algorithm,
         * result is undefined if code point does not belong in Algorithm.
         * @param ch code point
         * @param str StringBuffer to append to
         */
         protected void appendName(int ch, StringBuffer str)
         {
             str.append(m_prefix_);
             switch (m_type_)
             {
                 case TYPE_0_:
                     // prefix followed by hex digits indicating variants
                     Utility.hex(ch, m_variant_, str);
                     break;
                 case TYPE_1_:
                     // prefix followed by factorized-elements
                     int offset = ch - m_rangestart_;
                     int indexes[] = new int[m_variant_];
                     int factor;

                     // write elements according to the factors
                     // the factorized elements are determined by modulo
                     // arithmetic
                     for (int i = m_variant_ - 1; i > 0; i --)
                     {
                         factor = m_factor_[i] & 0x00FF;
                         indexes[i] = offset % factor;
                         offset /= factor;
                     }

                     // we don't need to calculate the last modulus because
                     // start <= code <= end guarantees here that
                     // code <= factors[0]
                     indexes[0] = offset;

                     // joining up the factorized strings
                     String s[] = getFactorString(indexes);
                     if (s != null && s.length > 0)
                     {
                         int size = s.length;
                         for (int i = 0; i < size; i ++)
                         str.append(s[i]);
                     }
                     break;
             }
         }

         /**
         * Gets the character for the argument algorithmic name
         * @return the algorithmic char or -1 otherwise.
         */
         protected int getAlgorithmChar(String name)
         {
             int prefixlen = m_prefix_.length();
             if (name.length() < prefixlen ||
                 !m_prefix_.equals(name.substring(0, prefixlen))) {
                 return -1;
             }

             switch (m_type_)
             {
                 case TYPE_0_ :
                 try
                 {
                     int result = Integer.parseInt(name.substring(prefixlen),
                                                   16);
                     // does it fit into the range?
                     if (m_rangestart_ <= result && result <= m_rangeend_) {
                         return result;
                     }
                 }
                 catch (NumberFormatException e)
                 {
                     return -1;
                 }
                 break;
                 case TYPE_1_ :
                     // repetitative suffix name comparison done here
                     // offset is the character code - start
                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
                     {
                         int offset = ch - m_rangestart_;
                         int indexes[] = new int[m_variant_];
                         int factor;

                         // write elements according to the factors
                         // the factorized elements are determined by modulo
                         // arithmetic
                         for (int i = m_variant_ - 1; i > 0; i --)
                         {
                             factor = m_factor_[i] & 0x00FF;
                             indexes[i] = offset % factor;
                             offset /= factor;
                         }

                         // we don't need to calculate the last modulus
                         // because start <= code <= end guarantees here that
                         // code <= factors[0]
                         indexes[0] = offset;

                         // joining up the factorized strings
                         if (compareFactorString(indexes, name, prefixlen)) {
                             return ch;
                         }
                     }
             }

             return -1;
         }

         // private data members ------------------------------------------

         /**
         * Algorithmic data information
         */
         private int m_rangestart_;
         private int m_rangeend_;
         private byte m_type_;
         private byte m_variant_;
         private char m_factor_[];
         private String m_prefix_;
         private byte m_factorstring_[];

         // private methods -----------------------------------------------

         /**
         * Gets the indexth string in each of the argument factor block
         * @param index array with each index corresponding to each factor block
         * @return array of indexth factor string in factor block
         */
         private String[] getFactorString(int index[])
         {
             int size = m_factor_.length;
             if (index == null || index.length != size) {
                 return null;
             }

             String result[] = new String[size];
             StringBuffer str = new StringBuffer();
             int count = 0;
             int factor;
             size --;
             for (int i = 0; i <= size; i ++) {
                 factor = m_factor_[i];
                 count = UCharacterUtil.skipNullTermByteSubString(
                                           m_factorstring_, count, index[i]);
                 count = UCharacterUtil.getNullTermByteSubString(
                                           str, m_factorstring_, count);
                 if (i != size) {
                     count = UCharacterUtil.skipNullTermByteSubString(
                                                    m_factorstring_, count,
                                                    factor - index[i] - 1);
                 }
                 result[i] = str.toString();
                 str.delete(0, str.length());
             }
             return result;
         }

         /**
         * Compares the indexth string in each of the argument factor block with
         * the argument string
         * @param index array with each index corresponding to each factor block
         * @param str string to compare with
         * @param offset of str to start comparison
         * @return true if string matches
         */
         private boolean compareFactorString(int index[], String str,
                                             int offset)
         {
             int size = m_factor_.length;
             if (index == null || index.length != size)
                 return false;

             int count = 0;
             int strcount = offset;
             int factor;
             size --;
             for (int i = 0; i <= size; i ++)
             {
                 factor = m_factor_[i];
                 count = UCharacterUtil.skipNullTermByteSubString(
                                           m_factorstring_, count, index[i]);
                 strcount = UCharacterUtil.compareNullTermByteSubString(str,
                                           m_factorstring_, strcount, count);
                 if (strcount < 0) {
                     return false;
                 }

                 if (i != size) {
                     count = UCharacterUtil.skipNullTermByteSubString(
                                   m_factorstring_, count, factor - index[i]);
                 }
             }
             if (strcount != str.length()) {
                 return false;
             }
             return true;
         }
     }

     // protected data members --------------------------------------------

     /**
      * Maximum number of groups
      */
     protected int m_groupcount_ = 0;
     /**
      * Size of each groups
      */
     protected int m_groupsize_ = 0;
     /**
     * Number of lines per group
     * 1 << GROUP_SHIFT_
     */
     protected static final int LINES_PER_GROUP_ = 1 << 5;

     // protected constructor ---------------------------------------------

     /**
     * <p>Protected constructor for use in UCharacter.</p>
     * @exception IOException thrown when data reading fails
     */
     protected UCharacterName() throws IOException
     {
         InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
         BufferedInputStream b = new BufferedInputStream(i,
                                                         NAME_BUFFER_SIZE_);
         UCharacterNameReader reader = new UCharacterNameReader(b);
         reader.read(this);
         i.close();
     }

     // protected methods -------------------------------------------------

     /**
     * Retrieve the name of a Unicode code point.
     * Depending on <code>choice</code>, the character name written into the
     * buffer is the "modern" name or the name that was defined in Unicode
     * version 1.0.
     * The name contains only "invariant" characters
     * like A-Z, 0-9, space, and '-'.
     *
     * @param ch the code point for which to get the name.
     * @param choice Selector for which name to get.
     * @return if code point is above 0x1fff, null is returned
     */
     protected String getName(int ch, int choice)
     {
         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
             choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT) {
             return null;
         }

         String result = null;

         result = getAlgName(ch, choice);

         // getting normal character name
         if (result == null || result.length() == 0) {
         	if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
                 result = getExtendedName(ch);
             } else {
                 result = getGroupName(ch, choice);
             }
         }

         return result;
     }

     /**
     * Find a character by its name and return its code point value
     * @param character name
     * @param choice selector to indicate if argument name is a Unicode 1.0
     *        or the most current version
     * @return code point
     */
     protected int getCharFromName(int choice, String name)
     {
         // checks for illegal arguments
         if (choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT ||
             name == null || name.length() == 0) {
             return -1;
         }

         // try extended names first
         int result = getExtendedChar(name.toLowerCase(), choice);
         if (result >= -1) {
             return result;
         }

         String upperCaseName = name.toUpperCase();
         // try algorithmic names first, if fails then try group names
         // int result = getAlgorithmChar(choice, uppercasename);

         if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
         	int count = 0;
         	if (m_algorithm_ != null) {
         	    count = m_algorithm_.length;
         	}
         	for (count --; count >= 0; count --) {
          	    result = m_algorithm_[count].getAlgorithmChar(upperCaseName);
           	    if (result >= 0) {
            	        return result;
             	}
         	}
         }

         if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
 	        result = getGroupChar(upperCaseName,
 	                              UCharacterNameChoice.U_UNICODE_CHAR_NAME);
         	if (result == -1) {
 	            result = getGroupChar(upperCaseName,
 	                              UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
         	}
         }
         else {
         	result = getGroupChar(upperCaseName, choice);
         }
     	return result;
     }

     /**
     * Sets the token data
     * @param token array of tokens
     * @param tokenstring array of string values of the tokens
     * @return false if there is a data error
     */
     protected boolean setToken(char token[], byte tokenstring[])
     {
         if (token != null && tokenstring != null && token.length > 0 &&
             tokenstring.length > 0) {
             m_tokentable_ = token;
             m_tokenstring_ = tokenstring;
             return true;
         }
         return false;
     }

     /**
     * Set the algorithm name information array
     * @param algorithm information array
     * @return true if the group string offset has been set correctly
     */
     protected boolean setAlgorithm(AlgorithmName alg[])
     {
         if (alg != null && alg.length != 0) {
             m_algorithm_ = alg;
             return true;
         }
         return false;
     }

     /**
     * Sets the number of group and size of each group in number of char
     * @param count number of groups
     * @param size size of group in char
     * @return true if group size is set correctly
     */
     protected boolean setGroupCountSize(int count, int size)
     {
         if (count <= 0 || size <= 0) {
             return false;
         }
         m_groupcount_ = count;
         m_groupsize_ = size;
         return true;
     }

     /**
     * Sets the group name data
     * @param group index information array
     * @param groupstring name information array
     * @return false if there is a data error
     */
     protected boolean setGroup(char group[], byte groupstring[])
     {
         if (group != null && groupstring != null && group.length > 0 &&
             groupstring.length > 0) {
             m_groupinfo_ = group;
             m_groupstring_ = groupstring;
             return true;
         }
         return false;
     }

     /**
     * Reads a block of compressed lengths of 32 strings and expands them into
     * offsets and lengths for each string. Lengths are stored with a
     * variable-width encoding in consecutive nibbles:
     * If a nibble<0xc, then it is the length itself (0 = empty string).
     * If a nibble>=0xc, then it forms a length value with the following
     * nibble.
     * The offsets and lengths arrays must be at least 33 (one more) long
     * because there is no check here at the end if the last nibble is still
     * used.
     * @param index of group string object in array
     * @param offsets array to store the value of the string offsets
     * @param lengths array to store the value of the string length
     * @return next index of the data string immediately after the lengths
     *         in terms of byte address
     */
     protected int getGroupLengths(int index, char offsets[], char lengths[])
     {
         char length = 0xffff;
         byte b = 0,
             n = 0;
         int shift;
         index = index * m_groupsize_; // byte count offsets of group strings
         int stringoffset = UCharacterUtil.toInt(
                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);

         offsets[0] = 0;

         // all 32 lengths must be read to get the offset of the first group
         // string
         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
             b = m_groupstring_[stringoffset];
             shift = 4;

             while (shift >= 0) {
                 // getting nibble
                 n = (byte)((b >> shift) & 0x0F);
                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
                 	length = (char)((n - 12) << 4);
                 }
                 else {
                 	if (length != 0xffff) {
                  	   lengths[i] = (char)((length | n) + 12);
                 	}
                 	else {
                  	   lengths[i] = (char)n;
                 	}

                 	if (i < LINES_PER_GROUP_) {
                  	   offsets[i + 1] = (char)(offsets[i] + lengths[i]);
                 	}

                 	length = 0xffff;
                 	i ++;
                 }

                 shift -= 4;
             }
         }
         return stringoffset;
     }

     /**
     * Gets the name of the argument group index
     * @param index of the group name string in byte count
     * @param length of the group name string
     * @param choice of Unicode 1.0 name or the most current name
     * @return name of the group
     */
     protected String getGroupName(int index, int length, int choice)
     {
         if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
         	int oldindex = index;
          	index += UCharacterUtil.skipByteSubString(m_groupstring_,
          		                               index, length, (byte)';');
          	length -= (index - oldindex);
         }

         StringBuffer s = new StringBuffer();
         byte b;
         char token;
         for (int i = 0; i < length;) {
             b = m_groupstring_[index + i];
             i ++;

             if (b >= m_tokentable_.length) {
                 if (b == ';') {
                 	break;
                 }
                 s.append(b); // implicit letter
             }
             else {
                 token = m_tokentable_[b & 0x00ff];
                 if (token == 0xFFFE) {
                     // this is a lead byte for a double-byte token
                     token = m_tokentable_[b << 8 |
                                       (m_groupstring_[index + i] & 0x00ff)];
                     i ++;
                 }
                 if (token == 0xFFFF) {
                     if (b == ';') {
                     	// skip the semicolon if we are seeking extended
                     	// names and there was no 2.0 name but there
                         // is a 1.0 name.
                     	if (s.length() == 0 && choice ==
                     	       UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
                         	continue;
                     	}
                         break;
                     }
                     s.append((char)(b & 0x00ff)); // explicit letter
                 }
                 else { // write token word
                     UCharacterUtil.getNullTermByteSubString(s,
                                                      m_tokenstring_, token);
                 }
             }
         }

         if (s.length() == 0) {
             return null;
         }
         return s.toString();
     }

     /**
     * Retrieves the extended name
     */
     protected String getExtendedName(int ch)
     {
         String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
         if (result == null) {
             if (getType(ch) == UCharacterCategory.CONTROL) {
                 result = getName(ch,
                                  UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
             }
             if (result == null) {
                 result = getExtendedOr10Name(ch);
             }
         }
         return result;
     }

     /**
      * Gets the group index for the codepoint, or the group before it.
      * @param codepoint
      * @return group index containing codepoint or the group before it.
      */
     protected int getGroup(int codepoint)
     {
     	int endGroup = m_groupcount_;
     	int msb      = getCodepointMSB(codepoint);
         int result   = 0;
         // binary search for the group of names that contains the one for
         // code
         // find the group that contains codepoint, or the highest before it
         while (result < endGroup - 1) {
             int gindex = (result + endGroup) >> 1;
             if (msb < getGroupMSB(gindex)) {
                	endGroup = gindex;
             }
             else {
                	result = gindex;
             }
         }
         return result;
     }

     /**
      * Gets the extended and 1.0 name when the most current unicode names
      * fail
      * @param ch codepoint
      * @return name of codepoint extended or 1.0
      */
     protected String getExtendedOr10Name(int ch)
     {
     	String result = null;
     	if (getType(ch) == UCharacterCategory.CONTROL) {
             result = getName(ch,
                              UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
         }
         if (result == null) {
             int type = getType(ch);
             // Return unknown if the table of names above is not up to
             // date.
             if (type >= UCharacterCategory.TYPE_NAMES_.length) {
                 result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
             }
             else {
                 result = UCharacterCategory.TYPE_NAMES_[type];
             }
             StringBuffer tempResult = new StringBuffer(result);
             tempResult.insert(0, '<');
             tempResult.append('-');
             String chStr = Integer.toHexString(ch).toUpperCase();
             int zeros = 4 - chStr.length();
             while (zeros > 0) {
                 tempResult.append('0');
                 zeros --;
             }
             tempResult.append(chStr);
             tempResult.append('>');
             result = tempResult.toString();
         }
         return result;
     }

     // these are all UCharacterNameIterator use methods -------------------

     /**
      * Gets the MSB from the group index
      * @param gindex group index
      * @return the MSB of the group if gindex is valid, -1 otherwise
      */
     protected int getGroupMSB(int gindex)
     {
     	if (gindex >= m_groupcount_) {
     		return -1;
     	}
     	return m_groupinfo_[gindex * m_groupsize_];
     }

     /**
      * Gets the MSB of the codepoint
      * @param codepoint
      * @return the MSB of the codepoint
      */
     protected int getCodepointMSB(int codepoint)
     {
     	return codepoint >> GROUP_SHIFT_;
     }

     /**
      * Gets the maximum codepoint + 1 of the group
      * @param msb most significant byte of the group
      * @return limit codepoint of the group
      */
     protected int getGroupLimit(int msb)
     {
     	return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
     }

     /**
      * Gets the minimum codepoint of the group
      * @param msb most significant byte of the group
      * @return minimum codepoint of the group
      */
     protected int getGroupMin(int msb)
     {
     	return msb << GROUP_SHIFT_;
     }

     /**
      * Gets the offset to a group
      * @param codepoint
      * @return offset to a group
      */
     protected int getGroupOffset(int codepoint)
     {
     	return codepoint & GROUP_MASK_;
     }

 	/**
      * Gets the minimum codepoint of a group
      * @param codepoint
      * @return minimum codepoint in the group which codepoint belongs to
      */
     protected int getGroupMinFromCodepoint(int codepoint)
     {
     	return codepoint & ~GROUP_MASK_;
     }

     /**
      * Get the Algorithm range length
      * @return Algorithm range length
      */
     protected int getAlgorithmLength()
     {
     	return m_algorithm_.length;
     }

     /**
      * Gets the start of the range
      * @param index algorithm index
      * @return algorithm range start
      */
     protected int getAlgorithmStart(int index)
     {
       	return m_algorithm_[index].m_rangestart_;
     }

     /**
      * Gets the end of the range
      * @param index algorithm index
      * @return algorithm range end
      */
     protected int getAlgorithmEnd(int index)
     {
       	return m_algorithm_[index].m_rangeend_;
     }

     /**
      * Gets the Algorithmic name of the codepoint
      * @param index algorithmic range index
      * @param codepoint
      * @return algorithmic name of codepoint
      */
     protected String getAlgorithmName(int index, int codepoint)
     {
     	StringBuffer result = new StringBuffer();
     	m_algorithm_[index].appendName(codepoint, result);
         return result.toString();
     }


     // private data members ----------------------------------------------

     /**
     * Data used in unames.dat
     */
     private char m_tokentable_[];
     private byte m_tokenstring_[];
     private char m_groupinfo_[];
     private byte m_groupstring_[];
     private AlgorithmName m_algorithm_[];

     /**
     * Group use
     */
     private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
     private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];

     /**
     * Default name of the name datafile
     */
     private static final String NAME_FILE_NAME_ =
                                            "/com/ibm/icu/impl/data/unames.dat";
     /**
     * Shift count to retrieve group information
     */
     private static final int GROUP_SHIFT_ = 5;
     /**
     * Mask to retrieve the offset for a particular character within a group
     */
     private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
     /**
     * Default buffer size of datafile
     */
     private static final int NAME_BUFFER_SIZE_ = 100000;

     /**
     * Position of offsethigh in group information array
     */
     private static final int OFFSET_HIGH_OFFSET_ = 1;

     /**
     * Position of offsetlow in group information array
     */
     private static final int OFFSET_LOW_OFFSET_ = 2;
     /**
     * Double nibble indicator, any nibble > this number has to be combined
     * with its following nibble
     */
     private static final int SINGLE_NIBBLE_MAX_ = 11;


     // private methods ---------------------------------------------------

     /**
     * Gets the algorithmic name for the argument character
     * @param ch character to determine name for
     * @param choice name choice
     * @return the algorithmic name or null if not found
     */
     private String getAlgName(int ch, int choice)
     {
     	// Do not write algorithmic Unicode 1.0 names because Unihan names are
         // the same as the modern ones, extension A was only introduced with
         // Unicode 3.0, and the Hangul syllable block was moved and changed
         // around Unicode 1.1.5.
         if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
        	 	// index in terms integer index
         	StringBuffer s = new StringBuffer();

         	for (int index = m_algorithm_.length - 1; index >= 0; index --) {
          	   if (m_algorithm_[index].contains(ch)) {
           	      m_algorithm_[index].appendName(ch, s);
             	  return s.toString();
          	   }
             }
         }
         return null;
     }

     /**
     * Getting the character with the tokenized argument name
     * @param name of the character
     * @return character with the tokenized argument name or -1 if character
     *         is not found
     */
     private synchronized int getGroupChar(String name, int choice)
     {
     	for (int i = 0; i < m_groupcount_; i ++) {
         	// populating the data set of grouptable

         	int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
                                                   m_grouplengths_);

         	// shift out to function
         	int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
         	                          choice);
         	if (result != -1) {
             	return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
             	         | result;
         	}
         }
         return -1;
     }

     /**
     * Compares and retrieve character if name is found within the argument
     * group
     * @param index index where the set of names reside in the group block
     * @param length list of lengths of the strings
     * @param name character name to search for
     * @param choice of either 1.0 or the most current unicode name
     * @return relative character in the group which matches name, otherwise if
     *         not found, -1 will be returned
     */
     private int getGroupChar(int index, char length[], String name,
                              int choice)
     {
         byte b = 0;
         char token;
         int len;
         int namelen = name.length();
         int nindex;
         int count;

         for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
             nindex = 0;
             len = length[result];

             if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
                 int oldindex = index;
                 index += UCharacterUtil.skipByteSubString(m_groupstring_,
                                                      index, len, (byte)';');
                 len -= (index - oldindex);
             }

             // number of tokens is > the length of the name
             // write each letter directly, and write a token word per token
             for (count = 0; count < len && nindex != -1 && nindex < namelen;
                 ) {
                 b = m_groupstring_[index + count];
                 count ++;

                 if (b >= m_tokentable_.length) {
                     if (name.charAt(nindex ++) != (b & 0xFF)) {
                         nindex = -1;
                     }
                 }
                 else {
                     token = m_tokentable_[b & 0xFF];
                     if (token == 0xFFFE) {
                         // this is a lead byte for a double-byte token
                         token = m_tokentable_[b << 8 |
                                    (m_groupstring_[index + count] & 0x00ff)];
                         count ++;
                     }
                     if (token == 0xFFFF) {
                         if (name.charAt(nindex ++) != (b & 0xFF)) {
                             nindex = -1;
                         }
                     }
                     else {
                         // compare token with name
                         nindex = UCharacterUtil.compareNullTermByteSubString(
                                         name, m_tokenstring_, nindex, token);
                     }
                 }
             }

             if (namelen == nindex &&
                 (count == len || m_groupstring_[index + count] == ';')) {
                 return result;
             }

             index += len;
         }
         return -1;
     }

     /**
     * Binary search for the group strings set that contains the argument Unicode
     * code point's most significant bits.
     * The return value is always a valid group string set that contain msb.
     * If group string set is not found, -1 is returned
     * @param ch the code point to look for
     * @return group string set index in datatable otherwise -1 is returned if
     *         group string set is not found
     */
     private int getGroupStringIndex(int ch)
     {
         // gets the msb
         int msb = ch >> GROUP_SHIFT_,
             end = m_groupcount_,
             start,
             gindex = 0;

         // binary search for the group of names that contains the one for code
         for (start = 0; start < end - 1;) {
             gindex = (start + end) >> 1;
             if (msb < m_groupinfo_[gindex * m_groupsize_]) {
                 end = gindex;
             }
             else {
                 start = gindex;
             }
         }

         // return this if it is an exact match
         if (msb == m_groupinfo_[start * m_groupsize_]) {
             start = start * m_groupsize_;
             return UCharacterUtil.toInt(
                                 m_groupinfo_[start + OFFSET_HIGH_OFFSET_],
                                 m_groupinfo_[start + OFFSET_LOW_OFFSET_]);
         }
         return -1;
     }

     /**
     * Gets the group name of the character
     * @param ch character to get the group name
     * @param choice name choice selector to choose a unicode 1.0 or newer name
     */
     private synchronized String getGroupName(int ch, int choice)
     {
         // gets the msb
         int msb   = getCodepointMSB(ch);
         int group = getGroup(ch);

         // return this if it is an exact match
         if (msb == m_groupinfo_[group * m_groupsize_]) {
             int index = getGroupLengths(group, m_groupoffsets_,
                                         m_grouplengths_);
             int offset = ch & GROUP_MASK_;
             return getGroupName(index + m_groupoffsets_[offset],
                                 m_grouplengths_[offset], choice);
         }

         return null;
     }

     /**
     * Gets the character extended type
     * @param ch character to be tested
     * @return extended type it is associated with
     */
     private int getType(int ch)
     {
         if (UCharacter.isNonCharacter(ch)) {
             // not a character we return a invalid category count
             return UCharacterCategory.NON_CHARACTER_;
         }
         int result = UCharacter.getType(ch);
         if (result == UCharacterCategory.SURROGATE) {
             if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
                 result = UCharacterCategory.LEAD_SURROGATE_;
             }
             else {
                 result = UCharacterCategory.TRAIL_SURROGATE_;
             }
         }
         return result;
     }

     /**
     * Getting the character with extended name of the form <....>.
     * @param name of the character to be found
     * @param choice name choice
     * @return character associated with the name, -1 if such character is not
     *                   found and -2 if we should continue with the search.
     */
     private int getExtendedChar(String name, int choice)
     {
         if (name.charAt(0) == '<') {
             if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
                 int endIndex = name.length() - 1;
                 if (name.charAt(endIndex) == '>') {
                     int startIndex = name.lastIndexOf('-');
                     if (startIndex >= 0) { // We've got a category.
                         startIndex ++;
                         int result = -1;
                         try {
                             result = Integer.parseInt(
                                         name.substring(startIndex, endIndex),
                                         16);
                         }
                         catch (NumberFormatException e) {
                             return -1;
                         }
                         // Now validate the category name. We could use a
                         // binary search, or a trie, if we really wanted to.
                         String type = name.substring(1, startIndex - 1);
                         int length = UCharacterCategory.TYPE_NAMES_.length;
                         for (int i = 0; i < length; ++ i) {
                             if (type.compareTo(
                                    UCharacterCategory.TYPE_NAMES_[i]) == 0) {
                                 if (getType(result) == i) {
                                     return result;
                                 }
                                 break;
                             }
                         }
                     }
                 }
             }
             return -1;
         }
         return -2;
     }
 }