/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*
* $Source: 
*     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $ 
* $Date: 2002/03/15 22:48:07 $ 
* $Revision: 1.15 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;

import java.util.Locale;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UTF16;

/**
* Internal class to manage character names.
* Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
* in an array of char, by default indexes used in this class is refering to 
* a 2 byte count, unless otherwise stated. Cases where the index is refering 
* to a byte count, the index is halved and depending on whether the index is 
* even or odd, the MSB or LSB of the result char at the halved index is 
* returned. For indexes to an array of int, the index is multiplied by 2, 
* result char at the multiplied index and its following char is returned as an 
* int.
* <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
* Note : 0 - 0x1F are control characters without names in Unicode 3.0
* Information on parsing of the binary data is located at
* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
* ReadMe</a>
* @author Syn Wee Quek
* @since nov0700
*/

final class UCharacterName
{
    // public methods ----------------------------------------------------
    
    /**
    * toString method for printing
    */
    public String toString()
    {
        StringBuffer result = new StringBuffer("names content \n");
        /*result.append(super.toString());
        result.append('\n');
        result.append("token string offset ");
        result.append(m_tokenstringoffset_);
        result.append("\n");
        result.append("group offset ");
        result.append(m_groupsoffset_);
        result.append("\n");
        result.append("group string offset ");
        result.append(m_groupstringoffset_);
        result.append("\n");
        result.append("alg names offset ");
        result.append(m_algnamesoffset_);
        result.append("\n");
        */
        return result.toString();
    } 
    
    // package protected inner class -------------------------------------
    
    /**
    * Algorithmic name class
    */
    static final class AlgorithmName
    {
        // protected data members ----------------------------------------
        
        /**
        * Constant type value of the different AlgorithmName
        */
        protected static final int TYPE_0_ = 0;
        protected static final int TYPE_1_ = 1;
        
        // protected constructors ----------------------------------------
        
        /**
        * Constructor
        */
        protected AlgorithmName()
        {
        }
        
        // protected methods ---------------------------------------------
        
        /**
        * Sets the information for accessing the algorithmic names
        * @param rangestart starting code point that lies within this name group
        * @param rangeend end code point that lies within this name group
        * @param type algorithm type. There's 2 kinds of algorithmic type. First 
        *        which uses code point as part of its name and the other uses 
        *        variant postfix strings
        * @param variant algorithmic variant
        * @return true if values are valid
        */ 
        protected boolean setInfo(int rangestart, int rangeend, byte type,
                                byte variant)
        {
            if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend 
                && rangeend <= UCharacter.MAX_VALUE && 
                (type == TYPE_0_ || type == TYPE_1_)) {
                m_rangestart_ = rangestart;
                m_rangeend_ = rangeend;
                m_type_ = type;
                m_variant_ = variant;
                return true;
            }
            return false;
        }
        
        /**
        * Sets the factor data
        * @param array of factor
        * @return true if factors are valid
        */
        protected boolean setFactor(char factor[])
        {
            if (factor.length == m_variant_) {
                m_factor_ = factor;
                return true;
            }
            return false;
        }
        
        /**
        * Sets the name prefix
        * @param prefix
        * @return true if prefix is set
        */
        protected boolean setPrefix(String prefix)
        {
            if (prefix != null && prefix.length() > 0) {
                m_prefix_ = prefix;
                return true;
            }
            return false;
        }
        
        /**
        * Sets the variant factorized name data 
        * @param string variant factorized name data
        * @return true if values are set
        */
        protected boolean setFactorString(byte string[])
        {
            // factor and variant string can be empty for things like 
            // hanggul code points
            m_factorstring_ = string;
            return true;
        }
      
        /**
        * Checks if code point lies in Algorithm object at index
        * @param ch code point 
        */
        protected boolean contains(int ch)
        {
            return m_rangestart_ <= ch && ch <= m_rangeend_;
        }
        
        /**
        * Appends algorithm name of code point into StringBuffer.
        * Note this method does not check for validity of code point in Algorithm,
        * result is undefined if code point does not belong in Algorithm.
        * @param ch code point
        * @param str StringBuffer to append to
        */
        protected void appendName(int ch, StringBuffer str)
        {
            str.append(m_prefix_);
            switch (m_type_) 
            {
                case TYPE_0_: 
                    // prefix followed by hex digits indicating variants
                    Utility.hex(ch, m_variant_, str);
                    break;
                case TYPE_1_: 
                    // prefix followed by factorized-elements
                    int offset = ch - m_rangestart_;
                    int indexes[] = new int[m_variant_];
                    int factor;
                      
                    // write elements according to the factors
                    // the factorized elements are determined by modulo 
                    // arithmetic
                    for (int i = m_variant_ - 1; i > 0; i --) 
                    {
                        factor = m_factor_[i] & 0x00FF;
                        indexes[i] = offset % factor;
                        offset /= factor;
                    }
                      
                    // we don't need to calculate the last modulus because 
                    // start <= code <= end guarantees here that 
                    // code <= factors[0]
                    indexes[0] = offset;

                    // joining up the factorized strings 
                    String s[] = getFactorString(indexes);
                    if (s != null && s.length > 0)
                    {
                        int size = s.length;
                        for (int i = 0; i < size; i ++)
                        str.append(s[i]);
                    }
                    break;
            }
        }
        
        /**
        * Gets the character for the argument algorithmic name
        * @return the algorithmic char or -1 otherwise.
        */
        protected int getAlgorithmChar(String name)
        {
            int prefixlen = m_prefix_.length();
            if (name.length() < prefixlen || 
                !m_prefix_.equals(name.substring(0, prefixlen))) {
                return -1;
            }
                
            switch (m_type_) 
            {
                case TYPE_0_ : 
                try
                {
                    int result = Integer.parseInt(name.substring(prefixlen), 
                                                  16);
                    // does it fit into the range?
                    if (m_rangestart_ <= result && result <= m_rangeend_) {
                        return result;
                    }
                }
                catch (NumberFormatException e)
                {
                    return -1;
                }
                break;
                case TYPE_1_ : 
                    // repetitative suffix name comparison done here
                    // offset is the character code - start
                    for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
                    {
                        int offset = ch - m_rangestart_;
                        int indexes[] = new int[m_variant_];
                        int factor;
                  
                        // write elements according to the factors
                        // the factorized elements are determined by modulo 
                        // arithmetic
                        for (int i = m_variant_ - 1; i > 0; i --) 
                        {
                            factor = m_factor_[i] & 0x00FF;
                            indexes[i] = offset % factor;
                            offset /= factor;
                        }
                        
                        // we don't need to calculate the last modulus 
                        // because start <= code <= end guarantees here that 
                        // code <= factors[0]
                        indexes[0] = offset;

                        // joining up the factorized strings 
                        if (compareFactorString(indexes, name, prefixlen)) {
                            return ch;
                        }
                    }
            }

            return -1;
        }
        
        // private data members ------------------------------------------
        
        /**
        * Algorithmic data information
        */
        private int m_rangestart_;
        private int m_rangeend_;
        private byte m_type_;
        private byte m_variant_;
        private char m_factor_[];
        private String m_prefix_;
        private byte m_factorstring_[];
        
        // private methods -----------------------------------------------
                
        /**
        * Gets the indexth string in each of the argument factor block
        * @param index array with each index corresponding to each factor block
        * @return array of indexth factor string in factor block
        */
        private String[] getFactorString(int index[])
        {
            int size = m_factor_.length;
            if (index == null || index.length != size) {
                return null;
            }
                
            String result[] = new String[size];
            StringBuffer str = new StringBuffer();
            int count = 0;
            int factor;
            size --;
            for (int i = 0; i <= size; i ++) {
                factor = m_factor_[i];
                count = UCharacterUtil.skipNullTermByteSubString(
                                          m_factorstring_, count, index[i]);
                count = UCharacterUtil.getNullTermByteSubString(
                                          str, m_factorstring_, count);
                if (i != size) {
                    count = UCharacterUtil.skipNullTermByteSubString(
                                                   m_factorstring_, count, 
                                                   factor - index[i] - 1);
                }
                result[i] = str.toString();
                str.delete(0, str.length());
            }
            return result;
        }
        
        /**
        * Compares the indexth string in each of the argument factor block with
        * the argument string
        * @param index array with each index corresponding to each factor block
        * @param str string to compare with
        * @param offset of str to start comparison
        * @return true if string matches
        */
        private boolean compareFactorString(int index[], String str, 
                                            int offset)
        {
            int size = m_factor_.length;
            if (index == null || index.length != size)
                return false;
                
            int count = 0;
            int strcount = offset;
            int factor;
            size --;
            for (int i = 0; i <= size; i ++)
            {
                factor = m_factor_[i];
                count = UCharacterUtil.skipNullTermByteSubString(
                                          m_factorstring_, count, index[i]);
                strcount = UCharacterUtil.compareNullTermByteSubString(str, 
                                          m_factorstring_, strcount, count);
                if (strcount < 0) {
                    return false;
                }
                  
                if (i != size) {
                    count = UCharacterUtil.skipNullTermByteSubString(
                                  m_factorstring_, count, factor - index[i]);
                }
            }
            if (strcount != str.length()) {
                return false;
            }
            return true;
        }
    }
    
    // protected data members --------------------------------------------
    
    /**
     * Maximum number of groups
     */
    protected int m_groupcount_ = 0;
    /**
     * Size of each groups
     */
    protected int m_groupsize_ = 0;
    /**
    * Number of lines per group 
    * 1 << GROUP_SHIFT_
    */
    protected static final int LINES_PER_GROUP_ = 1 << 5;
    
    // protected constructor ---------------------------------------------
    
    /**
    * <p>Protected constructor for use in UCharacter.</p>
    * @exception IOException thrown when data reading fails
    */
    protected UCharacterName() throws IOException
    {
        InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
        BufferedInputStream b = new BufferedInputStream(i, 
                                                        NAME_BUFFER_SIZE_);
        UCharacterNameReader reader = new UCharacterNameReader(b);
        reader.read(this);
        i.close();
    }
      
    // protected methods -------------------------------------------------
     
    /**
    * Retrieve the name of a Unicode code point.
    * Depending on <code>choice</code>, the character name written into the 
    * buffer is the "modern" name or the name that was defined in Unicode 
    * version 1.0.
    * The name contains only "invariant" characters
    * like A-Z, 0-9, space, and '-'.
    *
    * @param ch the code point for which to get the name.
    * @param choice Selector for which name to get.
    * @return if code point is above 0x1fff, null is returned
    */
    protected String getName(int ch, int choice)
    {
        if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || 
            choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT) {
            return null;
        }
        
        String result = null;
        
        result = getAlgName(ch, choice);
          
        // getting normal character name
        if (result == null || result.length() == 0) {
        	if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {	  
                result = getExtendedName(ch);	
            } else {
                result = getGroupName(ch, choice);
            }
        }
          
        return result;
    }
      
    /**
    * Find a character by its name and return its code point value
    * @param character name
    * @param choice selector to indicate if argument name is a Unicode 1.0 
    *        or the most current version 
    * @return code point
    */
    protected int getCharFromName(int choice, String name)
    {
        // checks for illegal arguments
        if (choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT || 
            name == null || name.length() == 0) {
            return -1;
        }
        
        // try extended names first  
        int result = getExtendedChar(name.toLowerCase(), choice);
        if (result >= -1) {
            return result;
        }
        
        String upperCaseName = name.toUpperCase();
        // try algorithmic names first, if fails then try group names
        // int result = getAlgorithmChar(choice, uppercasename);
        
        if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
        	int count = 0;
        	if (m_algorithm_ != null) {
        	    count = m_algorithm_.length;
        	}
        	for (count --; count >= 0; count --) {
         	    result = m_algorithm_[count].getAlgorithmChar(upperCaseName); 
          	    if (result >= 0) {
           	        return result;
            	}
        	}
        }
            
        if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
	        result = getGroupChar(upperCaseName, 
	                              UCharacterNameChoice.U_UNICODE_CHAR_NAME);
        	if (result == -1) {
	            result = getGroupChar(upperCaseName, 
	                              UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
        	}
        }
        else {
        	result = getGroupChar(upperCaseName, choice);
        }
    	return result;
    }
    
    /**
    * Sets the token data
    * @param token array of tokens
    * @param tokenstring array of string values of the tokens
    * @return false if there is a data error
    */
    protected boolean setToken(char token[], byte tokenstring[])
    {
        if (token != null && tokenstring != null && token.length > 0 &&
            tokenstring.length > 0) {
            m_tokentable_ = token;
            m_tokenstring_ = tokenstring;
            return true;
        }
        return false; 
    }
        
    /**
    * Set the algorithm name information array
    * @param algorithm information array
    * @return true if the group string offset has been set correctly
    */
    protected boolean setAlgorithm(AlgorithmName alg[])
    {
        if (alg != null && alg.length != 0) {
            m_algorithm_ = alg;
            return true;
        }
        return false;
    }
    
    /**
    * Sets the number of group and size of each group in number of char
    * @param count number of groups
    * @param size size of group in char
    * @return true if group size is set correctly
    */
    protected boolean setGroupCountSize(int count, int size)
    {
        if (count <= 0 || size <= 0) {
            return false;
        }
        m_groupcount_ = count;
        m_groupsize_ = size;
        return true;
    }
      
    /**
    * Sets the group name data
    * @param group index information array
    * @param groupstring name information array
    * @return false if there is a data error
    */
    protected boolean setGroup(char group[], byte groupstring[])
    {
        if (group != null && groupstring != null && group.length > 0 &&
            groupstring.length > 0) {
            m_groupinfo_ = group;
            m_groupstring_ = groupstring;
            return true;
        }
        return false; 
    }
    
    /**
    * Reads a block of compressed lengths of 32 strings and expands them into 
    * offsets and lengths for each string. Lengths are stored with a 
    * variable-width encoding in consecutive nibbles:
    * If a nibble<0xc, then it is the length itself (0 = empty string).
    * If a nibble>=0xc, then it forms a length value with the following 
    * nibble.
    * The offsets and lengths arrays must be at least 33 (one more) long 
    * because there is no check here at the end if the last nibble is still 
    * used.
    * @param index of group string object in array
    * @param offsets array to store the value of the string offsets
    * @param lengths array to store the value of the string length
    * @return next index of the data string immediately after the lengths 
    *         in terms of byte address
    */
    protected int getGroupLengths(int index, char offsets[], char lengths[]) 
    {
        char length = 0xffff;
        byte b = 0,
            n = 0;
        int shift;
        index = index * m_groupsize_; // byte count offsets of group strings
        int stringoffset = UCharacterUtil.toInt(
                                 m_groupinfo_[index + OFFSET_HIGH_OFFSET_], 
                                 m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
            
        offsets[0] = 0;
        
        // all 32 lengths must be read to get the offset of the first group 
        // string
        for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
            b = m_groupstring_[stringoffset];
            shift = 4;
              
            while (shift >= 0) {
                // getting nibble
                n = (byte)((b >> shift) & 0x0F);   
                if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
                	length = (char)((n - 12) << 4);
                }
                else {
                	if (length != 0xffff) {
                 	   lengths[i] = (char)((length | n) + 12);
                	}
                	else {
                 	   lengths[i] = (char)n;
                	}
                    
                	if (i < LINES_PER_GROUP_) {
                 	   offsets[i + 1] = (char)(offsets[i] + lengths[i]);
                	}
                    
                	length = 0xffff;
                	i ++;
                }
                      
                shift -= 4;
            }
        }
        return stringoffset;
    }
    
    /**
    * Gets the name of the argument group index
    * @param index of the group name string in byte count
    * @param length of the group name string
    * @param choice of Unicode 1.0 name or the most current name
    * @return name of the group 
    */
    protected String getGroupName(int index, int length, int choice) 
    {
        if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
        	int oldindex = index;
         	index += UCharacterUtil.skipByteSubString(m_groupstring_, 
         		                               index, length, (byte)';');   
         	length -= (index - oldindex);
        }
        
        StringBuffer s = new StringBuffer();
        byte b;
        char token;
        for (int i = 0; i < length;) {
            b = m_groupstring_[index + i];
            i ++;
              
            if (b >= m_tokentable_.length) {
                if (b == ';') {
                	break;
                }
                s.append(b); // implicit letter
            }
            else {
                token = m_tokentable_[b & 0x00ff];
                if (token == 0xFFFE) {
                    // this is a lead byte for a double-byte token
                    token = m_tokentable_[b << 8 | 
                                      (m_groupstring_[index + i] & 0x00ff)];
                    i ++;
                }
                if (token == 0xFFFF) {
                    if (b == ';') {
                    	// skip the semicolon if we are seeking extended 
                    	// names and there was no 2.0 name but there
                        // is a 1.0 name.
                    	if (s.length() == 0 && choice == 
                    	       UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
                        	continue;
                    	}
                        break;
                    }
                    s.append((char)(b & 0x00ff)); // explicit letter
                }
                else { // write token word
                    UCharacterUtil.getNullTermByteSubString(s, 
                                                     m_tokenstring_, token);
                }
            }
        }

        if (s.length() == 0) {
            return null;
        }
        return s.toString();
    }
    
    /**
    * Retrieves the extended name
    */
    protected String getExtendedName(int ch) 
    {    
        String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);    
        if (result == null) {        
            if (getType(ch) == UCharacterCategory.CONTROL) {            
                result = getName(ch, 
                                 UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
            }        
            if (result == null) {            
                result = getExtendedOr10Name(ch);
            }
        }    
        return result;
    }
    
    /**
     * Gets the group index for the codepoint, or the group before it.
     * @param codepoint
     * @return group index containing codepoint or the group before it.
     */
    protected int getGroup(int codepoint)
    {
    	int endGroup = m_groupcount_;
    	int msb      = getCodepointMSB(codepoint);
        int result   = 0;    
        // binary search for the group of names that contains the one for 
        // code
        // find the group that contains codepoint, or the highest before it
        while (result < endGroup - 1) {
            int gindex = (result + endGroup) >> 1;
            if (msb < getGroupMSB(gindex)) {
               	endGroup = gindex;
            }
            else {
               	result = gindex;
            }
        }
        return result;
    }
    
    /**
     * Gets the extended and 1.0 name when the most current unicode names
     * fail
     * @param ch codepoint
     * @return name of codepoint extended or 1.0
     */
    protected String getExtendedOr10Name(int ch)
    {
    	String result = null;
    	if (getType(ch) == UCharacterCategory.CONTROL) {            
            result = getName(ch, 
                             UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
        }        
        if (result == null) {            
            int type = getType(ch);    
            // Return unknown if the table of names above is not up to 
            // date.
            if (type >= UCharacterCategory.TYPE_NAMES_.length) {       
                result = UCharacterCategory.UNKNOWN_TYPE_NAME_;    
            } 
            else {        
                result = UCharacterCategory.TYPE_NAMES_[type];    
            }
            StringBuffer tempResult = new StringBuffer(result);
            tempResult.insert(0, '<');
            tempResult.append('-');
            String chStr = Integer.toHexString(ch).toUpperCase();
            int zeros = 4 - chStr.length();
            while (zeros > 0) {
                tempResult.append('0');
                zeros --;
            }
            tempResult.append(chStr);
            tempResult.append('>');
            result = tempResult.toString();
        }
        return result;
    }
    
    // these are all UCharacterNameIterator use methods -------------------
    
    /**
     * Gets the MSB from the group index
     * @param gindex group index
     * @return the MSB of the group if gindex is valid, -1 otherwise
     */
    protected int getGroupMSB(int gindex)
    {
    	if (gindex >= m_groupcount_) {
    		return -1;
    	}
    	return m_groupinfo_[gindex * m_groupsize_];
    }
    
    /**
     * Gets the MSB of the codepoint
     * @param codepoint 
     * @return the MSB of the codepoint
     */
    protected int getCodepointMSB(int codepoint)
    {
    	return codepoint >> GROUP_SHIFT_;
    }
    
    /**
     * Gets the maximum codepoint + 1 of the group
     * @param msb most significant byte of the group
     * @return limit codepoint of the group
     */
    protected int getGroupLimit(int msb)
    {
    	return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
    }
    
    /**
     * Gets the minimum codepoint of the group
     * @param msb most significant byte of the group
     * @return minimum codepoint of the group
     */
    protected int getGroupMin(int msb)
    {
    	return msb << GROUP_SHIFT_;
    }
    
    /**
     * Gets the offset to a group
     * @param codepoint 
     * @return offset to a group
     */
    protected int getGroupOffset(int codepoint)
    {
    	return codepoint & GROUP_MASK_;
    }

	/**
     * Gets the minimum codepoint of a group
     * @param codepoint
     * @return minimum codepoint in the group which codepoint belongs to
     */
    protected int getGroupMinFromCodepoint(int codepoint)
    {
    	return codepoint & ~GROUP_MASK_;
    }
    
    /**
     * Get the Algorithm range length 
     * @return Algorithm range length
     */
    protected int getAlgorithmLength()
    {
    	return m_algorithm_.length;
    }
        
    /**
     * Gets the start of the range
     * @param index algorithm index
     * @return algorithm range start
     */
    protected int getAlgorithmStart(int index)
    {
      	return m_algorithm_[index].m_rangestart_;
    }
        
    /**
     * Gets the end of the range
     * @param index algorithm index
     * @return algorithm range end
     */
    protected int getAlgorithmEnd(int index)
    {
      	return m_algorithm_[index].m_rangeend_;
    }
    
    /**
     * Gets the Algorithmic name of the codepoint
     * @param index algorithmic range index
     * @param codepoint 
     * @return algorithmic name of codepoint
     */
    protected String getAlgorithmName(int index, int codepoint) 
    {
    	StringBuffer result = new StringBuffer();
    	m_algorithm_[index].appendName(codepoint, result);
        return result.toString();
    }
    
        
    // private data members ----------------------------------------------
    
    /**
    * Data used in unames.dat
    */
    private char m_tokentable_[];
    private byte m_tokenstring_[];
    private char m_groupinfo_[];
    private byte m_groupstring_[];
    private AlgorithmName m_algorithm_[];
      
    /**
    * Group use
    */
    private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
    private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
      	 
    /**
    * Default name of the name datafile
    */
    private static final String NAME_FILE_NAME_ = 
                                           "/com/ibm/icu/impl/data/unames.dat";
    /**
    * Shift count to retrieve group information
    */
    private static final int GROUP_SHIFT_ = 5;
    /**
    * Mask to retrieve the offset for a particular character within a group
    */
    private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
    /**
    * Default buffer size of datafile
    */
    private static final int NAME_BUFFER_SIZE_ = 100000;
      
    /**
    * Position of offsethigh in group information array
    */
    private static final int OFFSET_HIGH_OFFSET_ = 1;
      
    /**
    * Position of offsetlow in group information array
    */
    private static final int OFFSET_LOW_OFFSET_ = 2;
    /**
    * Double nibble indicator, any nibble > this number has to be combined
    * with its following nibble
    */
    private static final int SINGLE_NIBBLE_MAX_ = 11;
     
      
    // private methods ---------------------------------------------------
      
    /**
    * Gets the algorithmic name for the argument character
    * @param ch character to determine name for
    * @param choice name choice
    * @return the algorithmic name or null if not found
    */
    private String getAlgName(int ch, int choice) 
    {
    	// Do not write algorithmic Unicode 1.0 names because Unihan names are 
        // the same as the modern ones, extension A was only introduced with 
        // Unicode 3.0, and the Hangul syllable block was moved and changed 
        // around Unicode 1.1.5.
        if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
       	 	// index in terms integer index
        	StringBuffer s = new StringBuffer();
        
        	for (int index = m_algorithm_.length - 1; index >= 0; index --) {
         	   if (m_algorithm_[index].contains(ch)) {
          	      m_algorithm_[index].appendName(ch, s);
            	  return s.toString();
         	   }
            }
        }
        return null;
    }
      
    /**
    * Getting the character with the tokenized argument name
    * @param name of the character
    * @return character with the tokenized argument name or -1 if character
    *         is not found
    */
    private synchronized int getGroupChar(String name, int choice) 
    {
    	for (int i = 0; i < m_groupcount_; i ++) {
        	// populating the data set of grouptable
        	
        	int startgpstrindex = getGroupLengths(i, m_groupoffsets_, 
                                                  m_grouplengths_);
          
        	// shift out to function
        	int result = getGroupChar(startgpstrindex, m_grouplengths_, name, 
        	                          choice);
        	if (result != -1) {
            	return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) 
            	         | result;
        	}
        }
        return -1;
    }
      
    /**
    * Compares and retrieve character if name is found within the argument 
    * group
    * @param index index where the set of names reside in the group block
    * @param length list of lengths of the strings
    * @param name character name to search for
    * @param choice of either 1.0 or the most current unicode name
    * @return relative character in the group which matches name, otherwise if   
    *         not found, -1 will be returned
    */
    private int getGroupChar(int index, char length[], String name, 
                             int choice)
    { 
        byte b = 0; 
        char token;
        int len;
        int namelen = name.length();
        int nindex;
        int count;
        
        for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
            nindex = 0;
            len = length[result];
              
            if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
                int oldindex = index;
                index += UCharacterUtil.skipByteSubString(m_groupstring_, 
                                                     index, len, (byte)';');
                len -= (index - oldindex);
            }
                
            // number of tokens is > the length of the name
            // write each letter directly, and write a token word per token
            for (count = 0; count < len && nindex != -1 && nindex < namelen;
                ) {
                b = m_groupstring_[index + count];
                count ++;
                   
                if (b >= m_tokentable_.length) {
                    if (name.charAt(nindex ++) != (b & 0xFF)) {
                        nindex = -1;
                    }
                }
                else {
                    token = m_tokentable_[b & 0xFF];
                    if (token == 0xFFFE) {
                        // this is a lead byte for a double-byte token
                        token = m_tokentable_[b << 8 | 
                                   (m_groupstring_[index + count] & 0x00ff)];
                        count ++;
                    }
                    if (token == 0xFFFF) {
                        if (name.charAt(nindex ++) != (b & 0xFF)) {
                            nindex = -1;
                        }
                    }
                    else {
                        // compare token with name
                        nindex = UCharacterUtil.compareNullTermByteSubString(
                                        name, m_tokenstring_, nindex, token);
                    }
                }
            }

            if (namelen == nindex && 
                (count == len || m_groupstring_[index + count] == ';')) {
                return result;
            }
                
            index += len;
        }
        return -1;
    }
       
    /**
    * Binary search for the group strings set that contains the argument Unicode 
    * code point's most significant bits.
    * The return value is always a valid group string set that contain msb.
    * If group string set is not found, -1 is returned
    * @param ch the code point to look for
    * @return group string set index in datatable otherwise -1 is returned if 
    *         group string set is not found
    */
    private int getGroupStringIndex(int ch)
    {
        // gets the msb
        int msb = ch >> GROUP_SHIFT_,
            end = m_groupcount_,
            start,
            gindex = 0;
            
        // binary search for the group of names that contains the one for code
        for (start = 0; start < end - 1;) {
            gindex = (start + end) >> 1;
            if (msb < m_groupinfo_[gindex * m_groupsize_]) {
                end = gindex;
            }
            else {
                start = gindex;
            }
        }

        // return this if it is an exact match
        if (msb == m_groupinfo_[start * m_groupsize_]) {
            start = start * m_groupsize_;
            return UCharacterUtil.toInt(
                                m_groupinfo_[start + OFFSET_HIGH_OFFSET_], 
                                m_groupinfo_[start + OFFSET_LOW_OFFSET_]);
        }
        return -1;
    }
    
    /**
    * Gets the group name of the character
    * @param ch character to get the group name 
    * @param choice name choice selector to choose a unicode 1.0 or newer name
    */
    private synchronized String getGroupName(int ch, int choice) 
    {            
        // gets the msb
        int msb   = getCodepointMSB(ch);
        int group = getGroup(ch);

        // return this if it is an exact match
        if (msb == m_groupinfo_[group * m_groupsize_]) {
            int index = getGroupLengths(group, m_groupoffsets_, 
                                        m_grouplengths_);
            int offset = ch & GROUP_MASK_;
            return getGroupName(index + m_groupoffsets_[offset], 
                                m_grouplengths_[offset], choice);
        }
        
        return null;
    }
    
    /**
    * Gets the character extended type
    * @param ch character to be tested
    * @return extended type it is associated with
    */
    private int getType(int ch)
    {
        if (UCharacter.isNonCharacter(ch)) {  
            // not a character we return a invalid category count
            return UCharacterCategory.NON_CHARACTER_;    
        }    
        int result = UCharacter.getType(ch);
        if (result == UCharacterCategory.SURROGATE) {            
            if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
                result = UCharacterCategory.LEAD_SURROGATE_;
            }
            else {
                result = UCharacterCategory.TRAIL_SURROGATE_;
            }    
        }    
        return result;
    }
    
    /**
    * Getting the character with extended name of the form <....>.
    * @param name of the character to be found
    * @param choice name choice
    * @return character associated with the name, -1 if such character is not
    *                   found and -2 if we should continue with the search.
    */
    private int getExtendedChar(String name, int choice)
    {
        if (name.charAt(0) == '<') {        
            if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {            
                int endIndex = name.length() - 1;
                if (name.charAt(endIndex) == '>') {
                    int startIndex = name.lastIndexOf('-');
                    if (startIndex >= 0) { // We've got a category.     
                        startIndex ++;
                        int result = -1;
                        try {
                            result = Integer.parseInt(
                                        name.substring(startIndex, endIndex), 
                                        16);
                        }
                        catch (NumberFormatException e) {
                            return -1;     
                        } 
                        // Now validate the category name. We could use a 
                        // binary search, or a trie, if we really wanted to. 
                        String type = name.substring(1, startIndex - 1);
                        int length = UCharacterCategory.TYPE_NAMES_.length;
                        for (int i = 0; i < length; ++ i) {             
                            if (type.compareTo(
                                   UCharacterCategory.TYPE_NAMES_[i]) == 0) { 
                                if (getType(result) == i) { 
                                    return result;     
                                }  
                                break;          
                            } 
                        }
                    }
                }
            }            
            return -1; 
        }    
        return -2;
    }
}
