| /** |
| ******************************************************************************* |
| * Copyright (C) 1996-2009, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| */ |
| package com.ibm.icu.impl; |
| |
| import java.io.InputStream; |
| import java.io.BufferedInputStream; |
| import java.io.IOException; |
| import java.util.MissingResourceException; |
| |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.lang.UCharacter; |
| import com.ibm.icu.lang.UCharacterCategory; |
| |
| /** |
| * Internal class to manage character names. |
| * Since data for names are stored |
| * in an array of char, by default indexes used in this class is refering to |
| * a 2 byte count, unless otherwise stated. Cases where the index is refering |
| * to a byte count, the index is halved and depending on whether the index is |
| * even or odd, the MSB or LSB of the result char at the halved index is |
| * returned. For indexes to an array of int, the index is multiplied by 2, |
| * result char at the multiplied index and its following char is returned as an |
| * int. |
| * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class |
| * Note : 0 - 0x1F are control characters without names in Unicode 3.0 |
| * @author Syn Wee Quek |
| * @since nov0700 |
| */ |
| |
| public final class UCharacterName |
| { |
| // public data members ---------------------------------------------- |
| |
| /* |
| * public singleton instance |
| */ |
| public static final UCharacterName INSTANCE; |
| |
| static { |
| try { |
| INSTANCE = new UCharacterName(); |
| } catch (IOException e) { |
| ///CLOVER:OFF |
| throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","",""); |
| ///CLOVER:ON |
| } |
| } |
| |
| /** |
| * Number of lines per group |
| * 1 << GROUP_SHIFT_ |
| */ |
| public static final int LINES_PER_GROUP_ = 1 << 5; |
| /** |
| * Maximum number of groups |
| */ |
| public int m_groupcount_ = 0; |
| |
| // public methods --------------------------------------------------- |
| |
| /** |
| * Retrieve the name of a Unicode code point. |
| * Depending on <code>choice</code>, the character name written into the |
| * buffer is the "modern" name or the name that was defined in Unicode |
| * version 1.0. |
| * The name contains only "invariant" characters |
| * like A-Z, 0-9, space, and '-'. |
| * |
| * @param ch the code point for which to get the name. |
| * @param choice Selector for which name to get. |
| * @return if code point is above 0x1fff, null is returned |
| */ |
| public String getName(int ch, int choice) |
| { |
| if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || |
| choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) { |
| return null; |
| } |
| |
| String result = null; |
| |
| result = getAlgName(ch, choice); |
| |
| // getting normal character name |
| if (result == null || result.length() == 0) { |
| if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { |
| result = getExtendedName(ch); |
| } else { |
| result = getGroupName(ch, choice); |
| } |
| } |
| |
| return result; |
| } |
| |
| /** |
| * Find a character by its name and return its code point value |
| * @param choice selector to indicate if argument name is a Unicode 1.0 |
| * or the most current version |
| * @param name the name to search for |
| * @return code point |
| */ |
| public int getCharFromName(int choice, String name) |
| { |
| // checks for illegal arguments |
| if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT || |
| name == null || name.length() == 0) { |
| return -1; |
| } |
| |
| // try extended names first |
| int result = getExtendedChar(name.toLowerCase(), choice); |
| if (result >= -1) { |
| return result; |
| } |
| |
| String upperCaseName = name.toUpperCase(); |
| // try algorithmic names first, if fails then try group names |
| // int result = getAlgorithmChar(choice, uppercasename); |
| |
| if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || |
| choice == UCharacterNameChoice.EXTENDED_CHAR_NAME |
| ) { |
| int count = 0; |
| if (m_algorithm_ != null) { |
| count = m_algorithm_.length; |
| } |
| for (count --; count >= 0; count --) { |
| result = m_algorithm_[count].getChar(upperCaseName); |
| if (result >= 0) { |
| return result; |
| } |
| } |
| } |
| |
| if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { |
| result = getGroupChar(upperCaseName, |
| UCharacterNameChoice.UNICODE_CHAR_NAME); |
| if (result == -1) { |
| result = getGroupChar(upperCaseName, |
| UCharacterNameChoice.UNICODE_10_CHAR_NAME); |
| } |
| if (result == -1) { |
| result = getGroupChar(upperCaseName, |
| UCharacterNameChoice.CHAR_NAME_ALIAS); |
| } |
| } |
| else { |
| result = getGroupChar(upperCaseName, choice); |
| } |
| return result; |
| } |
| |
| // these are all UCharacterNameIterator use methods ------------------- |
| |
| /** |
| * Reads a block of compressed lengths of 32 strings and expands them into |
| * offsets and lengths for each string. Lengths are stored with a |
| * variable-width encoding in consecutive nibbles: |
| * If a nibble<0xc, then it is the length itself (0 = empty string). |
| * If a nibble>=0xc, then it forms a length value with the following |
| * nibble. |
| * The offsets and lengths arrays must be at least 33 (one more) long |
| * because there is no check here at the end if the last nibble is still |
| * used. |
| * @param index of group string object in array |
| * @param offsets array to store the value of the string offsets |
| * @param lengths array to store the value of the string length |
| * @return next index of the data string immediately after the lengths |
| * in terms of byte address |
| */ |
| public int getGroupLengths(int index, char offsets[], char lengths[]) |
| { |
| char length = 0xffff; |
| byte b = 0, |
| n = 0; |
| int shift; |
| index = index * m_groupsize_; // byte count offsets of group strings |
| int stringoffset = UCharacterUtility.toInt( |
| m_groupinfo_[index + OFFSET_HIGH_OFFSET_], |
| m_groupinfo_[index + OFFSET_LOW_OFFSET_]); |
| |
| offsets[0] = 0; |
| |
| // all 32 lengths must be read to get the offset of the first group |
| // string |
| for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) { |
| b = m_groupstring_[stringoffset]; |
| shift = 4; |
| |
| while (shift >= 0) { |
| // getting nibble |
| n = (byte)((b >> shift) & 0x0F); |
| if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) { |
| length = (char)((n - 12) << 4); |
| } |
| else { |
| if (length != 0xffff) { |
| lengths[i] = (char)((length | n) + 12); |
| } |
| else { |
| lengths[i] = (char)n; |
| } |
| |
| if (i < LINES_PER_GROUP_) { |
| offsets[i + 1] = (char)(offsets[i] + lengths[i]); |
| } |
| |
| length = 0xffff; |
| i ++; |
| } |
| |
| shift -= 4; |
| } |
| } |
| return stringoffset; |
| } |
| |
| /** |
| * Gets the name of the argument group index. |
| * UnicodeData.txt uses ';' as a field separator, so no field can contain |
| * ';' as part of its contents. In unames.icu, it is marked as |
| * token[';'] == -1 only if the semicolon is used in the data file - which |
| * is iff we have Unicode 1.0 names or ISO comments or aliases. |
| * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases |
| * although we know that it will never be part of a name. |
| * Equivalent to ICU4C's expandName. |
| * @param index of the group name string in byte count |
| * @param length of the group name string |
| * @param choice of Unicode 1.0 name or the most current name |
| * @return name of the group |
| */ |
| public String getGroupName(int index, int length, int choice) |
| { |
| if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && |
| choice != UCharacterNameChoice.EXTENDED_CHAR_NAME |
| ) { |
| if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) { |
| /* |
| * skip the modern name if it is not requested _and_ |
| * if the semicolon byte value is a character, not a token number |
| */ |
| int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; |
| do { |
| int oldindex = index; |
| index += UCharacterUtility.skipByteSubString(m_groupstring_, |
| index, length, (byte)';'); |
| length -= (index - oldindex); |
| } while(--fieldIndex>0); |
| } |
| else { |
| // the semicolon byte is a token number, therefore only modern |
| // names are stored in unames.dat and there is no such |
| // requested alternate name here |
| length = 0; |
| } |
| } |
| |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); |
| byte b; |
| char token; |
| for (int i = 0; i < length;) { |
| b = m_groupstring_[index + i]; |
| i ++; |
| |
| if (b >= m_tokentable_.length) { |
| if (b == ';') { |
| break; |
| } |
| m_utilStringBuffer_.append(b); // implicit letter |
| } |
| else { |
| token = m_tokentable_[b & 0x00ff]; |
| if (token == 0xFFFE) { |
| // this is a lead byte for a double-byte token |
| token = m_tokentable_[b << 8 | |
| (m_groupstring_[index + i] & 0x00ff)]; |
| i ++; |
| } |
| if (token == 0xFFFF) { |
| if (b == ';') { |
| // skip the semicolon if we are seeking extended |
| // names and there was no 2.0 name but there |
| // is a 1.0 name. |
| if (m_utilStringBuffer_.length() == 0 && choice == |
| UCharacterNameChoice.EXTENDED_CHAR_NAME) { |
| continue; |
| } |
| break; |
| } |
| // explicit letter |
| m_utilStringBuffer_.append((char)(b & 0x00ff)); |
| } |
| else { // write token word |
| UCharacterUtility.getNullTermByteSubString( |
| m_utilStringBuffer_, m_tokenstring_, token); |
| } |
| } |
| } |
| |
| if (m_utilStringBuffer_.length() > 0) { |
| return m_utilStringBuffer_.toString(); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Retrieves the extended name |
| */ |
| public String getExtendedName(int ch) |
| { |
| String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); |
| if (result == null) { |
| if (getType(ch) == UCharacterCategory.CONTROL) { |
| result = getName(ch, |
| UCharacterNameChoice.UNICODE_10_CHAR_NAME); |
| } |
| if (result == null) { |
| result = getExtendedOr10Name(ch); |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Gets the group index for the codepoint, or the group before it. |
| * @param codepoint The codepoint index. |
| * @return group index containing codepoint or the group before it. |
| */ |
| public int getGroup(int codepoint) |
| { |
| int endGroup = m_groupcount_; |
| int msb = getCodepointMSB(codepoint); |
| int result = 0; |
| // binary search for the group of names that contains the one for |
| // code |
| // find the group that contains codepoint, or the highest before it |
| while (result < endGroup - 1) { |
| int gindex = (result + endGroup) >> 1; |
| if (msb < getGroupMSB(gindex)) { |
| endGroup = gindex; |
| } |
| else { |
| result = gindex; |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Gets the extended and 1.0 name when the most current unicode names |
| * fail |
| * @param ch codepoint |
| * @return name of codepoint extended or 1.0 |
| */ |
| public String getExtendedOr10Name(int ch) |
| { |
| String result = null; |
| if (getType(ch) == UCharacterCategory.CONTROL) { |
| result = getName(ch, |
| UCharacterNameChoice.UNICODE_10_CHAR_NAME); |
| } |
| if (result == null) { |
| int type = getType(ch); |
| // Return unknown if the table of names above is not up to |
| // date. |
| if (type >= TYPE_NAMES_.length) { |
| result = UNKNOWN_TYPE_NAME_; |
| } |
| else { |
| result = TYPE_NAMES_[type]; |
| } |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); |
| m_utilStringBuffer_.append('<'); |
| m_utilStringBuffer_.append(result); |
| m_utilStringBuffer_.append('-'); |
| String chStr = Integer.toHexString(ch).toUpperCase(); |
| int zeros = 4 - chStr.length(); |
| while (zeros > 0) { |
| m_utilStringBuffer_.append('0'); |
| zeros --; |
| } |
| m_utilStringBuffer_.append(chStr); |
| m_utilStringBuffer_.append('>'); |
| result = m_utilStringBuffer_.toString(); |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Gets the MSB from the group index |
| * @param gindex group index |
| * @return the MSB of the group if gindex is valid, -1 otherwise |
| */ |
| public int getGroupMSB(int gindex) |
| { |
| if (gindex >= m_groupcount_) { |
| return -1; |
| } |
| return m_groupinfo_[gindex * m_groupsize_]; |
| } |
| |
| /** |
| * Gets the MSB of the codepoint |
| * @param codepoint The codepoint value. |
| * @return the MSB of the codepoint |
| */ |
| public static int getCodepointMSB(int codepoint) |
| { |
| return codepoint >> GROUP_SHIFT_; |
| } |
| |
| /** |
| * Gets the maximum codepoint + 1 of the group |
| * @param msb most significant byte of the group |
| * @return limit codepoint of the group |
| */ |
| public static int getGroupLimit(int msb) |
| { |
| return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_; |
| } |
| |
| /** |
| * Gets the minimum codepoint of the group |
| * @param msb most significant byte of the group |
| * @return minimum codepoint of the group |
| */ |
| public static int getGroupMin(int msb) |
| { |
| return msb << GROUP_SHIFT_; |
| } |
| |
| /** |
| * Gets the offset to a group |
| * @param codepoint The codepoint value. |
| * @return offset to a group |
| */ |
| public static int getGroupOffset(int codepoint) |
| { |
| return codepoint & GROUP_MASK_; |
| } |
| |
| /** |
| * Gets the minimum codepoint of a group |
| * @param codepoint The codepoint value. |
| * @return minimum codepoint in the group which codepoint belongs to |
| */ |
| ///CLOVER:OFF |
| public static int getGroupMinFromCodepoint(int codepoint) |
| { |
| return codepoint & ~GROUP_MASK_; |
| } |
| ///CLOVER:ON |
| |
| /** |
| * Get the Algorithm range length |
| * @return Algorithm range length |
| */ |
| public int getAlgorithmLength() |
| { |
| return m_algorithm_.length; |
| } |
| |
| /** |
| * Gets the start of the range |
| * @param index algorithm index |
| * @return algorithm range start |
| */ |
| public int getAlgorithmStart(int index) |
| { |
| return m_algorithm_[index].m_rangestart_; |
| } |
| |
| /** |
| * Gets the end of the range |
| * @param index algorithm index |
| * @return algorithm range end |
| */ |
| public int getAlgorithmEnd(int index) |
| { |
| return m_algorithm_[index].m_rangeend_; |
| } |
| |
| /** |
| * Gets the Algorithmic name of the codepoint |
| * @param index algorithmic range index |
| * @param codepoint The codepoint value. |
| * @return algorithmic name of codepoint |
| */ |
| public String getAlgorithmName(int index, int codepoint) |
| { |
| String result = null; |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); |
| m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_); |
| result = m_utilStringBuffer_.toString(); |
| } |
| return result; |
| } |
| |
| /** |
| * Gets the group name of the character |
| * @param ch character to get the group name |
| * @param choice name choice selector to choose a unicode 1.0 or newer name |
| */ |
| public synchronized String getGroupName(int ch, int choice) |
| { |
| // gets the msb |
| int msb = getCodepointMSB(ch); |
| int group = getGroup(ch); |
| |
| // return this if it is an exact match |
| if (msb == m_groupinfo_[group * m_groupsize_]) { |
| int index = getGroupLengths(group, m_groupoffsets_, |
| m_grouplengths_); |
| int offset = ch & GROUP_MASK_; |
| return getGroupName(index + m_groupoffsets_[offset], |
| m_grouplengths_[offset], choice); |
| } |
| |
| return null; |
| } |
| |
| // these are transliterator use methods --------------------------------- |
| |
| /** |
| * Gets the maximum length of any codepoint name. |
| * Equivalent to uprv_getMaxCharNameLength. |
| * @return the maximum length of any codepoint name |
| */ |
| public int getMaxCharNameLength() |
| { |
| if (initNameSetsLengths()) { |
| return m_maxNameLength_; |
| } |
| else { |
| return 0; |
| } |
| } |
| |
| /** |
| * Gets the maximum length of any iso comments. |
| * Equivalent to uprv_getMaxISOCommentLength. |
| * @return the maximum length of any codepoint name |
| */ |
| ///CLOVER:OFF |
| public int getMaxISOCommentLength() |
| { |
| if (initNameSetsLengths()) { |
| return m_maxISOCommentLength_; |
| } |
| else { |
| return 0; |
| } |
| } |
| ///CLOVER:ON |
| |
| /** |
| * Fills set with characters that are used in Unicode character names. |
| * Equivalent to uprv_getCharNameCharacters. |
| * @param set USet to receive characters. Existing contents are deleted. |
| */ |
| public void getCharNameCharacters(UnicodeSet set) |
| { |
| convert(m_nameSet_, set); |
| } |
| |
| /** |
| * Fills set with characters that are used in Unicode character names. |
| * Equivalent to uprv_getISOCommentCharacters. |
| * @param set USet to receive characters. Existing contents are deleted. |
| */ |
| ///CLOVER:OFF |
| public void getISOCommentCharacters(UnicodeSet set) |
| { |
| convert(m_ISOCommentSet_, set); |
| } |
| ///CLOVER:ON |
| |
| // package private inner class -------------------------------------- |
| |
| /** |
| * Algorithmic name class |
| */ |
| static final class AlgorithmName |
| { |
| // package private data members ---------------------------------- |
| |
| /** |
| * Constant type value of the different AlgorithmName |
| */ |
| static final int TYPE_0_ = 0; |
| static final int TYPE_1_ = 1; |
| |
| // package private constructors ---------------------------------- |
| |
| /** |
| * Constructor |
| */ |
| AlgorithmName() |
| { |
| } |
| |
| // package private methods --------------------------------------- |
| |
| /** |
| * Sets the information for accessing the algorithmic names |
| * @param rangestart starting code point that lies within this name group |
| * @param rangeend end code point that lies within this name group |
| * @param type algorithm type. There's 2 kinds of algorithmic type. First |
| * which uses code point as part of its name and the other uses |
| * variant postfix strings |
| * @param variant algorithmic variant |
| * @return true if values are valid |
| */ |
| boolean setInfo(int rangestart, int rangeend, byte type, byte variant) |
| { |
| if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend |
| && rangeend <= UCharacter.MAX_VALUE && |
| (type == TYPE_0_ || type == TYPE_1_)) { |
| m_rangestart_ = rangestart; |
| m_rangeend_ = rangeend; |
| m_type_ = type; |
| m_variant_ = variant; |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Sets the factor data |
| * @param factor Array of factor |
| * @return true if factors are valid |
| */ |
| boolean setFactor(char factor[]) |
| { |
| if (factor.length == m_variant_) { |
| m_factor_ = factor; |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Sets the name prefix |
| * @param prefix |
| * @return true if prefix is set |
| */ |
| boolean setPrefix(String prefix) |
| { |
| if (prefix != null && prefix.length() > 0) { |
| m_prefix_ = prefix; |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Sets the variant factorized name data |
| * @param string variant factorized name data |
| * @return true if values are set |
| */ |
| boolean setFactorString(byte string[]) |
| { |
| // factor and variant string can be empty for things like |
| // hanggul code points |
| m_factorstring_ = string; |
| return true; |
| } |
| |
| /** |
| * Checks if code point lies in Algorithm object at index |
| * @param ch code point |
| */ |
| boolean contains(int ch) |
| { |
| return m_rangestart_ <= ch && ch <= m_rangeend_; |
| } |
| |
| /** |
| * Appends algorithm name of code point into StringBuffer. |
| * Note this method does not check for validity of code point in Algorithm, |
| * result is undefined if code point does not belong in Algorithm. |
| * @param ch code point |
| * @param str StringBuffer to append to |
| */ |
| void appendName(int ch, StringBuffer str) |
| { |
| str.append(m_prefix_); |
| switch (m_type_) |
| { |
| case TYPE_0_: |
| // prefix followed by hex digits indicating variants |
| str.append(Utility.hex(ch,m_variant_)); |
| break; |
| case TYPE_1_: |
| // prefix followed by factorized-elements |
| int offset = ch - m_rangestart_; |
| int indexes[] = m_utilIntBuffer_; |
| int factor; |
| |
| // write elements according to the factors |
| // the factorized elements are determined by modulo |
| // arithmetic |
| synchronized (m_utilIntBuffer_) { |
| for (int i = m_variant_ - 1; i > 0; i --) |
| { |
| factor = m_factor_[i] & 0x00FF; |
| indexes[i] = offset % factor; |
| offset /= factor; |
| } |
| |
| // we don't need to calculate the last modulus because |
| // start <= code <= end guarantees here that |
| // code <= factors[0] |
| indexes[0] = offset; |
| |
| // joining up the factorized strings |
| str.append(getFactorString(indexes, m_variant_)); |
| } |
| break; |
| } |
| } |
| |
| /** |
| * Gets the character for the argument algorithmic name |
| * @return the algorithmic char or -1 otherwise. |
| */ |
| int getChar(String name) |
| { |
| int prefixlen = m_prefix_.length(); |
| if (name.length() < prefixlen || |
| !m_prefix_.equals(name.substring(0, prefixlen))) { |
| return -1; |
| } |
| |
| switch (m_type_) |
| { |
| case TYPE_0_ : |
| try |
| { |
| int result = Integer.parseInt(name.substring(prefixlen), |
| 16); |
| // does it fit into the range? |
| if (m_rangestart_ <= result && result <= m_rangeend_) { |
| return result; |
| } |
| } |
| catch (NumberFormatException e) |
| { |
| return -1; |
| } |
| break; |
| case TYPE_1_ : |
| // repetitative suffix name comparison done here |
| // offset is the character code - start |
| for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++) |
| { |
| int offset = ch - m_rangestart_; |
| int indexes[] = m_utilIntBuffer_; |
| int factor; |
| |
| // write elements according to the factors |
| // the factorized elements are determined by modulo |
| // arithmetic |
| synchronized (m_utilIntBuffer_) { |
| for (int i = m_variant_ - 1; i > 0; i --) |
| { |
| factor = m_factor_[i] & 0x00FF; |
| indexes[i] = offset % factor; |
| offset /= factor; |
| } |
| |
| // we don't need to calculate the last modulus |
| // because start <= code <= end guarantees here that |
| // code <= factors[0] |
| indexes[0] = offset; |
| |
| // joining up the factorized strings |
| if (compareFactorString(indexes, m_variant_, name, |
| prefixlen)) { |
| return ch; |
| } |
| } |
| } |
| } |
| |
| return -1; |
| } |
| |
| /** |
| * Adds all chars in the set of algorithmic names into the set. |
| * Equivalent to part of calcAlgNameSetsLengths. |
| * @param set int set to add the chars of the algorithm names into |
| * @param maxlength maximum length to compare to |
| * @return the length that is either maxlength of the length of this |
| * algorithm name if it is longer than maxlength |
| */ |
| int add(int set[], int maxlength) |
| { |
| // prefix length |
| int length = UCharacterName.add(set, m_prefix_); |
| switch (m_type_) { |
| case TYPE_0_ : { |
| // name = prefix + (range->variant times) hex-digits |
| // prefix |
| length += m_variant_; |
| /* synwee to check |
| * addString(set, (const char *)(range + 1)) |
| + range->variant;*/ |
| break; |
| } |
| case TYPE_1_ : { |
| // name = prefix factorized-elements |
| // get the set and maximum factor suffix length for each |
| // factor |
| for (int i = m_variant_ - 1; i > 0; i --) |
| { |
| int maxfactorlength = 0; |
| int count = 0; |
| for (int factor = m_factor_[i]; factor > 0; -- factor) { |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, |
| m_utilStringBuffer_.length()); |
| count |
| = UCharacterUtility.getNullTermByteSubString( |
| m_utilStringBuffer_, |
| m_factorstring_, count); |
| UCharacterName.add(set, m_utilStringBuffer_); |
| if (m_utilStringBuffer_.length() |
| > maxfactorlength) |
| { |
| maxfactorlength |
| = m_utilStringBuffer_.length(); |
| } |
| } |
| } |
| length += maxfactorlength; |
| } |
| } |
| } |
| if (length > maxlength) { |
| return length; |
| } |
| return maxlength; |
| } |
| |
| // private data members ------------------------------------------ |
| |
| /** |
| * Algorithmic data information |
| */ |
| private int m_rangestart_; |
| private int m_rangeend_; |
| private byte m_type_; |
| private byte m_variant_; |
| private char m_factor_[]; |
| private String m_prefix_; |
| private byte m_factorstring_[]; |
| /** |
| * Utility StringBuffer |
| */ |
| private StringBuffer m_utilStringBuffer_ = new StringBuffer(); |
| /** |
| * Utility int buffer |
| */ |
| private int m_utilIntBuffer_[] = new int[256]; |
| |
| // private methods ----------------------------------------------- |
| |
| /** |
| * Gets the indexth string in each of the argument factor block |
| * @param index array with each index corresponding to each factor block |
| * @param length length of the array index |
| * @return the combined string of the array of indexth factor string in |
| * factor block |
| */ |
| private String getFactorString(int index[], int length) |
| { |
| int size = m_factor_.length; |
| if (index == null || length != size) { |
| return null; |
| } |
| |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); |
| int count = 0; |
| int factor; |
| size --; |
| for (int i = 0; i <= size; i ++) { |
| factor = m_factor_[i]; |
| count = UCharacterUtility.skipNullTermByteSubString( |
| m_factorstring_, count, index[i]); |
| count = UCharacterUtility.getNullTermByteSubString( |
| m_utilStringBuffer_, m_factorstring_, |
| count); |
| if (i != size) { |
| count = UCharacterUtility.skipNullTermByteSubString( |
| m_factorstring_, count, |
| factor - index[i] - 1); |
| } |
| } |
| return m_utilStringBuffer_.toString(); |
| } |
| } |
| |
| /** |
| * Compares the indexth string in each of the argument factor block with |
| * the argument string |
| * @param index array with each index corresponding to each factor block |
| * @param length index array length |
| * @param str string to compare with |
| * @param offset of str to start comparison |
| * @return true if string matches |
| */ |
| private boolean compareFactorString(int index[], int length, String str, |
| int offset) |
| { |
| int size = m_factor_.length; |
| if (index == null || length != size) |
| return false; |
| |
| int count = 0; |
| int strcount = offset; |
| int factor; |
| size --; |
| for (int i = 0; i <= size; i ++) |
| { |
| factor = m_factor_[i]; |
| count = UCharacterUtility.skipNullTermByteSubString( |
| m_factorstring_, count, index[i]); |
| strcount = UCharacterUtility.compareNullTermByteSubString(str, |
| m_factorstring_, strcount, count); |
| if (strcount < 0) { |
| return false; |
| } |
| |
| if (i != size) { |
| count = UCharacterUtility.skipNullTermByteSubString( |
| m_factorstring_, count, factor - index[i]); |
| } |
| } |
| if (strcount != str.length()) { |
| return false; |
| } |
| return true; |
| } |
| } |
| |
| // package private data members -------------------------------------- |
| |
| /** |
| * Size of each groups |
| */ |
| int m_groupsize_ = 0; |
| |
| // package private methods -------------------------------------------- |
| |
| /** |
| * Sets the token data |
| * @param token array of tokens |
| * @param tokenstring array of string values of the tokens |
| * @return false if there is a data error |
| */ |
| boolean setToken(char token[], byte tokenstring[]) |
| { |
| if (token != null && tokenstring != null && token.length > 0 && |
| tokenstring.length > 0) { |
| m_tokentable_ = token; |
| m_tokenstring_ = tokenstring; |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Set the algorithm name information array |
| * @param alg Algorithm information array |
| * @return true if the group string offset has been set correctly |
| */ |
| boolean setAlgorithm(AlgorithmName alg[]) |
| { |
| if (alg != null && alg.length != 0) { |
| m_algorithm_ = alg; |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Sets the number of group and size of each group in number of char |
| * @param count number of groups |
| * @param size size of group in char |
| * @return true if group size is set correctly |
| */ |
| boolean setGroupCountSize(int count, int size) |
| { |
| if (count <= 0 || size <= 0) { |
| return false; |
| } |
| m_groupcount_ = count; |
| m_groupsize_ = size; |
| return true; |
| } |
| |
| /** |
| * Sets the group name data |
| * @param group index information array |
| * @param groupstring name information array |
| * @return false if there is a data error |
| */ |
| boolean setGroup(char group[], byte groupstring[]) |
| { |
| if (group != null && groupstring != null && group.length > 0 && |
| groupstring.length > 0) { |
| m_groupinfo_ = group; |
| m_groupstring_ = groupstring; |
| return true; |
| } |
| return false; |
| } |
| |
| // private data members ---------------------------------------------- |
| |
| /** |
| * Data used in unames.icu |
| */ |
| private char m_tokentable_[]; |
| private byte m_tokenstring_[]; |
| private char m_groupinfo_[]; |
| private byte m_groupstring_[]; |
| private AlgorithmName m_algorithm_[]; |
| |
| /** |
| * Group use. Note - access must be synchronized. |
| */ |
| private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1]; |
| private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1]; |
| |
| /** |
| * Default name of the name datafile |
| */ |
| private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu"; |
| /** |
| * Shift count to retrieve group information |
| */ |
| private static final int GROUP_SHIFT_ = 5; |
| /** |
| * Mask to retrieve the offset for a particular character within a group |
| */ |
| private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1; |
| /** |
| * Default buffer size of datafile |
| */ |
| private static final int NAME_BUFFER_SIZE_ = 100000; |
| |
| /** |
| * Position of offsethigh in group information array |
| */ |
| private static final int OFFSET_HIGH_OFFSET_ = 1; |
| |
| /** |
| * Position of offsetlow in group information array |
| */ |
| private static final int OFFSET_LOW_OFFSET_ = 2; |
| /** |
| * Double nibble indicator, any nibble > this number has to be combined |
| * with its following nibble |
| */ |
| private static final int SINGLE_NIBBLE_MAX_ = 11; |
| |
| /* |
| * Maximum length of character names (regular & 1.0). |
| */ |
| //private static int MAX_NAME_LENGTH_ = 0; |
| /* |
| * Maximum length of ISO comments. |
| */ |
| //private static int MAX_ISO_COMMENT_LENGTH_ = 0; |
| |
| /** |
| * Set of chars used in character names (regular & 1.0). |
| * Chars are platform-dependent (can be EBCDIC). |
| */ |
| private int m_nameSet_[] = new int[8]; |
| /** |
| * Set of chars used in ISO comments. (regular & 1.0). |
| * Chars are platform-dependent (can be EBCDIC). |
| */ |
| private int m_ISOCommentSet_[] = new int[8]; |
| /** |
| * Utility StringBuffer |
| */ |
| private StringBuffer m_utilStringBuffer_ = new StringBuffer(); |
| /** |
| * Utility int buffer |
| */ |
| private int m_utilIntBuffer_[] = new int[2]; |
| /** |
| * Maximum ISO comment length |
| */ |
| private int m_maxISOCommentLength_; |
| /** |
| * Maximum name length |
| */ |
| private int m_maxNameLength_; |
| /** |
| * Type names used for extended names |
| */ |
| private static final String TYPE_NAMES_[] = {"unassigned", |
| "uppercase letter", |
| "lowercase letter", |
| "titlecase letter", |
| "modifier letter", |
| "other letter", |
| "non spacing mark", |
| "enclosing mark", |
| "combining spacing mark", |
| "decimal digit number", |
| "letter number", |
| "other number", |
| "space separator", |
| "line separator", |
| "paragraph separator", |
| "control", |
| "format", |
| "private use area", |
| "surrogate", |
| "dash punctuation", |
| "start punctuation", |
| "end punctuation", |
| "connector punctuation", |
| "other punctuation", |
| "math symbol", |
| "currency symbol", |
| "modifier symbol", |
| "other symbol", |
| "initial punctuation", |
| "final punctuation", |
| "noncharacter", |
| "lead surrogate", |
| "trail surrogate"}; |
| /** |
| * Unknown type name |
| */ |
| private static final String UNKNOWN_TYPE_NAME_ = "unknown"; |
| /** |
| * Not a character type |
| */ |
| private static final int NON_CHARACTER_ |
| = UCharacterCategory.CHAR_CATEGORY_COUNT; |
| /** |
| * Lead surrogate type |
| */ |
| private static final int LEAD_SURROGATE_ |
| = UCharacterCategory.CHAR_CATEGORY_COUNT + 1; |
| /** |
| * Trail surrogate type |
| */ |
| private static final int TRAIL_SURROGATE_ |
| = UCharacterCategory.CHAR_CATEGORY_COUNT + 2; |
| /** |
| * Extended category count |
| */ |
| static final int EXTENDED_CATEGORY_ |
| = UCharacterCategory.CHAR_CATEGORY_COUNT + 3; |
| |
| // private constructor ------------------------------------------------ |
| |
| /** |
| * <p>Protected constructor for use in UCharacter.</p> |
| * @exception IOException thrown when data reading fails |
| */ |
| private UCharacterName() throws IOException |
| { |
| InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_); |
| BufferedInputStream b = new BufferedInputStream(is, NAME_BUFFER_SIZE_); |
| UCharacterNameReader reader = new UCharacterNameReader(b); |
| reader.read(this); |
| b.close(); |
| } |
| |
| // private methods --------------------------------------------------- |
| |
| /** |
| * Gets the algorithmic name for the argument character |
| * @param ch character to determine name for |
| * @param choice name choice |
| * @return the algorithmic name or null if not found |
| */ |
| private String getAlgName(int ch, int choice) |
| { |
| /* Only the normative character name can be algorithmic. */ |
| if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || |
| choice == UCharacterNameChoice.EXTENDED_CHAR_NAME |
| ) { |
| // index in terms integer index |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); |
| |
| for (int index = m_algorithm_.length - 1; index >= 0; index --) |
| { |
| if (m_algorithm_[index].contains(ch)) { |
| m_algorithm_[index].appendName(ch, m_utilStringBuffer_); |
| return m_utilStringBuffer_.toString(); |
| } |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Getting the character with the tokenized argument name |
| * @param name of the character |
| * @return character with the tokenized argument name or -1 if character |
| * is not found |
| */ |
| private synchronized int getGroupChar(String name, int choice) |
| { |
| for (int i = 0; i < m_groupcount_; i ++) { |
| // populating the data set of grouptable |
| |
| int startgpstrindex = getGroupLengths(i, m_groupoffsets_, |
| m_grouplengths_); |
| |
| // shift out to function |
| int result = getGroupChar(startgpstrindex, m_grouplengths_, name, |
| choice); |
| if (result != -1) { |
| return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) |
| | result; |
| } |
| } |
| return -1; |
| } |
| |
| /** |
| * Compares and retrieve character if name is found within the argument |
| * group |
| * @param index index where the set of names reside in the group block |
| * @param length list of lengths of the strings |
| * @param name character name to search for |
| * @param choice of either 1.0 or the most current unicode name |
| * @return relative character in the group which matches name, otherwise if |
| * not found, -1 will be returned |
| */ |
| private int getGroupChar(int index, char length[], String name, |
| int choice) |
| { |
| byte b = 0; |
| char token; |
| int len; |
| int namelen = name.length(); |
| int nindex; |
| int count; |
| |
| for (int result = 0; result <= LINES_PER_GROUP_; result ++) { |
| nindex = 0; |
| len = length[result]; |
| |
| if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && |
| choice != UCharacterNameChoice.EXTENDED_CHAR_NAME |
| ) { |
| /* |
| * skip the modern name if it is not requested _and_ |
| * if the semicolon byte value is a character, not a token number |
| */ |
| int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; |
| do { |
| int oldindex = index; |
| index += UCharacterUtility.skipByteSubString(m_groupstring_, |
| index, len, (byte)';'); |
| len -= (index - oldindex); |
| } while(--fieldIndex>0); |
| } |
| |
| // number of tokens is > the length of the name |
| // write each letter directly, and write a token word per token |
| for (count = 0; count < len && nindex != -1 && nindex < namelen; |
| ) { |
| b = m_groupstring_[index + count]; |
| count ++; |
| |
| if (b >= m_tokentable_.length) { |
| if (name.charAt(nindex ++) != (b & 0xFF)) { |
| nindex = -1; |
| } |
| } |
| else { |
| token = m_tokentable_[b & 0xFF]; |
| if (token == 0xFFFE) { |
| // this is a lead byte for a double-byte token |
| token = m_tokentable_[b << 8 | |
| (m_groupstring_[index + count] & 0x00ff)]; |
| count ++; |
| } |
| if (token == 0xFFFF) { |
| if (name.charAt(nindex ++) != (b & 0xFF)) { |
| nindex = -1; |
| } |
| } |
| else { |
| // compare token with name |
| nindex = UCharacterUtility.compareNullTermByteSubString( |
| name, m_tokenstring_, nindex, token); |
| } |
| } |
| } |
| |
| if (namelen == nindex && |
| (count == len || m_groupstring_[index + count] == ';')) { |
| return result; |
| } |
| |
| index += len; |
| } |
| return -1; |
| } |
| |
| /** |
| * Gets the character extended type |
| * @param ch character to be tested |
| * @return extended type it is associated with |
| */ |
| private static int getType(int ch) |
| { |
| if (UCharacterUtility.isNonCharacter(ch)) { |
| // not a character we return a invalid category count |
| return NON_CHARACTER_; |
| } |
| int result = UCharacter.getType(ch); |
| if (result == UCharacterCategory.SURROGATE) { |
| if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { |
| result = LEAD_SURROGATE_; |
| } |
| else { |
| result = TRAIL_SURROGATE_; |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Getting the character with extended name of the form <....>. |
| * @param name of the character to be found |
| * @param choice name choice |
| * @return character associated with the name, -1 if such character is not |
| * found and -2 if we should continue with the search. |
| */ |
| private static int getExtendedChar(String name, int choice) |
| { |
| if (name.charAt(0) == '<') { |
| if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { |
| int endIndex = name.length() - 1; |
| if (name.charAt(endIndex) == '>') { |
| int startIndex = name.lastIndexOf('-'); |
| if (startIndex >= 0) { // We've got a category. |
| startIndex ++; |
| int result = -1; |
| try { |
| result = Integer.parseInt( |
| name.substring(startIndex, endIndex), |
| 16); |
| } |
| catch (NumberFormatException e) { |
| return -1; |
| } |
| // Now validate the category name. We could use a |
| // binary search, or a trie, if we really wanted to. |
| String type = name.substring(1, startIndex - 1); |
| int length = TYPE_NAMES_.length; |
| for (int i = 0; i < length; ++ i) { |
| if (type.compareTo(TYPE_NAMES_[i]) == 0) { |
| if (getType(result) == i) { |
| return result; |
| } |
| break; |
| } |
| } |
| } |
| } |
| } |
| return -1; |
| } |
| return -2; |
| } |
| |
| // sets of name characters, maximum name lengths ----------------------- |
| |
| /** |
| * Adds a codepoint into a set of ints. |
| * Equivalent to SET_ADD. |
| * @param set set to add to |
| * @param ch 16 bit char to add |
| */ |
| private static void add(int set[], char ch) |
| { |
| set[ch >>> 5] |= 1 << (ch & 0x1f); |
| } |
| |
| /** |
| * Checks if a codepoint is a part of a set of ints. |
| * Equivalent to SET_CONTAINS. |
| * @param set set to check in |
| * @param ch 16 bit char to check |
| * @return true if codepoint is part of the set, false otherwise |
| */ |
| private static boolean contains(int set[], char ch) |
| { |
| return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0; |
| } |
| |
| /** |
| * Adds all characters of the argument str and gets the length |
| * Equivalent to calcStringSetLength. |
| * @param set set to add all chars of str to |
| * @param str string to add |
| */ |
| private static int add(int set[], String str) |
| { |
| int result = str.length(); |
| |
| for (int i = result - 1; i >= 0; i --) { |
| add(set, str.charAt(i)); |
| } |
| return result; |
| } |
| |
| /** |
| * Adds all characters of the argument str and gets the length |
| * Equivalent to calcStringSetLength. |
| * @param set set to add all chars of str to |
| * @param str string to add |
| */ |
| private static int add(int set[], StringBuffer str) |
| { |
| int result = str.length(); |
| |
| for (int i = result - 1; i >= 0; i --) { |
| add(set, str.charAt(i)); |
| } |
| return result; |
| } |
| |
| /** |
| * Adds all algorithmic names into the name set. |
| * Equivalent to part of calcAlgNameSetsLengths. |
| * @param maxlength length to compare to |
| * @return the maximum length of any possible algorithmic name if it is > |
| * maxlength, otherwise maxlength is returned. |
| */ |
| private int addAlgorithmName(int maxlength) |
| { |
| int result = 0; |
| for (int i = m_algorithm_.length - 1; i >= 0; i --) { |
| result = m_algorithm_[i].add(m_nameSet_, maxlength); |
| if (result > maxlength) { |
| maxlength = result; |
| } |
| } |
| return maxlength; |
| } |
| |
| /** |
| * Adds all extended names into the name set. |
| * Equivalent to part of calcExtNameSetsLengths. |
| * @param maxlength length to compare to |
| * @return the maxlength of any possible extended name. |
| */ |
| private int addExtendedName(int maxlength) |
| { |
| for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) { |
| // for each category, count the length of the category name |
| // plus 9 = |
| // 2 for <> |
| // 1 for - |
| // 6 for most hex digits per code point |
| int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]); |
| if (length > maxlength) { |
| maxlength = length; |
| } |
| } |
| return maxlength; |
| } |
| |
| /** |
| * Adds names of a group to the argument set. |
| * Equivalent to calcNameSetLength. |
| * @param offset of the group name string in byte count |
| * @param length of the group name string |
| * @param tokenlength array to store the length of each token |
| * @param set to add to |
| * @return the length of the name string and the length of the group |
| * string parsed |
| */ |
| private int[] addGroupName(int offset, int length, byte tokenlength[], |
| int set[]) |
| { |
| int resultnlength = 0; |
| int resultplength = 0; |
| while (resultplength < length) { |
| char b = (char)(m_groupstring_[offset + resultplength] & 0xff); |
| resultplength ++; |
| if (b == ';') { |
| break; |
| } |
| |
| if (b >= m_tokentable_.length) { |
| add(set, b); // implicit letter |
| resultnlength ++; |
| } |
| else { |
| char token = m_tokentable_[b & 0x00ff]; |
| if (token == 0xFFFE) { |
| // this is a lead byte for a double-byte token |
| b = (char)(b << 8 | (m_groupstring_[offset + resultplength] |
| & 0x00ff)); |
| token = m_tokentable_[b]; |
| resultplength ++; |
| } |
| if (token == 0xFFFF) { |
| add(set, b); |
| resultnlength ++; |
| } |
| else { |
| // count token word |
| // use cached token length |
| byte tlength = tokenlength[b]; |
| if (tlength == 0) { |
| synchronized (m_utilStringBuffer_) { |
| m_utilStringBuffer_.delete(0, |
| m_utilStringBuffer_.length()); |
| UCharacterUtility.getNullTermByteSubString( |
| m_utilStringBuffer_, m_tokenstring_, |
| token); |
| tlength = (byte)add(set, m_utilStringBuffer_); |
| } |
| tokenlength[b] = tlength; |
| } |
| resultnlength += tlength; |
| } |
| } |
| } |
| m_utilIntBuffer_[0] = resultnlength; |
| m_utilIntBuffer_[1] = resultplength; |
| return m_utilIntBuffer_; |
| } |
| |
| /** |
| * Adds names of all group to the argument set. |
| * Sets the data member m_max*Length_. |
| * Method called only once. |
| * Equivalent to calcGroupNameSetsLength. |
| * @param maxlength length to compare to |
| */ |
| private void addGroupName(int maxlength) |
| { |
| int maxisolength = 0; |
| char offsets[] = new char[LINES_PER_GROUP_ + 2]; |
| char lengths[] = new char[LINES_PER_GROUP_ + 2]; |
| byte tokenlengths[] = new byte[m_tokentable_.length]; |
| |
| // enumerate all groups |
| // for (int i = m_groupcount_ - 1; i >= 0; i --) { |
| for (int i = 0; i < m_groupcount_ ; i ++) { |
| int offset = getGroupLengths(i, offsets, lengths); |
| // enumerate all lines in each group |
| // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0; |
| // linenumber --) { |
| for (int linenumber = 0; linenumber < LINES_PER_GROUP_; |
| linenumber ++) { |
| int lineoffset = offset + offsets[linenumber]; |
| int length = lengths[linenumber]; |
| if (length == 0) { |
| continue; |
| } |
| |
| // read regular name |
| int parsed[] = addGroupName(lineoffset, length, tokenlengths, |
| m_nameSet_); |
| if (parsed[0] > maxlength) { |
| // 0 for name length |
| maxlength = parsed[0]; |
| } |
| lineoffset += parsed[1]; |
| if (parsed[1] >= length) { |
| // 1 for parsed group string length |
| continue; |
| } |
| length -= parsed[1]; |
| // read Unicode 1.0 name |
| parsed = addGroupName(lineoffset, length, tokenlengths, |
| m_nameSet_); |
| if (parsed[0] > maxlength) { |
| // 0 for name length |
| maxlength = parsed[0]; |
| } |
| lineoffset += parsed[1]; |
| if (parsed[1] >= length) { |
| // 1 for parsed group string length |
| continue; |
| } |
| length -= parsed[1]; |
| // read ISO comment |
| parsed = addGroupName(lineoffset, length, tokenlengths, |
| m_ISOCommentSet_); |
| if (parsed[1] > maxisolength) { |
| maxisolength = length; |
| } |
| } |
| } |
| |
| // set gMax... - name length last for threading |
| m_maxISOCommentLength_ = maxisolength; |
| m_maxNameLength_ = maxlength; |
| } |
| |
| /** |
| * Sets up the name sets and the calculation of the maximum lengths. |
| * Equivalent to calcNameSetsLengths. |
| */ |
| private boolean initNameSetsLengths() |
| { |
| if (m_maxNameLength_ > 0) { |
| return true; |
| } |
| |
| String extra = "0123456789ABCDEF<>-"; |
| // set hex digits, used in various names, and <>-, used in extended |
| // names |
| for (int i = extra.length() - 1; i >= 0; i --) { |
| add(m_nameSet_, extra.charAt(i)); |
| } |
| |
| // set sets and lengths from algorithmic names |
| m_maxNameLength_ = addAlgorithmName(0); |
| // set sets and lengths from extended names |
| m_maxNameLength_ = addExtendedName(m_maxNameLength_); |
| // set sets and lengths from group names, set global maximum values |
| addGroupName(m_maxNameLength_); |
| return true; |
| } |
| |
| /** |
| * Converts the char set cset into a Unicode set uset. |
| * Equivalent to charSetToUSet. |
| * @param set Set of 256 bit flags corresponding to a set of chars. |
| * @param uset USet to receive characters. Existing contents are deleted. |
| */ |
| private void convert(int set[], UnicodeSet uset) |
| { |
| uset.clear(); |
| if (!initNameSetsLengths()) { |
| return; |
| } |
| |
| // build a char string with all chars that are used in character names |
| for (char c = 255; c > 0; c --) { |
| if (contains(set, c)) { |
| uset.add(c); |
| } |
| } |
| } |
| } |