blob: 1b4d159ac57fe062f5ebe4f7bdf9dd7224271cb7 [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
* $Date: 2001/03/23 19:51:38 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
package com.ibm.text;
/**
* Internal class to manage character names.
* <a href=UCharacterNameDB.html>UCharacterNameDB</a> provides the data
* required and UCharacterName parses it into meaningful results before
* returning value.
* Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
* in an array of char, by default indexes used in this class is refering to
* a 2 byte count, unless otherwise stated. Cases where the index is refering
* to a byte count, the index is halved and depending on whether the index is
* even or odd, the MSB or LSB of the result char at the halved index is
* returned. For indexes to an array of int, the index is multiplied by 2,
* result char at the multiplied index and its following char is returned as an
* int.
* <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
* Note : 0 - 0x1F are control characters without names in Unicode 3.0
* For information on parsing of the binary data in
* <a href=UCharacterNameDB.html>UCharacterNameDB</a> is located at
* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
* ReadMe</a>
* @author Syn Wee Quek
* @since nov0700
*/
final class UCharacterName
{
// private variable =============================================
/**
* Database storing the sets of character name
*/
private static final UCharacterNameDB NAME_DB_;
// block to initialise name database and unicode 1.0 data indicator
static
{
try
{
NAME_DB_ = new UCharacterNameDB();
}
catch (Exception e)
{
throw new RuntimeException(e.getMessage());
}
}
// protected method =============================================
/**
* Retrieve the name of a Unicode code point.
* Depending on <code>choice</code>, the character name written into the
* buffer is the "modern" name or the name that was defined in Unicode
* version 1.0.
* The name contains only "invariant" characters
* like A-Z, 0-9, space, and '-'.
*
* @param ch the code point for which to get the name.
* @param choice Selector for which name to get.
* @return if code point is above 0x1fff, null is returned
*/
protected static String getName(int ch, int choice)
{
if (ch < 0 || ch > 0x1ffff ||
choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT) {
return null;
}
String result = "";
// Do not write algorithmic Unicode 1.0 names because Unihan names are
// the same as the modern ones, extension A was only introduced with
// Unicode 3.0, and the Hangul syllable block was moved and changed around
// Unicode 1.1.5.
if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
// try getting algorithmic name first
result = getAlgName(ch);
}
// getting normal character name
if (result == null || result.length() == 0) {
result = NAME_DB_.getGroupName(ch, choice);
}
return result;
}
/**
* Find a character by its name and return its code point value
* @param character name
* @param choice selector to indicate if argument name is a Unicode 1.0
* or the most current version
* @return code point
*/
protected static int getCharFromName(int choice, String name)
{
// checks for illegal arguments
if (choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT ||
name == null || name.length() == 0) {
return -1;
}
// try algorithmic names first, if fails then try group names
int result = getAlgorithmChar(choice, name);
if (result >= 0) {
return result;
}
return getGroupChar(name, choice);
}
// private method =============================================
/**
* Gets the algorithmic name for the argument character
* @param ch character to determine name for
* @return the algorithmic name or null if not found
*/
private static String getAlgName(int ch)
{
// index in terms integer index
StringBuffer s = new StringBuffer();
int index = NAME_DB_.getAlgorithmIndex(ch);
if (index >= 0) {
NAME_DB_.appendAlgorithmName(index, ch, s);
return s.toString();
}
return null;
}
/**
* Gets the character for the argument algorithmic name
* @param choice of either 1.0 or the most current unicode name
* @return the algorithmic char or -1 otherwise.
*/
private static int getAlgorithmChar(int choice, String name)
{
// 1.0 has no algorithmic names
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
return -1;
}
int result;
for (int count = NAME_DB_.countAlgorithm() - 1; count >= 0; count --) {
result = NAME_DB_.getAlgorithmChar(count, name);
if (result >= 0) {
return result;
}
}
return -1;
}
/**
* Getting the character with the tokenized argument name
* @param name of the character
* @return character with the tokenized argument name or -1 if character is
* not found
*/
private static int getGroupChar(String name, int choice)
{
int groupcount = NAME_DB_.countGroup();
int result = 0;
for (int i = 0; i < groupcount; i ++) {
result = NAME_DB_.getGroupChar(i, name, choice);
if (result != -1) {
return result;
}
}
return -1;
}
}