src/com/ibm/text/UCharacterName.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source:
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
 * $Date: 2001/03/23 19:51:38 $
 * $Revision: 1.3 $
 *
 *******************************************************************************
 */
 package com.ibm.text;

 /**
 * Internal class to manage character names.
 * <a href=UCharacterNameDB.html>UCharacterNameDB</a> provides the data
 * required and UCharacterName parses it into meaningful results before
 * returning value.
 * Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
 * in an array of char, by default indexes used in this class is refering to
 * a 2 byte count, unless otherwise stated. Cases where the index is refering
 * to a byte count, the index is halved and depending on whether the index is
 * even or odd, the MSB or LSB of the result char at the halved index is
 * returned. For indexes to an array of int, the index is multiplied by 2,
 * result char at the multiplied index and its following char is returned as an
 * int.
 * <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
 * For information on parsing of the binary data in
 * <a href=UCharacterNameDB.html>UCharacterNameDB</a> is located at
 * <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
 * ReadMe</a>
 * @author Syn Wee Quek
 * @since nov0700
 */

 final class UCharacterName
 {
   // private variable =============================================

   /**
   * Database storing the sets of character name
   */
   private static final UCharacterNameDB NAME_DB_;

   // block to initialise name database and unicode 1.0 data indicator
   static
   {
     try
     {
       NAME_DB_ = new UCharacterNameDB();
     }
     catch (Exception e)
     {
       throw new RuntimeException(e.getMessage());
     }
   }

   // protected method =============================================

   /**
   * Retrieve the name of a Unicode code point.
   * Depending on <code>choice</code>, the character name written into the
   * buffer is the "modern" name or the name that was defined in Unicode
   * version 1.0.
   * The name contains only "invariant" characters
   * like A-Z, 0-9, space, and '-'.
   *
   * @param ch the code point for which to get the name.
   * @param choice Selector for which name to get.
   * @return if code point is above 0x1fff, null is returned
   */
   protected static String getName(int ch, int choice)
   {
     if (ch < 0 || ch > 0x1ffff ||
         choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT) {
       return null;
     }

     String result = "";

     // Do not write algorithmic Unicode 1.0 names because Unihan names are
     // the same as the modern ones, extension A was only introduced with
     // Unicode 3.0, and the Hangul syllable block was moved and changed around
     // Unicode 1.1.5.
     if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
       // try getting algorithmic name first
       result = getAlgName(ch);
     }

     // getting normal character name
     if (result == null || result.length() == 0) {
       result = NAME_DB_.getGroupName(ch, choice);
     }

     return result;
   }

   /**
   * Find a character by its name and return its code point value
   * @param character name
   * @param choice selector to indicate if argument name is a Unicode 1.0
   *        or the most current version
   * @return code point
   */
   protected static int getCharFromName(int choice, String name)
   {
     // checks for illegal arguments
     if (choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT ||
         name == null || name.length() == 0) {
         return -1;
     }

     // try algorithmic names first, if fails then try group names
     int result = getAlgorithmChar(choice, name);
     if (result >= 0) {
       return result;
     }
     return getGroupChar(name, choice);
   }

   // private method =============================================

   /**
   * Gets the algorithmic name for the argument character
   * @param ch character to determine name for
   * @return the algorithmic name or null if not found
   */
   private static String getAlgName(int ch)
   {
     // index in terms integer index
     StringBuffer s = new StringBuffer();

     int index = NAME_DB_.getAlgorithmIndex(ch);
     if (index >= 0) {
       NAME_DB_.appendAlgorithmName(index, ch, s);
       return s.toString();
     }
     return null;
   }

   /**
   * Gets the character for the argument algorithmic name
   * @param choice of either 1.0 or the most current unicode name
   * @return the algorithmic char or -1 otherwise.
   */
   private static int getAlgorithmChar(int choice, String name)
   {
     // 1.0 has no algorithmic names
     if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
       return -1;
     }
     int result;
     for (int count = NAME_DB_.countAlgorithm() - 1; count >= 0; count --) {
       result = NAME_DB_.getAlgorithmChar(count, name);
       if (result >= 0) {
         return result;
       }
     }
     return -1;
   }

   /**
   * Getting the character with the tokenized argument name
   * @param name of the character
   * @return character with the tokenized argument name or -1 if character is
   *         not found
   */
   private static int getGroupChar(String name, int choice)
   {
     int groupcount = NAME_DB_.countGroup();
     int result = 0;

     for (int i = 0; i < groupcount; i ++) {
       result = NAME_DB_.getGroupChar(i, name, choice);
       if (result != -1) {
         return result;
       }
     }
     return -1;
   }
 }
	/**
	*******************************************************************************
	* Copyright (C) 1996-2001, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source:
	* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
	* $Date: 2001/03/23 19:51:38 $
	* $Revision: 1.3 $
	*
	*******************************************************************************
	*/
	package com.ibm.text;

	/**
	* Internal class to manage character names.
	* <a href=UCharacterNameDB.html>UCharacterNameDB</a> provides the data
	* required and UCharacterName parses it into meaningful results before
	* returning value.
	* Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
	* in an array of char, by default indexes used in this class is refering to
	* a 2 byte count, unless otherwise stated. Cases where the index is refering
	* to a byte count, the index is halved and depending on whether the index is
	* even or odd, the MSB or LSB of the result char at the halved index is
	* returned. For indexes to an array of int, the index is multiplied by 2,
	* result char at the multiplied index and its following char is returned as an
	* int.
	* <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
	* Note : 0 - 0x1F are control characters without names in Unicode 3.0
	* For information on parsing of the binary data in
	* <a href=UCharacterNameDB.html>UCharacterNameDB</a> is located at
	* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
	* ReadMe</a>
	* @author Syn Wee Quek
	* @since nov0700
	*/

	final class UCharacterName
	{
	// private variable =============================================

	/**
	* Database storing the sets of character name
	*/
	private static final UCharacterNameDB NAME_DB_;

	// block to initialise name database and unicode 1.0 data indicator
	static
	{
	try
	{
	NAME_DB_ = new UCharacterNameDB();
	}
	catch (Exception e)
	{
	throw new RuntimeException(e.getMessage());
	}
	}

	// protected method =============================================

	/**
	* Retrieve the name of a Unicode code point.
	* Depending on <code>choice</code>, the character name written into the
	* buffer is the "modern" name or the name that was defined in Unicode
	* version 1.0.
	* The name contains only "invariant" characters
	* like A-Z, 0-9, space, and '-'.
	*
	* @param ch the code point for which to get the name.
	* @param choice Selector for which name to get.
	* @return if code point is above 0x1fff, null is returned
	*/
	protected static String getName(int ch, int choice)
	{
	if (ch < 0 \|\| ch > 0x1ffff \|\|
	choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT) {
	return null;
	}

	String result = "";

	// Do not write algorithmic Unicode 1.0 names because Unihan names are
	// the same as the modern ones, extension A was only introduced with
	// Unicode 3.0, and the Hangul syllable block was moved and changed around
	// Unicode 1.1.5.
	if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
	// try getting algorithmic name first
	result = getAlgName(ch);
	}

	// getting normal character name
	if (result == null \|\| result.length() == 0) {
	result = NAME_DB_.getGroupName(ch, choice);
	}

	return result;
	}

	/**
	* Find a character by its name and return its code point value
	* @param character name
	* @param choice selector to indicate if argument name is a Unicode 1.0
	* or the most current version
	* @return code point
	*/
	protected static int getCharFromName(int choice, String name)
	{
	// checks for illegal arguments
	if (choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT \|\|
	name == null \|\| name.length() == 0) {
	return -1;
	}

	// try algorithmic names first, if fails then try group names
	int result = getAlgorithmChar(choice, name);
	if (result >= 0) {
	return result;
	}
	return getGroupChar(name, choice);
	}

	// private method =============================================

	/**
	* Gets the algorithmic name for the argument character
	* @param ch character to determine name for
	* @return the algorithmic name or null if not found
	*/
	private static String getAlgName(int ch)
	{
	// index in terms integer index
	StringBuffer s = new StringBuffer();

	int index = NAME_DB_.getAlgorithmIndex(ch);
	if (index >= 0) {
	NAME_DB_.appendAlgorithmName(index, ch, s);
	return s.toString();
	}
	return null;
	}

	/**
	* Gets the character for the argument algorithmic name
	* @param choice of either 1.0 or the most current unicode name
	* @return the algorithmic char or -1 otherwise.
	*/
	private static int getAlgorithmChar(int choice, String name)
	{
	// 1.0 has no algorithmic names
	if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
	return -1;
	}
	int result;
	for (int count = NAME_DB_.countAlgorithm() - 1; count >= 0; count --) {
	result = NAME_DB_.getAlgorithmChar(count, name);
	if (result >= 0) {
	return result;
	}
	}
	return -1;
	}

	/**
	* Getting the character with the tokenized argument name
	* @param name of the character
	* @return character with the tokenized argument name or -1 if character is
	* not found
	*/
	private static int getGroupChar(String name, int choice)
	{
	int groupcount = NAME_DB_.countGroup();
	int result = 0;

	for (int i = 0; i < groupcount; i ++) {
	result = NAME_DB_.getGroupChar(i, name, choice);
	if (result != -1) {
	return result;
	}
	}
	return -1;
	}
	}