src/com/ibm/tools/normalizer/FCDBuilder.java - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 * Copyright (C) 1996-2000, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/tools/normalizer/Attic/FCDBuilder.java,v $
 * $Date: 2001/03/28 00:01:13 $
 * $Revision: 1.4 $
 *
 ******************************************************************************
 */

 package com.ibm.tools.normalizer;

 import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.Writer;
 import com.ibm.util.CharTrie;
 import com.ibm.text.UCharacter;
 import com.ibm.text.UTF16;
 import com.ibm.text.Normalizer;

 /**
 * Class to generate modified checkFCD data for collation.
 * Data generated is used only in internal ICU collation.
 * FCD is the set of strings such that for each string if you simply decomposed
 * any composites (including singleton composites) without canonical reordering.
 * FCD is not a normalization form, since there's no uniqueness.
 */
 public class FCDBuilder
 {
   // public methods ----------------------------------------------------

   /**
   * constructor with default output file path
   */
   public FCDBuilder()
   {
   }

   /**
   * Building method.
   * Each unicode character will be used to generate data, output to the default
   * file path
   */
   public void build()
   {
     build(DEFAULT_OUTPUT_PATH_);
   }

   /**
   * Building method.
   * Each unicode character will be used to generate data.
   * @param output file path
   */
   public void build(String output)
   {
     char result[] = new char[UCharacter.MAX_VALUE + 1];

     String cstr,
            nfd;
     for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
       result[ch] = getFCD(ch);
     }

     CharTrie trie = new CharTrie(result);

     // testing, checking trie values
     for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
       if (trie.getValue(ch) != getFCD(ch))
       {
         System.out.println("error at 0x" + Integer.toHexString(ch) + " " +
                            getFCD(ch));
         break;
       }
     }

     try
     {
       FileWriter f = new FileWriter(output);
       BufferedWriter w = new BufferedWriter(f);
       String s = trie.toString();
       w.write(s);
       w.close();
     }
     catch(Exception e)
     {
       e.printStackTrace();
     }
   }

   /**
   * Main method
   */
   public static void main(String arg[])
   {
     FCDBuilder fcdb = new FCDBuilder();
     fcdb.build();
   }

   // private methods -----------------------------------------------------

   /**
   * Retrieved the FCDcheck value of the argument codepoint.
   * f(ch) = combining class of
   * (first codepoint in (NFD of ch)) | (last code point in (NFD of ch))
   * @param ch character to get FCD from
   */
   private char getFCD(int ch)
   {
     String cstr = UCharacter.toString(ch),
            nfd = Normalizer.decompose(cstr, false, 0);
     int lastindex = UTF16.countCodePoint(nfd) - 1;
     int firstch = UTF16.charAtCodePointOffset(nfd, 0);
     int lastch = UTF16.charAtCodePointOffset(nfd, lastindex);
     return (char)((UCharacter.getCombiningClass(firstch) << LEAD_CC_SHIFT_) |
                   (UCharacter.getCombiningClass(lastch) & LAST_BYTE_MASK_));
   }

   // private data members ------------------------------------------------

   /**
   * Output file path
   */
   private final String DEFAULT_OUTPUT_PATH_ = "fcdcheck.txt";

   /**
   * Lead combining class shift
   */
   private final int LEAD_CC_SHIFT_ = 8;

   /**
   * Last byte mask
   */
   private final int LAST_BYTE_MASK_ = 0xFF;
 }
	/*
	******************************************************************************
	* Copyright (C) 1996-2000, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/tools/normalizer/Attic/FCDBuilder.java,v $
	* $Date: 2001/03/28 00:01:13 $
	* $Revision: 1.4 $
	*
	******************************************************************************
	*/

	package com.ibm.tools.normalizer;

	import java.io.BufferedWriter;
	import java.io.FileWriter;
	import java.io.Writer;
	import com.ibm.util.CharTrie;
	import com.ibm.text.UCharacter;
	import com.ibm.text.UTF16;
	import com.ibm.text.Normalizer;

	/**
	* Class to generate modified checkFCD data for collation.
	* Data generated is used only in internal ICU collation.
	* FCD is the set of strings such that for each string if you simply decomposed
	* any composites (including singleton composites) without canonical reordering.
	* FCD is not a normalization form, since there's no uniqueness.
	*/
	public class FCDBuilder
	{
	// public methods ----------------------------------------------------

	/**
	* constructor with default output file path
	*/
	public FCDBuilder()
	{
	}

	/**
	* Building method.
	* Each unicode character will be used to generate data, output to the default
	* file path
	*/
	public void build()
	{
	build(DEFAULT_OUTPUT_PATH_);
	}

	/**
	* Building method.
	* Each unicode character will be used to generate data.
	* @param output file path
	*/
	public void build(String output)
	{
	char result[] = new char[UCharacter.MAX_VALUE + 1];

	String cstr,
	nfd;
	for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
	result[ch] = getFCD(ch);
	}

	CharTrie trie = new CharTrie(result);

	// testing, checking trie values
	for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
	if (trie.getValue(ch) != getFCD(ch))
	{
	System.out.println("error at 0x" + Integer.toHexString(ch) + " " +
	getFCD(ch));
	break;
	}
	}

	try
	{
	FileWriter f = new FileWriter(output);
	BufferedWriter w = new BufferedWriter(f);
	String s = trie.toString();
	w.write(s);
	w.close();
	}
	catch(Exception e)
	{
	e.printStackTrace();
	}
	}

	/**
	* Main method
	*/
	public static void main(String arg[])
	{
	FCDBuilder fcdb = new FCDBuilder();
	fcdb.build();
	}

	// private methods -----------------------------------------------------

	/**
	* Retrieved the FCDcheck value of the argument codepoint.
	* f(ch) = combining class of
	* (first codepoint in (NFD of ch)) \| (last code point in (NFD of ch))
	* @param ch character to get FCD from
	*/
	private char getFCD(int ch)
	{
	String cstr = UCharacter.toString(ch),
	nfd = Normalizer.decompose(cstr, false, 0);
	int lastindex = UTF16.countCodePoint(nfd) - 1;
	int firstch = UTF16.charAtCodePointOffset(nfd, 0);
	int lastch = UTF16.charAtCodePointOffset(nfd, lastindex);
	return (char)((UCharacter.getCombiningClass(firstch) << LEAD_CC_SHIFT_) \|
	(UCharacter.getCombiningClass(lastch) & LAST_BYTE_MASK_));
	}

	// private data members ------------------------------------------------

	/**
	* Output file path
	*/
	private final String DEFAULT_OUTPUT_PATH_ = "fcdcheck.txt";

	/**
	* Lead combining class shift
	*/
	private final int LEAD_CC_SHIFT_ = 8;

	/**
	* Last byte mask
	*/
	private final int LAST_BYTE_MASK_ = 0xFF;
	}