src/com/ibm/tools/normalizer/FCDBuilder.java - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 * Copyright (C) 1996-2000, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/tools/normalizer/Attic/FCDBuilder.java,v $
 * $Date: 2001/03/08 03:05:47 $
 * $Revision: 1.3 $
 *
 ******************************************************************************
 */

 package com.ibm.tools.normalizer;

 import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.Writer;
 import com.ibm.util.ByteTrie;
 import com.ibm.text.UCharacter;
 import com.ibm.text.UTF16;
 import com.ibm.text.Normalizer;

 /**
 * Class to generate modified checkFCD data for collation.
 * Data generated is used only in internal ICU collation.
 * FCD is the set of strings such that for each string if you simply decomposed
 * any composites (including singleton composites) without canonical reordering.
 * FCD is not a normalization form, since there's no uniqueness.
 */
 public class FCDBuilder
 {
   // public methods ----------------------------------------------------

   /**
   * constructor with default output file path
   */
   public FCDBuilder()
   {
   }

   /**
   * Building method.
   * Each unicode character will be used to generate data, output to the default
   * file path
   */
   public void build()
   {
     build(DEFAULT_OUTPUT_PATH_);
   }

   /**
   * Building method.
   * Each unicode character will be used to generate data.
   * @param output file path
   */
   public void build(String output)
   {
     byte result[] = new byte[UCharacter.MAX_VALUE + 1];

     String cstr,
            nfd;
     for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
       result[ch] = getFCD(ch);
     }

     ByteTrie trie = new ByteTrie(result);

     // testing, checking trie values
     for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
       if (trie.getValue(ch) != getFCD(ch))
       {
         System.out.println("error at 0x" + Integer.toHexString(ch) + " " +
                            getFCD(ch));
         break;
       }
     }

     try
     {
       FileWriter f = new FileWriter(output);
       BufferedWriter w = new BufferedWriter(f);
       String s = trie.toString();
       w.write(s);
       w.close();
     }
     catch(Exception e)
     {
       e.printStackTrace();
     }
   }

   /**
   * Main method
   */
   public static void main(String arg[])
   {
     FCDBuilder fcdb = new FCDBuilder();
     fcdb.build();
   }

   // private methods -----------------------------------------------------

   /**
   * Retrieved the FCDcheck value of the argument codepoint.
   * f(ch) = combining class of (last code point in (NFD of ch))
   */
   private byte getFCD(int ch)
   {
     String cstr = UCharacter.toString(ch),
            nfd = Normalizer.decompose(cstr, false, 0);
     int lastindex = UTF16.countCodePoint(nfd) - 1;
     int lastch = UTF16.charAtCodePointOffset(nfd, lastindex);
     return UCharacter.getCombiningClass(lastch);
   }

   // private data members ------------------------------------------------

   /**
   * Output file path
   */
   private final String DEFAULT_OUTPUT_PATH_ = "fcdcheck.txt";
 }
	/*
	******************************************************************************
	* Copyright (C) 1996-2000, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/tools/normalizer/Attic/FCDBuilder.java,v $
	* $Date: 2001/03/08 03:05:47 $
	* $Revision: 1.3 $
	*
	******************************************************************************
	*/

	package com.ibm.tools.normalizer;

	import java.io.BufferedWriter;
	import java.io.FileWriter;
	import java.io.Writer;
	import com.ibm.util.ByteTrie;
	import com.ibm.text.UCharacter;
	import com.ibm.text.UTF16;
	import com.ibm.text.Normalizer;

	/**
	* Class to generate modified checkFCD data for collation.
	* Data generated is used only in internal ICU collation.
	* FCD is the set of strings such that for each string if you simply decomposed
	* any composites (including singleton composites) without canonical reordering.
	* FCD is not a normalization form, since there's no uniqueness.
	*/
	public class FCDBuilder
	{
	// public methods ----------------------------------------------------

	/**
	* constructor with default output file path
	*/
	public FCDBuilder()
	{
	}

	/**
	* Building method.
	* Each unicode character will be used to generate data, output to the default
	* file path
	*/
	public void build()
	{
	build(DEFAULT_OUTPUT_PATH_);
	}

	/**
	* Building method.
	* Each unicode character will be used to generate data.
	* @param output file path
	*/
	public void build(String output)
	{
	byte result[] = new byte[UCharacter.MAX_VALUE + 1];

	String cstr,
	nfd;
	for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
	result[ch] = getFCD(ch);
	}

	ByteTrie trie = new ByteTrie(result);

	// testing, checking trie values
	for (int ch = UCharacter.MIN_VALUE; ch <= UCharacter.MAX_VALUE; ch ++) {
	if (trie.getValue(ch) != getFCD(ch))
	{
	System.out.println("error at 0x" + Integer.toHexString(ch) + " " +
	getFCD(ch));
	break;
	}
	}

	try
	{
	FileWriter f = new FileWriter(output);
	BufferedWriter w = new BufferedWriter(f);
	String s = trie.toString();
	w.write(s);
	w.close();
	}
	catch(Exception e)
	{
	e.printStackTrace();
	}
	}

	/**
	* Main method
	*/
	public static void main(String arg[])
	{
	FCDBuilder fcdb = new FCDBuilder();
	fcdb.build();
	}

	// private methods -----------------------------------------------------

	/**
	* Retrieved the FCDcheck value of the argument codepoint.
	* f(ch) = combining class of (last code point in (NFD of ch))
	*/
	private byte getFCD(int ch)
	{
	String cstr = UCharacter.toString(ch),
	nfd = Normalizer.decompose(cstr, false, 0);
	int lastindex = UTF16.countCodePoint(nfd) - 1;
	int lastch = UTF16.charAtCodePointOffset(nfd, lastindex);
	return UCharacter.getCombiningClass(lastch);
	}

	// private data members ------------------------------------------------

	/**
	* Output file path
	*/
	private final String DEFAULT_OUTPUT_PATH_ = "fcdcheck.txt";
	}