unicodetools/com/ibm/text/UCD/IANANames.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $
 * $Date: 2002/10/05 01:28:58 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */

 package com.ibm.text.UCD;

 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.lang.UCharacter;


 import java.util.*;
 import java.text.NumberFormat;
 import java.io.*;

 public class IANANames implements UCD_Types {
     private Map aliasToBase = new TreeMap();
     private Map aliasToComment = new TreeMap();
     private Map aliasToLine = new TreeMap();

     public static void testSensitivity() throws IOException {
         IANANames iNames = new IANANames();
         Map m = new HashMap();
         Iterator it = iNames.getIterator();
         UnicodeSet removed = new UnicodeSet();
         int maxLength = 0;
         while (it.hasNext()) {
             String alias = (String) it.next();
             if (maxLength < alias.length()) maxLength = alias.length();
             if (alias.length() > 40) System.out.println("Name >40: " + alias);
             if (alias.indexOf(')') >= 0 || alias.indexOf('(') >= 0) System.out.println("Illegal tag: " + alias);
             String skeleton = removeNonAlphanumeric(alias, removed);
             String other = (String) m.get(skeleton);
             if (other != null) {
                 String base = iNames.getBase(alias);
                 String otherBase = iNames.getBase(other);
                 if (!base.equals(otherBase)) {
                     System.out.println("Collision between: " + alias + " (" + base + ") and "
                         + other + " (" + otherBase + ")");
                 } else {
                     System.out.println("Alias Variant: " + alias + " and " + other + " (" + base + ")");
                 }
             } else {
                 m.put(skeleton, alias);
             }
         }
         System.out.println("Max Length: " + maxLength);

         System.out.println("Characters removed: ");
         UnicodeSetIterator usi = new UnicodeSetIterator(removed);
         while (usi.next()) {
             char c = (char) usi.codepoint; // safe, can't be supplementary
             System.out.println("0x" + usi.codepoint + "\t'" + c + "'\t" + UCharacter.getName(usi.codepoint));
         }
     }

     public IANANames() throws IOException {
         BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1);
         try {
             boolean atStart = true;
             String lastName = "";
             int counter = 0;
             while (true) {
                 String line = in.readLine();
                 if (line == null) break;
                 counter++;
                 if (atStart) {
                     if (line.startsWith("-------------")) atStart = false;
                     continue;
                 }
                 if (line.trim().length() == 0) continue;

                 if (line.startsWith("Name:") || line.startsWith("Alias:")) {
                     lastName = add(line, lastName, counter);
                 } else if (line.startsWith("Source:") || line.startsWith("MIBenum:")
                         || line.startsWith("        ")) {
                     continue;
                 } else if (line.equals("REFERENCES")) {
                     break;
                 } else {
                     System.out.println("Unknown Line: " + line);
                 }
             }
         } finally {
             in.close();
         }
     }

     private String add(String line, String baseName, int counter) {
         // extract the alias, doing a little validity check
         int pos = line.indexOf(": ");
         if (pos < 0) throw new IllegalArgumentException("Bad line: " + counter + " '" + line + "'");
         String alias = line.substring(pos+2).trim();

         // get comment
         String comment = null;
         pos = alias.indexOf(' ');
         if (pos >= 0) {
             comment = alias.substring(pos).trim();
             alias = alias.substring(0, pos);
         }

         // reset the baseName if we are a name
         if (line.startsWith("Name:")) {
             baseName = alias;
         }

         // store
         if (!alias.equals("None")) {
             if (false) {
                 if (baseName.equals(alias)) System.out.println();
                 System.out.println("Adding " + alias + "\t=> " + baseName + (comment != null ? "\t(" + comment + ")" : ""));
             }
             // check if it is stored already
             String oldbaseName = (String) aliasToBase.get(alias);
             if (oldbaseName != null) {
                 System.out.println("Duplicate alias (" + alias + ", " + oldbaseName + ", " + baseName + "): "
                     + counter + " '" + line + "'");
             }
             aliasToBase.put(alias, baseName);
             if (comment != null) aliasToComment.put(alias, comment);
             aliasToLine.put(alias, comment);
         }
         return baseName;
     }

     public Iterator getIterator() {
         return aliasToBase.keySet().iterator();
     }

     /**
      * Returns the name for this alias, or "" if there is none
      */
     public String getBase(String alias) {
         return (String) aliasToBase.get(alias);
     }

     public static String removeNonAlphanumeric(String s, UnicodeSet removed) {
         s = s.toUpperCase(Locale.ENGLISH); // can't have Turkish!
         StringBuffer result = new StringBuffer();
         boolean removedZero = false;
         for (int i = 0; i < s.length(); ++i) {
             char c = s.charAt(i);
             if (c == '0') {
                 char cLast = result.length() > 0 ? result.charAt(result.length() - 1) : '0';
                 if ('0' <= cLast && cLast <= '9') {
                     result.append(c);
                 } else {
                     if (!removed.contains(c)) {
                         System.out.println("Removed '" + c + "' from " + s + " => " + result);
                         removed.add(c);
                     }
                     removedZero = true;
                 }
             } else if (('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) {
                 result.append(c);
             } else {
                 if (!removed.contains(c)) {
                     System.out.println("Removed '" + c + "' from " + s + " => " + result);
                     removed.add(c);
                 }
             }
         }
         //if (removedZero) System.out.println("Removed 0 from " + s + " => " + result);
         return result.toString();
     }
 }
	/**
	*******************************************************************************
	* Copyright (C) 1996-2001, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $
	* $Date: 2002/10/05 01:28:58 $
	* $Revision: 1.2 $
	*
	*******************************************************************************
	*/

	package com.ibm.text.UCD;

	import com.ibm.text.utility.*;
	import com.ibm.icu.text.UnicodeSet;
	import com.ibm.icu.text.UnicodeSetIterator;
	import com.ibm.icu.lang.UCharacter;


	import java.util.*;
	import java.text.NumberFormat;
	import java.io.*;

	public class IANANames implements UCD_Types {
	private Map aliasToBase = new TreeMap();
	private Map aliasToComment = new TreeMap();
	private Map aliasToLine = new TreeMap();

	public static void testSensitivity() throws IOException {
	IANANames iNames = new IANANames();
	Map m = new HashMap();
	Iterator it = iNames.getIterator();
	UnicodeSet removed = new UnicodeSet();
	int maxLength = 0;
	while (it.hasNext()) {
	String alias = (String) it.next();
	if (maxLength < alias.length()) maxLength = alias.length();
	if (alias.length() > 40) System.out.println("Name >40: " + alias);
	if (alias.indexOf(')') >= 0 \|\| alias.indexOf('(') >= 0) System.out.println("Illegal tag: " + alias);
	String skeleton = removeNonAlphanumeric(alias, removed);
	String other = (String) m.get(skeleton);
	if (other != null) {
	String base = iNames.getBase(alias);
	String otherBase = iNames.getBase(other);
	if (!base.equals(otherBase)) {
	System.out.println("Collision between: " + alias + " (" + base + ") and "
	+ other + " (" + otherBase + ")");
	} else {
	System.out.println("Alias Variant: " + alias + " and " + other + " (" + base + ")");
	}
	} else {
	m.put(skeleton, alias);
	}
	}
	System.out.println("Max Length: " + maxLength);

	System.out.println("Characters removed: ");
	UnicodeSetIterator usi = new UnicodeSetIterator(removed);
	while (usi.next()) {
	char c = (char) usi.codepoint; // safe, can't be supplementary
	System.out.println("0x" + usi.codepoint + "\t'" + c + "'\t" + UCharacter.getName(usi.codepoint));
	}
	}

	public IANANames() throws IOException {
	BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1);
	try {
	boolean atStart = true;
	String lastName = "";
	int counter = 0;
	while (true) {
	String line = in.readLine();
	if (line == null) break;
	counter++;
	if (atStart) {
	if (line.startsWith("-------------")) atStart = false;
	continue;
	}
	if (line.trim().length() == 0) continue;

	if (line.startsWith("Name:") \|\| line.startsWith("Alias:")) {
	lastName = add(line, lastName, counter);
	} else if (line.startsWith("Source:") \|\| line.startsWith("MIBenum:")
	\|\| line.startsWith(" ")) {
	continue;
	} else if (line.equals("REFERENCES")) {
	break;
	} else {
	System.out.println("Unknown Line: " + line);
	}
	}
	} finally {
	in.close();
	}
	}

	private String add(String line, String baseName, int counter) {
	// extract the alias, doing a little validity check
	int pos = line.indexOf(": ");
	if (pos < 0) throw new IllegalArgumentException("Bad line: " + counter + " '" + line + "'");
	String alias = line.substring(pos+2).trim();

	// get comment
	String comment = null;
	pos = alias.indexOf(' ');
	if (pos >= 0) {
	comment = alias.substring(pos).trim();
	alias = alias.substring(0, pos);
	}

	// reset the baseName if we are a name
	if (line.startsWith("Name:")) {
	baseName = alias;
	}

	// store
	if (!alias.equals("None")) {
	if (false) {
	if (baseName.equals(alias)) System.out.println();
	System.out.println("Adding " + alias + "\t=> " + baseName + (comment != null ? "\t(" + comment + ")" : ""));
	}
	// check if it is stored already
	String oldbaseName = (String) aliasToBase.get(alias);
	if (oldbaseName != null) {
	System.out.println("Duplicate alias (" + alias + ", " + oldbaseName + ", " + baseName + "): "
	+ counter + " '" + line + "'");
	}
	aliasToBase.put(alias, baseName);
	if (comment != null) aliasToComment.put(alias, comment);
	aliasToLine.put(alias, comment);
	}
	return baseName;
	}

	public Iterator getIterator() {
	return aliasToBase.keySet().iterator();
	}

	/**
	* Returns the name for this alias, or "" if there is none
	*/
	public String getBase(String alias) {
	return (String) aliasToBase.get(alias);
	}

	public static String removeNonAlphanumeric(String s, UnicodeSet removed) {
	s = s.toUpperCase(Locale.ENGLISH); // can't have Turkish!
	StringBuffer result = new StringBuffer();
	boolean removedZero = false;
	for (int i = 0; i < s.length(); ++i) {
	char c = s.charAt(i);
	if (c == '0') {
	char cLast = result.length() > 0 ? result.charAt(result.length() - 1) : '0';
	if ('0' <= cLast && cLast <= '9') {
	result.append(c);
	} else {
	if (!removed.contains(c)) {
	System.out.println("Removed '" + c + "' from " + s + " => " + result);
	removed.add(c);
	}
	removedZero = true;
	}
	} else if (('A' <= c && c <= 'Z') \|\| ('0' <= c && c <= '9')) {
	result.append(c);
	} else {
	if (!removed.contains(c)) {
	System.out.println("Removed '" + c + "' from " + s + " => " + result);
	removed.add(c);
	}
	}
	}
	//if (removedZero) System.out.println("Removed 0 from " + s + " => " + result);
	return result.toString();
	}
	}