| /** |
| ******************************************************************************* |
| * Copyright (C) 1996-2001, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| * |
| * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $ |
| * $Date: 2002/10/05 01:28:58 $ |
| * $Revision: 1.2 $ |
| * |
| ******************************************************************************* |
| */ |
| |
| package com.ibm.text.UCD; |
| |
| import com.ibm.text.utility.*; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.text.UnicodeSetIterator; |
| import com.ibm.icu.lang.UCharacter; |
| |
| |
| import java.util.*; |
| import java.text.NumberFormat; |
| import java.io.*; |
| |
| public class IANANames implements UCD_Types { |
| private Map aliasToBase = new TreeMap(); |
| private Map aliasToComment = new TreeMap(); |
| private Map aliasToLine = new TreeMap(); |
| |
| public static void testSensitivity() throws IOException { |
| IANANames iNames = new IANANames(); |
| Map m = new HashMap(); |
| Iterator it = iNames.getIterator(); |
| UnicodeSet removed = new UnicodeSet(); |
| int maxLength = 0; |
| while (it.hasNext()) { |
| String alias = (String) it.next(); |
| if (maxLength < alias.length()) maxLength = alias.length(); |
| if (alias.length() > 40) System.out.println("Name >40: " + alias); |
| if (alias.indexOf(')') >= 0 || alias.indexOf('(') >= 0) System.out.println("Illegal tag: " + alias); |
| String skeleton = removeNonAlphanumeric(alias, removed); |
| String other = (String) m.get(skeleton); |
| if (other != null) { |
| String base = iNames.getBase(alias); |
| String otherBase = iNames.getBase(other); |
| if (!base.equals(otherBase)) { |
| System.out.println("Collision between: " + alias + " (" + base + ") and " |
| + other + " (" + otherBase + ")"); |
| } else { |
| System.out.println("Alias Variant: " + alias + " and " + other + " (" + base + ")"); |
| } |
| } else { |
| m.put(skeleton, alias); |
| } |
| } |
| System.out.println("Max Length: " + maxLength); |
| |
| System.out.println("Characters removed: "); |
| UnicodeSetIterator usi = new UnicodeSetIterator(removed); |
| while (usi.next()) { |
| char c = (char) usi.codepoint; // safe, can't be supplementary |
| System.out.println("0x" + usi.codepoint + "\t'" + c + "'\t" + UCharacter.getName(usi.codepoint)); |
| } |
| } |
| |
| public IANANames() throws IOException { |
| BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1); |
| try { |
| boolean atStart = true; |
| String lastName = ""; |
| int counter = 0; |
| while (true) { |
| String line = in.readLine(); |
| if (line == null) break; |
| counter++; |
| if (atStart) { |
| if (line.startsWith("-------------")) atStart = false; |
| continue; |
| } |
| if (line.trim().length() == 0) continue; |
| |
| if (line.startsWith("Name:") || line.startsWith("Alias:")) { |
| lastName = add(line, lastName, counter); |
| } else if (line.startsWith("Source:") || line.startsWith("MIBenum:") |
| || line.startsWith(" ")) { |
| continue; |
| } else if (line.equals("REFERENCES")) { |
| break; |
| } else { |
| System.out.println("Unknown Line: " + line); |
| } |
| } |
| } finally { |
| in.close(); |
| } |
| } |
| |
| private String add(String line, String baseName, int counter) { |
| // extract the alias, doing a little validity check |
| int pos = line.indexOf(": "); |
| if (pos < 0) throw new IllegalArgumentException("Bad line: " + counter + " '" + line + "'"); |
| String alias = line.substring(pos+2).trim(); |
| |
| // get comment |
| String comment = null; |
| pos = alias.indexOf(' '); |
| if (pos >= 0) { |
| comment = alias.substring(pos).trim(); |
| alias = alias.substring(0, pos); |
| } |
| |
| // reset the baseName if we are a name |
| if (line.startsWith("Name:")) { |
| baseName = alias; |
| } |
| |
| // store |
| if (!alias.equals("None")) { |
| if (false) { |
| if (baseName.equals(alias)) System.out.println(); |
| System.out.println("Adding " + alias + "\t=> " + baseName + (comment != null ? "\t(" + comment + ")" : "")); |
| } |
| // check if it is stored already |
| String oldbaseName = (String) aliasToBase.get(alias); |
| if (oldbaseName != null) { |
| System.out.println("Duplicate alias (" + alias + ", " + oldbaseName + ", " + baseName + "): " |
| + counter + " '" + line + "'"); |
| } |
| aliasToBase.put(alias, baseName); |
| if (comment != null) aliasToComment.put(alias, comment); |
| aliasToLine.put(alias, comment); |
| } |
| return baseName; |
| } |
| |
| public Iterator getIterator() { |
| return aliasToBase.keySet().iterator(); |
| } |
| |
| /** |
| * Returns the name for this alias, or "" if there is none |
| */ |
| public String getBase(String alias) { |
| return (String) aliasToBase.get(alias); |
| } |
| |
| public static String removeNonAlphanumeric(String s, UnicodeSet removed) { |
| s = s.toUpperCase(Locale.ENGLISH); // can't have Turkish! |
| StringBuffer result = new StringBuffer(); |
| boolean removedZero = false; |
| for (int i = 0; i < s.length(); ++i) { |
| char c = s.charAt(i); |
| if (c == '0') { |
| char cLast = result.length() > 0 ? result.charAt(result.length() - 1) : '0'; |
| if ('0' <= cLast && cLast <= '9') { |
| result.append(c); |
| } else { |
| if (!removed.contains(c)) { |
| System.out.println("Removed '" + c + "' from " + s + " => " + result); |
| removed.add(c); |
| } |
| removedZero = true; |
| } |
| } else if (('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) { |
| result.append(c); |
| } else { |
| if (!removed.contains(c)) { |
| System.out.println("Removed '" + c + "' from " + s + " => " + result); |
| removed.add(c); |
| } |
| } |
| } |
| //if (removedZero) System.out.println("Removed 0 from " + s + " => " + result); |
| return result.toString(); |
| } |
| } |