| /** |
| ******************************************************************************* |
| * Copyright (C) 1996-2001, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| * |
| * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $ |
| * $Date: 2006/04/05 22:12:44 $ |
| * $Revision: 1.18 $ |
| * |
| ******************************************************************************* |
| */ |
| |
| package com.ibm.text.UCD; |
| |
| import com.ibm.text.utility.*; |
| |
| import java.util.*; |
| import java.text.NumberFormat; |
| import java.io.*; |
| |
| |
| /** Simple program to merge UCD files into XML. Not yet documented!! |
| * @author Mark Davis |
| */ |
| |
| public final class ConvertUCD implements UCD_Types { |
| public static final boolean SHOW = false; |
| public static final boolean DEBUG = false; |
| static final boolean SHOW_SAMPLE = false; |
| |
| |
| int major; |
| int minor; |
| int update; |
| |
| String version; |
| |
| // varies by version |
| /* |
| public static final String BASE_DIR11 = DATA_DIR + "\\Versions\\"; |
| public static final String BASE_DIR20 = DATA_DIR + "\\Versions\\"; |
| public static final String BASE_DIR21 = DATA_DIR + "\\Versions\\"; |
| public static final String BASE_DIR30 = DATA_DIR + "\\Update 3.0.1\\"; |
| public static final String BASE_DIR31 = DATA_DIR + "\\3.1-Update\\"; |
| */ |
| |
| //public static final String blocksnamePlain = "Blocks.txt"; |
| //public static final String blocksname31 = "Blocks-4d2.beta"; |
| |
| /** First item is file name, rest are field names (skipping character). |
| * "OMIT" is special -- means don't record |
| */ |
| |
| static String[][] labelList = { |
| // Labels for the incoming files. Labels MUST match field order in file. |
| // IMPORTANT - defaults of form y-=x must occur after x is encountered! |
| // The one exception is "st", which is handled specially. |
| // So file order is important. |
| //* |
| // 01CA;LATIN CAPITAL LETTER NJ;Lu;0; L; <compat> 004E 004A; ; ; ;N ;LATIN CAPITAL LETTER N J; ; ;01CC;01CB |
| // n gc cc bc dm dd dv nv bm on cm, uc lc tc |
| {"UnicodeData", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| //{"ExtraProperties", "xp"}, |
| {"PropList", "binary"}, |
| |
| //{"ExtraProperties", "xp"}, |
| |
| {"EastAsianWidth", "ea", "OMIT"}, |
| {"LineBreak", "lb", "OMIT"}, |
| {"SpecialCasing", "*sl", "*st", "*su", "sc"}, |
| {"CompositionExclusions", "ce"}, |
| {"CaseFolding", "OMIT", "*fc"}, |
| {"ArabicShaping", "OMIT", "jt", "jg"}, |
| {"BidiMirroring", "*bg"}, |
| {"Scripts", "sn"}, |
| //{"Jamo", "jn"}, |
| //{"Scripts-1d4", "RANGE", "sn"}, |
| //{"Age", "*sn"}, |
| //*/ |
| /* |
| //*/ |
| }; |
| static HashMap isHex = new HashMap(); |
| static HashMap defaults = new HashMap(); |
| |
| static { |
| for (int j = 0; j < labelList.length; ++j) { |
| String[] labels = labelList[j]; |
| |
| for (int i = 1; i < labels.length; ++i) { |
| boolean hex = false; |
| String def = null; |
| //char appendChar = '\u0000'; |
| |
| // pull off "*": hex interpretation |
| if (labels[i].charAt(0) == '*') { // HEX value |
| hex = true; |
| labels[i] = labels[i].substring(1); |
| } |
| |
| /* |
| // pull off "$": append duplicates |
| if (labels[i].charAt(0) == '$') { // HEX value |
| appendChar = labels[i].charAt(1); |
| labels[i] = labels[i].substring(2); |
| } |
| |
| // pull off default values |
| int pos = labels[i].indexOf('-'); |
| if (pos >= 0) { |
| def = labels[i].substring(pos+1); |
| labels[i] = labels[i].substring(0,pos); |
| } |
| */ |
| // store results |
| // we do this after all processing, so that the label is clean!! |
| |
| if (hex) isHex.put(labels[i], ""); |
| //if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar)); |
| defaults.put(labels[i], def); |
| } |
| } |
| } |
| /* |
| static String[][] labelList31 = { |
| // Labels for the incoming files. Labels MUST match field order in file. |
| // IMPORTANT - defaults of form y-=x must occur after x is encountered! |
| // The one exception is "st", which is handled specially. |
| // So file order is important. |
| //* |
| // 01CA;LATIN CAPITAL LETTER NJ;Lu;0; L; <compat> 004E 004A; ; ; ;N ;LATIN CAPITAL LETTER N J; ; ;01CC;01CB |
| // n gc cc bc dm dd dv nv bm on cm, uc lc tc |
| {"UnicodeData-3.1.0d8.beta", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| {"PropList-3.1.0d5.beta", "binary"}, |
| |
| {"ExtraProperties", "xp"}, |
| |
| {"EastAsianWidth-4d7.beta", "ea", "OMIT"}, |
| {"LineBreak-6d6.beta", "lb", "OMIT"}, |
| {"SpecialCasing-4d1.beta", "*sl", "*st", "*su", "sc"}, |
| {"CompositionExclusions-3d6.beta", "ce"}, |
| {"CaseFolding-3d4.beta", "OMIT", "*fc"}, |
| {"ArabicShaping", "OMIT", "jt", "jg"}, |
| {"BidiMirroring", "*bg"}, |
| {"Scripts-3.1.0d4.beta", "sn"}, |
| //{"Scripts-1d4", "RANGE", "sn"}, |
| //{"Age", "*sn"}, |
| //*/ |
| /* |
| {"Jamo", "jn"}, |
| // |
| }; |
| /* |
| {"UnicodeData-3.1.0d8.beta", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| {"ExtraProperties", "xp"}, |
| |
| {"EastAsianWidth-4d7.beta", "ea", "OMIT"}, |
| {"LineBreak-6d6.beta", "lb", "OMIT"}, |
| {"SpecialCasing-4d1.beta", "*sl", "*st", "*su", "sc"}, |
| {"CompositionExclusions-3d6.beta", "ce"}, |
| {"CaseFolding-3d4.beta", "OMIT", "*fc"}, |
| {"PropList-3.1.0d2.beta", "PROP", "OMIT"}, |
| {"ArabicShaping", "OMIT", "jt", "jg"}, |
| {"BidiMirroring", "*bg"}, |
| {"Scripts-1d4", "sn"}, |
| //{"Scripts-1d4", "RANGE", "sn"}, |
| //{"Age", "*sn"}, |
| //*/ |
| /* |
| {"Jamo", "jn"}, |
| // |
| |
| //"NamesList-3.1.0d1.beta" |
| |
| static String[][] labelList30 = { |
| // Labels for the incoming files. Labels MUST match field order in file. |
| // IMPORTANT - defaults of form y-=x must occur after x is encountered! |
| // The one exception is "st", which is handled specially. |
| // So file order is important. |
| //* |
| {"UnicodeData", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| {"CompositionExclusions", "ce"}, |
| {"EastAsianWidth", "ea", "OMIT"}, |
| {"LineBreak", "lb", "OMIT"}, |
| {"SpecialCasing", "*sl", "*st", "*su", "sc"}, |
| {"CaseFolding", "OMIT", "*fc"}, |
| {"ArabicShaping", "OMIT", "jt", "jg"}, |
| {"BidiMirroring", "*bg"}, |
| /* |
| {"Jamo", "jn"}, |
| {"PropList.alpha", "RANGE", "OMIT"}, |
| // |
| }; |
| |
| static String[][] labelList11 = { |
| {"UnicodeData-1.1", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| }; |
| |
| static String[][] labelList20 = { |
| {"UnicodeData-2.0", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| }; |
| |
| static String[][] labelList21 = { |
| {"UnicodeData-2.1", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"}, |
| }; |
| */ |
| |
| // handles |
| public static final String blocksname = "Blocks"; |
| //public static final String[][] labelList; |
| public static final boolean NEWPROPS = true; |
| |
| /* |
| static { |
| switch (major*10 + minor) { |
| case 31: |
| blocksname = blocksname31; |
| labelList = labelList31; |
| break; |
| case 30: |
| blocksname = blocksnamePlain; |
| labelList = labelList30; |
| break; |
| case 21: |
| blocksname = blocksnamePlain; |
| labelList = labelList21; |
| break; |
| case 20: |
| blocksname = blocksnamePlain; |
| labelList = labelList20; |
| break; |
| default: |
| blocksname = blocksnamePlain; |
| labelList = labelList11; |
| break; |
| } |
| } |
| |
| */ |
| static final String dataFilePrefix = "UCD_Data"; |
| |
| |
| // MAIN!! |
| |
| public static void main (String[] args) throws Exception { |
| System.out.println("Building binary version of UCD"); |
| |
| log = new PrintWriter(new BufferedWriter( |
| new OutputStreamWriter( |
| new FileOutputStream(GEN_DIR + "UCD-log.txt"), |
| "UTF8"), |
| 32*1024)); |
| log.write("\uFEFF"); // BOM |
| |
| try { |
| for (int i = 0; i < args.length; ++i) { |
| String version = args[i]; |
| if (version.length() == 0) version = UCD.latestVersion; |
| |
| new ConvertUCD().toJava(version); |
| } |
| } finally { |
| log.close(); |
| } |
| } |
| |
| /* |
| static void toXML() throws Exception { |
| // Blocks is special |
| // Unihan is special |
| // collect all the other .txt files in the directory |
| if (false) readBlocks(); |
| if (true) for (int i = 0; i < labelList.length; ++i) { |
| readSemi(labelList[i]); |
| } else { |
| readSemi(labelList[0]); // TESTING ONLY |
| } |
| writeXML(); |
| } |
| */ |
| |
| void toJava(String version) throws Exception { |
| this.version = version; |
| String[] parts = new String[3]; |
| Utility.split(version, '.', parts); |
| major = Integer.parseInt(parts[0]); |
| minor = Integer.parseInt(parts[1]); |
| update = Integer.parseInt(parts[2]); |
| System.out.println("Building " + version); |
| // Blocks is special |
| // Unihan is special |
| // collect all the other .txt files in the directory |
| if (false) readBlocks(); |
| if (true) for (int i = 0; i < labelList.length; ++i) { |
| readSemi(labelList[i]); |
| } else { |
| readSemi(labelList[0]); // TESTING ONLY |
| } |
| |
| Iterator it = charData.keySet().iterator(); |
| while (it.hasNext()) { |
| Object key = it.next(); |
| UData value = (UData) charData.get(key); |
| value.compact(); |
| } |
| |
| /* |
| UData ud; |
| ud = getEntry(0x5e); |
| System.out.println("SPOT-CHECK: 5e: " + ud); |
| |
| ud = getEntry(0x130); |
| System.out.println("SPOT-CHECK: 130: " + ud); |
| |
| ud = getEntry(0x1f6); |
| System.out.println("SPOT-CHECK: 1f6: " + ud); |
| |
| ud = getEntry(0x2A6D6); |
| System.out.println("SPOT-CHECK: 2A6D6: " + ud); |
| |
| ud = getEntry(0xFFFF); |
| System.out.println("SPOT-CHECK: FFFF: " + ud); |
| */ |
| |
| writeJavaData(); |
| } |
| |
| static PrintWriter log; |
| //static String directory = BASE_DIR; |
| //static Map appendDuplicates = new HashMap(); |
| |
| /** First item in labels is file name, rest are field names (skipping character). |
| * "OMIT" is special -- means don't record |
| */ |
| |
| |
| List blockData = new LinkedList(); |
| |
| void readBlocks() throws Exception { |
| System.out.println("Reading 'Blocks'"); |
| BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1); |
| String line = ""; |
| try { |
| String[] parts = new String[20]; |
| for (int lineNumber = 1; ; ++lineNumber) { |
| line = input.readLine(); |
| if (line == null) break; |
| if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'"); |
| |
| //String original = line; |
| String comment = ""; |
| int commentPos = line.indexOf('#'); |
| if (commentPos >= 0) { |
| comment = line.substring(commentPos+1).trim(); |
| line = line.substring(0, commentPos); |
| } |
| line = line.trim(); |
| if (line.length() == 0) continue; |
| |
| int count = Utility.split(line,';',parts); |
| if (count != 3) throw new ChainException("Bad count in Blocks", null); |
| blockData.add(new String[] {Utility.fromHex(parts[0]), Utility.fromHex(parts[1]), parts[2].trim()}); |
| } |
| |
| } catch (Exception e) { |
| System.out.println("Exception at: " + line); |
| throw e; |
| } finally { |
| input.close(); |
| } |
| } |
| |
| Set properties = new TreeSet(); |
| |
| void readSemi(String[] labels) throws Exception { |
| System.out.println(); |
| System.out.println("Reading '" + labels[0] + "'"); |
| if (major < 3 || (major == 3 && minor < 1)) { |
| if (labels[0] == "PropList") { |
| System.out.println("SKIPPING old format of Proplist for " + version); |
| return; |
| } |
| } |
| String tempVersion = version; |
| if (version.equals(UCD.latestVersion)) tempVersion = ""; |
| BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, Utility.LATIN1); |
| if (input == null) { |
| System.out.println("COULDN'T OPEN: " + labels[0]); |
| return; |
| } |
| boolean showedSemi = false; |
| boolean showedShort = false; |
| String line = ""; |
| |
| try { |
| String[] parts = new String[20]; |
| for (int lineNumber = 1; ; ++lineNumber) { |
| try { |
| line = input.readLine(); |
| if (line == null) break; |
| if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'"); |
| |
| String original = line; |
| String comment = ""; |
| int commentPos = line.indexOf('#'); |
| if (commentPos >= 0) { |
| comment = line.substring(commentPos+1).trim(); |
| line = line.substring(0, commentPos); |
| } |
| line = line.trim(); |
| if (line.length() == 0) continue; |
| |
| int count = Utility.split(line,';',parts); |
| |
| if (false && parts[0].equals("2801")) { |
| System.out.println("debug?"); |
| } |
| |
| // fix malformed or simple lists. |
| |
| if (count != labels.length) { |
| if (count == labels.length + 1 && parts[count-1].equals("")) { |
| if (!showedSemi) System.out.println("Extra semicolon in: " + original); |
| showedSemi = true; |
| } else if (count == 1) { // fix simple list |
| ++count; |
| parts[1] = "Y"; |
| } else if (count < labels.length) { |
| if (!showedShort) System.out.println("Line shorter than labels: " + original); |
| showedShort = true; |
| for (int i = count; i < labels.length; ++i) { |
| parts[i] = ""; |
| } |
| } else { |
| throw new ChainException("wrong count: {0}", |
| new Object[] {new Integer(line), new Integer(count)}); |
| } |
| } |
| |
| // store char |
| // first field is always character OR range. May be UTF-32 |
| int cpTop; |
| int cpStart; |
| int ddot = parts[0].indexOf("."); |
| if (ddot >= 0) { |
| cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0); |
| cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0); |
| // System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop)); |
| } else { |
| cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0); |
| cpTop = cpStart; |
| if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0); |
| } |
| |
| // properties first |
| if (labels[1].equals("PROP")) { |
| String prop = parts[2].trim(); |
| // FIX!! |
| boolean skipLetters = false; |
| if (prop.equals("Alphabetic")) { |
| prop = "Other_Alphabetic"; |
| skipLetters = true; |
| } |
| // END FIX!! |
| properties.add(prop); |
| if (Utility.find(prop, UCD_Names.DeletedProperties, true) == -1) { // only undeleted |
| int end = UTF32.char32At(Utility.fromHex(parts[1]),0); |
| if (end == 0) end = cpStart; |
| |
| for (int j = cpStart; j <= end; ++j) { |
| if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue; |
| if (skipLetters && getEntry(cpStart).isLetter()) continue; |
| appendCharProperties(j, prop); |
| } |
| } |
| } else { // not range! |
| String val = ""; |
| String lastVal; |
| |
| for (int i = 1; i < labels.length; ++i) { |
| String key = labels[i]; |
| lastVal = val; |
| if (isHex.get(key) != null) { |
| val = Utility.fromHex(parts[i]); |
| } else { |
| val = parts[i].trim(); |
| } |
| if (key.equals("OMIT")) continue; // do after val, so lastVal is correct |
| if (key.equals("RANGE")) continue; // do after val, so lastVal is correct |
| if (val.equals("")) continue; // skip empty values, they mean default |
| |
| for (int cps = cpStart; cps <= cpTop; ++cps) { |
| if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue; // skip condensed ranges |
| |
| if (key.equals("binary")) { |
| appendCharProperties(cps, val); |
| } else if (key.equals("fc")) { |
| UData data = getEntry(cps); |
| String type = parts[i-1].trim(); |
| if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) { |
| data.fullCaseFolding = val; |
| //System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val)); |
| } |
| if (type.equals("S") || type.equals("C") || type.equals("L")) { |
| data.simpleCaseFolding = val; |
| //System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val)); |
| } |
| if (type.equals("I")) { |
| data.simpleCaseFolding = val; |
| setBinaryProperty(cps, CaseFoldTurkishI); |
| if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " |
| + Utility.hex(cps) + ": " + Utility.hex(val)); |
| } |
| } else if (labels[0].equals("SpecialCasing") // special handling for special casing |
| && labels[4].equals("sc") |
| && parts[4].trim().length() > 0) { |
| if (i < 4) { |
| if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", " |
| + Utility.hex(key) + ":" + Utility.hex(val)); |
| addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val); |
| } |
| } else { |
| /*if (key.equals("sn")) { // SKIP UNDEFINED!! |
| UData data = getEntryIfExists(cps); |
| if (data == null || data.generalCategory == Cn) continue; |
| } |
| */ |
| addCharData(cps, key, val); |
| } |
| } |
| } |
| } |
| } catch (Exception e) { |
| System.err.println("*Exception at: " + line + ", " + e.getMessage()); |
| //System.err.println(e.getMessage()); |
| } |
| } |
| } catch (Exception e) { |
| System.out.println("Exception at: " + line + ", " + e.getMessage()); |
| throw e; |
| } finally { |
| input.close(); |
| } |
| //printValues("JOINING_TYPE", jtSet); |
| //printValues("JOINING_GROUP", jgSet); |
| } |
| |
| static void printValues(String title, Set s) { |
| Iterator it = s.iterator(); |
| System.out.println("public static String[] " + title + " = {"); |
| while (it.hasNext()) { |
| String value = (String) it.next(); |
| System.out.println(" \"" + value + "\","); |
| } |
| System.out.println("};"); |
| it = s.iterator(); |
| System.out.println("public static byte "); |
| int count = 0; |
| while (it.hasNext()) { |
| String value = (String) it.next(); |
| System.out.println(" " + value.replace(' ', '-').toUpperCase() + " = " + (count++) + ","); |
| } |
| System.out.println(" LIMIT_" + title + " = " + count); |
| System.out.println(";"); |
| } |
| |
| Map charData = new TreeMap(); |
| |
| /* |
| static void writeXML() throws IOException { |
| System.out.println("Writing 'UCD-Main.xml'"); |
| BufferedWriter output = new BufferedWriter( |
| new OutputStreamWriter( |
| new FileOutputStream(UCD.BIN_DIR + "UCD_Data.xml"), |
| "UTF8"), |
| 32*1024); |
| |
| try { |
| // write header |
| |
| output.write("<?xml version='1.0' encoding='utf-8'?>\r\n"); |
| output.write("<UnicodeCharacterDatabase>\r\n"); |
| output.write(" <!-- IMPORTANT: see UCD-Notes.html for information on the format. This file CANNOT be read correctly without that information. -->\r\n"); |
| output.write(" <unicode version='" + major + "' minor='" + minor + "' update='" + update + "'/>\r\n"); |
| output.write(" <fileVersion status='DRAFT' date='" + new Date() + "'/>\r\n"); |
| |
| // write blocks |
| |
| Iterator it = blockData.iterator(); |
| while (it.hasNext()) { |
| String[] block = (String[]) it.next(); |
| output.write(" <block start='" + Utility.quoteXML(block[0]) |
| + "' end='" + Utility.quoteXML(block[1]) |
| + "' name='" + Utility.quoteXML(block[2]) |
| + "'/>\r\n" ); |
| } |
| |
| // write char data |
| |
| it = charData.keySet().iterator(); |
| while (it.hasNext()) { |
| Integer cc = (Integer) it.next(); |
| output.write(" <e c='" + Utility.quoteXML(cc.intValue()) + "'"); |
| /* |
| UData data = (UData) charData.get(cc); |
| Iterator dataIt = data.keySet().iterator(); |
| while (dataIt.hasNext()) { |
| String label = (String) dataIt.next(); |
| if (label.equals("c")) continue; // already wrote it. |
| if (label.equals("fc")) { |
| String fc = getResolved(data, "fc"); |
| String lc = getResolved(data, "lc"); |
| if (!fc.equals(lc) && !lc.equals(cc)) log.println("FC " + fc.length() + ": " + toString(cc)); |
| } |
| String value = Utility.quoteXML((String) data.get(label)); |
| output.write(" " + label + "='" + value + "'"); |
| } |
| *//* |
| output.write("/>\r\n"); |
| } |
| |
| // write footer |
| |
| output.write("</UnicodeCharacterDatabase>\r\n"); |
| } finally { |
| output.close(); |
| } |
| } |
| */ |
| |
| void writeJavaData() throws IOException { |
| Iterator it = charData.keySet().iterator(); |
| int codePoint = -1; |
| System.out.println("Writing " + dataFilePrefix + version); |
| DataOutputStream dataOut = new DataOutputStream( |
| new BufferedOutputStream( |
| new FileOutputStream(UCD.BIN_DIR + dataFilePrefix + version + ".bin"), |
| 128*1024)); |
| |
| // write header |
| dataOut.writeByte(BINARY_FORMAT); |
| dataOut.writeByte(major); |
| dataOut.writeByte(minor); |
| dataOut.writeByte(update); |
| long millis = System.currentTimeMillis(); |
| dataOut.writeLong(millis); |
| dataOut.writeInt(charData.size()); |
| System.out.println("Data Size: " + NumberFormat.getInstance().format(charData.size())); |
| int count = 0; |
| |
| // write records |
| try { |
| // write char data |
| |
| while (it.hasNext()) { |
| Object cc = (Object) it.next(); |
| //codePoint = UTF32.char32At(cc,0); |
| if (DEBUG) System.out.println(Utility.hex(cc)); |
| |
| UData uData = (UData) charData.get(cc); |
| if (false && uData.name == null) { |
| System.out.println("Warning: NULL name\r\n" + uData); |
| System.out.println(); |
| } |
| if (false && uData.codePoint == 0x2801) { |
| System.out.println("SPOT-CHECK: " + uData); |
| } |
| uData.writeBytes(dataOut); |
| count++; |
| if (DEBUG) System.out.println("Setting2"); |
| } |
| System.out.println("Wrote Data " + count); |
| } catch (Exception e) { |
| throw new ChainException("Bad data write {0}", new Object [] {Utility.hex(codePoint)}, e); |
| } finally { |
| dataOut.close(); |
| } |
| } |
| |
| //static String[] xsSplit = new String[40]; |
| |
| // Cache a little bit for speed |
| int getEntryCodePoint = -1; |
| UData getEntryUData = null; |
| |
| UData getEntryIfExists(int cp) { |
| if (cp == getEntryCodePoint) return getEntryUData; |
| Integer cc = new Integer(cp); |
| UData charEntry = (UData) charData.get(cc); |
| if (charEntry == null) return null; |
| getEntryCodePoint = cp; |
| getEntryUData = charEntry; |
| return charEntry; |
| } |
| |
| /* Get entry in table for cc |
| */ |
| UData getEntry(int cp) { |
| if (cp == getEntryCodePoint) return getEntryUData; |
| Integer cc = new Integer(cp); |
| UData charEntry = (UData) charData.get(cc); |
| if (charEntry == null) { |
| charEntry = new UData(cp); |
| charData.put(cc, charEntry); |
| //charEntry.put("c", cc); |
| } |
| getEntryCodePoint = cp; |
| getEntryUData = charEntry; |
| return charEntry; |
| } |
| /** Adds the character data. Signals duplicates with an exception |
| */ |
| |
| void setBinaryProperty(int cp, int binProp) { |
| UData charEntry = getEntry(cp); |
| charEntry.binaryProperties |= (1L << binProp); |
| } |
| |
| void appendCharProperties(int cp, String key) { |
| int ind; |
| //if (true || NEWPROPS) { |
| ind = Utility.lookup(key, UCD_Names.BP, true); |
| /*} else { |
| ind = Utility.lookup(key, UCD_Names.BP_OLD); |
| } |
| */ |
| //charEntry.binaryProperties |= (1 << ind); |
| setBinaryProperty(cp, ind); |
| } |
| |
| Set jtSet = new TreeSet(); |
| Set jgSet = new TreeSet(); |
| |
| /** Adds the character data. Signals duplicates with an exception |
| */ |
| void addCharData(int cp, String key, String value) { |
| //if (cp < 10) System.out.println("A: " + Utility.hex(cp) + ", " + key + ", " + Utility.quoteJavaString(value)); |
| UData charEntry = getEntry(cp); |
| //if (cp < 10) System.out.println(" " + charEntry); |
| |
| if (SHOW_SAMPLE && cp == 0x221) { |
| System.out.println("Sample: " + cp + ", " + key + ", " + value); |
| System.out.println(charEntry); |
| } |
| |
| if (key.equals("bm")) { |
| if (value.equals("Y")) charEntry.binaryProperties |= 1; |
| } else if (key.equals("ce")) { |
| charEntry.binaryProperties |= 2; |
| } else if (key.equals("on")) { |
| if (charEntry.name.charAt(0) == '<') { |
| charEntry.name = '<' + value + '>'; |
| } |
| } else if (key.equals("dm")) { |
| charEntry.decompositionType = CANONICAL; |
| if (value.charAt(0) == '<') { |
| int pos = value.indexOf('>'); |
| String dType = value.substring(1,pos); |
| if (major < 2) if (dType.charAt(0) == '+') dType = dType.substring(1); |
| value = value.substring(pos+1); |
| setField(charEntry, "dt", dType); |
| } |
| // FIX OLD |
| if (major < 2) { |
| int oldStyle = value.indexOf('<'); |
| if (oldStyle > 0) { |
| value = value.substring(0,oldStyle); |
| } |
| oldStyle = value.indexOf('{'); |
| if (oldStyle > 0) { |
| value = value.substring(0,oldStyle); |
| } |
| } |
| setField(charEntry, key, Utility.fromHex(value)); |
| |
| // fix the numeric fields to be more sensible |
| } else if (key.equals("dd")) { |
| if (charEntry.numericType < UCD_Types.DECIMAL) { |
| charEntry.numericType = UCD_Types.DECIMAL; |
| } |
| setField(charEntry, "nv", value); |
| } else if (key.equals("dv")) { |
| if (charEntry.numericType < UCD_Types.DIGIT) { |
| charEntry.numericType = UCD_Types.DIGIT; |
| } |
| setField(charEntry, "nv", value); |
| } else if (key.equals("nv")) { |
| if (charEntry.numericType < UCD_Types.NUMERIC) { |
| charEntry.numericType = UCD_Types.NUMERIC; |
| } |
| setField(charEntry, "nv", value); |
| /*} else if (key.equals("jt")) { |
| jtSet.add(value); |
| } else if (key.equals("jg")) { |
| jgSet.add(value); |
| */ |
| } else { |
| setField(charEntry, key, value); |
| } |
| if (SHOW_SAMPLE && cp == 0x221) { |
| System.out.println("Sample Result:"); |
| System.out.println(charEntry); |
| } |
| |
| } |
| |
| public void setField(UData uData, String fieldName, String fieldValue) { |
| try { |
| if (fieldName.equals("n")) { |
| uData.name = fieldValue; |
| } else if (fieldName.equals("dm")) { |
| uData.decompositionMapping = fieldValue; |
| } else if (fieldName.equals("bg")) { |
| uData.bidiMirror = fieldValue; |
| } else if (fieldName.equals("uc")) { |
| uData.simpleUppercase = fieldValue; |
| } else if (fieldName.equals("lc")) { |
| uData.simpleLowercase = fieldValue; |
| } else if (fieldName.equals("tc")) { |
| uData.simpleTitlecase = fieldValue; |
| |
| } else if (fieldName.equals("su")) { |
| uData.fullUppercase = fieldValue; |
| } else if (fieldName.equals("sl")) { |
| if (DEBUG) System.out.println("Setting full lowercase to " + Utility.hex(fieldValue) + uData); |
| uData.fullLowercase = fieldValue; |
| } else if (fieldName.equals("st")) { |
| uData.fullTitlecase = fieldValue; |
| |
| } else if (fieldName.equals("sc")) { |
| if (uData.specialCasing.length() > 0) { |
| uData.specialCasing += ";"; |
| } |
| uData.specialCasing += fieldValue; |
| |
| } else if (fieldName.equals("xp")) { |
| uData.binaryProperties |= 1L << Utility.lookup(fieldValue, UCD_Names.BP, true); |
| //UCD_Names.BP_OLD |
| |
| } else if (fieldName.equals("gc")) { |
| uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true); |
| // if (major >= 5 && uData.script == Unknown_Script |
| // && uData.generalCategory != Cn |
| // && uData.generalCategory != Cs |
| // && uData.generalCategory != Co) { |
| // uData.script = COMMON_SCRIPT; |
| // System.out.println("Resetting to Common Script: " + Utility.hex(uData.codePoint)); |
| // } |
| } else if (fieldName.equals("bc")) { |
| uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true); |
| } else if (fieldName.equals("dt")) { |
| if (major < 2) { |
| if (fieldValue.equals("no-break")) fieldValue = "noBreak"; |
| else if (fieldValue.equals("circled")) fieldValue = "circle"; |
| else if (fieldValue.equals("sup")) fieldValue = "super"; |
| else if (fieldValue.equals("break")) fieldValue = "compat"; |
| else if (fieldValue.equals("font variant")) fieldValue = "font"; |
| else if (fieldValue.equals("no-join")) fieldValue = "compat"; |
| else if (fieldValue.equals("join")) fieldValue = "compat"; |
| } |
| uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.LONG_DECOMPOSITION_TYPE, true); |
| } else if (fieldName.equals("nt")) { |
| uData.numericType = Utility.lookup(fieldValue, UCD_Names.LONG_NUMERIC_TYPE, true); |
| |
| } else if (fieldName.equals("ea")) { |
| uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EAST_ASIAN_WIDTH, true); |
| } else if (fieldName.equals("lb")) { |
| uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LINE_BREAK, true); |
| |
| } else if (fieldName.equals("sn")) { |
| uData.script = Utility.lookup(fieldValue, UCD_Names.LONG_SCRIPT, true); |
| |
| } else if (fieldName.equals("jt")) { |
| uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE, true); |
| } else if (fieldName.equals("jg")) { |
| byte temp = (byte)Utility.find(fieldValue, UCD_Names.OLD_JOINING_GROUP, true); |
| if (temp != -1) uData.joiningGroup = temp; |
| else uData.joiningGroup = Utility.lookup(fieldValue, UCD_Names.JOINING_GROUP, true); |
| |
| } else if (fieldName.equals("nv")) { |
| if (major < 2) { |
| if (fieldValue.equals("-")) return; |
| } |
| uData.numericValue = Utility.doubleFrom(fieldValue); |
| } else if (fieldName.equals("cc")) { |
| uData.combiningClass = (byte)Utility.intFrom(fieldValue); |
| if (uData.combiningClass == 9 && major >= 5) { |
| System.out.println("setting Grapheme_Link " + Utility.hex(uData.codePoint) + "\t" + uData.name); |
| uData.binaryProperties |= (1<<GraphemeLink); |
| System.out.println(uData); |
| } |
| } else if (fieldName.equals("bp")) { |
| uData.binaryProperties = (byte)Utility.longFrom(fieldValue); |
| // if (major >= 5 && (uData.binaryProperties & 1<<Noncharacter_Code_Point) != 0) { |
| // uData.script = Unknown_Script; |
| // } |
| System.out.println("Resetting: " + uData); |
| } else { |
| throw new IllegalArgumentException("Unknown fieldName"); |
| } |
| } catch (Exception e) { |
| throw new ChainException( |
| "Bad field name= \"{0}\", value= \"{1}\"", new Object[] {fieldName, fieldValue}, e); |
| } |
| } |
| |
| } |