unicodetools/com/ibm/text/UCD/UCD.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
 * $Date: 2006/11/27 23:15:21 $
 * $Revision: 1.42 $
 *
 *******************************************************************************
 */

 package com.ibm.text.UCD;

 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.BitSet;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import java.io.IOException;
 import java.io.DataInputStream;
 import java.io.BufferedInputStream;
 import java.io.FileInputStream;
 import java.io.BufferedReader;

 import com.ibm.text.utility.*;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.text.Transliterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;

 public final class UCD implements UCD_Types {

     private static int SPOT_CHECK = 0x20AC;

     static final boolean DEBUG = false;

     /**
      * Used for the default version.
      */
     public static final String latestVersion = "5.0.0";
     public static final String lastVersion = "4.1.0";

     /**
      * Create singleton instance for default (latest) version
      */
     public static UCD make() {
         return make("");
     }

     /**
      * Create singleton instance for the specific version
      */
     public static UCD make(String version) {
         if (version == null || version.length() == 0) version = latestVersion;
         if (version.indexOf('.') < 0) throw new IllegalArgumentException("Version must be of form 3.1.1");
         UCD result = (UCD)versionCache.get(version);
         if (result == null) {
             //System.out.println(Utility.getStack());
             result = new UCD();
             result.fillFromFile(version);
             versionCache.put(version, result);
         }
         return result;
     }

     /**
      * Get the version of the UCD
      */
     public String getVersion() {
         return version;
     }

     /**
      * Get the date that the data was parsed
      */
     public long getDate() {
         return date;
     }

     /**
      * Is the code point allocated?
      */
     public boolean isAllocated(int codePoint) {
         if (getCategory(codePoint) != Cn) return true;
         if (isNoncharacter(codePoint)) return true;
         return false;
     }

     public boolean isNoncharacter(int codePoint) {
         if ((codePoint & 0xFFFE) == 0xFFFE) {
             if (compositeVersion < 0x20000 && codePoint > 0xFFFF) return false;
             return true;
         }
         if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && compositeVersion >= 0x30100) return true;
         return false;
     }

     /**
      * Is the code point assigned to a character (or surrogate)
      */
     public boolean isAssigned(int codePoint) {
         return getCategory(codePoint) != Cn;
     }

     /**
      * Is the code point a PUA character (fast check)
      */
     public boolean isPUA(int codePoint) {
         if (codePoint >= 0xE000 && codePoint < 0xF900) return true;
         if (compositeVersion < 0x20000) return false;
         return (codePoint >= 0xF0000 && codePoint < 0xFFFFE
              || codePoint >= 0x100000 && codePoint < 0x10FFFE);
     }

     /**
      * Many ranges are elided in the UCD. All but the first are not actually
      * represented in the data internally. This detects such cases.
      */
     public boolean isRepresented(int codePoint) {
         return getRaw(codePoint) != null;
     }

     /**
      * Return XML version of the data associated with the code point.
      */
     public String toString(int codePoint) {
         return get(codePoint, true).toString(this,FULL);
     }

     /**
      * Get the character name.
      */
     public String getName(int codePoint) {
         return getName(codePoint, NORMAL);
     }

     /**
      * Get the character name.
      */
     public String getName(String s) {
         return getName(s, NORMAL);
     }

     /**
      * Get the character name.
      */
     public String getName(int codePoint, byte style) {
         if (style == SHORT) return get(codePoint, true).shortName;
         return get(codePoint, true).name;
     }

     /**
      * Get the character names for the code points in a string, separated by ", "
      */
     public String getName(String s, byte style) {
     	return getName(s, style, ", ");
     }

     public String getName(String s, byte style, String separator) {
         if (s.length() == 1) return getName(s.charAt(0), style); // optimize BMP
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
             cp = UTF16.charAt(s, i);
             if (i > 0) result.append(separator);
             result.append(getName(cp, style));
         }
         return result.toString();
     }

     /**
      * Get the code in U+ notation
      */
     public static String getCode(int codePoint) {
         return "U+" + Utility.hex(codePoint);
     }

     /**
      * Get the code in U+ notation
      */
     public static String getCode(String s) {
         if (s.length() == 1) return getCode(s.charAt(0)); // fast path
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i > 0) result.append(", ");
             result.append(getCode(cp));
         }
         return result.toString();
     }

     /**
      * Get the name and number (U+xxxx NAME) for a code point
      */
     public String getCodeAndName(int codePoint, byte type) {
       return getCodeAndName(codePoint, type, null);
     }

     public String getCodeAndName(int codePoint, byte type, Transliterator charTrans) {
         return getCode(codePoint)
         + (charTrans == null ? " " : " ( " + charTrans.transliterate(UTF16.valueOf(codePoint)) + " ) ")
         + getName(codePoint, type);
     }

     /**
      * Get the name and number (U+xxxx NAME) for the code points in a string,
      * separated by ", "
      */
     public String getCodeAndName(String s, byte type) {
       return getCodeAndName(s,type,null);
     }

     public String getCodeAndName(String s, byte type, Transliterator charTrans) {
         if (s == null || s.length() == 0) return "NULL";
         if (s.length() == 1) return getCodeAndName(s.charAt(0), type, charTrans); // fast path
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i > 0) result.append(", ");
             result.append(getCodeAndName(cp, type, charTrans));
         }
         return result.toString();
     }

     /**
      * Get the name and number (U+xxxx NAME) for a code point
      */
     public String getCodeAndName(int codePoint) {
         return getCodeAndName(codePoint, NORMAL);
     }

     /**
      * Get the name and number (U+xxxx NAME) for a code point
      */
     public String getCodeAndName(String s) {
         return getCodeAndName(s, NORMAL);
     }

     /**
      * Get the general category
      */
     public byte getCategory(int codePoint) {
         return get(codePoint, false).generalCategory;
     }

     private static final byte FAKE_SYMBOL = 57; // fake category for comparison
     private static final byte FAKE_PUNCTUATION = 58; // fake category for comparison
     private static final byte FAKE_SEPERATOR = 59; // fake category for comparison
     private static final byte FAKE_NUMBER = 60; // fake category for comparison
     private static final byte FAKE_MARK = 61; // fake category for comparison
     private static final byte FAKE_LETTER = 62; // fake category for comparison
     private static final byte FAKE_OTHER = 63; // fake category for comparison
     private static final byte FAKENC = 31; // fake category for comparison

     public byte getModCat(int cp, int collapseBits) {
         byte cat = getCategory(cp);
         if (cat == UNASSIGNED && isNoncharacter(cp)) {
             cat = FAKENC;
         } else if (((1<<cat) & collapseBits) != 0) {
         	switch (cat) {
 				case UNASSIGNED: cat = FAKE_OTHER; break;
 				case FAKENC: cat = FAKE_OTHER; break;

 				case UPPERCASE_LETTER: cat = FAKE_LETTER; break;
 				case LOWERCASE_LETTER: cat = FAKE_LETTER; break;
 				case TITLECASE_LETTER: cat = FAKE_LETTER; break;
 				case MODIFIER_LETTER: cat = FAKE_LETTER; break;
 				case OTHER_LETTER: cat = FAKE_LETTER; break;

 				case NON_SPACING_MARK: cat = FAKE_MARK; break;
 				case ENCLOSING_MARK: cat = FAKE_MARK; break;
 				case COMBINING_SPACING_MARK: cat = FAKE_MARK; break;

 				case DECIMAL_DIGIT_NUMBER: cat = FAKE_NUMBER; break;
 				case LETTER_NUMBER: cat = FAKE_NUMBER; break;
 				case OTHER_NUMBER: cat = FAKE_NUMBER; break;

 				case SPACE_SEPARATOR: cat = FAKE_SEPERATOR; break;
 				case LINE_SEPARATOR: cat = FAKE_SEPERATOR; break;
 				case PARAGRAPH_SEPARATOR: cat = FAKE_SEPERATOR; break;

 				case CONTROL: cat = FAKE_OTHER; break;
 				case FORMAT: cat = FAKE_OTHER; break;
 				case UNUSED_CATEGORY: cat = FAKE_OTHER; break;
 				case PRIVATE_USE: cat = FAKE_OTHER; break;
 				case SURROGATE: cat = FAKE_OTHER; break;

 				case DASH_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
 				case START_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
 				case END_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
 				case CONNECTOR_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
 				case OTHER_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
 				case INITIAL_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
 				case FINAL_PUNCTUATION: cat = FAKE_PUNCTUATION; break;

 				case MATH_SYMBOL: cat = FAKE_SYMBOL; break;
 				case CURRENCY_SYMBOL: cat = FAKE_SYMBOL; break;
 				case MODIFIER_SYMBOL: cat = FAKE_SYMBOL; break;
 				case OTHER_SYMBOL: cat = FAKE_SYMBOL; break;
 			}
             if (collapseBits == -1) {
                 switch (cat) {
                     case FAKE_MARK:
                     case FAKE_NUMBER:
                     case FAKE_SEPERATOR:
                     case FAKE_PUNCTUATION:
                     case FAKE_SYMBOL:
                         cat = FAKE_LETTER;
                         break;
                 }
 			}
         }
         return cat;
     }

     public String getModCatID_fromIndex(byte cat) {
     	switch (cat) {
 			case FAKE_SYMBOL: return "S&";
 			case FAKE_PUNCTUATION: return "P&";
 			case FAKE_SEPERATOR: return "Z&";
 			case FAKE_NUMBER: return "N&";
 			case FAKE_MARK: return "M&";
 			case FAKE_LETTER: return "L&";
 			case FAKE_OTHER: return "C&";
 			case FAKENC: return "NC";
         }
         return getCategoryID_fromIndex(cat);
     }

     /**
      * Get the main category, as a mask
      */
     public static int mainCategoryMask(byte cat) {
         switch (cat) {
           case Lu: case Ll: case Lt: case Lm: case Lo: return LETTER_MASK;
           case Mn: case Me: case Mc: return MARK_MASK;
           case Nd: case Nl: case No: return NUMBER_MASK;
           case Zs: case Zl: case Zp: return SEPARATOR_MASK;
           case Cc: case Cf: case Cs: case Co: return CONTROL_MASK;
           case Pc: case Pd: case Ps: case Pe: case Po: case Pi: case Pf: return PUNCTUATION_MASK;
           case Sm: case Sc: case Sk: case So: return SYMBOL_MASK;
           case Cn: return UNASSIGNED_MASK;
         }
         throw new IllegalArgumentException ("Illegal General Category " + cat);
     }

     /**
      * Get the combining class, a number between zero and 255. Returned
      * as a short to avoid the signed-byte problem in Java
      */
     public short getCombiningClass(int codePoint) {
         return (short)(get(codePoint, false).combiningClass & 0xFF);
     }

     /**
      * Does this combining class actually occur in this version of the data.
      */
     public boolean isCombiningClassUsed(byte value) {
         return combiningClassSet.get(0xFF & value);
     }

     static UnicodeSet BIDI_R_SET, BIDI_AL_SET, BIDI_BN_SET;

     /**
      * Get the bidi class
      */
     public byte getBidiClass(int codePoint) {
         if (getCategory(codePoint) != Cn) return get(codePoint, false).bidiClass;

         if (BIDI_R_SET == null) { // build it

             BIDI_R_SET = new UnicodeSet();
             BIDI_AL_SET = new UnicodeSet();

             blockData.getSet("Hebrew",BIDI_R_SET);
             blockData.getSet("Cypriot_Syllabary",BIDI_R_SET);

             blockData.getSet("Arabic",BIDI_AL_SET);
             blockData.getSet("Syriac",BIDI_AL_SET);
             blockData.getSet("Thaana",BIDI_AL_SET);
             blockData.getSet("Arabic_Presentation_Forms-A",BIDI_AL_SET);
             blockData.getSet("Arabic_Presentation_Forms-B",BIDI_AL_SET);
             /*
             int blockId = 0;
             BlockData blockData = new BlockData();
             UnicodeSet s = blockData.get
             while (getBlockData(blockId++, blockData)) {
                 if (blockData.name.equals("Hebrew")
                  || blockData.name.equals("Cypriot_Syllabary")
                 ) {
                     System.out.println("R:  Adding " + blockData.name + ": "
                         + Utility.hex(blockData.start)
                         + ".." + Utility.hex(blockData.end));
                     BIDI_R_SET.add(blockData.start, blockData.end);
                 } else if (blockData.name.equals("Arabic")
                  || blockData.name.equals("Syriac")
                  || blockData.name.equals("Thaana")
                  || blockData.name.equals("Arabic_Presentation_Forms-A")
                  || blockData.name.equals("Arabic_Presentation_Forms-B")
                 ) {
                     System.out.println("AL: Adding " + blockData.name + ": "
                         + Utility.hex(blockData.start)
                         + ".." + Utility.hex(blockData.end));
                     BIDI_AL_SET.add(blockData.start, blockData.end);
                 } else {
                     if (false) System.out.println("SKIPPING: " + blockData.name + ": "
                         + Utility.hex(blockData.start)
                         + ".." + Utility.hex(blockData.end));
                 }
             }
             */

             System.out.println("BIDI_R_SET: " + BIDI_R_SET);
             System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);

             UnicodeSet BIDI_R_Delta = new UnicodeSet(0xFB1D, 0xFB4F).add(0x10800, 0x10FFF).add(0x07C0,0x8FF);
             BIDI_R_Delta.removeAll(BIDI_R_SET);
             System.out.println("R: Adding " + BIDI_R_Delta);
             BIDI_R_SET.addAll(BIDI_R_Delta);

             UnicodeSet BIDI_AL_Delta = new UnicodeSet(0x0750, 0x077F);
             BIDI_AL_Delta.removeAll(BIDI_AL_SET);
             System.out.println("AL: Adding " + BIDI_AL_Delta);
             BIDI_AL_SET.addAll(BIDI_AL_Delta);

             UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point, this).getSet();
             noncharacters.remove(Utility.BOM);

             System.out.println("Removing Noncharacters/BOM  " + noncharacters);
             BIDI_R_SET.removeAll(noncharacters);
             BIDI_AL_SET.removeAll(noncharacters);

             BIDI_BN_SET = new UnicodeSet();
             if (compositeVersion >= 0x40001) {
                 BIDI_BN_SET.addAll(noncharacters);
                 UnicodeSet DefaultIg = DerivedProperty.make(DefaultIgnorable, this).getSet();
                 System.out.println("DefaultIg: " + DefaultIg);
                 BIDI_BN_SET.addAll(DefaultIg);
             }

             System.out.println("BIDI_R_SET: " + BIDI_R_SET);
             System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
             System.out.println("BIDI_BN_SET: " + BIDI_BN_SET);

             if (BIDI_R_SET.containsSome(BIDI_AL_SET)) {
                 throw new ChainException("BIDI values for Cf characters overlap!!", null);
             }

         }

         if (BIDI_BN_SET.contains(codePoint)) {
             return BIDI_BN;
         }
         if (BIDI_R_SET.contains(codePoint)) {
             return BIDI_R;
         }
         if (BIDI_AL_SET.contains(codePoint)) {
             return BIDI_AL;
         }
         return BIDI_L;
     }

     /**
      * Get the RAW decomposition mapping. Must be used recursively for the full mapping!
      */
     public String getDecompositionMapping(int codePoint) {
         return get(codePoint, true).decompositionMapping;
     }

     /**
      * Get BIDI mirroring character, if there is one.
      */
     public String getBidiMirror(int codePoint) {
         return get(codePoint, true).bidiMirror;
     }

     /**
      * Get the RAW decomposition type: the <...> field in the UCD data.
      */
     public byte getDecompositionType(int codePoint) {
         return get(codePoint, false).decompositionType;
     }

     IntMap hanExceptions = null;

     static class HanException {
         double numericValue;
         byte numericType;
     }

     public UnicodeMap getHanValue(String propertyName) {
         UnicodeMap result = new UnicodeMap();
         try {
             BufferedReader in = Utility.openUnicodeFile("Unihan", version, true, Utility.UTF8);
             int lineCounter = 0;
             while (true) {
                 Utility.dot(++lineCounter);

                 String line = in.readLine();
                 if (line == null) break;
                 if (line.length() < 6) continue;
                 if (line.charAt(0) == '#') continue;
                 line = line.trim();

                 int tabPos = line.indexOf('\t');
                 int tabPos2 = line.indexOf('\t', tabPos+1);

                 String property = line.substring(tabPos+1, tabPos2).trim();
                 if (!property.equalsIgnoreCase(propertyName)) continue;

                 String scode = line.substring(2, tabPos).trim();
                 int code = Integer.parseInt(scode, 16);
                 String propertyValue = line.substring(tabPos2+1).trim();
                 result.put(code, propertyValue);
             }
             in.close();
         } catch (Exception e) {
             throw new ChainException("Han File Processing Exception", null, e);
         } finally {
             Utility.fixDot();
         }
         return result;
     }


     void populateHanExceptions() {
         hanExceptions = new IntMap();
         BufferedReader in = null;
         try {
             in = Utility.openUnicodeFile("Unihan", version, true, Utility.UTF8);
             int lineCounter = 0;
             while (true) {
                 Utility.dot(++lineCounter);

                 String line = in.readLine();
                 if (line == null) break;
                 if (line.length() < 6) continue;
                 if (line.charAt(0) == '#') continue;
                 line = line.trim();

                 int tabPos = line.indexOf('\t');
                 int tabPos2 = line.indexOf('\t', tabPos+1);

                 String property = line.substring(tabPos+1, tabPos2).trim();
                 if (!property.endsWith("Numeric")) continue;

                 String propertyValue = line.substring(tabPos2+1).trim();
                 propertyValue = Utility.replace(propertyValue, ",", "");
                 int hack = propertyValue.indexOf(' ');
                 if (hack >= 0) {
                     Utility.fixDot();
                     System.out.println("BAD NUMBER: " + line);
                     propertyValue = propertyValue.substring(0,hack);
                 }

                 String scode = line.substring(2, tabPos).trim();
                 int code = Integer.parseInt(scode, 16);

                 if (code == 0x5793 || code == 0x4EAC) continue; // two exceptions!!

                 //kAccountingNumeric
                 //kOtherNumeric
                 //kPrimaryNumeric

                 HanException except = (HanException) hanExceptions.get(code);
                 if (except != null) throw new Exception("Duplicate Numeric Value for " + line);
                 except = new HanException();
                 hanExceptions.put(code, except);
                 except.numericValue = Double.parseDouble(propertyValue);
                 except.numericType = property.equals("kAccountingNumeric") ? NUMERIC
                 : property.equals("kOtherNumeric") ? NUMERIC
                 : property.equals("kPrimaryNumeric") ? NUMERIC
                 : NONE;
                 if (except.numericType == NONE) throw new Exception("Unknown Numeric Type for " + line);

                 if (false) {
                     Utility.fixDot();
                     System.out.println(line);
                     System.out.println(getNumericValue(code));
                     System.out.println(getNumericTypeID(code));
                 }
             }
             in.close();
         } catch (Exception e) {
             throw new ChainException("Han File Processing Exception", null, e);
         } finally {
             Utility.fixDot();
             System.out.println("****Size: " + hanExceptions.size());
         }
     }

     public double getNumericValue(int codePoint) {
         if (hanExceptions == null) populateHanExceptions();
         Object except = hanExceptions.get(codePoint);
         if (except != null) {
             return ((HanException)except).numericValue;
         }
         return get(codePoint, false).numericValue;
     }

     public byte getNumericType(int codePoint) {
         if (hanExceptions == null) populateHanExceptions();
         Object except = hanExceptions.get(codePoint);
         if (except != null) {
             return ((HanException)except).numericType;
         }
         return get(codePoint, false).numericType;
     }

     public String getCase(int codePoint, byte simpleVsFull, byte caseType) {
         return getCase(codePoint, simpleVsFull, caseType, "");
     }

     public String getCase(String s, byte simpleVsFull, byte caseType) {
         return getCase(s, simpleVsFull, caseType, "");
     }

     public String getCase(int codePoint, byte simpleVsFull, byte caseType, String condition) {
         UData udata = get(codePoint, true);
         if (caseType < LOWER || caseType > FOLD
           || (simpleVsFull != SIMPLE && simpleVsFull != FULL)) {
             throw new IllegalArgumentException("simpleVsFull or caseType out of bounds");
         }
         if (caseType < FOLD) {
             if (simpleVsFull == FULL && udata.specialCasing.length() != 0) {
                 if (condition.length() == 0
                 || udata.specialCasing.indexOf(condition) < 0) {
                     simpleVsFull = SIMPLE;
                 }
             }
         } else {
             // special case. For these characters alone, use "I" as option meaning collapse to "i"
             //if (codePoint == 0x0131 || codePoint == 0x0130) { // special case turkish i
             if (getBinaryProperty(codePoint, CaseFoldTurkishI)) {
                 if (!udata.specialCasing.equals("I")) simpleVsFull = SIMPLE;
                 else simpleVsFull = FULL;
             }
         }

         switch (caseType + simpleVsFull) {
             case SIMPLE + UPPER: return udata.simpleUppercase;
             case SIMPLE + LOWER: return udata.simpleLowercase;
             case SIMPLE + TITLE: return udata.simpleTitlecase;
             case SIMPLE + FOLD: return udata.simpleCaseFolding;
             case FULL + UPPER: return udata.fullUppercase;
             case FULL + LOWER: return udata.fullLowercase;
             case FULL + TITLE: return udata.fullTitlecase;
             case FULL + FOLD: return udata.fullCaseFolding;
         }
         throw new IllegalArgumentException("getCase: " + caseType + ", " + simpleVsFull);
     }

     static final char SHY = '\u00AD';

     static final char APOSTROPHE = '\u2019';

     public String getCase(String s, byte simpleVsFull, byte caseType, String condition) {
         if (UTF32.length32(s) == 1) return getCase(UTF32.char32At(s, 0), simpleVsFull, caseType);
         StringBuffer result = new StringBuffer();
         int cp;
         byte currentCaseType = caseType;
         UCDProperty defaultIgnorable = DerivedProperty.make(DerivedProperty.DefaultIgnorable, this);

         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             String mappedVersion = getCase(cp, simpleVsFull, currentCaseType, condition);
             result.append(mappedVersion);
             if (caseType == TITLE) {    // set the case type for the next character

                 // certain characters are ignored
                 if (cp == SHY || cp == '\'' || cp == APOSTROPHE) continue;
                 byte cat = getCategory(cp);
                 if (cat == Mn || cat == Me || cat == Cf || cat == Lm) continue;
                 if (defaultIgnorable.hasValue(cp)) continue;
                 // if DefaultIgnorable is not supported, then
                 // check for (Cf + Cc + Cs) - White_Space
                 // if (cat == Cs && cp != 0x85 && (cp < 9 || cp > 0xD)) continue;

                 // if letter is cased, change next to lowercase, otherwise revert to TITLE
                 if (cat == Lu || cat == Ll || cat == Lt
                   || getBinaryProperty(cp, Other_Lowercase) // skip if not supported
                   || getBinaryProperty(cp, Other_Uppercase) // skip if not supported
                 ) {
                     currentCaseType = LOWER;
                 } else {
                     currentCaseType = TITLE;
                 }
             }
         }
         return result.toString();
     }

     /*
     public String getSimpleLowercase(int codePoint) {
         return get(codePoint, true).simpleLowercase;
     }

     public String getSimpleUppercase(int codePoint) {
         return get(codePoint, true).simpleUppercase;
     }

     public String getSimpleTitlecase(int codePoint) {
         return get(codePoint, true).simpleTitlecase;
     }

     public String getSimpleCaseFolding(int codePoint) {
         return get(codePoint, true).simpleCaseFolding;
     }

     public String getFullLowercase(int codePoint) {
         return get(codePoint, true).fullLowercase;
     }

     public String getFullUppercase(int codePoint) {
         return get(codePoint, true).fullUppercase;
     }

     public String getFullTitlecase(int codePoint) {
         return get(codePoint, true).fullTitlecase;
     }

     public String getFullCaseFolding(int codePoint) {
         return get(codePoint, true).simpleCaseFolding;
     }

     public String getLowercase(int codePoint, boolean full) {
         if (full) return getFullLowercase(codePoint);
         return getSimpleLowercase(codePoint);
     }

     public String getUppercase(int codePoint, boolean full) {
         if (full) return getFullUppercase(codePoint);
         return getSimpleLowercase(codePoint);
     }

     public String getTitlecase(int codePoint, boolean full) {
         if (full) return getFullTitlecase(codePoint);
         return getSimpleTitlecase(codePoint);
     }

     public String getCaseFolding(int codePoint, boolean full) {
         if (full) return getFullCaseFolding(codePoint);
         return getSimpleCaseFolding(codePoint);
     }

     public String getLowercase(String s, boolean full) {
         if (s.length() == 1) return getLowercase(s.charAt(0), true);
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i > 0) result.append(", ");
             result.append(getLowercase(cp, true));
         }
         return result.toString();
     }

     public String getUppercase(String s, boolean full) {
         if (s.length() == 1) return getUppercase(s.charAt(0), true);
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i > 0) result.append(", ");
             result.append(getUppercase(cp, true));
         }
         return result.toString();
     }

     public String getTitlecase(String s, boolean full) {
         if (s.length() == 1) return getTitlecase(s.charAt(0), true);
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i > 0) result.append(", ");
             result.append(getTitlecase(cp, true));
         }
         return result.toString();
     }

     public String getCaseFolding(String s, boolean full) {
         if (s.length() == 1) return getCaseFolding(s.charAt(0), true);
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i > 0) result.append(", ");
             result.append(getCaseFolding(cp, true));
         }
         return result.toString();
     }
     */

     public String getSpecialCase(int codePoint) {
         return get(codePoint, true).specialCasing;
     }

     public byte getEastAsianWidth(int codePoint) {
 //      if (0x30000 <= codepoint && codepoint <= 0x3FFFD) return EAW;
         return get(codePoint, false).eastAsianWidth;
     }

     public byte getLineBreak(int codePoint) {
         return get(codePoint, false).lineBreak;
     }

     public byte getScript(int codePoint) {
     	if (codePoint == 0xE000) {
     		codePoint += 0;
     	}
         return get(codePoint, false).script;
     }


     public byte getScript(String s) {
         byte result = COMMON_SCRIPT;
         if (s == null || s.length() == 0) return result;
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             byte script = getScript(cp);
             if (script == INHERITED_SCRIPT) continue;
             result = script;
         }
         return result;
     }


     public byte getAge(int codePoint) {
         return get(codePoint, false).age;
     }

     public byte getJoiningType(int codePoint) {
         return get(codePoint, false).joiningType;
     }

     public byte getJoiningGroup(int codePoint) {
         return get(codePoint, false).joiningGroup;
     }

     public long getBinaryProperties(int codePoint) {
         return get(codePoint, false).binaryProperties;
     }

     public boolean getBinaryProperty(int codePoint, int bit) {
         return (get(codePoint, false).binaryProperties & (1L<<bit)) != 0;
     }

     // ENUM Mask Utilties

     public int getCategoryMask(int codePoint) {
         return 1<<get(codePoint, false).generalCategory;
     }

     public int getBidiClassMask(int codePoint) {
         return 1<<get(codePoint, false).bidiClass;
     }

     public int getNumericTypeMask(int codePoint) {
         return 1<<getNumericType(codePoint);
     }

     public int getDecompositionTypeMask(int codePoint) {
         return 1<<get(codePoint, false).decompositionType;
     }

     public int getEastAsianWidthMask(int codePoint) {
         return 1<<get(codePoint, false).eastAsianWidth;
     }

     public int getLineBreakMask(int codePoint) {
         return 1<<get(codePoint, false).lineBreak;
     }

     public int getScriptMask(int codePoint) {
         return 1<<get(codePoint, false).script;
     }

     public int getAgeMask(int codePoint) {
         return 1<<get(codePoint, false).age;
     }

     public int getJoiningTypeMask(int codePoint) {
         return 1<<get(codePoint, false).joiningType;
     }

     public int getJoiningGroupMask(int codePoint) {
         return 1<<get(codePoint, false).joiningGroup;
     }


     // VERSIONS WITH NAMES

     public String getCategoryID(int codePoint) {
         return getCategoryID_fromIndex(getCategory(codePoint));
     }

     public static String getCategoryID_fromIndex(byte prop) {
         return getCategoryID_fromIndex(prop, NORMAL);
     }

     public static String getCategoryID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.GENERAL_CATEGORY.length ? null
         	: (style == EXTRA_ALIAS && prop == DECIMAL_DIGIT_NUMBER) ? "digit"
             : (style != LONG) ? UCD_Names.GENERAL_CATEGORY[prop]
 			: UCD_Names.LONG_GENERAL_CATEGORY[prop];
     }


     public String getCombiningClassID(int codePoint) {
         return getCombiningClassID(codePoint, NORMAL);
     }

     public String getCombiningClassID(int codePoint, byte style) {
         return getCombiningClassID_fromIndex(getCombiningClass(codePoint), style);
     }

     public static String getCombiningClassID_fromIndex(short cc) {
         return getCombiningClassID_fromIndex(cc, NORMAL);
     }

     static String getCombiningClassID_fromIndex (short index, byte style) {
         return index < 0
             || index >= UCD_Names.COMBINING_CLASS.length
             ? null
             : style == SHORT
             ? UCD_Names.COMBINING_CLASS[index]
             : UCD_Names.LONG_COMBINING_CLASS[index];
 /*
         if (index > 255) return null;
         index &= 0xFF;
         if (style == NORMAL || style == NUMBER) return String.valueOf(index);
         String s = "";
         switch (index) {
             case 0: s = style < LONG ? "NR" : "NotReordered"; break;
             case 1: s = style < LONG ? "OV" :  "Overlay"; break;
             case 7: s = style < LONG ? "NK" :  "Nukta"; break;
             case 8: s = style < LONG ? "KV" :  "KanaVoicing"; break;
             case 9: s = style < LONG ? "VR" :  "Virama"; break;
             case 200: s = style < LONG ? "ATBL" :  "AttachedBelowLeft"; break;
             case 202: s = style < LONG ? "ATB" :  "AttachedBelow"; break;
             case 204: s = style < LONG ? "ATBR" :  "AttachedBelowRight"; break;
             case 208: s = style < LONG ? "ATL" :  "AttachedLeft"; break;
             case 210: s = style < LONG ? "ATR" :  "AttachedRight"; break;
             case 212: s = style < LONG ? "ATAL" :  "AttachedAboveLeft"; break;
             case 214: s = style < LONG ? "ATA" :  "AttachedAbove"; break;
             case 216: s = style < LONG ? "ATAR" :   "AttachedAboveRight"; break;
             case 218: s = style < LONG ? "BL" :   "BelowLeft"; break;
             case 220: s = style < LONG ? "B" :   "Below"; break;
             case 222: s = style < LONG ? "BR" :   "BelowRight"; break;
             case 224: s = style < LONG ? "L" :   "Left"; break;
             case 226: s = style < LONG ? "R" :   "Right"; break;
             case 228: s = style < LONG ? "AL" :   "AboveLeft"; break;
             case 230: s = style < LONG ? "A" :   "Above"; break;
             case 232: s = style < LONG ? "AR" :   "AboveRight"; break;
             case 233: s = style < LONG ? "DB" :   "DoubleBelow"; break;
             case 234: s = style < LONG ? "DA" :   "DoubleAbove"; break;
             case 240: s = style < LONG ? "IS" :   "IotaSubscript"; break;
             default: s += "" + index;
         }
         return s;
         */
     }


     public String getBidiClassID(int codePoint) {
         return getBidiClassID_fromIndex(getBidiClass(codePoint));
     }

     public static String getBidiClassID_fromIndex(byte prop) {
         return getBidiClassID_fromIndex(prop, NORMAL);
     }

     public static String getBidiClassID_fromIndex(byte prop, byte style) {
         return prop < 0
             || prop >= UCD_Names.BIDI_CLASS.length
             ? null
             : style == SHORT
             ? UCD_Names.BIDI_CLASS[prop]
             : UCD_Names.LONG_BIDI_CLASS[prop];
     }

     public String getDecompositionTypeID(int codePoint) {
         return getDecompositionTypeID_fromIndex(getDecompositionType(codePoint));
     }

     public static String getDecompositionTypeID_fromIndex(byte prop) {
         return getDecompositionTypeID_fromIndex(prop, NORMAL);
     }
     public static String getDecompositionTypeID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.LONG_DECOMPOSITION_TYPE.length ? null
         : style == SHORT ? UCD_Names.DECOMPOSITION_TYPE[prop] : UCD_Names.LONG_DECOMPOSITION_TYPE[prop];
     }

     public String getNumericTypeID(int codePoint) {
         return getNumericTypeID_fromIndex(getNumericType(codePoint));
     }

     public static String getNumericTypeID_fromIndex(byte prop) {
         return getNumericTypeID_fromIndex(prop, NORMAL);
     }

     public static String getNumericTypeID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.LONG_NUMERIC_TYPE.length ? null
         : style == SHORT ? UCD_Names.NUMERIC_TYPE[prop] : UCD_Names.LONG_NUMERIC_TYPE[prop];
     }

     public String getEastAsianWidthID(int codePoint) {
         return getEastAsianWidthID_fromIndex(getEastAsianWidth(codePoint));
     }

     public static String getEastAsianWidthID_fromIndex(byte prop) {
         return getEastAsianWidthID_fromIndex(prop, NORMAL);
     }

     public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.LONG_EAST_ASIAN_WIDTH.length ? null
         : style != LONG ? UCD_Names.EAST_ASIAN_WIDTH[prop] : UCD_Names.LONG_EAST_ASIAN_WIDTH[prop];
     }

     public String getLineBreakID(int codePoint) {
         return getLineBreakID_fromIndex(getLineBreak(codePoint));
     }

     public static String getLineBreakID_fromIndex(byte prop) {
         return getLineBreakID_fromIndex(prop, NORMAL);
     }

     public static String getLineBreakID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.LINE_BREAK.length ? null
         : style != LONG ? UCD_Names.LINE_BREAK[prop] : UCD_Names.LONG_LINE_BREAK[prop];
     }

     public String getJoiningTypeID(int codePoint) {
         return getJoiningTypeID_fromIndex(getJoiningType(codePoint));
     }

     public static String getJoiningTypeID_fromIndex(byte prop) {
         return getJoiningTypeID_fromIndex(prop, NORMAL);
     }

     public static String getJoiningTypeID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.JOINING_TYPE.length ? null
         : style != LONG ? UCD_Names.JOINING_TYPE[prop] : UCD_Names.LONG_JOINING_TYPE[prop];
     }

     public String getJoiningGroupID(int codePoint) {
         return getJoiningGroupID_fromIndex(getJoiningGroup(codePoint));
     }

     public static String getJoiningGroupID_fromIndex(byte prop) {
         return getJoiningGroupID_fromIndex(prop, NORMAL);
     }

     public static String getJoiningGroupID_fromIndex(byte prop, byte style) {
         // no short version
         return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
         : UCD_Names.JOINING_GROUP[prop];
     }

     public String getScriptID(int codePoint) {
         return getScriptID_fromIndex(getScript(codePoint));
     }

     public static String getScriptID_fromIndex(byte prop) {
         return getScriptID_fromIndex(prop, NORMAL);
     }

     public static String getScriptID_fromIndex(byte prop, byte length) {
         return prop < 0 || prop >= UCD_Names.SCRIPT.length ? null
         : (length == EXTRA_ALIAS && prop == COPTIC) ? "Qaac"
         : (length == SHORT) ? UCD_Names.SCRIPT[prop]
         : UCD_Names.LONG_SCRIPT[prop];
     }

     public String getAgeID(int codePoint) {
         return getAgeID_fromIndex(getAge(codePoint));
     }

     public static String getAgeID_fromIndex(byte prop) {
         return getAgeID_fromIndex(prop, NORMAL);
     }

     public static String getAgeID_fromIndex(byte prop, byte style) {
         // no short for
         return prop < 0 || prop >= UCD_Names.AGE.length ? null
         : UCD_Names.AGE[prop];
     }

     public String getBinaryPropertiesID(int codePoint, byte bit) {
         return getBinaryProperty(codePoint, bit) ? UCD_Names.YN_TABLE[1] : UCD_Names.YN_TABLE[0];
     }

     public static String getBinaryPropertiesID_fromIndex(byte bit) {
         return getBinaryPropertiesID_fromIndex(bit, NORMAL);
     }

     public static String getBinaryPropertiesID_fromIndex(byte bit, byte style) {
         return bit < 0 || bit >= UCD_Names.BP.length ? null
         : style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
     }

     public static int mapToRepresentative(int ch, int rCompositeVersion) {
         if (ch <= 0xFFFD) {
             //if (ch <= 0x2800) return ch;
             //if (ch <= 0x28FF) return 0x2800;    // braille
             if (ch <= 0x3400) return ch;         // CJK Ideograph Extension A
             if (ch <= 0x4DB5) return 0x3400;
             if (ch <= 0x4E00) return ch;         // CJK Ideograph
             if (ch <= 0x9FA5) return 0x4E00;
             if (ch <= 0x9FBB && rCompositeVersion >= 0x40100) return 0x4E00;
             if (ch <= 0xAC00) return ch;         // Hangul Syllable
             if (ch <= 0xD7A3) return 0xAC00;
             if (ch <= 0xD800) return ch;         // Non Private Use High Surrogate
             if (ch <= 0xDB7F) return 0xD800;
             if (ch <= 0xDB80) return ch;         // Private Use High Surrogate
             if (ch <= 0xDBFF) return 0xDB80;
             if (ch <= 0xDC00) return ch;         // Low Surrogate
             if (ch <= 0xDFFF) return 0xDC00;
             if (ch <= 0xE000) return ch;         // Private Use
             if (ch <= 0xF8FF) return 0xE000;
             if (rCompositeVersion < 0x20105) {
                 if (ch <= 0xF900) return ch;         // CJK Compatibility Ideograp
                 if (ch <= 0xFA2D) return 0xF900;
             }
             if (ch <  0xFDD0) return ch;         // Noncharacter
             if (ch <= 0xFDEF) return 0xFFFF;
         } else {
             if ((ch & 0xFFFE) == 0xFFFE) return 0xFFFF;         // Noncharacter

             if (ch <= 0x20000) return ch;         // Extension B
             if (ch <= 0x2A6D6) return 0x20000;
             //if (ch <= 0x2F800) return ch;
             //if (ch <= 0x2FA1D) return 0x2F800;      // compat ideographs
             if (ch < 0xF0000) return ch;       // Plane 15 Private Use
             if (rCompositeVersion >= 0x20000) {
                 return 0xE000;
             }
             /*
             if (ch <= 0xFFFFD) return 0xF0000;       // Plane 16 Private Use
             if (ch <= 0x100000) return ch;       // Plane 15 Private Use
             if (ch <= 0x10FFFD) return 0x100000;       // Plane 16 Private Use
             */
         }
         return ch;
     }

     public boolean isIdentifierStart(int cp) {
         /*
         if (extended) {
             if (cp == 0x0E33 || cp == 0x0EB3 || cp == 0xFF9E || cp == 0xFF9F) return false;
             if (cp == 0x037A || cp >= 0xFC5E && cp <= 0xFC63 || cp == 0xFDFA || cp == 0xFDFB) return false;
             if (cp >= 0xFE70 && cp <= 0xFE7E && (cp & 1) == 0) return false;
         }
         */
         byte cat = getCategory(cp);
         if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl) return true;
         if (getBinaryProperty(cp, Other_ID_Start)) return true;
         return false;
     }

     public boolean isIdentifierContinue_NO_Cf(int cp) {
         if (isIdentifierStart(cp)) return true;
         /*
         if (extended) {
             if (cp == 0x00B7) return true;
             if (cp == 0x0E33 || cp == 0x0EB3 || cp == 0xFF9E || cp == 0xFF9F) return true;
         }
         */
         byte cat = getCategory(cp);
         if (cat == Mn || cat == Mc || cat == Nd || cat == Pc) return true;
         if (getBinaryProperty(cp, Other_ID_Start)) return true;
         if (getBinaryProperty(cp, Other_ID_Continue)) return true;
         return false;
     }

     public boolean isIdentifier(String s) {
         if (s.length() == 0) return false; // at least one!
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
             cp = UTF32.char32At(s, i);
             if (i == 0) {
                 if (!isIdentifierStart(cp)) return false;
             } else {
                 if (!isIdentifierContinue_NO_Cf(cp)) return false;
             }
         }
         return true;
     }
     /*
 Middle Dot. Because most Catalan legacy data will be encoded in Latin-1, U+00B7 MIDDLE DOT needs to be
 allowed in <identifier_extend>.

 In particular, the following four characters should be in <identifier_extend> and not <identifier_start>:
 0E33 THAI CHARACTER SARA AM
 0EB3 LAO VOWEL SIGN AM
 FF9E HALFWIDTH KATAKANA VOICED SOUND MARK
 FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
 Irregularly decomposing characters. U+037A GREEK YPOGEGRAMMENI and certain Arabic presentation
 forms have irregular compatibility decompositions, and need to be excluded from both <identifier_start>
 and <identifier_extend>. It is recommended that all Arabic presentation forms be excluded from identifiers
 in any event, although only a few of them are required to be excluded for normalization
 to guarantee identifier closure.
 */

     // *******************
     // PRIVATES
     // *******************

         // cache of singletons
     private static Map versionCache = new HashMap();

     private static final int LIMIT_CODE_POINT = 0x110000;
     private static final UData[] ALL_NULLS = new UData[1024];

     // main data
     private UData[][] data = new UData[LIMIT_CODE_POINT>>10][];

     // extras
     private BitSet combiningClassSet = new BitSet(256);
     private String version;
     private String file;
     private long date = -1;
     private byte format = -1;
     //private byte major = -1;
     //private byte minor = -1;
     //private byte update = -1;
     private int compositeVersion = -1;
     private int size = -1;

     // cache last UData
     private int lastCode = Integer.MIN_VALUE;
     private UData lastResult = UData.UNASSIGNED;
     private boolean lastCodeFixed = false;

     // hide constructor
     private UCD() {
         for (int i = 0; i < data.length; ++i) {
             data[i] = ALL_NULLS;
         }
     }

     private void add(UData uData) {
         int high = uData.codePoint>>10;
         if (data[high] == ALL_NULLS) {
             UData[] temp = new UData[1024];
             data[high] = temp;
         }
         data[high][uData.codePoint & 0x3FF] = uData;
     }

     public boolean hasComputableName(int codePoint) {
         if (codePoint >= 0xF900 && codePoint <= 0xFA2D) return true;
         if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true;
         if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;

         int rangeStart = mapToRepresentative(codePoint, compositeVersion);
         switch (rangeStart) {
           default:
             return getRaw(codePoint) == null;
           case 0x2800: // braille
           case 0xF900: // compat ideos
           case 0x2F800: // compat ideos
           case 0x3400: // CJK Ideograph Extension A
           case 0x4E00: // CJK Ideograph
           case 0x20000: // Extension B
           case 0xAC00: // Hangul Syllable
           case 0xE000: // Private Use
           case 0xF0000: // Private Use
           case 0x100000: // Private Use
           case 0xD800: // Surrogate
           case 0xDB80: // Private Use
           case 0xDC00: // Private Use
           case 0xFFFF: // Noncharacter
             return true;
         }
     }

     private UData getRaw(int codePoint) {
         return data[codePoint>>10][codePoint & 0x3FF];
     }

     // access data for codepoint
     UData get(int codePoint, boolean fixStrings) {
         /*if (codePoint == 0xF901) {
             System.out.println(version + ", " + Integer.toString(compositeVersion, 16));
             System.out.println("debug: ");
         }
         */
         if (codePoint < 0 || codePoint > 0x10FFFF) {
             throw new IllegalArgumentException("Illegal Code Point: " + Utility.hex(codePoint));
         }
         //if (codePoint == lastCode && fixStrings <= lastCodeFixed) return lastResult;
         /*
         // we play some funny tricks for performance
         // if cp is not represented, it is either in a elided block or missing.
         // elided blocks are either CONTINUE or FFFF

         byte cat;
         if (!ucdData.isRepresented(cp)) {
             int rep = UCD.mapToRepresentative(cp);
             if (rep == 0xFFFF) cat = Cn;
             else if (rep != cp) return CONTINUE;
             else if (!ucdData.isRepresented(rep)) cat = Cn;
             else cat = ucdData.getCategory(rep);
         } else {
             cat = ucdData.getCategory(cp);
         }
         */

         UData result = null;

         // do range stuff
         String constructedName = null;
         int rangeStart = mapToRepresentative(codePoint, compositeVersion);
         boolean isHangul = false;
         boolean isRemapped = false;
         switch (rangeStart) {
           case 0xF900:
             if (compositeVersion < 0x020105) {
                 if (fixStrings) constructedName = "CJK COMPATIBILITY IDEOGRAPH-" + Utility.hex(codePoint, 4);
                 break;
             }
             //isRemapped = true;
             break;
             // FALL THROUGH!!!!
           //default:
             /*
             result = getRaw(codePoint);
             if (result == null) {
                 result = UData.UNASSIGNED;
                 result.name = null; // clean this up, since we reuse UNASSIGNED
                 result.shortName = null;
                 if (fixStrings) {
                     result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
                 }
             }
             if (fixStrings) {
                 if (result.name == null) {
                     result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
                     // System.out.println("Warning: fixing name for " + result.name);
                 }
                 if (result.shortName == null) {
                     result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
                 }
             }
             */
             //break;
           case 0x3400: // CJK Ideograph Extension A
           case 0x4E00: // CJK Ideograph
           case 0x20000: // Extension B
             if (fixStrings) constructedName = "CJK UNIFIED IDEOGRAPH-" + Utility.hex(codePoint, 4);
             isRemapped = true;
             break;
           case 0xAC00: // Hangul Syllable
             isHangul = true;
             if (fixStrings) {
                 constructedName = "HANGUL SYLLABLE " + getHangulName(codePoint);
             }
             isRemapped = true;
             break;
           case   0xE000: // Private Use
           case  0xF0000: // Private Use
           case 0x100000: // Private Use
             if (fixStrings) constructedName = "<private-use-" + Utility.hex(codePoint, 4) + ">";
             isRemapped = true;
             break;
           case 0xD800: // Surrogate
           case 0xDB80: // Private Use
           case 0xDC00: // Private Use
             if (fixStrings) constructedName = "<surrogate-" + Utility.hex(codePoint, 4) + ">";
             isRemapped = true;
             break;
           case 0xFFFF: // Noncharacter
             if (fixStrings) constructedName = "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
             isRemapped = true;
             break;
         }
         result = getRaw(rangeStart);
         if (result == null) {
             result = UData.UNASSIGNED;
             isRemapped = true;
             result.name = null; // clean this up, since we reuse UNASSIGNED
             result.shortName = null;
             result.decompositionType = NONE;
             if (fixStrings) {
                 constructedName = "<reserved-" + Utility.hex(codePoint, 4) + ">";
                 //result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
             }
             //return result;
         }

         result.codePoint = codePoint;
         if (fixStrings) {
             if (result.name == null || isRemapped) result.name = constructedName;
             if (result.shortName == null) result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
             if (isRemapped) {
                 result.decompositionMapping = result.bidiMirror
                 = result.simpleLowercase = result.simpleUppercase = result.simpleTitlecase = result.simpleCaseFolding
                 = result.fullLowercase = result.fullUppercase = result.fullTitlecase = result.fullCaseFolding
                 = UTF32.valueOf32(codePoint);
             }
         }
         if (isHangul) {
             if (fixStrings) result.decompositionMapping = getHangulDecompositionPair(codePoint);
             if (isLV(codePoint)) result.lineBreak = LB_H2; else result.lineBreak = LB_H3;
             result.decompositionType = CANONICAL;
         }
         return result;
     }

     // Neither Mapped nor Composite CJK: [\u3400-\u4DB5\u4E00-\u9FA5\U00020000-\U0002A6D6]

     public static final boolean isCJK_AB(int bigChar) {
         return (CJK_A_BASE <= bigChar && bigChar < CJK_A_LIMIT
              || CJK_B_BASE <= bigChar && bigChar < CJK_B_LIMIT);
     }

     public static boolean isCJK_BASE(int cp) {
         return (CJK_BASE <= cp && cp < CJK_LIMIT
         || cp == 0xFA0E	// compat characters that don't decompose.
         || cp == 0xFA0F
         || cp == 0xFA11
         || cp == 0xFA13
         || cp == 0xFA14
         || cp == 0xFA1F
         || cp == 0xFA21
         || cp == 0xFA23
         || cp == 0xFA24
         || cp == 0xFA27
         || cp == 0xFA28
         || cp == 0xFA29
         || cp == 0xFA2E
         || cp == 0xFA2F
         );
     }

     // Hangul constants

     public static final int
         SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
         LCount = 19, VCount = 21, TCount = 28,
         NCount = VCount * TCount,   // 588
         SCount = LCount * NCount,   // 11172
         LLimit = LBase + LCount,    // 1113
         VLimit = VBase + VCount,    // 1176
         TLimit = TBase + TCount,    // 11C3
         LLimitFull = 0x1160,
         VLimitFull = TBase,
         TLimitFull = 0x11FF,
         SLimit = SBase + SCount;    // D7A4

     private static String getHangulName(int s) {
         int SIndex = s - SBase;
         if (0 > SIndex || SIndex >= SCount) {
             throw new IllegalArgumentException("Not a Hangul Syllable: " + s);
         }
         int LIndex = SIndex / NCount;
         int VIndex = (SIndex % NCount) / TCount;
         int TIndex = SIndex % TCount;
         // if (true) return "?";
         return UCD_Names.JAMO_L_TABLE[LIndex] + UCD_Names.JAMO_V_TABLE[VIndex] + UCD_Names.JAMO_T_TABLE[TIndex];
     }

     private static final char[] pair = new char[2];

     static boolean isDoubleHangul(int s) {
         int SIndex = s - SBase;
         if (0 > SIndex || SIndex >= SCount) {
             throw new IllegalArgumentException("Not a Hangul Syllable: " + s);
         }
         return (SIndex % TCount) == 0;
     }

     static String getHangulDecompositionPair(int ch) {
         int SIndex = ch - SBase;
         if (0 > SIndex || SIndex >= SCount) {
             return "";
         }
         int TIndex = SIndex % TCount;
         if (TIndex != 0) { // triple
             pair[0] = (char)(SBase + SIndex - TIndex);
             pair[1] = (char)(TBase + TIndex);
         } else {
             pair[0] = (char)(LBase + SIndex / NCount);
             pair[1] = (char)(VBase + (SIndex % NCount) / TCount);
         }
         return String.valueOf(pair);
     }

     static int composeHangul(int char1, int char2) {
         if (LBase <= char1 && char1 < LLimit && VBase <= char2 && char2 < VLimit) {
             return (SBase + ((char1 - LBase) * VCount + (char2 - VBase)) * TCount);
         }
         if (SBase <= char1 && char1 < SLimit && TBase <= char2 && char2 < TLimit
                 && ((char1 - SBase) % TCount) == 0) {
             return char1 + (char2 - TBase);
         }
         return 0xFFFF; // no composition
     }

     static public boolean isHangulSyllable(int char1) {
         return SBase <= char1 && char1 < SLimit;
     }

     static boolean isLeadingJamoComposition(int char1) {
         return isLeadingJamo(char1) || isLV(char1);
     }

     static boolean isLV(int char1) {
         return (SBase <= char1 && char1 < SLimit && ((char1 - SBase) % TCount) == 0);
     }

     static boolean isVowelJamo(int cp) {
         return (VBase <= cp && cp < VLimit);
     }

     static boolean isTrailingJamo(int cp) {
         return (TBase <= cp && cp < TLimit);
     }

     static boolean isLeadingJamo(int cp) {
         return (LBase <= cp && cp < LLimit);
     }

     static boolean isNonLeadJamo(int cp) {
         return (VBase <= cp && cp < VLimit) || (TBase <= cp && cp < TLimit);
     }

     byte getHangulSyllableType(int cp) {
         if (!isAssigned(cp)) return NA;
         if (LBase <= cp && cp < LLimitFull) return L;
         if (LLimitFull <= cp && cp < VLimitFull) return V;
         if (VLimitFull <= cp && cp < TLimitFull) return T;
         if (isLV(cp)) return LV;
         if (isHangulSyllable(cp)) return LVT;
         return NA;
     }

     static String getHangulSyllableTypeID_fromIndex(byte prop, byte style) {
         return prop < 0 || prop >= UCD_Names.HANGUL_SYLLABLE_TYPE.length ? null
         : (style == LONG) ? UCD_Names.LONG_HANGUL_SYLLABLE_TYPE[prop]
         : UCD_Names.HANGUL_SYLLABLE_TYPE[prop];
     }

     String getHangulSyllableTypeID(int char1, byte style) {
         return getHangulSyllableTypeID_fromIndex(getHangulSyllableType(char1),style);
     }

     private void fillFromFile(String version) {
     	try {
     		fillFromFile2(version);
     	} catch (ChainException e) {
     		try {
     			ConvertUCD.main(new String[]{version});
     		} catch (Exception e2) {
             	throw new ChainException("Can't build data file for {0}", new Object[]{version}, e2);
     		}
     		fillFromFile2(version);
     	}
     }

     private void fillFromFile2(String version) {
         DataInputStream dataIn = null;
         String fileName = BIN_DIR + "UCD_Data" + version + ".bin";
         int uDataFileCount = 0;
         try {
             dataIn = new DataInputStream(
                 new BufferedInputStream(
                     new FileInputStream(fileName),
                     128*1024));
             // header
             format = dataIn.readByte();
             byte major = dataIn.readByte();
             byte minor = dataIn.readByte();
             byte update = dataIn.readByte();
             compositeVersion = (major << 16) | (minor << 8) | update;

             String foundVersion = major + "." + minor + "." + update;
             if (format != BINARY_FORMAT || !version.equals(foundVersion)) {
                 throw new ChainException("Illegal data file format for {0}: {1}, {2}",
                     new Object[]{version, new Byte(format), foundVersion});
             }
             date = dataIn.readLong();
             size = uDataFileCount = dataIn.readInt();

             boolean didJoiningHack = false;
             System.out.println("Loading UCD " + foundVersion);


             // records
             for (int i = 0; i < uDataFileCount; ++i) {
                 UData uData = new UData();
                 uData.readBytes(dataIn);

                 //T = Mc + (Cf - ZWNJ - ZWJ)
                 int cp = uData.codePoint;
                 byte old = uData.joiningType;
                 byte cat = uData.generalCategory;
                 if (cat == Me) {
                 	if (compositeVersion >= 0x40100) {
                 		uData.joiningType = JT_T;
                 	}
                 }
                 //if (cp == 0x200D) {
                   //  uData.joiningType = JT_C;
                 //} else
                 /*
                 if (cp != 0x200D && cp != 0x200C && (cat == Mn || cat == Cf)) {
                     uData.joiningType = JT_T;
                 }
                 */
                 if (!didJoiningHack && uData.joiningType != old) {
                     System.out.println("HACK " + foundVersion + ": Setting "
                         + UCD_Names.LONG_JOINING_TYPE[uData.joiningType]
                         + ": " + Utility.hex(cp) + " " + uData.name);
                     didJoiningHack = true;
                 }

                 combiningClassSet.set(uData.combiningClass & 0xFF);
                 if (cp == 0xE000) {
                 	System.out.println("Check: " + uData.script);
                 }
                 add(uData);
             }
             /*
             if (update == -1) {
                 throw new ChainException("Data File truncated for ",
                     new Object[]{version}, e);
             }
             if (size != fileSize) {
                 throw new ChainException("Counts do not match: file {0}, records {1}",
                     new Object[]{new Integer(fileSize), new Integer(size)});
             }
             */
             // everything is ok!
             this.version = version;
             this.file = fileName;
             //+ " " + new File(fileName).lastModified();
         } catch (IOException e) {
             throw new ChainException("Can't read data file for {0}", new Object[]{version}, e);
         } finally {
             if (dataIn != null) {
                 try {
                     dataIn.close();
                 } catch (IOException e) {}
             }
         }
     }

     UnicodeMap blockData;
     public String getBlock(int codePoint) {
         if (blockData == null) loadBlocks();
         return (String)blockData.getValue(codePoint);
     }
     public List getBlockNames() {
             return getBlockNames(null);
     }
     public List getBlockNames(List result) {
         if (result == null) result = new ArrayList();
         if (blockData == null) loadBlocks();
         return (List)blockData.getAvailableValues(result);
     }
     public UnicodeSet getBlockSet(String value, UnicodeSet result) {
         if (result == null) result = new UnicodeSet();
         if (blockData == null) loadBlocks();
         return blockData.getSet(value, result);
     }

     static final Matcher blockPattern = Pattern.compile("([0-9A-F]+)\\s*(?:[.][.]|[;])\\s*([0-9A-F]+)\\s*[;](.*)").matcher("");
     private void loadBlocks() {
         blockData = new UnicodeMap();

         try {
             BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
             try {
               for (int i = 1; ; ++i) {
                     // 0000..007F; Basic Latin
                     String line = Utility.readDataLine(in);
                     if (line == null) break;
                     if (line.length() == 0) continue;
                     if (!blockPattern.reset(line).matches()) {
                       throw new IllegalArgumentException("Bad line: " + line);
                     }
 //                    int pos1 = line.indexOf(';');
 //                    int pos2 = line.indexOf(';', pos1+1);

                     //lastBlock = new BlockData();
                     try {
                       int start = Integer.parseInt(blockPattern.group(1), 16);
                       int end = Integer.parseInt(blockPattern.group(2), 16);
                       String name = blockPattern.group(3).trim().replace(' ', '_');
                       blockData.putAll(start,end, name);
                     } catch (RuntimeException e) {
                       System.err.println("Failed on line " + i + "\t" + line);
                       throw e;
                     }
                 }
                 blockData.setMissing("No_Block");
             } finally {
                 in.close();
             }
         } catch (IOException e) {
             throw new IllegalArgumentException("Can't read block file");
         }
     }

     /*
     public static class BlockData {
         public int start;
         public int end;
         public String name;
     }

     public String NOBLOCK = Utility.getUnskeleton("no block", true);
     private BlockData lastBlock;

     public String getBlock(int codePoint) {
         if (blocks == null) loadBlocks();
         if (codePoint >= lastBlock.start && codePoint <= lastBlock.end) return lastBlock.name;
         Iterator it = blocks.iterator();
         while (it.hasNext()) {
             lastBlock = (BlockData) it.next();
             if (codePoint < lastBlock.start) continue;
             if (codePoint > lastBlock.end) break;
             return lastBlock.name;
         }
         return NOBLOCK;
     }

     public Collection getBlockNames(Collection result) {
         if (result == null) result = new ArrayList();
         if (blocks == null) loadBlocks();
         Iterator it = blocks.iterator();
         while (it.hasNext()) {
             BlockData data = (BlockData) it.next();
             UnicodeProperty.addUnique(data.name, result);
         }
         UnicodeProperty.addUnique(NOBLOCK, result);
         return result;
     }

     public boolean getBlockData(int blockId, BlockData output) {
         if (blocks == null) loadBlocks();
         BlockData temp;
         try {
             temp = (BlockData) blocks.get(blockId);
         } catch (IndexOutOfBoundsException e) {
             return false;
         }
         output.name = temp.name;
         output.start = temp.start;
         output.end = temp.end;
         return true;
     }

     private List blocks = null;

     private void loadBlocks() {
         blocks = new ArrayList();
         try {
             BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
             try {
                 while (true) {
                     // 0000..007F; Basic Latin
                     String line = Utility.readDataLine(in);
                     if (line == null) break;
                     if (line.length() == 0) continue;
                     int pos1 = line.indexOf('.');
                     int pos2 = line.indexOf(';', pos1);

                     lastBlock = new BlockData();
                     lastBlock.start = Integer.parseInt(line.substring(0, pos1), 16);
                     lastBlock.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
                     lastBlock.name = line.substring(pos2+1).trim().replace(' ', '_');
                     blocks.add(lastBlock);
                 }
             } finally {
                 in.close();
             }
         } catch (IOException e) {
             throw new IllegalArgumentException("Can't read block file");
         }
     }
     */
     /**
      * @return
      */
     public int getCompositeVersion() {
         return compositeVersion;
     }

     /**
      * @param i
      */
     public void setCompositeVersion(int i) {
         compositeVersion = i;
     }

 }