blob: fae6843479a3a45e68fcb9751e7a8cef1f778cc1 [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2006/04/05 22:12:44 $
* $Revision: 1.33 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
public interface UCD_Types {
static final byte BINARY_FORMAT = 17; // bumped if binary format of UCD changes. Forces rebuild
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";
public static final String BIN_DIR = BASE_DIR + "BIN\\";
public static final String GEN_DIR = BASE_DIR + "GEN\\";
public static final char DOTTED_CIRCLE = '\u25CC';
public static final int
CJK_BASE = 0x4E00,
CJK_LIMIT = 0x9FFF+1,
CJK_COMPAT_USED_BASE = 0xFA0E,
CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
CJK_A_BASE = 0x3400,
CJK_A_LIMIT = 0x4DBF+1,
CJK_B_BASE = 0x20000,
CJK_B_LIMIT = 0x2A6DF+1;
// Unicode Property Types
static final byte
NOT_DERIVED = 1,
DERIVED_CORE = 2,
DERIVED_NORMALIZATION = 4,
DERIVED_ALL = 0x6,
ALL = (byte)-1;
static final byte
NUMERIC_PROP = 0,
STRING_PROP = 1,
MISC_PROP = 2,
CATALOG_PROP = 3,
ENUMERATED_PROP = 4,
BINARY_PROP = 5,
FLATTENED_BINARY_PROP = 6,
UNKNOWN_PROP = 7;
/*
0 Code value in 4-digit hexadecimal format.
1 Unicode 2.1 Character Name. These names match exactly the
2 General Category. This is a useful breakdown into various "character
3 Canonical Combining Classes. The classes used for the
4 Bidirectional Category. See the list below for an explanation of the
5 Character Decomposition. In the Unicode Standard, not all of
6 Decimal digit value. This is a numeric field. If the character
7 Digit value. This is a numeric field. If the character represents a
8 Numeric value. This is a numeric field. If the character has the
9 If the characters has been identified as a "mirrored" character in
10 Unicode 1.0 Name. This is the old name as published in Unicode 1.0.
11 10646 Comment field. This field is informative.
12 Upper case equivalent mapping. If a character is part of an
13 Lower case equivalent mapping. Similar to 12. This field is informative.
14 Title case equivalent mapping. Similar to 12. This field is informative.
*/
// for IDs
static final byte NUMBER = -2, SHORT = -1, NORMAL = 0, LONG = 1, BOTH = 2, EXTRA_ALIAS = 3;
// Binary ENUM Grouping
public static final int
CATEGORY = 0,
COMBINING_CLASS = 0x100,
BIDI_CLASS = 0x200,
DECOMPOSITION_TYPE = 0x300,
NUMERIC_TYPE = 0x400,
EAST_ASIAN_WIDTH = 0x500,
LINE_BREAK = 0x600,
JOINING_TYPE = 0x700,
JOINING_GROUP = 0x800,
BINARY_PROPERTIES = 0x900,
SCRIPT = 0xA00,
AGE = 0xB00,
HANGUL_SYLLABLE_TYPE = 0xC00,
DERIVED = 0xD00,
LIMIT_ENUM = DERIVED + 0x100,
NEXT_ENUM = 0x100;
public static final int LIMIT_COMBINING_CLASS = 256;
// getCategory
public static final byte
UNASSIGNED = 0,
UPPERCASE_LETTER = 1,
LOWERCASE_LETTER = 2,
TITLECASE_LETTER = 3,
MODIFIER_LETTER = 4,
OTHER_LETTER = 5,
NON_SPACING_MARK = 6,
ENCLOSING_MARK = 7,
COMBINING_SPACING_MARK = 8,
DECIMAL_DIGIT_NUMBER = 9,
LETTER_NUMBER = 10,
OTHER_NUMBER = 11,
SPACE_SEPARATOR = 12,
LINE_SEPARATOR = 13,
PARAGRAPH_SEPARATOR = 14,
CONTROL = 15,
FORMAT = 16,
UNUSED_CATEGORY = 17,
PRIVATE_USE = 18,
SURROGATE = 19,
DASH_PUNCTUATION = 20,
START_PUNCTUATION = 21,
END_PUNCTUATION = 22,
CONNECTOR_PUNCTUATION = 23,
OTHER_PUNCTUATION = 24,
MATH_SYMBOL = 25,
CURRENCY_SYMBOL = 26,
MODIFIER_SYMBOL = 27,
OTHER_SYMBOL = 28,
INITIAL_PUNCTUATION = 29,
FINAL_PUNCTUATION = 30,
LIMIT_CATEGORY = FINAL_PUNCTUATION+1,
// Unicode abbreviations
Lu = UPPERCASE_LETTER,
Ll = LOWERCASE_LETTER,
Lt = TITLECASE_LETTER,
Lm = MODIFIER_LETTER,
Lo = OTHER_LETTER,
Mn = NON_SPACING_MARK,
Me = ENCLOSING_MARK,
Mc = COMBINING_SPACING_MARK,
Nd = DECIMAL_DIGIT_NUMBER,
Nl = LETTER_NUMBER,
No = OTHER_NUMBER,
Zs = SPACE_SEPARATOR,
Zl = LINE_SEPARATOR,
Zp = PARAGRAPH_SEPARATOR,
Cc = CONTROL,
Cf = FORMAT,
Cs = SURROGATE,
Co = PRIVATE_USE,
Cn = UNASSIGNED,
Pc = CONNECTOR_PUNCTUATION,
Pd = DASH_PUNCTUATION,
Ps = START_PUNCTUATION,
Pe = END_PUNCTUATION,
Po = OTHER_PUNCTUATION,
Pi = INITIAL_PUNCTUATION,
Pf = FINAL_PUNCTUATION,
Sm = MATH_SYMBOL,
Sc = CURRENCY_SYMBOL,
Sk = MODIFIER_SYMBOL,
So = OTHER_SYMBOL;
static final int
LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt) | (1<<Lm) | (1 << Lo),
CASED_LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt),
MARK_MASK = (1<<Mn) | (1<<Me) | (1<<Mc),
NUMBER_MASK = (1<<Nd) | (1<<Nl) | (1<<No),
SEPARATOR_MASK = (1<<Zs) | (1<<Zl) | (1<<Zp),
CONTROL_MASK = (1<<Cc) | (1<<Cf) | (1<<Cs) | (1<<Co),
PUNCTUATION_MASK = (1<<Pc) | (1<<Pd) | (1<<Ps) | (1<<Pe) | (1<<Po) | (1<<Pi) | (1<<Pf),
SYMBOL_MASK = (1<<Sm) | (1<<Sc) | (1<<Sk) | (1<<So),
UNASSIGNED_MASK = (1<<Cn),
BASE_MASK = LETTER_MASK | NUMBER_MASK | PUNCTUATION_MASK | SYMBOL_MASK | (1<<Mc),
NONSPACING_MARK_MASK = (1<<Mn) | (1<<Me);
// Binary Properties
public static final byte
BidiMirrored = 0,
CompositionExclusion = 1,
White_space = 2,
Non_break = 3,
Bidi_Control = 4,
Join_Control = 5,
Dash = 6,
Hyphen = 7,
Quotation_Mark = 8,
Terminal_Punctuation = 9,
Math_Property = 10,
Hex_Digit = 11,
ASCII_Hex_Digit = 12,
Other_Alphabetic = 13,
Ideographic = 14,
Diacritic = 15,
Extender = 16,
Other_Lowercase = 17,
Other_Uppercase = 18,
Noncharacter_Code_Point = 19,
CaseFoldTurkishI = 20,
Other_GraphemeExtend = 21,
GraphemeLink = 22,
IDS_BinaryOperator = 23,
IDS_TrinaryOperator = 24,
Radical = 25,
UnifiedIdeograph = 26,
Other_Default_Ignorable_Code_Point = 27,
Deprecated = 28,
Soft_Dotted = 29,
Logical_Order_Exception = 30,
Other_ID_Start = 31,
Sentence_Terminal = 32,
Variation_Selector = 33,
Other_ID_Continue = 34,
Pattern_White_Space = 35,
Pattern_Syntax = 36,
LIMIT_BINARY_PROPERTIES = 37;
/*
static final int
BidiMirroredMask = 1<<BidiMirrored,
CompositionExclusionMask = 1<<CompositionExclusion,
AlphabeticMask = 1<<Other_Alphabetic,
Bidi_ControlMask = 1<<Bidi_Control,
DashMask = 1<<Dash,
DiacriticMask = 1<<Diacritic,
ExtenderMask = 1<<Extender,
Hex_DigitMask = 1<<Hex_Digit,
HyphenMask = 1<<Hyphen,
IdeographicMask = 1<<Ideographic,
Join_ControlMask = 1<<Join_Control,
Math_PropertyMask = 1<<Math_Property,
Non_breakMask = 1<<Non_break,
Noncharacter_Code_PointMask = 1<<Noncharacter_Code_Point,
Other_LowercaseMask = 1<<Other_Lowercase,
Other_UppercaseMask = 1<<Other_Uppercase,
Quotation_MarkMask = 1<<Quotation_Mark,
Terminal_PunctuationMask = 1<<Terminal_Punctuation,
White_spaceMask = 1<<White_space;
*/
// line break
public static final byte
LB_XX = 0, LB_OP = 1, LB_CL = 2, LB_QU = 3, LB_GL = 4, LB_NS = 5, LB_EX = 6, LB_SY = 7,
LB_IS = 8, LB_PR = 9, LB_PO = 10, LB_NU = 11, LB_AL = 12, LB_ID = 13, LB_IN = 14, LB_HY = 15,
LB_CM = 16, LB_BB = 17, LB_BA = 18, LB_SP = 19, LB_BK = 20, LB_CR = 21, LB_LF = 22, LB_CB = 23,
LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
LB_NL = 29,
LB_WJ = 30,
LB_JL = 31,
LB_JV = 32,
LB_JT = 33,
LB_H2 = 34,
LB_H3 = 35,
//LB_JL = 29,
//LB_JV = 30,
//LB_JT = 31,
LIMIT_LINE_BREAK = 36,
LB_LIMIT = LIMIT_LINE_BREAK;
// east asian width
public static final byte
EAN = 0, EAA = 1, EAH = 2, EAW = 3, EAF = 4, EANa = 5,
LIMIT_EAST_ASIAN_WIDTH = 6;
// bidi class
static final byte
BIDI_L = 0, // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
BIDI_R = 1, // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
BIDI_EN = 2, // European Number
BIDI_ES = 3, // European Number Separator
BIDI_ET = 4, // European Number Terminator
BIDI_AN = 5, // Arabic Number
BIDI_CS = 6, // Common Number Separator
BIDI_B = 7, // Block Separator
BIDI_S = 8, // Segment Separator
BIDI_WS = 9, // Whitespace
BIDI_ON = 10, // Other Neutrals ; All other characters: punctuation, symbols
LIMIT_BIDI_2 = 11,
BIDI_UNUSED = 11,
BIDI_BN = 12,
BIDI_NSM = 13,
BIDI_AL = 14,
BIDI_LRO = 15,
BIDI_RLO = 16,
BIDI_LRE = 17,
BIDI_RLE = 18,
BIDI_PDF = 19,
LIMIT_BIDI_CLASS = 20;
// decompositionType
static final byte NONE = 0,
CANONICAL = 1,
COMPATIBILITY = 2,
COMPAT_UNSPECIFIED = 2, // Otherwise unspecified compatibility character.
COMPAT_FONT = 3, // A font variant (e.g. a blackletter form).
COMPAT_NOBREAK = 4, // A no-break version of a space or hyphen.
COMPAT_INITIAL = 5, // // An initial presentation form (Arabic).
COMPAT_MEDIAL = 6, // // A medial presentation form (Arabic).
COMPAT_FINAL = 7, // // A final presentation form (Arabic).
COMPAT_ISOLATED = 8, // An isolated presentation form (Arabic).
COMPAT_CIRCLE = 9, // An encircled form.
COMPAT_SUPER = 10, // A superscript form.
COMPAT_SUB = 11, // A subscript form.
COMPAT_VERTICAL = 12, // A vertical layout presentation form.
COMPAT_WIDE = 13, // A wide (or zenkaku) compatibility character.
COMPAT_NARROW = 14, // A narrow (or hankaku) compatibility character.
COMPAT_SMALL = 15, // A small variant form (CNS compatibility).
COMPAT_SQUARE = 16, // A CJK squared font variant.
COMPAT_FRACTION = 17, // A vulgar fraction form.
LIMIT_DECOMPOSITION_TYPE = 18;
// mirrored type
static final byte NO = 0, YES = 1, LIMIT_MIRRORED = 2;
// for QuickCheck
static final byte QNO = 0, QMAYBE = 1, QYES = 2;
// case type
static final byte LOWER = 0, TITLE = 1, UPPER = 2, UNCASED = 3, FOLD = 3, LIMIT_CASE = 4;
static final byte SIMPLE = 0, FULL = 8;
// normalization type
static final byte UNNORMALIZED = 0, C = 1, KC = 2, D = 3, KD = 4, FORM_LIMIT = 5;
// numericType
static final byte NUMERIC_NONE = 0, NUMERIC = 1, DIGIT = 2, DECIMAL = 3,
LIMIT_NUMERIC_TYPE = 4;
// HAN_PRIMARY = 4, HAN_ACCOUNTING = 5, HAN_OTHER = 6,
// WARNING, reset to 7 if all properties desired!!
static final byte NA = 0, L = 1, V = 2, T = 3, LV = 4, LVT = 5,
HANGUL_SYLLABLE_TYPE_LIMIT = 6;
public static final byte // SCRIPT CODE
COMMON_SCRIPT = 0,
LATIN_SCRIPT = 1,
GREEK_SCRIPT = 2,
CYRILLIC_SCRIPT = 3,
ARMENIAN_SCRIPT = 4,
HEBREW_SCRIPT = 5,
ARABIC_SCRIPT = 6,
SYRIAC_SCRIPT = 7,
THAANA_SCRIPT = 8,
DEVANAGARI_SCRIPT = 9,
BENGALI_SCRIPT = 10,
GURMUKHI_SCRIPT = 11,
GUJARATI_SCRIPT = 12,
ORIYA_SCRIPT = 13,
TAMIL_SCRIPT = 14,
TELUGU_SCRIPT = 15,
KANNADA_SCRIPT = 16,
MALAYALAM_SCRIPT = 17,
SINHALA_SCRIPT = 18,
THAI_SCRIPT = 19,
LAO_SCRIPT = 20,
TIBETAN_SCRIPT = 21,
MYANMAR_SCRIPT = 22,
GEORGIAN_SCRIPT = 23,
UNUSED_SCRIPT = 24,
HANGUL_SCRIPT = 25,
ETHIOPIC_SCRIPT = 26,
CHEROKEE_SCRIPT = 27,
ABORIGINAL_SCRIPT = 28,
OGHAM_SCRIPT = 29,
RUNIC_SCRIPT = 30,
KHMER_SCRIPT = 31,
MONGOLIAN_SCRIPT = 32,
HIRAGANA_SCRIPT = 33,
KATAKANA_SCRIPT = 34,
BOPOMOFO_SCRIPT = 35,
HAN_SCRIPT = 36,
YI_SCRIPT = 37,
OLD_ITALIC_SCRIPT = 38,
GOTHIC_SCRIPT = 39,
DESERET_SCRIPT = 40,
INHERITED_SCRIPT = 41,
TAGALOG_SCRIPT = 42,
HANUNOO_SCRIPT = 43,
BUHID_SCRIPT = 44,
TAGBANWA_SCRIPT = 45,
LIMBU = 46,
TAI_LE = 47,
LINEAR_B = 48,
UGARITIC = 49,
SHAVIAN = 50,
OSMANYA = 51,
CYPRIOT = 52,
BRAILLE = 53,
KATAKANA_OR_HIRAGANA = 54,
BUGINESE = 55,
COPTIC = 56,
NEW_TAI_LUE = 57,
GLAGOLITIC = 58,
TIFINAGH = 59,
SYLOTI_NAGRI = 60,
OLD_PERSIAN = 61,
KHAROSHTHI = 62,
Balinese = 63,
Cuneiform = 64,
Phoenician = 65,
Phags_Pa = 66,
NKo = 67,
Unknown_Script = 68,
LIMIT_SCRIPT = 69;
static final int
UNKNOWN = 0,
AGE11 = 1,
AGE20 = 2,
AGE21 = 3,
AGE30 = 4,
AGE31 = 5,
AGE32 = 6,
AGE40 = 7,
AGE41 = 8,
AGE50 = 9,
LIMIT_AGE = 10;
static final String[] AGE_VERSIONS = {
"?",
"1.1.0",
"2.0.0",
"2.1.2",
"3.0.0",
"3.1.0",
"3.2.0",
"4.0.0",
"4.1.0",
"5.0.0"
};
public static byte
JT_C = 0,
JT_D = 1,
JT_R = 2,
JT_U = 3,
JT_L = 4,
JT_T = 5,
LIMIT_JOINING_TYPE = 6;
public static byte
NO_SHAPING = 0,
AIN = 1,
ALAPH = 2,
ALEF = 3,
BEH = 4,
BETH = 5,
DAL = 6,
DALATH_RISH = 7,
E = 8,
FEH = 9,
FINAL_SEMKATH = 10,
GAF = 11,
GAMAL = 12,
HAH = 13,
HAMZA_ON_HEH_GOAL = 14,
HE = 15,
HEH = 16,
HEH_GOAL = 17,
HETH = 18,
KAF = 19,
KAPH = 20,
KNOTTED_HEH = 21,
LAM = 22,
LAMADH = 23,
MEEM = 24,
MIM = 25,
NOON = 26,
NUN = 27,
PE = 28,
QAF = 29,
QAPH = 30,
REH = 31,
REVERSED_PE = 32,
SAD = 33,
SADHE = 34,
SEEN = 35,
SEMKATH = 36,
SHIN = 37,
SWASH_KAF = 38,
TAH = 39,
TAW = 40,
TEH_MARBUTA = 41,
TETH = 42,
WAW = 43,
SYRIAC_WAW = 44,
YEH = 45,
YEH_BARREE = 46,
YEH_WITH_TAIL = 47,
YUDH = 48,
YUDH_HE = 49,
ZAIN = 50,
ZHAIN = 51,
KHAPH = 52,
FE = 53,
LIMIT_JOINING_GROUP = 54;
static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3;
public static final int
NF_COMPATIBILITY_MASK = 2,
NF_COMPOSITION_MASK = 1;
// DERIVED PROPERTY
static final byte
PropMath = 0,
PropAlphabetic = 1,
PropLowercase = 2,
PropUppercase = 3,
ID_Start = 4,
ID_Continue_NO_Cf = 5,
Mod_ID_Start = 6,
Mod_ID_Continue_NO_Cf = 7,
Missing_Uppercase = 8,
Missing_Lowercase = 9,
Missing_Mixedcase = 10,
FC_NFKC_Closure = 11,
FullCompExclusion = 12,
FullCompInclusion = 13,
QuickNFD = 14,
QuickNFC = 15,
QuickNFKD = 16,
QuickNFKC = 17,
ExpandsOnNFD = 18,
ExpandsOnNFC = 19,
ExpandsOnNFKD = 20,
ExpandsOnNFKC = 21,
GenNFD = 22,
GenNFC = 23,
GenNFKD = 24,
GenNFKC = 25,
DefaultIgnorable = 26,
GraphemeExtend = 27,
GraphemeBase = 28,
FC_NFC_Closure = 29,
Other_Case_Ignorable = 30,
Case_Ignorable = 31,
Type_i = 32,
NFC_Leading = 33,
NFC_TrailingNonZero = 34,
NFC_TrailingZero = 35,
NFC_Resulting = 36,
NFD_UnsafeStart = 37,
NFC_UnsafeStart = 38,
NFKD_UnsafeStart = 39,
NFKC_UnsafeStart = 40,
NFD_Skippable = 41,
NFC_Skippable = 42,
NFKD_Skippable = 43,
NFKC_Skippable = 44,
Case_Sensitive = 45,
DERIVED_PROPERTY_LIMIT = 46;
}