| /* |
| ***************************************************************************************** |
| * Copyright (C) 1997-1999, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ***************************************************************************************** |
| * |
| * File TXTBDAT.CPP |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 02/18/97 aliu Converted from OpenClass. |
| * Made static data members const where appropriate. |
| * 05/06/97 aliu Made kSI, kStop, and kSI_Stop into #defines to help out |
| * non-compliant compilers. |
| ***************************************************************************************** |
| */ |
| |
| #include "txtbdat.h" |
| |
| // ***************************************************************************** |
| // class TextBoundaryData |
| // ***************************************************************************** |
| |
| // The following is removed and became #define(s) because of compiler problems. |
| //const TextBoundaryData::Node TextBoundaryData::kSI = 0x80; |
| //const TextBoundaryData::Node TextBoundaryData::kStop = 0; |
| //const TextBoundaryData::Node TextBoundaryData::kSI_Stop = kSI + kStop; |
| |
| // The following Unicode character may need special mappings in a particular |
| // text boundary. |
| const UChar TextBoundaryData::ASCII_END_OF_TEXT = (UChar)0x0003; |
| const UChar TextBoundaryData::ASCII_HORIZONTAL_TABULATION = (UChar)0x0009; |
| const UChar TextBoundaryData::ASCII_LINEFEED = (UChar)0x000A; |
| const UChar TextBoundaryData::ASCII_VERTICAL_TABULATION = (UChar)0x000B; |
| const UChar TextBoundaryData::ASCII_FORM_FEED = (UChar)0x000C; |
| const UChar TextBoundaryData::ASCII_CARRIAGE_RETURN = (UChar)0x000D; |
| const UChar TextBoundaryData::ASCII_SPACE = (UChar)0x0020; |
| const UChar TextBoundaryData::ASCII_EXCLAMATION_MARK = (UChar)0x0021; |
| const UChar TextBoundaryData::ASCII_QUOTATION_MARK = (UChar)0x0022; |
| const UChar TextBoundaryData::ASCII_NUMBER_SIGN = (UChar)0x0023; |
| const UChar TextBoundaryData::ASCII_DOLLAR_SIGN = (UChar)0x0024; |
| const UChar TextBoundaryData::ASCII_PERCENT = (UChar)0x0025; |
| const UChar TextBoundaryData::ASCII_AMPERSAND = (UChar)0x0026; |
| const UChar TextBoundaryData::ASCII_APOSTROPHE = (UChar)0x0027; |
| const UChar TextBoundaryData::ASCII_COMMA = (UChar)0x002C; |
| const UChar TextBoundaryData::ASCII_FULL_STOP = (UChar)0x002E; |
| const UChar TextBoundaryData::ASCII_COLON = (UChar)0x003A; |
| const UChar TextBoundaryData::ASCII_SEMICOLON = (UChar)0x003B; |
| const UChar TextBoundaryData::ASCII_QUESTION_MARK = (UChar)0x003F; |
| const UChar TextBoundaryData::ASCII_NONBREAKING_SPACE = (UChar)0x00A0; |
| const UChar TextBoundaryData::ASCII_CENT_SIGN = (UChar)0x00A2; |
| const UChar TextBoundaryData::ASCII_POUND_SIGN = (UChar)0x00A3; |
| const UChar TextBoundaryData::ASCII_YEN_SIGN = (UChar)0x00A5; |
| const UChar TextBoundaryData::LATIN1_SOFTHYPHEN = (UChar)0x00AD; |
| const UChar TextBoundaryData::LATIN1_DEGREE_SIGN = (UChar)0x00B0; |
| const UChar TextBoundaryData::ARABIC_PERCENT_SIGN = (UChar)0x066A; |
| const UChar TextBoundaryData::ARABIC_DECIMAL_SEPARATOR = (UChar)0x066B; |
| const UChar TextBoundaryData::HANGUL_CHOSEONG_LOW = (UChar)0x1100; |
| const UChar TextBoundaryData::HANGUL_CHOSEONG_HIGH = (UChar)0x115F; |
| const UChar TextBoundaryData::HANGUL_JUNGSEONG_LOW = (UChar)0x1160; |
| const UChar TextBoundaryData::HANGUL_JUNGSEONG_HIGH = (UChar)0x11A7; |
| const UChar TextBoundaryData::HANGUL_JONGSEONG_LOW = (UChar)0x11A8; |
| const UChar TextBoundaryData::HANGUL_JONGSEONG_HIGH = (UChar)0x11FF; |
| const UChar TextBoundaryData::FIGURE_SPACE = (UChar)0x2007; |
| const UChar TextBoundaryData::NONBREAKING_HYPHEN = (UChar)0x2011; |
| const UChar TextBoundaryData::PUNCTUATION_HYPHENATION_POINT = (UChar)0x2027; |
| const UChar TextBoundaryData::PUNCTUATION_LINE_SEPARATOR = (UChar)0x2028; |
| const UChar TextBoundaryData::PUNCTUATION_PARAGRAPH_SEPARATOR = (UChar)0x2029; |
| const UChar TextBoundaryData::PER_MILLE_SIGN = (UChar)0x2030; |
| const UChar TextBoundaryData::PER_TEN_THOUSAND_SIGN = (UChar)0x2031; |
| const UChar TextBoundaryData::PRIME = (UChar)0x2032; |
| const UChar TextBoundaryData::DOUBLE_PRIME = (UChar)0x2033; |
| const UChar TextBoundaryData::TRIPLE_PRIME = (UChar)0x2034; |
| const UChar TextBoundaryData::DEGREE_CELSIUS = (UChar)0x2103; |
| const UChar TextBoundaryData::DEGREE_FAHRENHEIT = (UChar)0x2109; |
| const UChar TextBoundaryData::PUNCTUATION_IDEOGRAPHIC_COMMA = (UChar)0x3001; |
| const UChar TextBoundaryData::PUNCTUATION_IDEOGRAPHIC_FULL_STOP = (UChar)0x3002; |
| const UChar TextBoundaryData::IDEOGRAPHIC_ITERATION_MARK = (UChar)0x3005; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_A = (UChar)0x3041; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_A = (UChar)0x3042; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_I = (UChar)0x3043; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_I = (UChar)0x3044; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_U = (UChar)0x3045; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_U = (UChar)0x3046; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_E = (UChar)0x3047; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_E = (UChar)0x3048; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_O = (UChar)0x3049; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_O = (UChar)0x304A; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_DI = (UChar)0x3062; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_TU = (UChar)0x3063; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_TU = (UChar)0x3064; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_MO = (UChar)0x3082; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YA = (UChar)0x3083; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_YA = (UChar)0x3084; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YU = (UChar)0x3085; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_YU = (UChar)0x3086; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YO = (UChar)0x3087; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_YO = (UChar)0x3088; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_RO = (UChar)0x308D; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_WA = (UChar)0x308E; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_WA = (UChar)0x308F; |
| const UChar TextBoundaryData::HIRAGANA_LETTER_VU = (UChar)0x3094; |
| const UChar TextBoundaryData::COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK = (UChar)0x3099; |
| const UChar TextBoundaryData::HIRAGANA_SEMIVOICED_SOUND_MARK = (UChar)0x309C; |
| const UChar TextBoundaryData::HIRAGANA_ITERATION_MARK = (UChar)0x309D; |
| const UChar TextBoundaryData::HIRAGANA_VOICED_ITERATION_MARK = (UChar)0x309E; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_A = (UChar)0x30A1; |
| const UChar TextBoundaryData::KATAKANA_LETTER_A = (UChar)0x30A2; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_I = (UChar)0x30A3; |
| const UChar TextBoundaryData::KATAKANA_LETTER_I = (UChar)0x30A4; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_U = (UChar)0x30A5; |
| const UChar TextBoundaryData::KATAKANA_LETTER_U = (UChar)0x30A6; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_E = (UChar)0x30A7; |
| const UChar TextBoundaryData::KATAKANA_LETTER_E = (UChar)0x30A8; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_O = (UChar)0x30A9; |
| const UChar TextBoundaryData::KATAKANA_LETTER_O = (UChar)0x30AA; |
| const UChar TextBoundaryData::KATAKANA_LETTER_DI = (UChar)0x30C2; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_TU = (UChar)0x30C3; |
| const UChar TextBoundaryData::KATAKANA_LETTER_TU = (UChar)0x30C4; |
| const UChar TextBoundaryData::KATAKANA_LETTER_MO = (UChar)0x30E2; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YA = (UChar)0x30E3; |
| const UChar TextBoundaryData::KATAKANA_LETTER_YA = (UChar)0x30E4; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YU = (UChar)0x30E5; |
| const UChar TextBoundaryData::KATAKANA_LETTER_YU = (UChar)0x30E6; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YO = (UChar)0x30E7; |
| const UChar TextBoundaryData::KATAKANA_LETTER_YO = (UChar)0x30E8; |
| const UChar TextBoundaryData::KATAKANA_LETTER_RO = (UChar)0x30ED; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_WA = (UChar)0x30EE; |
| const UChar TextBoundaryData::KATAKANA_LETTER_WA = (UChar)0x30EF; |
| const UChar TextBoundaryData::KATAKANA_LETTER_VU = (UChar)0x30F4; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_KA = (UChar)0x30F5; |
| const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_KE = (UChar)0x30F6; |
| const UChar TextBoundaryData::KATAKANA_LETTER_VA = (UChar)0x30F7; |
| const UChar TextBoundaryData::KATAKANA_LETTER_VO = (UChar)0x30FA; |
| const UChar TextBoundaryData::KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = (UChar)0x30FC; |
| const UChar TextBoundaryData::KATAKANA_ITERATION_MARK = (UChar)0x30FD; |
| const UChar TextBoundaryData::KATAKANA_VOICED_ITERATION_MARK = (UChar)0x30FE; |
| const UChar TextBoundaryData::UNICODE_LOW_BOUND_HAN = (UChar)0x4E00; |
| const UChar TextBoundaryData::UNICODE_HIGH_BOUND_HAN = (UChar)0x9FA5; |
| const UChar TextBoundaryData::HANGUL_SYL_LOW = (UChar)0xAC00; |
| const UChar TextBoundaryData::HANGUL_SYL_HIGH = (UChar)0xD7A3; |
| const UChar TextBoundaryData::CJK_COMPATIBILITY_F900 = (UChar)0xF900; |
| const UChar TextBoundaryData::CJK_COMPATIBILITY_FA2D = (UChar)0xFA2D; |
| const UChar TextBoundaryData::UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE = (UChar)0xFEFF; |
| const UChar TextBoundaryData::FULLWIDTH_EXCLAMATION_MARK = (UChar)0xFF01; |
| const UChar TextBoundaryData::FULLWIDTH_FULL_STOP = (UChar)0xFF0E; |
| const UChar TextBoundaryData::FULLWIDTH_QUESTION_MARK = (UChar)0xFF1F; |
| |
| // SimpleTextBoundary has an internal convention that the not-a-Unicode value |
| // $FFFF is used to signify the end of the string when looking a proper state |
| // transition for the end of the string |
| const UChar TextBoundaryData::END_OF_STRING = (UChar)0xFFFF; |
| |
| //eof |