blob: 75f5fd7f658b769ba1763e56152caa1ead10f956 [file] [log] [blame]
/*
**********************************************************************
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 05/23/00 aliu Creation.
**********************************************************************
*/
#ifndef TESTUTIL_H
#define TESTUTIL_H
#include "unicode/utypes.h"
class UnicodeString;
/**
* Utility methods. Everything in this class is static -- do not
* attempt to instantiate.
*/
class TestUtility {
public:
static inline int8_t getScript(UChar c);
static inline int8_t getScript(int8_t block);
static int8_t getBlock(UChar c);
// returns next letter of script, or 0xFFFF if done
static UChar getNextLetter(UChar c, int8_t script);
static UnicodeString hex(UChar ch);
static UnicodeString hex(const UnicodeString& s);
static UnicodeString hex(const UnicodeString& s, UChar sep);
enum { // SCRIPT CODE
COMMON_SCRIPT = 0,
LATIN_SCRIPT = 1,
GREEK_SCRIPT = 2,
CYRILLIC_SCRIPT = 3,
ARMENIAN_SCRIPT = 4,
HEBREW_SCRIPT = 5,
ARABIC_SCRIPT = 6,
SYRIAC_SCRIPT = 7,
THAANA_SCRIPT = 8,
DEVANAGARI_SCRIPT = 9,
BENGALI_SCRIPT = 10,
GURMUKHI_SCRIPT = 11,
GUJARATI_SCRIPT = 12,
ORIYA_SCRIPT = 13,
TAMIL_SCRIPT = 14,
TELUGU_SCRIPT = 15,
KANNADA_SCRIPT = 16,
MALAYALAM_SCRIPT = 17,
SINHALA_SCRIPT = 18,
THAI_SCRIPT = 19,
LAO_SCRIPT = 20,
TIBETAN_SCRIPT = 21,
MYANMAR_SCRIPT = 22,
GEORGIAN_SCRIPT = 23,
JAMO_SCRIPT = 24,
HANGUL_SCRIPT = 25,
ETHIOPIC_SCRIPT = 26,
CHEROKEE_SCRIPT = 27,
ABORIGINAL_SCRIPT = 28,
OGHAM_SCRIPT = 29,
RUNIC_SCRIPT = 30,
KHMER_SCRIPT = 31,
MONGOLIAN_SCRIPT = 32,
HIRAGANA_SCRIPT = 33,
KATAKANA_SCRIPT = 34,
BOPOMOFO_SCRIPT = 35,
HAN_SCRIPT = 36,
YI_SCRIPT = 37
};
enum { // block code
RESERVED_BLOCK = 0,
BASIC_LATIN = 1,
LATIN_1_SUPPLEMENT = 2,
LATIN_EXTENDED_A = 3,
LATIN_EXTENDED_B = 4,
IPA_EXTENSIONS = 5,
SPACING_MODIFIER_LETTERS = 6,
COMBINING_DIACRITICAL_MARKS = 7,
GREEK = 8,
CYRILLIC = 9,
ARMENIAN = 10,
HEBREW = 11,
ARABIC = 12,
SYRIAC = 13,
THAANA = 14,
DEVANAGARI = 15,
BENGALI = 16,
GURMUKHI = 17,
GUJARATI = 18,
ORIYA = 19,
TAMIL = 20,
TELUGU = 21,
KANNADA = 22,
MALAYALAM = 23,
SINHALA = 24,
THAI = 25,
LAO = 26,
TIBETAN = 27,
MYANMAR = 28,
GEORGIAN = 29,
HANGUL_JAMO = 30,
ETHIOPIC = 31,
CHEROKEE = 32,
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33,
OGHAM = 34,
RUNIC = 35,
KHMER = 36,
MONGOLIAN = 37,
LATIN_EXTENDED_ADDITIONAL = 38,
GREEK_EXTENDED = 39,
GENERAL_PUNCTUATION = 40,
SUPERSCRIPTS_AND_SUBSCRIPTS = 41,
CURRENCY_SYMBOLS = 42,
COMBINING_MARKS_FOR_SYMBOLS = 43,
LETTERLIKE_SYMBOLS = 44,
NUMBER_FORMS = 45,
ARROWS = 46,
MATHEMATICAL_OPERATORS = 47,
MISCELLANEOUS_TECHNICAL = 48,
CONTROL_PICTURES = 49,
OPTICAL_CHARACTER_RECOGNITION = 50,
ENCLOSED_ALPHANUMERICS = 51,
BOX_DRAWING = 52,
BLOCK_ELEMENTS = 53,
GEOMETRIC_SHAPES = 54,
MISCELLANEOUS_SYMBOLS = 55,
DINGBATS = 56,
BRAILLE_PATTERNS = 57,
CJK_RADICALS_SUPPLEMENT = 58,
KANGXI_RADICALS = 59,
IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
CJK_SYMBOLS_AND_PUNCTUATION = 61,
HIRAGANA = 62,
KATAKANA = 63,
BOPOMOFO = 64,
HANGUL_COMPATIBILITY_JAMO = 65,
KANBUN = 66,
BOPOMOFO_EXTENDED = 67,
ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
CJK_COMPATIBILITY = 69,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
CJK_UNIFIED_IDEOGRAPHS = 71,
YI_SYLLABLES = 72,
YI_RADICALS = 73,
HANGUL_SYLLABLES = 74,
HIGH_SURROGATES = 75,
HIGH_PRIVATE_USE_SURROGATES = 76,
LOW_SURROGATES = 77,
PRIVATE_USE = 78,
CJK_COMPATIBILITY_IDEOGRAPHS = 79,
ALPHABETIC_PRESENTATION_FORMS = 80,
ARABIC_PRESENTATION_FORMS_A = 81,
COMBINING_HALF_MARKS = 82,
CJK_COMPATIBILITY_FORMS = 83,
SMALL_FORM_VARIANTS = 84,
ARABIC_PRESENTATION_FORMS_B = 85,
SPECIALS = 86,
HALFWIDTH_AND_FULLWIDTH_FORMS = 87
};
private:
static const int8_t blockToScript[];
static const int8_t charToBlock[];
TestUtility() {} // Prevent instantiation
};
inline int8_t
TestUtility::getScript(UChar c) {
return blockToScript[getBlock(c)];
}
inline int8_t
TestUtility::getScript(int8_t block) {
return blockToScript[block];
}
#endif