| /* | 
 | ******************************************************************************* | 
 | *   Copyright (C) 2013, International Business Machines | 
 | *   Corporation and others.  All Rights Reserved. | 
 | ******************************************************************************* | 
 | *   file name:  uscript_props.cpp | 
 | *   encoding:   US-ASCII | 
 | *   tab size:   8 (not used) | 
 | *   indentation:4 | 
 | * | 
 | *   created on: 2013feb16 | 
 | *   created by: Markus W. Scherer | 
 | */ | 
 |  | 
 | #include "unicode/utypes.h" | 
 | #include "unicode/unistr.h" | 
 | #include "unicode/uscript.h" | 
 | #include "unicode/utf16.h" | 
 | #include "ustr_imp.h" | 
 |  | 
 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | 
 |  | 
 | namespace { | 
 |  | 
 | // Script metadata (script properties). | 
 | // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt | 
 |  | 
 | // 0 = NOT_ENCODED, no sample character, default false script properties. | 
 | // Bits 20.. 0: sample character | 
 |  | 
 | // Bits 23..21: usage | 
 | const int32_t UNKNOWN = 1 << 21; | 
 | const int32_t EXCLUSION = 2 << 21; | 
 | const int32_t LIMITED_USE = 3 << 21; | 
 | const int32_t ASPIRATIONAL = 4 << 21; | 
 | const int32_t RECOMMENDED = 5 << 21; | 
 |  | 
 | // Bits 31..24: Single-bit flags | 
 | const int32_t RTL = 1 << 24; | 
 | const int32_t LB_LETTERS = 1 << 25; | 
 | const int32_t CASED = 1 << 26; | 
 |  | 
 | const int32_t SCRIPT_PROPS[] = { | 
 |     // Begin copy-paste output from | 
 |     // tools/trunk/unicode/py/parsescriptmetadata.py | 
 |     0x0040 | UNKNOWN,  // Zyyy | 
 |     0x0308 | UNKNOWN,  // Zinh | 
 |     0x0628 | RECOMMENDED | RTL,  // Arab | 
 |     0x0531 | RECOMMENDED | CASED,  // Armn | 
 |     0x0995 | RECOMMENDED,  // Beng | 
 |     0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo | 
 |     0x13C4 | LIMITED_USE,  // Cher | 
 |     0x03E2 | EXCLUSION | CASED,  // Copt | 
 |     0x042F | RECOMMENDED | CASED,  // Cyrl | 
 |     0x10414 | EXCLUSION | CASED,  // Dsrt | 
 |     0x0905 | RECOMMENDED,  // Deva | 
 |     0x12A0 | RECOMMENDED,  // Ethi | 
 |     0x10D3 | RECOMMENDED,  // Geor | 
 |     0x10330 | EXCLUSION,  // Goth | 
 |     0x03A9 | RECOMMENDED | CASED,  // Grek | 
 |     0x0A95 | RECOMMENDED,  // Gujr | 
 |     0x0A15 | RECOMMENDED,  // Guru | 
 |     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani | 
 |     0xAC00 | RECOMMENDED,  // Hang | 
 |     0x05D0 | RECOMMENDED | RTL,  // Hebr | 
 |     0x304B | RECOMMENDED | LB_LETTERS,  // Hira | 
 |     0x0C95 | RECOMMENDED,  // Knda | 
 |     0x30AB | RECOMMENDED | LB_LETTERS,  // Kana | 
 |     0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr | 
 |     0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo | 
 |     0x004C | RECOMMENDED | CASED,  // Latn | 
 |     0x0D15 | RECOMMENDED,  // Mlym | 
 |     0x1826 | ASPIRATIONAL,  // Mong | 
 |     0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr | 
 |     0x168F | EXCLUSION,  // Ogam | 
 |     0x10300 | EXCLUSION,  // Ital | 
 |     0x0B15 | RECOMMENDED,  // Orya | 
 |     0x16A0 | EXCLUSION,  // Runr | 
 |     0x0D85 | RECOMMENDED,  // Sinh | 
 |     0x0710 | LIMITED_USE | RTL,  // Syrc | 
 |     0x0B95 | RECOMMENDED,  // Taml | 
 |     0x0C15 | RECOMMENDED,  // Telu | 
 |     0x078C | RECOMMENDED | RTL,  // Thaa | 
 |     0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai | 
 |     0x0F40 | RECOMMENDED,  // Tibt | 
 |     0x14C0 | ASPIRATIONAL,  // Cans | 
 |     0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii | 
 |     0x1703 | EXCLUSION,  // Tglg | 
 |     0x1723 | EXCLUSION,  // Hano | 
 |     0x1743 | EXCLUSION,  // Buhd | 
 |     0x1763 | EXCLUSION,  // Tagb | 
 |     0x2800 | UNKNOWN,  // Brai | 
 |     0x10800 | EXCLUSION | RTL,  // Cprt | 
 |     0x1900 | LIMITED_USE,  // Limb | 
 |     0x10000 | EXCLUSION,  // Linb | 
 |     0x10480 | EXCLUSION,  // Osma | 
 |     0x10450 | EXCLUSION,  // Shaw | 
 |     0x1950 | LIMITED_USE | LB_LETTERS,  // Tale | 
 |     0x10380 | EXCLUSION,  // Ugar | 
 |     0, | 
 |     0x1A00 | EXCLUSION,  // Bugi | 
 |     0x2C00 | EXCLUSION | CASED,  // Glag | 
 |     0x10A00 | EXCLUSION | RTL,  // Khar | 
 |     0xA800 | LIMITED_USE,  // Sylo | 
 |     0x1980 | LIMITED_USE | LB_LETTERS,  // Talu | 
 |     0x2D30 | ASPIRATIONAL,  // Tfng | 
 |     0x103A0 | EXCLUSION,  // Xpeo | 
 |     0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali | 
 |     0x1BC0 | LIMITED_USE,  // Batk | 
 |     0, | 
 |     0x11005 | EXCLUSION,  // Brah | 
 |     0xAA00 | LIMITED_USE,  // Cham | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0x13153 | EXCLUSION,  // Egyp | 
 |     0, | 
 |     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans | 
 |     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0xA984 | LIMITED_USE | LB_LETTERS,  // Java | 
 |     0xA90A | LIMITED_USE,  // Kali | 
 |     0, | 
 |     0, | 
 |     0x1C00 | LIMITED_USE,  // Lepc | 
 |     0, | 
 |     0x0840 | LIMITED_USE | RTL,  // Mand | 
 |     0, | 
 |     0x10980 | EXCLUSION | RTL,  // Mero | 
 |     0x07CA | LIMITED_USE | RTL,  // Nkoo | 
 |     0x10C00 | EXCLUSION | RTL,  // Orkh | 
 |     0, | 
 |     0xA840 | EXCLUSION,  // Phag | 
 |     0x10900 | EXCLUSION | RTL,  // Phnx | 
 |     0x16F00 | ASPIRATIONAL,  // Plrd | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0xA549 | LIMITED_USE,  // Vaii | 
 |     0, | 
 |     0x12000 | EXCLUSION,  // Xsux | 
 |     0, | 
 |     0xFDD0 | UNKNOWN,  // Zzzz | 
 |     0x102A0 | EXCLUSION,  // Cari | 
 |     0x304B | RECOMMENDED | LB_LETTERS,  // Jpan | 
 |     0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana | 
 |     0x10280 | EXCLUSION,  // Lyci | 
 |     0x10920 | EXCLUSION | RTL,  // Lydi | 
 |     0x1C5A | LIMITED_USE,  // Olck | 
 |     0xA930 | EXCLUSION,  // Rjng | 
 |     0xA882 | LIMITED_USE,  // Saur | 
 |     0, | 
 |     0x1B83 | LIMITED_USE,  // Sund | 
 |     0, | 
 |     0xABC0 | LIMITED_USE,  // Mtei | 
 |     0x10840 | EXCLUSION | RTL,  // Armi | 
 |     0x10B00 | EXCLUSION | RTL,  // Avst | 
 |     0x11103 | LIMITED_USE,  // Cakm | 
 |     0xAC00 | RECOMMENDED,  // Kore | 
 |     0x11083 | EXCLUSION,  // Kthi | 
 |     0, | 
 |     0x10B60 | EXCLUSION | RTL,  // Phli | 
 |     0, | 
 |     0, | 
 |     0x10B40 | EXCLUSION | RTL,  // Prti | 
 |     0x0800 | EXCLUSION | RTL,  // Samr | 
 |     0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt | 
 |     0, | 
 |     0, | 
 |     0xA6A0 | LIMITED_USE,  // Bamu | 
 |     0xA4D0 | LIMITED_USE,  // Lisu | 
 |     0, | 
 |     0x10A60 | EXCLUSION | RTL,  // Sarb | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0x109A0 | EXCLUSION | RTL,  // Merc | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0x11183 | EXCLUSION,  // Shrd | 
 |     0x110D0 | EXCLUSION,  // Sora | 
 |     0x11680 | EXCLUSION,  // Takr | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     0, | 
 |     // End copy-paste from parsescriptmetadata.py | 
 | }; | 
 |  | 
 | int32_t getScriptProps(UScriptCode script) { | 
 |     if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { | 
 |         return SCRIPT_PROPS[script]; | 
 |     } else { | 
 |         return 0; | 
 |     } | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | U_CAPI int32_t U_EXPORT2 | 
 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { | 
 |     if(U_FAILURE(*pErrorCode)) { return 0; } | 
 |     if(capacity < 0 || (capacity > 0 && dest == NULL)) { | 
 |         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; | 
 |         return 0; | 
 |     } | 
 |     int32_t sampleChar = getScriptProps(script) & 0x1fffff; | 
 |     int32_t length; | 
 |     if(sampleChar == 0) { | 
 |         length = 0; | 
 |     } else { | 
 |         length = U16_LENGTH(sampleChar); | 
 |         if(length <= capacity) { | 
 |             int32_t i = 0; | 
 |             U16_APPEND_UNSAFE(dest, i, sampleChar); | 
 |         } | 
 |     } | 
 |     return u_terminateUChars(dest, capacity, length, pErrorCode); | 
 | } | 
 |  | 
 | U_COMMON_API icu::UnicodeString U_EXPORT2 | 
 | uscript_getSampleUnicodeString(UScriptCode script) { | 
 |     icu::UnicodeString sample; | 
 |     int32_t sampleChar = getScriptProps(script) & 0x1fffff; | 
 |     if(sampleChar != 0) { | 
 |         sample.append(sampleChar); | 
 |     } | 
 |     return sample; | 
 | } | 
 |  | 
 | U_CAPI UScriptUsage U_EXPORT2 | 
 | uscript_getUsage(UScriptCode script) { | 
 |     return (UScriptUsage)((getScriptProps(script) >> 21) & 7); | 
 | } | 
 |  | 
 | U_CAPI UBool U_EXPORT2 | 
 | uscript_isRightToLeft(UScriptCode script) { | 
 |     return (getScriptProps(script) & RTL) != 0; | 
 | } | 
 |  | 
 | U_CAPI UBool U_EXPORT2 | 
 | uscript_breaksBetweenLetters(UScriptCode script) { | 
 |     return (getScriptProps(script) & LB_LETTERS) != 0; | 
 | } | 
 |  | 
 | U_CAPI UBool U_EXPORT2 | 
 | uscript_isCased(UScriptCode script) { | 
 |     return (getScriptProps(script) & CASED) != 0; | 
 | } |