| /* |
| ********************************************************************** |
| * Copyright (C) 2001, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * file name: ucmpe32.h |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2001aug03 |
| * created by: Vladimir Weinstein |
| * |
| * This is basically a rip-off of trie developed by Markus for |
| * normalization data, but using a reduced ucmp interface |
| * Interface is implemented as much as required by the collation |
| * framework. |
| * This table is slow on data addition, but should support surrogates |
| * nicely. |
| */ |
| |
| /** |
| * NOTE: This array is specifically implemented to support surrogates |
| * in the collation framework. It's interface is minimal and usage model |
| * is far from the flexible. Use at your own risk outside of collation. |
| * Risk is also present in the collation framework, but there is hardly |
| * anything you can do about it, save reimplementig the framework |
| */ |
| |
| #ifndef UCMPE32_H |
| #define UCMPE32_H |
| |
| #define ICU_UCMPE32_VERSION 0x01260000 |
| #define INIT_UCMPE32_STAGE2_SIZE 60000 |
| |
| #include "unicode/utypes.h" |
| |
| #include "filestrm.h" |
| #include "umemstrm.h" |
| |
| |
| /* trie constants */ |
| enum { |
| /* |
| * must be <=10: |
| * above 10, a lead surrogate's block is smaller than a stage 2 block |
| */ |
| _UCMPE32_TRIE_SHIFT=5, |
| |
| _UCMPE32_STAGE_2_BLOCK_COUNT=1<<_UCMPE32_TRIE_SHIFT, |
| _UCMPE32_STAGE_2_MASK=_UCMPE32_STAGE_2_BLOCK_COUNT-1, |
| |
| _UCMPE32_STAGE_1_BMP_COUNT=(1<<(16-_UCMPE32_TRIE_SHIFT)), |
| |
| _UCMPE32_SURROGATE_BLOCK_BITS=10-_UCMPE32_TRIE_SHIFT, |
| _UCMPE32_SURROGATE_BLOCK_COUNT=(1<<_UCMPE32_SURROGATE_BLOCK_BITS), |
| |
| _UCMPE32_EXTRA_SHIFT=16, /* 16 bits for the index to UChars and other extra data */ |
| _UCMPE32_EXTRA_INDEX_TOP=0xfc00, /* start of surrogate specials after shift */ |
| |
| _UCMPE32_EXTRA_SURROGATE_MASK=0x3ff, |
| _UCMPE32_EXTRA_SURROGATE_TOP=0x3f0 /* hangul etc. */ |
| }; |
| |
| /* this may be >0xffff and may not work as an enum */ |
| #define _UCMPE32_STAGE_1_MAX_COUNT (0x110000>>_UCMPE32_TRIE_SHIFT) |
| |
| typedef struct CompactEIntArray{ |
| uint32_t fStructSize; |
| UBool fCompact; |
| UBool fAlias; |
| UBool fBogus; |
| UBool fInitPhase; |
| |
| uint16_t *stage1; |
| int32_t *stage2; |
| int32_t stage1Top; |
| int32_t stage2Top; |
| int32_t stage2DefaultTop; |
| int32_t fDefaultValue; |
| int32_t fSurrogateValue; |
| int32_t fLeadSurrogateValue; |
| int32_t stage2Size; |
| } CompactEIntArray; |
| |
| |
| /** |
| * Construct an empty CompactEIntArray. |
| * |
| * @param defaultValue the default value for all characters not explicitly in the array |
| */ |
| U_CAPI CompactEIntArray* U_EXPORT2 |
| ucmpe32_open(int32_t defaultValue, int32_t surrogateValue, int32_t leadSurrogateValue, UErrorCode *status); |
| |
| /** |
| * Opens a compacted read-only array from |
| * a block in memory. |
| */ |
| U_CAPI CompactEIntArray* U_EXPORT2 |
| ucmpe32_openFromData( const uint8_t **source, UErrorCode *status); |
| |
| /** |
| * Clones an array. It can be either compacted or expanded |
| */ |
| U_CAPI CompactEIntArray* U_EXPORT2 |
| ucmpe32_clone(CompactEIntArray* orig, UErrorCode *status); |
| |
| /** |
| * Free up any allocated memory associated with this compact array. |
| * The memory that is uprv_free'd depends on how the array was initialized |
| * or opened. |
| * |
| * @param array The compact array to close |
| */ |
| U_CAPI void U_EXPORT2 ucmpe32_close(CompactEIntArray* array); |
| |
| /** |
| * Get the mapped value of a Unicode character. |
| * |
| * @param index the character to get the mapped value with |
| * @return the mapped value of the given character |
| */ |
| #define ucmpe32_get(this_obj, index) (this_obj->stage2[(this_obj->stage1[(index >> _UCMPE32_TRIE_SHIFT)] )+ \ |
| (index & _UCMPE32_STAGE_2_MASK)]) |
| U_CAPI int32_t U_EXPORT2 |
| ucmpe32_get32(CompactEIntArray* this_obj, UChar32 code); |
| |
| /** |
| * Get the mapped value of a confirmed surrogate. First value already comes |
| * from the trie and is combined with the following value in order to get |
| * the value. THIS CAN BE ONLY USED ON A COMPACTED TRIE. You will get wrong |
| * results if you try it on the expanded one |
| * NO ERROR CHECKING IS PERFORMED! PREPARE YOUR DATA CAREFULLY! |
| * @param leadValue32 the mapping of the leading surrogate. |
| * @param trail the trailing surrogate |
| * @return the mapped value of the given character |
| */ |
| |
| #define ucmpe32_getSurrogate(this_obj, leadValue32, trail) ucmpe32_get(this_obj, \ |
| ((leadValue32 & 0xffc00) | (trail & 0x3ff))) |
| |
| /** |
| * This is a slow function that takes lead and trail surrogate and gets |
| * the mapping regardless of the compaction status. |
| */ |
| U_CAPI int32_t U_EXPORT2 |
| ucmpe32_getSurrogateEx(CompactEIntArray *array, UChar lead, UChar trail); |
| |
| /** |
| * Set a new value for a Unicode character. |
| * Do not set if the array is compacted - nothing will happen. |
| * @param character the character to set the mapped value with |
| * @param value the new mapped value |
| */ |
| U_CAPI void U_EXPORT2 ucmpe32_set32(CompactEIntArray *array, |
| UChar32 character, |
| int32_t value); |
| /** |
| * Sets an inclusive range of characters to the same value |
| * @param start start of the range |
| * @param end end of the range |
| * @param value value to be set |
| */ |
| U_CAPI void U_EXPORT2 |
| ucmpe32_setRange32(CompactEIntArray* this_obj, UChar32 start, UChar32 end, int32_t value); |
| |
| /** |
| * alias for compatibility |
| */ |
| #define ucmpe32_set(array, character, value) ucmpe32_set32((array), (UChar32)(character), (value)) |
| |
| |
| /** |
| * Set a new value for a surrogate character. |
| * Do not set if the array is compacted - nothing will happen. |
| * Set automatically expands the array if it is compacted. |
| * Alternatively you can put the surrogate code point together |
| * yourself and use set32. |
| * @param lead leading surrogate unit |
| * @param trail trailing surrogate unit |
| * @param value the new mapped value |
| */ |
| U_CAPI void U_EXPORT2 |
| ucmpe32_setSurrogate(CompactEIntArray* this_obj, UChar lead, |
| UChar trail, int32_t value); |
| /** |
| * compacts the array. |
| * This folds the surrogates and compacts the array. |
| * no setting will succeed after the array is compacted. |
| * Array have to be compacted in order to be flattened. |
| */ |
| U_CAPI void U_EXPORT2 |
| ucmpe32_compact(CompactEIntArray* this_object); |
| |
| /** |
| * Flattens the array to an memory stream. |
| * Array has to be compacted beforehand. |
| * @param MS memory stream to flatten to |
| * @return number of bytes written. |
| */ |
| U_CAPI uint32_t U_EXPORT2 |
| ucmpe32_flattenMem(const CompactEIntArray* this_object, UMemoryStream *MS); |
| |
| #endif |
| |