| // © 2017 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2012-2015, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| * collationbasedatabuilder.h |
| * |
| * created on: 2012aug11 |
| * created by: Markus W. Scherer |
| */ |
| |
| #ifndef __COLLATIONBASEDATABUILDER_H__ |
| #define __COLLATIONBASEDATABUILDER_H__ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION |
| |
| #include "unicode/uniset.h" |
| #include "unicode/unistr.h" |
| #include "unicode/uscript.h" |
| #include "collation.h" |
| #include "collationdata.h" |
| #include "collationdatabuilder.h" |
| #include "normalizer2impl.h" |
| #include "utrie2.h" |
| #include "uvectr32.h" |
| #include "uvectr64.h" |
| #include "uvector.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| /** |
| * Low-level base CollationData builder. |
| */ |
| class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { |
| public: |
| CollationBaseDataBuilder(UErrorCode &errorCode); |
| |
| virtual ~CollationBaseDataBuilder(); |
| |
| void init(UErrorCode &errorCode); |
| |
| /** |
| * Sets the Han ranges as ranges of offset CE32s. |
| * Note: Unihan extension A sorts after the other BMP ranges. |
| * See http://www.unicode.org/reports/tr10/#Implicit_Weights |
| * |
| * @param ranges array of ranges of [:Unified_Ideograph:] in collation order, |
| * as (start, end) code point pairs |
| * @param length number of code points (not pairs) |
| * @param errorCode in/out error code |
| */ |
| void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode); |
| |
| void setNumericPrimary(uint32_t np) { numericPrimary = np; } |
| |
| virtual UBool isCompressibleLeadByte(uint32_t b) const; |
| |
| void setCompressibleLeadByte(uint32_t b); |
| |
| static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); |
| static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); |
| |
| virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); |
| |
| void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); |
| void addRootElement(int64_t ce, UErrorCode &errorCode); |
| |
| void addScriptStart(int32_t script, uint32_t p); |
| |
| virtual void build(CollationData &data, UErrorCode &errorCode); |
| |
| void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); |
| |
| private: |
| int32_t writeRootElementsRange( |
| uint32_t prevPrimary, uint32_t p, int32_t i, |
| UVector32 &table, UErrorCode &errorCode); |
| |
| // Flags for which primary-weight lead bytes are compressible. |
| UBool compressibleBytes[256]; |
| uint32_t numericPrimary; |
| uint32_t firstHanPrimary; |
| uint32_t lastHanPrimary; |
| int32_t hanStep; |
| UVector64 rootElements; |
| uint16_t scriptsIndex[USCRIPT_CODE_LIMIT + 16]; // need exactly this many |
| uint16_t scriptStarts[USCRIPT_CODE_LIMIT + 16]; // should be safely more than needed |
| int32_t scriptStartsLength; |
| }; |
| |
| U_NAMESPACE_END |
| |
| #endif // !UCONFIG_NO_COLLATION |
| #endif // __COLLATIONBASEDATABUILDER_H__ |