blob: c526a4f14f2df823aa2e45e340936c0b8a241ae3 [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2012-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* collationkeys.h
*
* created on: 2012sep02
* created by: Markus W. Scherer
*/
#ifndef __COLLATIONKEYS_H__
#define __COLLATIONKEYS_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/bytestream.h"
#include "unicode/ucol.h"
#include "charstr.h"
#include "collation.h"
U_NAMESPACE_BEGIN
class CollationIterator;
struct CollationDataReader;
struct CollationSettings;
class SortKeyByteSink : public ByteSink {
public:
SortKeyByteSink(char *dest, int32_t destCapacity)
: buffer_(dest), capacity_(destCapacity),
appended_(0), ignore_(0) {}
virtual ~SortKeyByteSink();
void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
virtual void Append(const char *bytes, int32_t n);
void Append(uint32_t b) {
if (ignore_ > 0) {
--ignore_;
} else {
if (appended_ < capacity_ || Resize(1, appended_)) {
buffer_[appended_] = (char)b;
}
++appended_;
}
}
virtual char *GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char *scratch, int32_t scratch_capacity,
int32_t *result_capacity);
int32_t NumberOfBytesAppended() const { return appended_; }
/**
* @return how many bytes can be appended (including ignored ones)
* without reallocation
*/
int32_t GetRemainingCapacity() const {
// Either ignore_ or appended_ should be 0.
return ignore_ + capacity_ - appended_;
}
UBool Overflowed() const { return appended_ > capacity_; }
/** @return false if memory allocation failed */
UBool IsOk() const { return buffer_ != NULL; }
protected:
virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
void SetNotOk() {
buffer_ = NULL;
capacity_ = 0;
}
char *buffer_;
int32_t capacity_;
int32_t appended_;
int32_t ignore_;
private:
SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
};
class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
public:
class LevelCallback : public UMemory {
public:
virtual ~LevelCallback();
/**
* @param level The next level about to be written to the ByteSink.
* @return true if the level is to be written
* (the base class implementation always returns true)
*/
virtual UBool needToWrite(Collation::Level level);
};
/**
* Writes the sort key bytes for minLevel up to the iterator data's strength.
* Optionally writes the case level.
* Stops writing levels when callback.needToWrite(level) returns false.
* Separates levels with the LEVEL_SEPARATOR_BYTE
* but does not write a TERMINATOR_BYTE.
*/
static void writeSortKeyUpToQuaternary(CollationIterator &iter,
const UBool *compressibleBytes,
const CollationSettings &settings,
SortKeyByteSink &sink,
Collation::Level minLevel, LevelCallback &callback,
UBool preflight, UErrorCode &errorCode);
private:
friend struct CollationDataReader;
CollationKeys(); // no instantiation
// Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
// Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
// Case level, upperFirst: Compress up to 13 common weights as 3..15.
static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
// Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
// Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
// Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
// Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
static const uint32_t QUAT_COMMON_LOW = 0x1c;
static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
// Primary weights shifted to quaternary level must be encoded with
// a lead byte below the common-weight compression range.
static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_COLLATION
#endif // __COLLATIONKEYS_H__