| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ****************************************************************************** |
| * Copyright (C) 2001-2016, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ****************************************************************************** |
| * |
| * File ucoleitr.cpp |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 02/15/2001 synwee Modified all methods to process its own function |
| * instead of calling the equivalent c++ api (coleitr.h) |
| * 2012-2014 markus Rewritten in C++ again. |
| ******************************************************************************/ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION |
| |
| #include "unicode/coleitr.h" |
| #include "unicode/tblcoll.h" |
| #include "unicode/ucoleitr.h" |
| #include "unicode/ustring.h" |
| #include "unicode/sortkey.h" |
| #include "unicode/uobject.h" |
| #include "cmemory.h" |
| #include "usrchimp.h" |
| |
| U_NAMESPACE_USE |
| |
| #define BUFFER_LENGTH 100 |
| |
| #define DEFAULT_BUFFER_SIZE 16 |
| #define BUFFER_GROW 8 |
| |
| #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0]) |
| |
| #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type)) |
| |
| #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
| |
| struct RCEI |
| { |
| uint32_t ce; |
| int32_t low; |
| int32_t high; |
| }; |
| |
| U_NAMESPACE_BEGIN |
| |
| struct RCEBuffer |
| { |
| RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; |
| RCEI *buffer; |
| int32_t bufferIndex; |
| int32_t bufferSize; |
| |
| RCEBuffer(); |
| ~RCEBuffer(); |
| |
| UBool isEmpty() const; |
| void put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode); |
| const RCEI *get(); |
| }; |
| |
| RCEBuffer::RCEBuffer() |
| { |
| buffer = defaultBuffer; |
| bufferIndex = 0; |
| bufferSize = UPRV_LENGTHOF(defaultBuffer); |
| } |
| |
| RCEBuffer::~RCEBuffer() |
| { |
| if (buffer != defaultBuffer) { |
| DELETE_ARRAY(buffer); |
| } |
| } |
| |
| UBool RCEBuffer::isEmpty() const |
| { |
| return bufferIndex <= 0; |
| } |
| |
| void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode) |
| { |
| if (U_FAILURE(errorCode)) { |
| return; |
| } |
| if (bufferIndex >= bufferSize) { |
| RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); |
| if (newBuffer == NULL) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| ARRAY_COPY(newBuffer, buffer, bufferSize); |
| |
| if (buffer != defaultBuffer) { |
| DELETE_ARRAY(buffer); |
| } |
| |
| buffer = newBuffer; |
| bufferSize += BUFFER_GROW; |
| } |
| |
| buffer[bufferIndex].ce = ce; |
| buffer[bufferIndex].low = ixLow; |
| buffer[bufferIndex].high = ixHigh; |
| |
| bufferIndex += 1; |
| } |
| |
| const RCEI *RCEBuffer::get() |
| { |
| if (bufferIndex > 0) { |
| return &buffer[--bufferIndex]; |
| } |
| |
| return NULL; |
| } |
| |
| PCEBuffer::PCEBuffer() |
| { |
| buffer = defaultBuffer; |
| bufferIndex = 0; |
| bufferSize = UPRV_LENGTHOF(defaultBuffer); |
| } |
| |
| PCEBuffer::~PCEBuffer() |
| { |
| if (buffer != defaultBuffer) { |
| DELETE_ARRAY(buffer); |
| } |
| } |
| |
| void PCEBuffer::reset() |
| { |
| bufferIndex = 0; |
| } |
| |
| UBool PCEBuffer::isEmpty() const |
| { |
| return bufferIndex <= 0; |
| } |
| |
| void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode) |
| { |
| if (U_FAILURE(errorCode)) { |
| return; |
| } |
| if (bufferIndex >= bufferSize) { |
| PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); |
| if (newBuffer == NULL) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| ARRAY_COPY(newBuffer, buffer, bufferSize); |
| |
| if (buffer != defaultBuffer) { |
| DELETE_ARRAY(buffer); |
| } |
| |
| buffer = newBuffer; |
| bufferSize += BUFFER_GROW; |
| } |
| |
| buffer[bufferIndex].ce = ce; |
| buffer[bufferIndex].low = ixLow; |
| buffer[bufferIndex].high = ixHigh; |
| |
| bufferIndex += 1; |
| } |
| |
| const PCEI *PCEBuffer::get() |
| { |
| if (bufferIndex > 0) { |
| return &buffer[--bufferIndex]; |
| } |
| |
| return NULL; |
| } |
| |
| UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); } |
| |
| UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); } |
| |
| void UCollationPCE::init(UCollationElements *elems) { |
| init(CollationElementIterator::fromUCollationElements(elems)); |
| } |
| |
| void UCollationPCE::init(CollationElementIterator *iter) |
| { |
| cei = iter; |
| init(*iter->rbc_); |
| } |
| |
| void UCollationPCE::init(const Collator &coll) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| |
| strength = coll.getAttribute(UCOL_STRENGTH, status); |
| toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED; |
| isShifted = FALSE; |
| variableTop = coll.getVariableTop(status); |
| } |
| |
| UCollationPCE::~UCollationPCE() |
| { |
| // nothing to do |
| } |
| |
| uint64_t UCollationPCE::processCE(uint32_t ce) |
| { |
| uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; |
| |
| // This is clean, but somewhat slow... |
| // We could apply the mask to ce and then |
| // just get all three orders... |
| switch(strength) { |
| default: |
| tertiary = ucol_tertiaryOrder(ce); |
| U_FALLTHROUGH; |
| |
| case UCOL_SECONDARY: |
| secondary = ucol_secondaryOrder(ce); |
| U_FALLTHROUGH; |
| |
| case UCOL_PRIMARY: |
| primary = ucol_primaryOrder(ce); |
| } |
| |
| // **** This should probably handle continuations too. **** |
| // **** That means that we need 24 bits for the primary **** |
| // **** instead of the 16 that we're currently using. **** |
| // **** So we can lay out the 64 bits as: 24.12.12.16. **** |
| // **** Another complication with continuations is that **** |
| // **** the *second* CE is marked as a continuation, so **** |
| // **** we always have to peek ahead to know how long **** |
| // **** the primary is... **** |
| if ((toShift && variableTop > ce && primary != 0) |
| || (isShifted && primary == 0)) { |
| |
| if (primary == 0) { |
| return UCOL_IGNORABLE; |
| } |
| |
| if (strength >= UCOL_QUATERNARY) { |
| quaternary = primary; |
| } |
| |
| primary = secondary = tertiary = 0; |
| isShifted = TRUE; |
| } else { |
| if (strength >= UCOL_QUATERNARY) { |
| quaternary = 0xFFFF; |
| } |
| |
| isShifted = FALSE; |
| } |
| |
| return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; |
| } |
| |
| U_NAMESPACE_END |
| |
| /* public methods ---------------------------------------------------- */ |
| |
| U_CAPI UCollationElements* U_EXPORT2 |
| ucol_openElements(const UCollator *coll, |
| const UChar *text, |
| int32_t textLength, |
| UErrorCode *status) |
| { |
| if (U_FAILURE(*status)) { |
| return NULL; |
| } |
| if (coll == NULL || (text == NULL && textLength != 0)) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); |
| if (rbc == NULL) { |
| *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator |
| return NULL; |
| } |
| |
| UnicodeString s((UBool)(textLength < 0), text, textLength); |
| CollationElementIterator *cei = rbc->createCollationElementIterator(s); |
| if (cei == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| |
| return cei->toUCollationElements(); |
| } |
| |
| |
| U_CAPI void U_EXPORT2 |
| ucol_closeElements(UCollationElements *elems) |
| { |
| delete CollationElementIterator::fromUCollationElements(elems); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| ucol_reset(UCollationElements *elems) |
| { |
| CollationElementIterator::fromUCollationElements(elems)->reset(); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_next(UCollationElements *elems, |
| UErrorCode *status) |
| { |
| if (U_FAILURE(*status)) { |
| return UCOL_NULLORDER; |
| } |
| |
| return CollationElementIterator::fromUCollationElements(elems)->next(*status); |
| } |
| |
| U_NAMESPACE_BEGIN |
| |
| int64_t |
| UCollationPCE::nextProcessed( |
| int32_t *ixLow, |
| int32_t *ixHigh, |
| UErrorCode *status) |
| { |
| int64_t result = UCOL_IGNORABLE; |
| uint32_t low = 0, high = 0; |
| |
| if (U_FAILURE(*status)) { |
| return UCOL_PROCESSED_NULLORDER; |
| } |
| |
| pceBuffer.reset(); |
| |
| do { |
| low = cei->getOffset(); |
| int32_t ce = cei->next(*status); |
| high = cei->getOffset(); |
| |
| if (ce == UCOL_NULLORDER) { |
| result = UCOL_PROCESSED_NULLORDER; |
| break; |
| } |
| |
| result = processCE((uint32_t)ce); |
| } while (result == UCOL_IGNORABLE); |
| |
| if (ixLow != NULL) { |
| *ixLow = low; |
| } |
| |
| if (ixHigh != NULL) { |
| *ixHigh = high; |
| } |
| |
| return result; |
| } |
| |
| U_NAMESPACE_END |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_previous(UCollationElements *elems, |
| UErrorCode *status) |
| { |
| if(U_FAILURE(*status)) { |
| return UCOL_NULLORDER; |
| } |
| return CollationElementIterator::fromUCollationElements(elems)->previous(*status); |
| } |
| |
| U_NAMESPACE_BEGIN |
| |
| int64_t |
| UCollationPCE::previousProcessed( |
| int32_t *ixLow, |
| int32_t *ixHigh, |
| UErrorCode *status) |
| { |
| int64_t result = UCOL_IGNORABLE; |
| int32_t low = 0, high = 0; |
| |
| if (U_FAILURE(*status)) { |
| return UCOL_PROCESSED_NULLORDER; |
| } |
| |
| // pceBuffer.reset(); |
| |
| while (pceBuffer.isEmpty()) { |
| // buffer raw CEs up to non-ignorable primary |
| RCEBuffer rceb; |
| int32_t ce; |
| |
| // **** do we need to reset rceb, or will it always be empty at this point **** |
| do { |
| high = cei->getOffset(); |
| ce = cei->previous(*status); |
| low = cei->getOffset(); |
| |
| if (ce == UCOL_NULLORDER) { |
| if (!rceb.isEmpty()) { |
| break; |
| } |
| |
| goto finish; |
| } |
| |
| rceb.put((uint32_t)ce, low, high, *status); |
| } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce))); |
| |
| // process the raw CEs |
| while (U_SUCCESS(*status) && !rceb.isEmpty()) { |
| const RCEI *rcei = rceb.get(); |
| |
| result = processCE(rcei->ce); |
| |
| if (result != UCOL_IGNORABLE) { |
| pceBuffer.put(result, rcei->low, rcei->high, *status); |
| } |
| } |
| if (U_FAILURE(*status)) { |
| return UCOL_PROCESSED_NULLORDER; |
| } |
| } |
| |
| finish: |
| if (pceBuffer.isEmpty()) { |
| // **** Is -1 the right value for ixLow, ixHigh? **** |
| if (ixLow != NULL) { |
| *ixLow = -1; |
| } |
| |
| if (ixHigh != NULL) { |
| *ixHigh = -1 |
| ; |
| } |
| return UCOL_PROCESSED_NULLORDER; |
| } |
| |
| const PCEI *pcei = pceBuffer.get(); |
| |
| if (ixLow != NULL) { |
| *ixLow = pcei->low; |
| } |
| |
| if (ixHigh != NULL) { |
| *ixHigh = pcei->high; |
| } |
| |
| return pcei->ce; |
| } |
| |
| U_NAMESPACE_END |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_getMaxExpansion(const UCollationElements *elems, |
| int32_t order) |
| { |
| return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order); |
| |
| // TODO: The old code masked the order according to strength and then did a binary search. |
| // However this was probably at least partially broken because of the following comment. |
| // Still, it might have found a match when this version may not. |
| |
| // FIXME: with a masked search, there might be more than one hit, |
| // so we need to look forward and backward from the match to find all |
| // of the hits... |
| } |
| |
| U_CAPI void U_EXPORT2 |
| ucol_setText( UCollationElements *elems, |
| const UChar *text, |
| int32_t textLength, |
| UErrorCode *status) |
| { |
| if (U_FAILURE(*status)) { |
| return; |
| } |
| |
| if ((text == NULL && textLength != 0)) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| UnicodeString s((UBool)(textLength < 0), text, textLength); |
| return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_getOffset(const UCollationElements *elems) |
| { |
| return CollationElementIterator::fromUCollationElements(elems)->getOffset(); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| ucol_setOffset(UCollationElements *elems, |
| int32_t offset, |
| UErrorCode *status) |
| { |
| if (U_FAILURE(*status)) { |
| return; |
| } |
| |
| CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_primaryOrder (int32_t order) |
| { |
| return (order >> 16) & 0xffff; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_secondaryOrder (int32_t order) |
| { |
| return (order >> 8) & 0xff; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_tertiaryOrder (int32_t order) |
| { |
| return order & 0xff; |
| } |
| |
| #endif /* #if !UCONFIG_NO_COLLATION */ |