| // © 2016 and later: Unicode, Inc. and others. | 
 | // License & terms of use: http://www.unicode.org/copyright.html | 
 | /* | 
 | ******************************************************************************* | 
 | * Copyright (C) 2013-2014, International Business Machines | 
 | * Corporation and others.  All Rights Reserved. | 
 | ******************************************************************************* | 
 | * collationrootelements.cpp | 
 | * | 
 | * created on: 2013mar05 | 
 | * created by: Markus W. Scherer | 
 | */ | 
 |  | 
 | #include "unicode/utypes.h" | 
 |  | 
 | #if !UCONFIG_NO_COLLATION | 
 |  | 
 | #include "collation.h" | 
 | #include "collationrootelements.h" | 
 | #include "uassert.h" | 
 |  | 
 | U_NAMESPACE_BEGIN | 
 |  | 
 | int64_t | 
 | CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { | 
 |     if(p == 0) { return 0; } | 
 |     U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); | 
 |     int32_t index = findP(p); | 
 |     uint32_t q = elements[index]; | 
 |     uint32_t secTer; | 
 |     if(p == (q & 0xffffff00)) { | 
 |         // p == elements[index] is a root primary. Find the CE before it. | 
 |         // We must not be in a primary range. | 
 |         U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | 
 |         secTer = elements[index - 1]; | 
 |         if((secTer & SEC_TER_DELTA_FLAG) == 0) { | 
 |             // Primary CE just before p. | 
 |             p = secTer & 0xffffff00; | 
 |             secTer = Collation::COMMON_SEC_AND_TER_CE; | 
 |         } else { | 
 |             // secTer = last secondary & tertiary for the previous primary | 
 |             index -= 2; | 
 |             for(;;) { | 
 |                 p = elements[index]; | 
 |                 if((p & SEC_TER_DELTA_FLAG) == 0) { | 
 |                     p &= 0xffffff00; | 
 |                     break; | 
 |                 } | 
 |                 --index; | 
 |             } | 
 |         } | 
 |     } else { | 
 |         // p > elements[index] which is the previous primary. | 
 |         // Find the last secondary & tertiary weights for it. | 
 |         p = q & 0xffffff00; | 
 |         secTer = Collation::COMMON_SEC_AND_TER_CE; | 
 |         for(;;) { | 
 |             q = elements[++index]; | 
 |             if((q & SEC_TER_DELTA_FLAG) == 0) { | 
 |                 // We must not be in a primary range. | 
 |                 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | 
 |                 break; | 
 |             } | 
 |             secTer = q; | 
 |         } | 
 |     } | 
 |     return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); | 
 | } | 
 |  | 
 | int64_t | 
 | CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { | 
 |     if(p == 0) { return 0; } | 
 |     int32_t index = findP(p); | 
 |     if(p != (elements[index] & 0xffffff00)) { | 
 |         for(;;) { | 
 |             p = elements[++index]; | 
 |             if((p & SEC_TER_DELTA_FLAG) == 0) { | 
 |                 // First primary after p. We must not be in a primary range. | 
 |                 U_ASSERT((p & PRIMARY_STEP_MASK) == 0); | 
 |                 break; | 
 |             } | 
 |         } | 
 |     } | 
 |     // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. | 
 |     return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { | 
 |     int32_t index = findPrimary(p); | 
 |     int32_t step; | 
 |     uint32_t q = elements[index]; | 
 |     if(p == (q & 0xffffff00)) { | 
 |         // Found p itself. Return the previous primary. | 
 |         // See if p is at the end of a previous range. | 
 |         step = (int32_t)q & PRIMARY_STEP_MASK; | 
 |         if(step == 0) { | 
 |             // p is not at the end of a range. Look for the previous primary. | 
 |             do { | 
 |                 p = elements[--index]; | 
 |             } while((p & SEC_TER_DELTA_FLAG) != 0); | 
 |             return p & 0xffffff00; | 
 |         } | 
 |     } else { | 
 |         // p is in a range, and not at the start. | 
 |         uint32_t nextElement = elements[index + 1]; | 
 |         U_ASSERT(isEndOfPrimaryRange(nextElement)); | 
 |         step = (int32_t)nextElement & PRIMARY_STEP_MASK; | 
 |     } | 
 |     // Return the previous range primary. | 
 |     if((p & 0xffff) == 0) { | 
 |         return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); | 
 |     } else { | 
 |         return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); | 
 |     } | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { | 
 |     int32_t index; | 
 |     uint32_t previousSec, sec; | 
 |     if(p == 0) { | 
 |         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | 
 |         // Gap at the beginning of the secondary CE range. | 
 |         previousSec = 0; | 
 |         sec = elements[index] >> 16; | 
 |     } else { | 
 |         index = findPrimary(p) + 1; | 
 |         previousSec = Collation::BEFORE_WEIGHT16; | 
 |         sec = getFirstSecTerForPrimary(index) >> 16; | 
 |     } | 
 |     U_ASSERT(s >= sec); | 
 |     while(s > sec) { | 
 |         previousSec = sec; | 
 |         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | 
 |         sec = elements[index++] >> 16; | 
 |     } | 
 |     U_ASSERT(sec == s); | 
 |     return previousSec; | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { | 
 |     U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); | 
 |     int32_t index; | 
 |     uint32_t previousTer, secTer; | 
 |     if(p == 0) { | 
 |         if(s == 0) { | 
 |             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | 
 |             // Gap at the beginning of the tertiary CE range. | 
 |             previousTer = 0; | 
 |         } else { | 
 |             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | 
 |             previousTer = Collation::BEFORE_WEIGHT16; | 
 |         } | 
 |         secTer = elements[index] & ~SEC_TER_DELTA_FLAG; | 
 |     } else { | 
 |         index = findPrimary(p) + 1; | 
 |         previousTer = Collation::BEFORE_WEIGHT16; | 
 |         secTer = getFirstSecTerForPrimary(index); | 
 |     } | 
 |     uint32_t st = (s << 16) | t; | 
 |     while(st > secTer) { | 
 |         if((secTer >> 16) == s) { previousTer = secTer; } | 
 |         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | 
 |         secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; | 
 |     } | 
 |     U_ASSERT(secTer == st); | 
 |     return previousTer & 0xffff; | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { | 
 |     U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); | 
 |     uint32_t q = elements[++index]; | 
 |     int32_t step; | 
 |     if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { | 
 |         // Return the next primary in this range. | 
 |         if((p & 0xffff) == 0) { | 
 |             return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); | 
 |         } else { | 
 |             return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); | 
 |         } | 
 |     } else { | 
 |         // Return the next primary in the list. | 
 |         while((q & SEC_TER_DELTA_FLAG) != 0) { | 
 |             q = elements[++index]; | 
 |         } | 
 |         U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | 
 |         return q; | 
 |     } | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { | 
 |     uint32_t secTer; | 
 |     uint32_t secLimit; | 
 |     if(index == 0) { | 
 |         // primary = 0 | 
 |         U_ASSERT(s != 0); | 
 |         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | 
 |         secTer = elements[index]; | 
 |         // Gap at the end of the secondary CE range. | 
 |         secLimit = 0x10000; | 
 |     } else { | 
 |         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | 
 |         secTer = getFirstSecTerForPrimary(index + 1); | 
 |         // If this is an explicit sec/ter unit, then it will be read once more. | 
 |         // Gap for secondaries of primary CEs. | 
 |         secLimit = getSecondaryBoundary(); | 
 |     } | 
 |     for(;;) { | 
 |         uint32_t sec = secTer >> 16; | 
 |         if(sec > s) { return sec; } | 
 |         secTer = elements[++index]; | 
 |         if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } | 
 |     } | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { | 
 |     uint32_t secTer; | 
 |     uint32_t terLimit; | 
 |     if(index == 0) { | 
 |         // primary = 0 | 
 |         if(s == 0) { | 
 |             U_ASSERT(t != 0); | 
 |             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | 
 |             // Gap at the end of the tertiary CE range. | 
 |             terLimit = 0x4000; | 
 |         } else { | 
 |             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | 
 |             // Gap for tertiaries of primary/secondary CEs. | 
 |             terLimit = getTertiaryBoundary(); | 
 |         } | 
 |         secTer = elements[index] & ~SEC_TER_DELTA_FLAG; | 
 |     } else { | 
 |         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | 
 |         secTer = getFirstSecTerForPrimary(index + 1); | 
 |         // If this is an explicit sec/ter unit, then it will be read once more. | 
 |         terLimit = getTertiaryBoundary(); | 
 |     } | 
 |     uint32_t st = (s << 16) | t; | 
 |     for(;;) { | 
 |         if(secTer > st) { | 
 |             U_ASSERT((secTer >> 16) == s); | 
 |             return secTer & 0xffff; | 
 |         } | 
 |         secTer = elements[++index]; | 
 |         // No tertiary greater than t for this primary+secondary. | 
 |         if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } | 
 |         secTer &= ~SEC_TER_DELTA_FLAG; | 
 |     } | 
 | } | 
 |  | 
 | uint32_t | 
 | CollationRootElements::getFirstSecTerForPrimary(int32_t index) const { | 
 |     uint32_t secTer = elements[index]; | 
 |     if((secTer & SEC_TER_DELTA_FLAG) == 0) { | 
 |         // No sec/ter delta. | 
 |         return Collation::COMMON_SEC_AND_TER_CE; | 
 |     } | 
 |     secTer &= ~SEC_TER_DELTA_FLAG; | 
 |     if(secTer > Collation::COMMON_SEC_AND_TER_CE) { | 
 |         // Implied sec/ter. | 
 |         return Collation::COMMON_SEC_AND_TER_CE; | 
 |     } | 
 |     // Explicit sec/ter below common/common. | 
 |     return secTer; | 
 | } | 
 |  | 
 | int32_t | 
 | CollationRootElements::findPrimary(uint32_t p) const { | 
 |     // Requirement: p must occur as a root primary. | 
 |     U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary | 
 |     int32_t index = findP(p); | 
 |     // If p is in a range, then we just assume that p is an actual primary in this range. | 
 |     // (Too cumbersome/expensive to check.) | 
 |     // Otherwise, it must be an exact match. | 
 |     U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); | 
 |     return index; | 
 | } | 
 |  | 
 | int32_t | 
 | CollationRootElements::findP(uint32_t p) const { | 
 |     // p need not occur as a root primary. | 
 |     // For example, it might be a reordering group boundary. | 
 |     U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); | 
 |     // modified binary search | 
 |     int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; | 
 |     U_ASSERT(p >= elements[start]); | 
 |     int32_t limit = length - 1; | 
 |     U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); | 
 |     U_ASSERT(p < elements[limit]); | 
 |     while((start + 1) < limit) { | 
 |         // Invariant: elements[start] and elements[limit] are primaries, | 
 |         // and elements[start]<=p<=elements[limit]. | 
 |         int32_t i = (start + limit) / 2; | 
 |         uint32_t q = elements[i]; | 
 |         if((q & SEC_TER_DELTA_FLAG) != 0) { | 
 |             // Find the next primary. | 
 |             int32_t j = i + 1; | 
 |             for(;;) { | 
 |                 if(j == limit) { break; } | 
 |                 q = elements[j]; | 
 |                 if((q & SEC_TER_DELTA_FLAG) == 0) { | 
 |                     i = j; | 
 |                     break; | 
 |                 } | 
 |                 ++j; | 
 |             } | 
 |             if((q & SEC_TER_DELTA_FLAG) != 0) { | 
 |                 // Find the preceding primary. | 
 |                 j = i - 1; | 
 |                 for(;;) { | 
 |                     if(j == start) { break; } | 
 |                     q = elements[j]; | 
 |                     if((q & SEC_TER_DELTA_FLAG) == 0) { | 
 |                         i = j; | 
 |                         break; | 
 |                     } | 
 |                     --j; | 
 |                 } | 
 |                 if((q & SEC_TER_DELTA_FLAG) != 0) { | 
 |                     // No primary between start and limit. | 
 |                     break; | 
 |                 } | 
 |             } | 
 |         } | 
 |         if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary. | 
 |             limit = i; | 
 |         } else { | 
 |             start = i; | 
 |         } | 
 |     } | 
 |     return start; | 
 | } | 
 |  | 
 | U_NAMESPACE_END | 
 |  | 
 | #endif  // !UCONFIG_NO_COLLATION |