| /* |
| ********************************************************************** |
| * Copyright (C) 1999-2000 IBM and others. All rights reserved. |
| ********************************************************************** |
| * Date Name Description |
| * 03/22/2000 helena Creation. |
| ********************************************************************** |
| */ |
| |
| #include <memory.h> |
| #include "unicode/coleitr.h" |
| #include "unicode/schriter.h" |
| #include "strsrch.h" |
| /** |
| * <code>StringSearch</code> is a <code>SearchIterator</code> that provides |
| * language-sensitive text searching based on the comparison rules defined |
| * in a {@link RuleBasedCollator} object. |
| * Instances of <code>StringSearch</code> function as iterators |
| * maintain a current position and scan over text returning the index of |
| * characters where the pattern occurs and the length of each match. |
| * <p> |
| * <code>StringSearch</code> uses a version of the fast Boyer-Moore search |
| * algorithm that has been adapted to work with the large character set of |
| * Unicode. See "Efficient Text Searching in Java", to be published in |
| * <i>Java Report</i> in February, 1999, for further information on the algorithm. |
| * <p> |
| * Consult the <code>SearchIterator</code> documentation for information on |
| * and examples of how to use instances of this class to implement text |
| * searching. <code>SearchIterator</code> provides all of the necessary |
| * API; this class only provides constructors and internal implementation |
| * methods. |
| * |
| * @see SearchIterator |
| * @see RuleBasedCollator |
| * |
| * @author Laura Werner |
| * @version 1.0 |
| */ |
| |
| char StringSearch::fgClassID = 0; // Value is irrelevant // class id |
| /* to be removed */ |
| void StringSearch::dumpTables() { |
| int i; |
| for (i = 0; i < 256; i++) { |
| if (shiftTable[i] != minLen) { |
| // debug("shift[" + Integer.toString(i,16) + "] = " + shiftTable[i]); |
| } |
| } |
| for (i = 0; i < 256; i++) { |
| if (backShiftTable[i] != minLen) { |
| // debug("backShift[" + Integer.toString(i,16) + "] = " + backShiftTable[i]); |
| } |
| } |
| } |
| |
| StringSearch::StringSearch(const UnicodeString& pat, |
| CharacterIterator* target, |
| RuleBasedCollator* coll, |
| BreakIterator* breaker, |
| UErrorCode& status) : |
| SearchIterator(target, breaker), |
| strength(coll->getStrength()), |
| valueList(NULL), |
| valueListLen(0), |
| pattern(pat), |
| normLen(0), // num. of collation elements in pattern. |
| minLen(0), // Min of composed, decomposed versions |
| maxLen(0), // Max |
| it(NULL) |
| |
| { |
| if (U_FAILURE(status)) return; |
| collator = (RuleBasedCollator*)(coll->clone()); |
| iter = collator->createCollationElementIterator(*target); |
| it = collator->createCollationElementIterator(pat); |
| |
| initialize(status); // Initialize the Boyer-Moore tables |
| } |
| |
| /** |
| * Construct a <code>StringSearch</code> object using a specific collator. |
| * <p> |
| * @param pattern The text for which this object will search. |
| * |
| * @param target The text in which to search for the pattern. |
| * |
| * @param collator A <code>RuleBasedCollator</code> object which defines the |
| * language-sensitive comparison rules used to determine |
| * whether text in the pattern and target matches. |
| */ |
| StringSearch::StringSearch(const UnicodeString& pat, |
| CharacterIterator* target, |
| RuleBasedCollator* collator, |
| UErrorCode& status) : |
| SearchIterator(), |
| strength(collator->getStrength()), |
| valueList(NULL), |
| valueListLen(0), |
| pattern(pat), |
| normLen(0), // num. of collation elements in pattern. |
| minLen(0), // Min of composed, decomposed versions |
| maxLen(0), // Max |
| it(NULL) |
| { |
| if (U_FAILURE(status)) return; |
| this->adoptTarget(target); |
| this->collator = (RuleBasedCollator*)(collator->clone()); |
| this->iter = collator->createCollationElementIterator(*target); |
| this->it = collator->createCollationElementIterator(pat); |
| initialize(status); |
| } |
| |
| /** |
| * Construct a <code>StringSearch</code> object using the collator and |
| * character boundary detection rules for a given locale |
| * <p> |
| * @param pattern The text for which this object will search. |
| * |
| * @param target The text in which to search for the pattern. |
| * |
| * @param loc The locale whose collation and break-detection rules |
| * should be used. |
| * |
| * @exception ClassCastException thrown if the collator for the specified |
| * locale is not a RuleBasedCollator. |
| */ |
| StringSearch::StringSearch(const StringSearch& that) : |
| SearchIterator(that), |
| iter(NULL), |
| collator(that.collator), |
| strength(that.strength), |
| valueList(NULL), |
| valueListLen(that.valueListLen), |
| normLen(that.normLen), // num. of collation elements in pattern. |
| minLen(that.minLen), // Min of composed, decomposed versions |
| maxLen(that.maxLen), |
| it(NULL) |
| { |
| valueList = new int32_t[valueListLen]; |
| memcpy(valueList, that.valueList, valueListLen*sizeof(int32_t)); |
| iter = that.collator->createCollationElementIterator(that.getTarget()); |
| it = that.collator->createCollationElementIterator(that.pattern); |
| } |
| |
| StringSearch::StringSearch(const UnicodeString& pat, |
| CharacterIterator* target, |
| const Locale& loc, |
| UErrorCode& status) : |
| SearchIterator(), |
| valueList(NULL), |
| valueListLen(0), |
| pattern(pat), |
| normLen(0), // num. of collation elements in pattern. |
| minLen(0), // Min of composed, decomposed versions |
| maxLen(0) // Max |
| { |
| if (U_FAILURE(status)) return; |
| this->adoptTarget(target); |
| collator = (RuleBasedCollator*)Collator::createInstance(loc, status); |
| iter = collator->createCollationElementIterator(*target); |
| it = collator->createCollationElementIterator(pat); |
| |
| strength = collator->getStrength(); |
| |
| initialize(status); |
| } |
| |
| UBool |
| StringSearch::operator==(const SearchIterator& that) const |
| { |
| if (that.getDynamicClassID() != getDynamicClassID()) |
| return FALSE; |
| if (!SearchIterator::operator==(that)) |
| return FALSE; |
| const StringSearch& that2 = (const StringSearch&)that; |
| if (*that2.iter != *iter) return FALSE; |
| else if (*that2.collator != *collator) return FALSE; |
| else if (that2.strength != strength) return FALSE; |
| else if (that2.valueListLen != valueListLen) return FALSE; |
| else if (memcmp(that2.valueList, valueList, valueListLen*sizeof(int32_t)) != 0) return FALSE; |
| else if (that2.pattern != pattern) return FALSE; |
| else if (that2.normLen != normLen) return FALSE; |
| else if (that2.minLen != minLen) return FALSE; |
| else if (that2.maxLen != maxLen) return FALSE; |
| else return TRUE; |
| } |
| |
| SearchIterator* |
| StringSearch::clone(void) const |
| { |
| return new StringSearch(*this); |
| } |
| |
| /** |
| * Construct a <code>StringSearch</code> object using the collator for the default |
| * locale |
| * <p> |
| * @param pattern The text for which this object will search. |
| * |
| * @param target The text in which to search for the pattern. |
| * |
| * @param collator A <code>RuleBasedCollator</code> object which defines the |
| * language-sensitive comparison rules used to determine |
| * whether text in the pattern and target matches. |
| */ |
| StringSearch::StringSearch(const UnicodeString& pat, |
| const UnicodeString& newText, |
| UErrorCode& status) : |
| SearchIterator(), |
| valueList(NULL), |
| valueListLen(0), |
| pattern(pat), |
| normLen(0), // num. of collation elements in pattern. |
| minLen(0), // Min of composed, decomposed versions |
| maxLen(0) // Max |
| { |
| StringCharacterIterator *s = new StringCharacterIterator(newText); |
| collator = (RuleBasedCollator*)Collator::createInstance(Locale::getDefault(), status); |
| strength = collator->getStrength(); |
| iter = collator->createCollationElementIterator(newText); |
| it = collator->createCollationElementIterator(pat); |
| this->adoptTarget(s); |
| initialize(status); |
| } |
| |
| StringSearch::~StringSearch(void) |
| { |
| if (valueList != NULL) { |
| delete [] valueList; |
| valueList = 0; |
| } |
| if (iter != NULL) { |
| delete iter; |
| iter = 0; |
| } |
| if (collator != NULL) { |
| delete collator; |
| collator = 0; |
| } |
| if (it != NULL) { |
| delete it; |
| it = 0; |
| } |
| } |
| //------------------------------------------------------------------- |
| // Getters and Setters |
| //------------------------------------------------------------------- |
| |
| /** |
| * Sets this object's strength property. The strength determines the |
| * minimum level of difference considered significant during a |
| * search. Generally, {@link Collator#TERTIARY} and |
| * {@link Collator#IDENTICAL} indicate that all differences are |
| * considered significant, {@link Collator#SECONDARY} indicates |
| * that upper/lower case distinctions should be ignored, and |
| * {@link Collator#PRIMARY} indicates that both case and accents |
| * should be ignored. However, the exact meanings of these constants |
| * are determined by individual Collator objects. |
| * <p> |
| * @see Collator#PRIMARY |
| * @see Collator#SECONDARY |
| * @see Collator#TERTIARY |
| * @see Collator#IDENTICAL |
| */ |
| void StringSearch::setStrength(Collator::ECollationStrength newStrength, UErrorCode& status) { |
| if (U_FAILURE(status)) |
| { |
| return; |
| } |
| strength = newStrength; |
| |
| // Due to a bug (?) in CollationElementIterator, we must set the |
| // collator's strength as well, since the iterator is going to |
| // mask out the portions of the collation element that are not |
| // relevant for the collator's current strength setting |
| // Note that this makes it impossible to share a Collator among |
| // multiple StringSearch objects if you adjust Strength settings. |
| collator->setStrength(strength); |
| initialize(status); |
| } |
| |
| |
| /** |
| * Returns this object's strength property, which indicates what level |
| * of differences are considered significant during a search. |
| * <p> |
| * @see #setStrength |
| */ |
| Collator::ECollationStrength StringSearch::getStrength() const |
| { |
| return strength; |
| } |
| |
| /** |
| * Set the collator to be used for this string search. Also changes |
| * the search strength to match that of the new collator. |
| * <p> |
| * This method causes internal data such as Boyer-Moore shift tables |
| * to be recalculated, but the iterator's position is unchanged. |
| * <p> |
| * @see #getCollator |
| */ |
| void StringSearch::setCollator(const RuleBasedCollator *coll, UErrorCode& status) |
| { |
| delete iter; |
| delete collator; |
| collator = (RuleBasedCollator*)coll->clone(); |
| strength = collator->getStrength(); |
| // Also need to recompute the pattern and get a new target iterator |
| iter = collator->createCollationElementIterator(getTarget()); |
| initialize(status); |
| } |
| |
| /** |
| * Return the RuleBasedCollator being used for this string search. |
| */ |
| const RuleBasedCollator& StringSearch::getCollator(void) const |
| { |
| return *collator; |
| } |
| |
| /** |
| * Set the pattern for which to search. |
| * This method causes internal data such as Boyer-Moore shift tables |
| * to be recalculated, but the iterator's position is unchanged. |
| */ |
| void StringSearch::setPattern(const UnicodeString& pat, UErrorCode& status) |
| { |
| pattern = pat; |
| initialize(status); |
| } |
| |
| /** |
| * Returns the pattern for which this object is searching. |
| */ |
| const UnicodeString& StringSearch::getPattern() const |
| { |
| return pattern; |
| } |
| |
| /** |
| * Set the target text which should be searched and resets the |
| * iterator's position to point before the start of the new text. |
| * This method is useful if you want to re-use an iterator to |
| * search for the same pattern within a different body of text. |
| */ |
| void StringSearch::adoptTarget(CharacterIterator* target) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| SearchIterator::adoptTarget(target); |
| |
| // fix me: Skipped the error code |
| // Since we're caching a CollationElementIterator, recreate it |
| iter->setText(*target, status); |
| } |
| void StringSearch::setTarget(const UnicodeString& newText) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| SearchIterator::setTarget(newText); |
| // Since we're caching a CollationElementIterator, recreate it |
| iter->setText(newText, status); |
| } |
| |
| void StringSearch::reset(void) |
| { |
| SearchIterator::reset(); |
| iter->reset(); |
| }//------------------------------------------------------------------- |
| // Privates |
| //------------------------------------------------------------------- |
| |
| /** |
| * Search forward for matching text, starting at a given location. |
| * Clients should not call this method directly; instead they should call |
| * {@link SearchIterator#next}. |
| * <p> |
| * If a match is found, this method returns the index at which the match |
| * starts and calls {@link SearchIterator#setMatchLength} |
| * with the number of characters in the target |
| * text that make up the match. If no match is found, the method returns |
| * <code>DONE</code> and does not call <tt>setMatchLength</tt>. |
| * <p> |
| * @param start The index in the target text at which the search starts. |
| * |
| * @return The index at which the matched text in the target starts, or DONE |
| * if no match was found. |
| * <p> |
| * @see SearchIterator#next |
| * @see SearchIterator#DONE |
| */ |
| int32_t StringSearch::handleNext(int32_t start, UErrorCode& status) |
| { |
| if (U_FAILURE(status)) |
| { |
| return SearchIterator::DONE; |
| } |
| const CharacterIterator& target = getTarget(); |
| |
| int mask = getMask(strength); |
| |
| #if 0 |
| int done = CollationElementIterator::NULLORDER & mask; |
| if (DEBUG) { |
| debug("-------------------------handleNext-----------------------------------"); |
| debug(""); |
| debug("strength=" + strength + ", mask=" + Integer.toString(mask,16) |
| + ", done=" + Integer.toString(done,16)); |
| debug("decomp=" + collator.getDecomposition()); |
| |
| debug("target.begin=" + getTarget().getBeginIndex()); |
| debug("target.end=" + getTarget().getEndIndex()); |
| debug("start = " + start); |
| } |
| #endif |
| int32_t index = start + minLen; |
| int32_t matchEnd = 0; |
| |
| while (index <= target.endIndex()) |
| { |
| int32_t patIndex = normLen; |
| int32_t tval = 0, pval = 0; |
| UBool getP = TRUE; |
| |
| iter->setOffset(index, status); |
| matchEnd = index; |
| |
| //if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index); |
| |
| while ((patIndex > 0 || getP == false) && iter->getOffset() > start) |
| { |
| #if 0 |
| if (DEBUG) { |
| debug(" inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset()); |
| debug(" getP=" + getP); |
| } |
| #endif |
| |
| // Get the previous character in both the pattern and the target |
| tval = iter->previous(status) & mask; |
| if (U_FAILURE(status)) |
| { |
| return SearchIterator::DONE; |
| } |
| |
| if (getP) pval = valueList[--patIndex]; |
| getP = TRUE; |
| |
| // (DEBUG) debug(" pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16)); |
| |
| if (tval == 0) { // skip tval, use same pval |
| // (DEBUG) debug(" tval is ignorable"); |
| getP = FALSE; |
| } |
| else if (pval != tval) { // Mismatch, skip ahead |
| // (DEBUG) debug(" mismatch: skippping " + getShift(tval, patIndex)); |
| |
| index += getShift(tval, patIndex); |
| break; |
| } |
| else if (patIndex == 0) { |
| // The values matched, and we're at the beginning of the pattern, |
| // which means we matched the whole thing. |
| start = iter->getOffset(); |
| setMatchLength(matchEnd - start); |
| // if (DEBUG) debug("Found match at index "+ start ); |
| return start; |
| } |
| } |
| #if 0 |
| if (DEBUG) debug(" end of inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset()); |
| if (DEBUG) debug(" getP=" + getP); |
| #endif |
| if (iter->getOffset() <= start) { |
| // We hit the beginning of the text being searched, which is |
| // possible if it contains lots of ignorable characters. |
| // Advance one character and try again. |
| // if (DEBUG) debug("hit beginning of target; advance by one"); |
| index++; |
| } |
| } |
| // if (DEBUG) debug("Fell off end of outer loop; returning DONE"); |
| return SearchIterator::DONE; |
| } |
| |
| /** |
| * Search backward for matching text ,starting at a given location. |
| * Clients should not call this method directly; instead they should call |
| * <code>SearchIterator.previous()</code>, which this method overrides. |
| * <p> |
| * If a match is found, this method returns the index at which the match |
| * starts and calls {@link SearchIterator#setMatchLength} |
| * with the number of characters in the target |
| * text that make up the match. If no match is found, the method returns |
| * <code>DONE</code> and does not call <tt>setMatchLength</tt>. |
| * <p> |
| * @param start The index in the target text at which the search starts. |
| * |
| * @return The index at which the matched text in the target starts, or DONE |
| * if no match was found. |
| * <p> |
| * @see SearchIterator#previous |
| * @see SearchIterator#DONE |
| */ |
| int32_t StringSearch::handlePrev(int32_t start, UErrorCode& status) |
| { |
| if (U_FAILURE(status)) |
| { |
| return SearchIterator::DONE; |
| } |
| int patLen = normLen; |
| int index = start - minLen; |
| |
| int mask = getMask(strength); |
| int done = CollationElementIterator::NULLORDER & mask; |
| #if 0 |
| if (DEBUG) { |
| debug("-------------------------handlePrev-----------------------------------"); |
| debug(""); |
| debug("strength=" + strength + ", mask=" + Integer.toString(mask,16) |
| + ", done=" + Integer.toString(done,16)); |
| debug("decomp=" + collator.getDecomposition()); |
| |
| debug("target.begin=" + getTarget().getBeginIndex()); |
| debug("target.end=" + getTarget().getEndIndex()); |
| } |
| #endif |
| |
| while (index >= 0) { |
| int patIndex = 0; |
| int tval = 0, pval = 0; |
| UBool getP = TRUE; |
| |
| iter->setOffset(index, status); |
| if (U_FAILURE(status)) |
| { |
| return SearchIterator::DONE; |
| } |
| |
| |
| // if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index); |
| |
| while ((patIndex < patLen || !getP) && iter->getOffset() < start) |
| { |
| /* if (DEBUG) { |
| debug(" inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset()); |
| } |
| */ |
| tval = iter->next(status) & mask; |
| if (U_FAILURE(status)) |
| { |
| return SearchIterator::DONE; |
| } |
| if (getP) pval = valueList[patIndex++]; |
| getP = TRUE; |
| |
| //if (DEBUG) debug(" pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16)); |
| |
| if (tval == done) { |
| // if (DEBUG) debug(" end of target; no match"); |
| return DONE; |
| } |
| else if (tval == 0) { |
| // if (DEBUG) debug(" tval is ignorable"); |
| getP = false; |
| } |
| else if (pval != tval) { |
| // We didn't match this pattern. Skip ahead |
| // if (DEBUG) debug(" mismatch: skippping " + getBackShift(tval, patIndex)); |
| |
| int shift = getBackShift(tval, patIndex); |
| index -= shift; |
| break; |
| } |
| else if (patIndex == patLen) { |
| // The elements matched and we're at the end of the pattern, |
| // which means we matched the whole thing. |
| setMatchLength(iter->getOffset() - index); |
| return index; |
| } |
| } |
| if (iter->getOffset() >= start) { |
| // We hit the end of the text being searched, which is |
| // possible if it contains lots of ignorable characters. |
| // Back up one character and try again. |
| // if (DEBUG) debug("hit end of target; back by one"); |
| index--; |
| } |
| } |
| return SearchIterator::DONE; |
| } |
| |
| /** |
| * Return a bitmask that will select only the portions of a collation |
| * element that are significant at the given strength level. |
| */ |
| int32_t StringSearch::getMask(Collator::ECollationStrength strength) |
| { |
| switch (strength) { |
| case Collator::PRIMARY: |
| return 0xFFFF0000; |
| case Collator::SECONDARY: |
| return 0xFFFFFF00; |
| default: |
| return 0xFFFFFFFF; |
| } |
| } |
| |
| |
| void StringSearch::initialize(UErrorCode& status) { |
| /* |
| if (DEBUG) { |
| debug("-------------------------initialize-----------------------------------"); |
| debug("pattern=" + pattern); |
| } |
| */ |
| it->setText(pattern, status); |
| if (U_FAILURE(status)) { |
| delete it; |
| return; |
| } |
| |
| int mask = getMask(strength); |
| |
| // See how many non-ignorable collation keys are in the text |
| normLen = 0; |
| int32_t elem; |
| while ((elem = it->next(status)) != CollationElementIterator::NULLORDER) |
| { |
| if (U_FAILURE(status)) { |
| return; |
| } |
| if ((elem & mask) != 0) { |
| normLen++; |
| } |
| } |
| |
| if (valueList != NULL) { |
| delete [] valueList; |
| } |
| |
| // Save them all |
| valueList = new int32_t[normLen]; |
| int expandLen = 0; |
| it->reset(); |
| |
| int32_t i; |
| for (i = 0; i < normLen; i++) |
| { |
| elem = it->next(status); |
| if (U_FAILURE(status)) { |
| return; |
| } |
| |
| if ((elem & mask) != 0) { |
| valueList[i] = elem & mask; |
| |
| } |
| // Keep track of whether there are any expanding-character |
| // sequences that can result in one of the characters that's in |
| // the pattern. If there are, we have to reduce the shift |
| // distances calculated below to account for it. |
| expandLen += it->getMaxExpansion(elem) - 1; |
| } |
| |
| // |
| // We need to remember the size of the composed and decomposed |
| // versions of the string. Standard Boyer-Moore shift calculations |
| // can be wrong by an amount up to that difference, since a small |
| // small number of characters in the pattern can map to a larger |
| // number in the text being searched, or vice-versa. |
| // |
| int uniLen = pattern.length(); |
| maxLen = uprv_max(normLen, uniLen); |
| minLen = uprv_min(normLen, uniLen) - expandLen; |
| |
| |
| /* |
| if (DEBUG) debug("normLen=" + normLen + ", expandLen=" + expandLen |
| + ", maxLen=" + maxLen + ", minLen=" + minLen); |
| */ |
| // Now initialize the shift tables |
| // |
| // NOTE: This is the most conservative way to build them. If we had a way |
| // of knowing that there were no expanding/contracting chars in the rules, |
| // we could get rid of the "- 1" in the shiftTable calculations. |
| // But all of the default collators have at least one expansion or |
| // contraction, so it probably doesn't matter anyway. |
| // |
| for (i = 0; i < 256; i++) { |
| shiftTable[i] = backShiftTable[i] = minLen; |
| } |
| |
| for (i = 0; i < normLen-1; i++) { |
| shiftTable[hash(valueList[i])] = uprv_max(minLen - i - 1, 1); |
| } |
| shiftTable[hash(valueList[normLen-1])] = 1; |
| |
| for (i = normLen - 1; i > 0; i--) { |
| backShiftTable[hash(valueList[i])] = i; |
| } |
| backShiftTable[hash(valueList[0])] = 1; |
| |
| /* dumpTables(); */ |
| } |
| |
| /** |
| * Method used by StringSearch to determine how far to the right to |
| * shift the pattern during a Boyer-Moore search. |
| * |
| * @param curValue The current value in the target text |
| * @param curIndex The index in the pattern at which we failed to match |
| * curValue in the target text. |
| */ |
| int32_t StringSearch::getShift( int32_t curValue, int32_t curIndex ) const |
| { |
| int32_t shiftAmt = shiftTable[hash(curValue)]; |
| |
| if (minLen != maxLen) { |
| int adjust = normLen - curIndex; |
| if (shiftAmt > adjust + 1) { |
| // if (DEBUG) debug("getShift: adjusting by " + adjust); |
| shiftAmt -= adjust; |
| } |
| } |
| return shiftAmt; |
| } |
| |
| /** |
| * Method used by StringSearch to determine how far to the left to |
| * shift the pattern during a reverse Boyer-Moore search. |
| * |
| * @param curValue The current value in the target text |
| * @param curIndex The index in the pattern at which we failed to match |
| * curValue in the target text. |
| */ |
| int32_t StringSearch::getBackShift( int32_t curValue, int32_t curIndex ) const |
| { |
| int shiftAmt = backShiftTable[hash(curValue)]; |
| |
| if (minLen != maxLen) { |
| int adjust = normLen - (minLen - curIndex); |
| if (shiftAmt > adjust + 1) { |
| // if (DEBUG) debug("getBackShift: adjusting by " + adjust); |
| shiftAmt -= adjust; |
| } |
| } |
| return shiftAmt; |
| } |
| |
| /** |
| * Hash a collation element from its full size (32 bits) down into a |
| * value that can be used as an index into the shift tables. Right |
| * now we do a modulus by the size of the hash table. |
| * |
| * TODO: At some point I should experiment to see whether a slightly |
| * more complicated hash function gives us a better distribution |
| * on multilingual text. I doubt it will have much effect on |
| * performance, though. |
| */ |
| int32_t StringSearch::hash(int32_t order) |
| { |
| return CollationElementIterator::primaryOrder(order) % 256; |
| } |
| |
| |
| |
| |