|  | /* | 
|  | ********************************************************************** | 
|  | *   Copyright (C) 2001 IBM and others. All rights reserved. | 
|  | ********************************************************************** | 
|  | *   Date        Name        Description | 
|  | *  08/13/2001   synwee      Creation. | 
|  | ********************************************************************** | 
|  | */ | 
|  | #ifndef USRCHIMP_H | 
|  | #define USRCHIMP_H | 
|  |  | 
|  | #include "unicode/utypes.h" | 
|  | #include "unicode/ucol.h" | 
|  | #include "unicode/ucoleitr.h" | 
|  | #include "unicode/ubrk.h" | 
|  |  | 
|  | #define INITIAL_ARRAY_SIZE_       256 | 
|  | #define MAX_TABLE_SIZE_           257 | 
|  |  | 
|  | struct USearch { | 
|  | // required since collation element iterator does not have a getText API | 
|  | const UChar              *text; | 
|  | int32_t             textLength; // exact length | 
|  | UBool               isOverlap; | 
|  | UBool               isCanonicalMatch; | 
|  | UBreakIterator     *breakIter; | 
|  | // value USEARCH_DONE is the default value | 
|  | // if we are not at the start of the text or the end of the text, | 
|  | // depending on the iteration direction and matchedIndex is USEARCH_DONE | 
|  | // it means that we can find any more matches in that particular direction | 
|  | UTextOffset         matchedIndex; | 
|  | int32_t             matchedLength; | 
|  | UBool               isForwardSearching; | 
|  | UBool               reset; | 
|  | }; | 
|  |  | 
|  | struct UPattern { | 
|  | const UChar              *text; | 
|  | int32_t             textLength; // exact length | 
|  | // length required for backwards ce comparison | 
|  | int32_t             CELength; | 
|  | uint32_t           *CE; | 
|  | uint32_t            CEBuffer[INITIAL_ARRAY_SIZE_]; | 
|  | UBool               hasPrefixAccents; | 
|  | UBool               hasSuffixAccents; | 
|  | int16_t             defaultShiftSize; | 
|  | int16_t             shift[MAX_TABLE_SIZE_]; | 
|  | int16_t             backShift[MAX_TABLE_SIZE_]; | 
|  | }; | 
|  |  | 
|  | struct UStringSearch { | 
|  | struct USearch            *search; | 
|  | struct UPattern            pattern; | 
|  | const  UCollator          *collator; | 
|  | // positions within the collation element iterator is used to determine | 
|  | // if we are at the start of the text. | 
|  | UCollationElements *textIter; | 
|  | // utility collation element, used throughout program for temporary | 
|  | // iteration. | 
|  | UCollationElements *utilIter; | 
|  | UBool               ownCollator; | 
|  | UBool               toNormalize; | 
|  | UCollationStrength  strength; | 
|  | uint32_t            ceMask; | 
|  | uint32_t            variableTop; | 
|  | UBool               toShift; | 
|  | UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; | 
|  | UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * Exact matches without checking for the ends for extra accents. | 
|  | * The match after the position within the collation element iterator is to be | 
|  | * found. | 
|  | * After a match is found the offset in the collation element iterator will be | 
|  | * shifted to the start of the match. | 
|  | * Implementation note: | 
|  | * For tertiary we can't use the collator->tertiaryMask, that is a | 
|  | * preprocessed mask that takes into account case options. since we are only | 
|  | * concerned with exact matches, we don't need that. | 
|  | * Alternate handling - since only the 16 most significant digits is only used, | 
|  | * we can safely do a compare without masking if the ce is a variable, we mask | 
|  | * and get only the primary values no shifting to quartenary is required since | 
|  | * all primary values less than variabletop will need to be masked off anyway. | 
|  | * If the end character is composite and the pattern ce does not match the text | 
|  | * ce, we skip it until we find a match in the end composite character or when | 
|  | * it has passed the character. This is so that we can match pattern "a" with | 
|  | * the text "\u00e6" | 
|  | * @param strsrch string search data | 
|  | * @param status error status if any | 
|  | * @return TRUE if an exact match is found, FALSE otherwise | 
|  | */ | 
|  | U_CFUNC | 
|  | UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); | 
|  |  | 
|  | /** | 
|  | * Canonical matches. | 
|  | * According to the definition, matches found here will include the whole span | 
|  | * of beginning and ending accents if it overlaps that region. | 
|  | * @param strsrch string search data | 
|  | * @param status error status if any | 
|  | * @return TRUE if a canonical match is found, FALSE otherwise | 
|  | */ | 
|  | U_CFUNC | 
|  | UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); | 
|  |  | 
|  | /** | 
|  | * Gets the previous match. | 
|  | * Comments follows from handleNextExact | 
|  | * @param strsrch string search data | 
|  | * @param status error status if any | 
|  | */ | 
|  | U_CFUNC | 
|  | UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); | 
|  |  | 
|  | /** | 
|  | * Canonical matches. | 
|  | * According to the definition, matches found here will include the whole span | 
|  | * of beginning and ending accents if it overlaps that region. | 
|  | * @param strsrch string search data | 
|  | * @param status error status if any | 
|  | * @return TRUE if a canonical match is found, FALSE otherwise | 
|  | */ | 
|  | U_CFUNC | 
|  | UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, | 
|  | UErrorCode    *status); | 
|  |  | 
|  | #endif | 
|  |  | 
|  |  |