source/i18n/usrchimp.h - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (C) 2001 IBM and others. All rights reserved.
 **********************************************************************
 *   Date        Name        Description
 *  08/13/2001   synwee      Creation.
 **********************************************************************
 */
 #ifndef USRCHIMP_H
 #define USRCHIMP_H

 #include "unicode/utypes.h"
 #include "unicode/ucol.h"
 #include "unicode/ucoleitr.h"
 #include "unicode/ubrk.h"

 #define INITIAL_ARRAY_SIZE_       256
 #define MAX_TABLE_SIZE_           257

 struct USearch {
     // required since collation element iterator does not have a getText API
     const UChar              *text;
           int32_t             textLength; // exact length
           UBool               isOverlap;
           UBool               isCanonicalMatch;
           UBreakIterator     *breakIter;
     // value USEARCH_DONE is the default value
     // if we are not at the start of the text or the end of the text,
     // depending on the iteration direction and matchedIndex is USEARCH_DONE
     // it means that we can find any more matches in that particular direction
           UTextOffset         matchedIndex;
           int32_t             matchedLength;
           UBool               isForwardSearching;
           UBool               reset;
 };

 struct UPattern {
     const UChar              *text;
           int32_t             textLength; // exact length
           // length required for backwards ce comparison
           int32_t             CELength;
           uint32_t           *CE;
           uint32_t            CEBuffer[INITIAL_ARRAY_SIZE_];
           UBool               hasPrefixAccents;
           UBool               hasSuffixAccents;
           int16_t             defaultShiftSize;
           int16_t             shift[MAX_TABLE_SIZE_];
           int16_t             backShift[MAX_TABLE_SIZE_];
 };

 struct UStringSearch {
     struct USearch            *search;
     struct UPattern            pattern;
     const  UCollator          *collator;
     // positions within the collation element iterator is used to determine
     // if we are at the start of the text.
            UCollationElements *textIter;
     // utility collation element, used throughout program for temporary
     // iteration.
            UCollationElements *utilIter;
            UBool               ownCollator;
            UBool               toNormalize;
            UCollationStrength  strength;
            uint32_t            ceMask;
            uint32_t            variableTop;
            UBool               toShift;
            UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
            UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
 };

 /**
 * Exact matches without checking for the ends for extra accents.
 * The match after the position within the collation element iterator is to be
 * found.
 * After a match is found the offset in the collation element iterator will be
 * shifted to the start of the match.
 * Implementation note:
 * For tertiary we can't use the collator->tertiaryMask, that is a
 * preprocessed mask that takes into account case options. since we are only
 * concerned with exact matches, we don't need that.
 * Alternate handling - since only the 16 most significant digits is only used,
 * we can safely do a compare without masking if the ce is a variable, we mask
 * and get only the primary values no shifting to quartenary is required since
 * all primary values less than variabletop will need to be masked off anyway.
 * If the end character is composite and the pattern ce does not match the text
 * ce, we skip it until we find a match in the end composite character or when
 * it has passed the character. This is so that we can match pattern "a" with
 * the text "\u00e6"
 * @param strsrch string search data
 * @param status error status if any
 * @return TRUE if an exact match is found, FALSE otherwise
 */
 U_CFUNC
 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);

 /**
 * Canonical matches.
 * According to the definition, matches found here will include the whole span
 * of beginning and ending accents if it overlaps that region.
 * @param strsrch string search data
 * @param status error status if any
 * @return TRUE if a canonical match is found, FALSE otherwise
 */
 U_CFUNC
 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);

 /**
 * Gets the previous match.
 * Comments follows from handleNextExact
 * @param strsrch string search data
 * @param status error status if any
 */
 U_CFUNC
 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);

 /**
 * Canonical matches.
 * According to the definition, matches found here will include the whole span
 * of beginning and ending accents if it overlaps that region.
 * @param strsrch string search data
 * @param status error status if any
 * @return TRUE if a canonical match is found, FALSE otherwise
 */
 U_CFUNC
 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
                                       UErrorCode    *status);

 #endif
	/*
	**********************************************************************
	* Copyright (C) 2001 IBM and others. All rights reserved.
	**********************************************************************
	* Date Name Description
	* 08/13/2001 synwee Creation.
	**********************************************************************
	*/
	#ifndef USRCHIMP_H
	#define USRCHIMP_H

	#include "unicode/utypes.h"
	#include "unicode/ucol.h"
	#include "unicode/ucoleitr.h"
	#include "unicode/ubrk.h"

	#define INITIAL_ARRAY_SIZE_ 256
	#define MAX_TABLE_SIZE_ 257

	struct USearch {
	// required since collation element iterator does not have a getText API
	const UChar *text;
	int32_t textLength; // exact length
	UBool isOverlap;
	UBool isCanonicalMatch;
	UBreakIterator *breakIter;
	// value USEARCH_DONE is the default value
	// if we are not at the start of the text or the end of the text,
	// depending on the iteration direction and matchedIndex is USEARCH_DONE
	// it means that we can find any more matches in that particular direction
	UTextOffset matchedIndex;
	int32_t matchedLength;
	UBool isForwardSearching;
	UBool reset;
	};

	struct UPattern {
	const UChar *text;
	int32_t textLength; // exact length
	// length required for backwards ce comparison
	int32_t CELength;
	uint32_t *CE;
	uint32_t CEBuffer[INITIAL_ARRAY_SIZE_];
	UBool hasPrefixAccents;
	UBool hasSuffixAccents;
	int16_t defaultShiftSize;
	int16_t shift[MAX_TABLE_SIZE_];
	int16_t backShift[MAX_TABLE_SIZE_];
	};

	struct UStringSearch {
	struct USearch *search;
	struct UPattern pattern;
	const UCollator *collator;
	// positions within the collation element iterator is used to determine
	// if we are at the start of the text.
	UCollationElements *textIter;
	// utility collation element, used throughout program for temporary
	// iteration.
	UCollationElements *utilIter;
	UBool ownCollator;
	UBool toNormalize;
	UCollationStrength strength;
	uint32_t ceMask;
	uint32_t variableTop;
	UBool toShift;
	UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
	UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
	};

	/**
	* Exact matches without checking for the ends for extra accents.
	* The match after the position within the collation element iterator is to be
	* found.
	* After a match is found the offset in the collation element iterator will be
	* shifted to the start of the match.
	* Implementation note:
	* For tertiary we can't use the collator->tertiaryMask, that is a
	* preprocessed mask that takes into account case options. since we are only
	* concerned with exact matches, we don't need that.
	* Alternate handling - since only the 16 most significant digits is only used,
	* we can safely do a compare without masking if the ce is a variable, we mask
	* and get only the primary values no shifting to quartenary is required since
	* all primary values less than variabletop will need to be masked off anyway.
	* If the end character is composite and the pattern ce does not match the text
	* ce, we skip it until we find a match in the end composite character or when
	* it has passed the character. This is so that we can match pattern "a" with
	* the text "\u00e6"
	* @param strsrch string search data
	* @param status error status if any
	* @return TRUE if an exact match is found, FALSE otherwise
	*/
	U_CFUNC
	UBool usearch_handleNextExact(UStringSearch strsrch, UErrorCode status);

	/**
	* Canonical matches.
	* According to the definition, matches found here will include the whole span
	* of beginning and ending accents if it overlaps that region.
	* @param strsrch string search data
	* @param status error status if any
	* @return TRUE if a canonical match is found, FALSE otherwise
	*/
	U_CFUNC
	UBool usearch_handleNextCanonical(UStringSearch strsrch, UErrorCode status);

	/**
	* Gets the previous match.
	* Comments follows from handleNextExact
	* @param strsrch string search data
	* @param status error status if any
	*/
	U_CFUNC
	UBool usearch_handlePreviousExact(UStringSearch strsrch, UErrorCode status);

	/**
	* Canonical matches.
	* According to the definition, matches found here will include the whole span
	* of beginning and ending accents if it overlaps that region.
	* @param strsrch string search data
	* @param status error status if any
	* @return TRUE if a canonical match is found, FALSE otherwise
	*/
	U_CFUNC
	UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
	UErrorCode *status);

	#endif