source/i18n/strmatch.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 * Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   07/23/01    aliu        Creation.
 **********************************************************************
 */

 #include "strmatch.h"
 #include "rbt_data.h"
 #include "rbt_rule.h"

 U_NAMESPACE_BEGIN

 StringMatcher::StringMatcher(const UnicodeString& theString,
                              int32_t start,
                              int32_t limit,
                              UBool isSeg,
                              const TransliterationRuleData& theData) :
     data(theData),
     isSegment(isSeg),
     matchStart(-1),
     matchLimit(-1)
 {
     theString.extractBetween(start, limit, pattern);
 }

 StringMatcher::StringMatcher(const StringMatcher& o) :
     UnicodeMatcher(o),
     pattern(o.pattern),
     data(o.data),
     isSegment(o.isSegment),
     matchStart(o.matchStart),
     matchLimit(o.matchLimit)
 {
 }

 /**
  * Destructor
  */
 StringMatcher::~StringMatcher() {
 }

 /**
  * Implement UnicodeMatcher
  */
 UnicodeMatcher* StringMatcher::clone() const {
     return new StringMatcher(*this);
 }

 /**
  * Implement UnicodeMatcher
  */
 UMatchDegree StringMatcher::matches(const Replaceable& text,
                                     int32_t& offset,
                                     int32_t limit,
                                     UBool incremental) {
     int32_t i;
     int32_t cursor = offset;
     if (limit < cursor) {
         // Match in the reverse direction
         for (i=pattern.length()-1; i>=0; --i) {
             UChar keyChar = pattern.charAt(i);
             UnicodeMatcher* subm = data.lookup(keyChar);
             if (subm == 0) {
                 if (cursor >= limit &&
                     keyChar == text.charAt(cursor)) {
                     --cursor;
                 } else {
                     return U_MISMATCH;
                 }
             } else {
                 UMatchDegree m =
                     subm->matches(text, cursor, limit, incremental);
                 if (m != U_MATCH) {
                     return m;
                 }
             }
         }
         // Record the match position, but adjust for a normal
         // forward start, limit, and only if a prior match does not
         // exist -- we want the rightmost match.
         if (matchStart < 0) {
             matchStart = cursor+1;
             matchLimit = offset+1;
         }
     } else {
         for (i=0; i<pattern.length(); ++i) {
             if (incremental && cursor == limit) {
                 // We've reached the context limit without a mismatch and
                 // without completing our match.
                 return U_PARTIAL_MATCH;
             }
             UChar keyChar = pattern.charAt(i);
             UnicodeMatcher* subm = data.lookup(keyChar);
             if (subm == 0) {
                 // Don't need the cursor < limit check if
                 // incremental is TRUE (because it's done above); do need
                 // it otherwise.
                 if (cursor < limit &&
                     keyChar == text.charAt(cursor)) {
                     ++cursor;
                 } else {
                     return U_MISMATCH;
                 }
             } else {
                 UMatchDegree m =
                     subm->matches(text, cursor, limit, incremental);
                 if (m != U_MATCH) {
                     return m;
                 }
             }
         }
         // Record the match position
         matchStart = offset;
         matchLimit = cursor;
     }

     offset = cursor;
     return U_MATCH;
 }

 /**
  * Implement UnicodeMatcher
  */
 UnicodeString& StringMatcher::toPattern(UnicodeString& result,
                                         UBool escapeUnprintable) const {
 	result.truncate(0);
     UnicodeString str, quoteBuf;
     if (isSegment) {
         result.append((UChar)40); /*(*/
     }
     for (int32_t i=0; i<pattern.length(); ++i) {
         UChar keyChar = pattern.charAt(i);
         const UnicodeMatcher* m = data.lookup(keyChar);
         if (m == 0) {
             TransliterationRule::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
         } else {
             TransliterationRule::appendToRule(result, m->toPattern(str, escapeUnprintable),
                          TRUE, escapeUnprintable, quoteBuf);
         }
     }
     if (isSegment) {
         result.append((UChar)41); /*)*/
     }
     // Flush quoteBuf out to result
     TransliterationRule::appendToRule(result, -1,
                                       TRUE, escapeUnprintable, quoteBuf);
     return result;
 }

 /**
  * Implement UnicodeMatcher
  */
 UBool StringMatcher::matchesIndexValue(uint8_t v) const {
     if (pattern.length() == 0) {
         return TRUE;
     }
     UChar32 c = pattern.char32At(0);
     const UnicodeMatcher *m = data.lookup(c);
     return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
 }

 /**
  * Remove any match data.  This must be called before performing a
  * set of matches with this segment.
  */
  void StringMatcher::resetMatch() {
     matchStart = matchLimit = -1;
 }

 /**
  * Return the start offset, in the match text, of the <em>rightmost</em>
  * match.  This method may get moved up into the UnicodeMatcher if
  * it turns out to be useful to generalize this.
  */
 int32_t StringMatcher::getMatchStart() const {
     return matchStart;
 }

 /**
  * Return the limit offset, in the match text, of the <em>rightmost</em>
  * match.  This method may get moved up into the UnicodeMatcher if
  * it turns out to be useful to generalize this.
  */
 int32_t StringMatcher::getMatchLimit() const {
     return matchLimit;
 }

 U_NAMESPACE_END

 //eof
	/*
	* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
	**********************************************************************
	* Date Name Description
	* 07/23/01 aliu Creation.
	**********************************************************************
	*/

	#include "strmatch.h"
	#include "rbt_data.h"
	#include "rbt_rule.h"

	U_NAMESPACE_BEGIN

	StringMatcher::StringMatcher(const UnicodeString& theString,
	int32_t start,
	int32_t limit,
	UBool isSeg,
	const TransliterationRuleData& theData) :
	data(theData),
	isSegment(isSeg),
	matchStart(-1),
	matchLimit(-1)
	{
	theString.extractBetween(start, limit, pattern);
	}

	StringMatcher::StringMatcher(const StringMatcher& o) :
	UnicodeMatcher(o),
	pattern(o.pattern),
	data(o.data),
	isSegment(o.isSegment),
	matchStart(o.matchStart),
	matchLimit(o.matchLimit)
	{
	}

	/**
	* Destructor
	*/
	StringMatcher::~StringMatcher() {
	}

	/**
	* Implement UnicodeMatcher
	*/
	UnicodeMatcher* StringMatcher::clone() const {
	return new StringMatcher(*this);
	}

	/**
	* Implement UnicodeMatcher
	*/
	UMatchDegree StringMatcher::matches(const Replaceable& text,
	int32_t& offset,
	int32_t limit,
	UBool incremental) {
	int32_t i;
	int32_t cursor = offset;
	if (limit < cursor) {
	// Match in the reverse direction
	for (i=pattern.length()-1; i>=0; --i) {
	UChar keyChar = pattern.charAt(i);
	UnicodeMatcher* subm = data.lookup(keyChar);
	if (subm == 0) {
	if (cursor >= limit &&
	keyChar == text.charAt(cursor)) {
	--cursor;
	} else {
	return U_MISMATCH;
	}
	} else {
	UMatchDegree m =
	subm->matches(text, cursor, limit, incremental);
	if (m != U_MATCH) {
	return m;
	}
	}
	}
	// Record the match position, but adjust for a normal
	// forward start, limit, and only if a prior match does not
	// exist -- we want the rightmost match.
	if (matchStart < 0) {
	matchStart = cursor+1;
	matchLimit = offset+1;
	}
	} else {
	for (i=0; i<pattern.length(); ++i) {
	if (incremental && cursor == limit) {
	// We've reached the context limit without a mismatch and
	// without completing our match.
	return U_PARTIAL_MATCH;
	}
	UChar keyChar = pattern.charAt(i);
	UnicodeMatcher* subm = data.lookup(keyChar);
	if (subm == 0) {
	// Don't need the cursor < limit check if
	// incremental is TRUE (because it's done above); do need
	// it otherwise.
	if (cursor < limit &&
	keyChar == text.charAt(cursor)) {
	++cursor;
	} else {
	return U_MISMATCH;
	}
	} else {
	UMatchDegree m =
	subm->matches(text, cursor, limit, incremental);
	if (m != U_MATCH) {
	return m;
	}
	}
	}
	// Record the match position
	matchStart = offset;
	matchLimit = cursor;
	}

	offset = cursor;
	return U_MATCH;
	}

	/**
	* Implement UnicodeMatcher
	*/
	UnicodeString& StringMatcher::toPattern(UnicodeString& result,
	UBool escapeUnprintable) const {
	result.truncate(0);
	UnicodeString str, quoteBuf;
	if (isSegment) {
	result.append((UChar)40); /(/
	}
	for (int32_t i=0; i<pattern.length(); ++i) {
	UChar keyChar = pattern.charAt(i);
	const UnicodeMatcher* m = data.lookup(keyChar);
	if (m == 0) {
	TransliterationRule::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
	} else {
	TransliterationRule::appendToRule(result, m->toPattern(str, escapeUnprintable),
	TRUE, escapeUnprintable, quoteBuf);
	}
	}
	if (isSegment) {
	result.append((UChar)41); /)/
	}
	// Flush quoteBuf out to result
	TransliterationRule::appendToRule(result, -1,
	TRUE, escapeUnprintable, quoteBuf);
	return result;
	}

	/**
	* Implement UnicodeMatcher
	*/
	UBool StringMatcher::matchesIndexValue(uint8_t v) const {
	if (pattern.length() == 0) {
	return TRUE;
	}
	UChar32 c = pattern.char32At(0);
	const UnicodeMatcher *m = data.lookup(c);
	return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
	}

	/**
	* Remove any match data. This must be called before performing a
	* set of matches with this segment.
	*/
	void StringMatcher::resetMatch() {
	matchStart = matchLimit = -1;
	}

	/**
	* Return the start offset, in the match text, of the <em>rightmost</em>
	* match. This method may get moved up into the UnicodeMatcher if
	* it turns out to be useful to generalize this.
	*/
	int32_t StringMatcher::getMatchStart() const {
	return matchStart;
	}

	/**
	* Return the limit offset, in the match text, of the <em>rightmost</em>
	* match. This method may get moved up into the UnicodeMatcher if
	* it turns out to be useful to generalize this.
	*/
	int32_t StringMatcher::getMatchLimit() const {
	return matchLimit;
	}

	U_NAMESPACE_END

	//eof