|  | /* | 
|  | * Copyright © {1999}, International Business Machines Corporation and others. All Rights Reserved. | 
|  | ********************************************************************** | 
|  | *   Date        Name        Description | 
|  | *   11/17/99    aliu        Creation. | 
|  | ********************************************************************** | 
|  | */ | 
|  | #ifndef RBT_SET_H | 
|  | #define RBT_SET_H | 
|  |  | 
|  | #include "uvector.h" | 
|  | #include "unicode/utrans.h" | 
|  |  | 
|  | class Replaceable; | 
|  | class TransliterationRule; | 
|  | class TransliterationRuleData; | 
|  | class UnicodeFilter; | 
|  | class UnicodeString; | 
|  |  | 
|  | /** | 
|  | * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes | 
|  | * the transliteration in one direction from one set of characters or short | 
|  | * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to | 
|  | * two such sets, one for the forward direction, and one for the reverse. | 
|  | * | 
|  | * <p>A <code>TransliterationRuleSet</code> has one important operation, that of | 
|  | * finding a matching rule at a given point in the text.  This is accomplished | 
|  | * by the <code>findMatch()</code> method. | 
|  | * | 
|  | * @author Alan Liu | 
|  | */ | 
|  | class TransliterationRuleSet { | 
|  | /** | 
|  | * Vector of rules, in the order added.  This is only used while the rule | 
|  | * set is getting built.  After that, freeze() reorders and indexes the | 
|  | * rules into rules[].  However, the vector is kept until destruction. | 
|  | */ | 
|  | UVector* ruleVector; | 
|  |  | 
|  | /** | 
|  | * Length of the longest preceding context | 
|  | */ | 
|  | int32_t maxContextLength; | 
|  |  | 
|  | /** | 
|  | * Sorted and indexed table of rules.  This is created by freeze() from | 
|  | * the rules in ruleVector. | 
|  | */ | 
|  | TransliterationRule** rules; | 
|  |  | 
|  | /** | 
|  | * Index table.  For text having a first character c, compute x = c&0xFF. | 
|  | * Now use rules[index[x]..index[x+1]-1].  This index table is created by | 
|  | * freeze(). | 
|  | */ | 
|  | int32_t index[257]; | 
|  |  | 
|  | public: | 
|  |  | 
|  | /** | 
|  | * Construct a new empty rule set. | 
|  | */ | 
|  | TransliterationRuleSet(); | 
|  |  | 
|  | /** | 
|  | * Copy constructor. | 
|  | */ | 
|  | TransliterationRuleSet(const TransliterationRuleSet&); | 
|  |  | 
|  | /** | 
|  | * Destructor. | 
|  | */ | 
|  | virtual ~TransliterationRuleSet(); | 
|  |  | 
|  | /** | 
|  | * Return the maximum context length. | 
|  | * @return the length of the longest preceding context. | 
|  | */ | 
|  | virtual int32_t getMaximumContextLength(void) const; | 
|  |  | 
|  | /** | 
|  | * Add a rule to this set.  Rules are added in order, and order is | 
|  | * significant.  The last call to this method must be followed by | 
|  | * a call to <code>freeze()</code> before the rule set is used. | 
|  | * | 
|  | * @param adoptedRule the rule to add | 
|  | */ | 
|  | virtual void addRule(TransliterationRule* adoptedRule, | 
|  | UErrorCode& status); | 
|  |  | 
|  | /** | 
|  | * Check this for masked rules and index it to optimize performance. | 
|  | * The sequence of operations is: (1) add rules to a set using | 
|  | * <code>addRule()</code>; (2) freeze the set using | 
|  | * <code>freeze()</code>; (3) use the rule set.  If | 
|  | * <code>addRule()</code> is called after calling this method, it | 
|  | * invalidates this object, and this method must be called again. | 
|  | * That is, <code>freeze()</code> may be called multiple times, | 
|  | * although for optimal performance it shouldn't be. | 
|  | */ | 
|  | virtual void freeze(const TransliterationRuleData& data, | 
|  | UErrorCode& status); | 
|  |  | 
|  | /** | 
|  | * Attempt to find a matching rule at the specified point in the text. | 
|  | * @param text the text, both translated and untranslated | 
|  | * @param start the beginning index, inclusive; <code>0 <= start | 
|  | * <= limit</code>. | 
|  | * @param limit the ending index, exclusive; <code>start <= limit | 
|  | * <= text.length()</code>. | 
|  | * @param cursor position at which to translate next, representing offset | 
|  | * into text.  This value must be between <code>start</code> and | 
|  | * <code>limit</code>. | 
|  | * @param data a dictionary mapping variables to the sets they | 
|  | * represent (maps <code>Character</code> to <code>UnicodeSet</code>) | 
|  | * @param filter the filter.  Any character for which | 
|  | * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be | 
|  | * altered by this transliterator.  If <tt>filter</tt> is | 
|  | * <tt>null</tt> then no filtering is applied. | 
|  | * @return the matching rule, or null if none found. | 
|  | */ | 
|  | virtual TransliterationRule* findMatch(const Replaceable& text, | 
|  | const UTransPosition& pos, | 
|  | const TransliterationRuleData& data, | 
|  | const UnicodeFilter* filter) const; | 
|  |  | 
|  | /** | 
|  | * Attempt to find a matching rule at the specified point in the text. | 
|  | * Unlike <code>findMatch()</code>, this method does an incremental match. | 
|  | * An incremental match requires that there be no partial matches that might | 
|  | * pre-empt the full match that is found.  If there are partial matches, | 
|  | * then null is returned.  A non-null result indicates that a full match has | 
|  | * been found, and that it cannot be pre-empted by a partial match | 
|  | * regardless of what additional text is added to the translation buffer. | 
|  | * @param text the text, both translated and untranslated | 
|  | * @param start the beginning index, inclusive; <code>0 <= start | 
|  | * <= limit</code>. | 
|  | * @param limit the ending index, exclusive; <code>start <= limit | 
|  | * <= text.length()</code>. | 
|  | * @param cursor position at which to translate next, representing offset | 
|  | * into text.  This value must be between <code>start</code> and | 
|  | * <code>limit</code>. | 
|  | * @param data a dictionary mapping variables to the sets they | 
|  | * represent (maps <code>Character</code> to <code>UnicodeSet</code>) | 
|  | * @param partial output parameter.  <code>partial[0]</code> is set to | 
|  | * true if a partial match is returned. | 
|  | * @param filter the filter.  Any character for which | 
|  | * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be | 
|  | * altered by this transliterator.  If <tt>filter</tt> is | 
|  | * <tt>null</tt> then no filtering is applied. | 
|  | * @return the matching rule, or null if none found, or if the text buffer | 
|  | * does not have enough text yet to unambiguously match a rule. | 
|  | */ | 
|  | virtual TransliterationRule* findIncrementalMatch(const Replaceable& text, | 
|  | const UTransPosition& pos, | 
|  | const TransliterationRuleData& data, | 
|  | UBool& isPartial, | 
|  | const UnicodeFilter* filter) const; | 
|  | }; | 
|  | #endif |