| /****************************************************************************** | 
 |  * COPYRIGHT:                                                                | 
 |  *  (C) Copyright Taligent, Inc., 1996 | 
 |  *  (C) Copyright IBM Corp. 1996-1999 | 
 |  *  Licensed Material - Program-Property of IBM - All Rights Reserved. | 
 |  *  US Government Users Restricted Rights - Use, duplication, or disclosure | 
 |  *  restricted by GSA ADP Schedule Contact with IBM Corp. | 
 |  * | 
 |  ****************************************************************************** | 
 |  */ | 
 | //============================================================================= | 
 | // | 
 | // File mergecol.h | 
 | // | 
 | //  | 
 | // | 
 | // Created by: Helena Shih | 
 | // | 
 | // Modification History: | 
 | // | 
 | //  Date        Name        Description | 
 | //  3/5/97      mark        Cleaned up fixEntry().  Added constants BYTEPOWER | 
 | //                          and BYTEMASK to replace BYTESIZE. | 
 | //  6/17/97     helena      In getPattern, added the queue-up list for entries  | 
 | //                          with the same extension chars. | 
 | //  8/18/97     helena      Added internal API documentation. | 
 | //  8/13/98     erm         Synched up with 1.2 version of MergeCollation.java | 
 | // 04/23/99     stephen     Removed EDecompositionMode, merged with | 
 | //                          Normalizer::EMode | 
 | //============================================================================= | 
 |  | 
 | #ifndef MERGECOL_H | 
 | #define MERGECOL_H | 
 |  | 
 | #include "unistr.h" | 
 | #include "ptnentry.h" | 
 | #include "tables.h" | 
 | #include "coll.h" | 
 | #include "normlzr.h" | 
 |  | 
 |  | 
 | /** | 
 |  * Utility class for normalizing and merging patterns for collation. | 
 |  * Patterns are strings of the form <entry>*, where <entry> has the | 
 |  * form: | 
 |  * <pre> | 
 |  * <pattern> := <entry>* | 
 |  * <entry> := <separator><chars>{"/"<extension>} | 
 |  * <separator> := "=", ",", ";", "<", "&" | 
 |  * <chars>, and <extension> are both arbitrary strings. | 
 |  * </pre> | 
 |  * <P>Unquoted whitespaces are ignored. | 
 |  * 'xxx' can be used to quote characters. | 
 |  * <P> | 
 |  * One difference from Collation is that & is used to reset to a current | 
 |  * point. Or, in other words, it introduces a new sequence which is to | 
 |  * be added to the old. | 
 |  * <P> | 
 |  * That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR | 
 |  * "a < b < d & b < c" | 
 |  * XXX: make '' be a single quote. | 
 |  * @see        PatternEntry | 
 |  * @version    1.4 1/7/97 | 
 |  * @author     Mark Davis, Helena Shih | 
 |  */ | 
 |  | 
 | class MergeCollation  | 
 | { | 
 | public: | 
 |  | 
 |     /** | 
 |      * Creates a merged collation table from a pattern string. | 
 |      * @param pattern the pattern string. | 
 |      * @param status the error code status.  If the input pattern is incorrect, | 
 |      * this will be set to INVALID_FORMAT_ERROR. | 
 |      */ | 
 |   MergeCollation( const   UnicodeString&  pattern, | 
 |           Normalizer::EMode decompMode, | 
 |           UErrorCode&      success); | 
 |   /** | 
 |      * Copy constructor. | 
 |      */ | 
 |   MergeCollation( const   MergeCollation& other); | 
 |  | 
 |   /** | 
 |      * Destructor. | 
 |      */ | 
 |   ~MergeCollation(); | 
 |  | 
 |   /** Assignment operator | 
 |      */ | 
 |   const   MergeCollation&     operator=(const MergeCollation& other); | 
 |   /** | 
 |      * Recovers current pattern from this merged collation object. | 
 |      * @param pattern the result buffer. | 
 |      * @return the recovered result. | 
 |      */ | 
 |   UnicodeString& getPattern(UnicodeString& pattern) const; | 
 |  | 
 |   /** | 
 |      * Recovers current pattern with white spaces. | 
 |      * @param pattern the result buffer. | 
 |      * @param withWhiteSpace puts spacing around the entries, and \n | 
 |      * before & and < | 
 |      * @return the recovered result. | 
 |      */ | 
 |   UnicodeString& getPattern(UnicodeString& pattern, bool_t withWhiteSpace) const; | 
 |  | 
 |   /** | 
 |      * Emits the pattern for collation builder. | 
 |      * @param pattern the result buffer. | 
 |      * @return Emits the string in the format understable to the collation | 
 |      * builder. | 
 |      */ | 
 |   UnicodeString& emitPattern(UnicodeString& pattern) const; | 
 |  | 
 |   /** | 
 |      * Emits the pattern for collation builder. | 
 |      * @param pattern the result buffer. | 
 |      * @param withWhiteSpace puts spacing around the entries, and \n | 
 |      * before & and < | 
 |      * @return Emits the string in the format understable to the collation | 
 |      * builder. | 
 |      */ | 
 |   UnicodeString& emitPattern(UnicodeString& pattern, bool_t withWhiteSpace) const; | 
 |  | 
 |   /** | 
 |      * Sets the pattern. | 
 |      * @param pattern string. | 
 |      * @param status the error code status, it will be set to INVALID_FORMAT_ERROR | 
 |      * if the pattern is incorrect. | 
 |      */ | 
 |   void setPattern(const   UnicodeString&  pattern, | 
 |           Normalizer::EMode decompMode, | 
 |           UErrorCode&      status); | 
 |  | 
 |   /** | 
 |      * Adds a pattern to the current merge collation object. | 
 |      * @param pattern the new pattern to be added. | 
 |      * @param status the error code status, it will be set to INVALID_FORMAT_ERROR | 
 |      * if the pattern is incorrect. | 
 |      */ | 
 |   void addPattern(const   UnicodeString&  pattern, | 
 |           Normalizer::EMode decompMode, | 
 |           UErrorCode&      status); | 
 |  | 
 |   /** | 
 |      * Gets count of separate entries in the merge collation object. | 
 |      * @return the number of pattern entries | 
 |      */ | 
 |   int32_t getCount(void) const; | 
 |  | 
 |   /** | 
 |      * Gets the specified pattern entry out of the merge collation object. | 
 |      * @param index the offset of the desired pattern entry | 
 |      * @return the requested pattern entry | 
 |      */ | 
 |   const PatternEntry* getItemAt(UTextOffset index) const; | 
 |  | 
 | private: | 
 |  | 
 |     //============================================================ | 
 |     // privates | 
 |     //============================================================ | 
 |  | 
 |   VectorOfPointersToPatternEntry* patterns; // a vector of PatternEntries | 
 |   static  const   int32_t         BITARRAYSIZE; | 
 |   static  const   uint8_t         BITARRAYMASK; | 
 |   static  const   int32_t         BYTEPOWER; | 
 |   static  const   int32_t         BYTEMASK; | 
 |  | 
 |   PatternEntry*   lastEntry; | 
 |   PatternEntry*   saveEntry; | 
 |   uint8_t*        statusArray; | 
 |  | 
 |  | 
 |     /** | 
 |      * Finds the last pattern entry before the specified offset that does not have  | 
 |      * extension chars. | 
 |      * @param i the offset. | 
 |      * @return the pattern entry. | 
 |      */ | 
 |   const PatternEntry* findLastWithNoExtension(int32_t i) const; | 
 |  | 
 |   /**  | 
 |      * Fixes the new pattern entry in the merge collation table. | 
 |      * If the strength is RESET, then just change the lastEntry to | 
 |      * be the current. (If the current is not in patterns, signal an error). | 
 |      * If not, then remove the current entry, and add it after lastEntry | 
 |      * (which is usually at the end).  Strength indicates the text order | 
 |      * weight for an entry. | 
 |      * @param newEntry the new pattern entry | 
 |      * @param status the error code status, it will be set to INVALID_FORMAT_ERROR | 
 |      * if the strength is RESET and a previous entry can't be found. | 
 |      */ | 
 |   void fixEntry(  PatternEntry*   newEntry, | 
 |           UErrorCode&      status); | 
 |  | 
 |   /** | 
 |      * Finds the offset of the specified entry that was previously installed in the | 
 |      * merge collation object. | 
 |      * @param lastEntry the entry that was previously installed. | 
 |      * @param excess the extra characters  | 
 |      * @param status the error code status, it will be set to INVALID_FORMAT_ERROR | 
 |      * if the strength is RESET and a previous entry can't be found. | 
 |      * @return the offset of the found entry | 
 |      */ | 
 |   int32_t findLastEntry(  const PatternEntry* lastEntry,  | 
 |               UnicodeString&  excess, | 
 |               UErrorCode&      success) const; | 
 | }; | 
 |  | 
 | inline UnicodeString& MergeCollation::getPattern(UnicodeString& result) const | 
 | { | 
 |   return getPattern(result, TRUE); | 
 | } | 
 |  | 
 | inline UnicodeString& MergeCollation::emitPattern(UnicodeString& result) const | 
 | { | 
 |   return emitPattern(result, TRUE); | 
 | } | 
 |  | 
 |  | 
 | #endif // _MERGECOL |