blob: fa821c078b4f5bf2927695a9cc4020d459ca9607 [file] [log] [blame]
/*
**********************************************************************
* Copyright (C) 1999 IBM Corp. All rights reserved.
**********************************************************************
* Date Name Description
* 11/11/99 rgillam Complete port from Java.
**********************************************************************
*/
#ifndef RBBI_TBL_H
#define RBBI_TBL_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/brkiter.h"
#include "unicode/udata.h"
#include "filestrm.h"
U_CDECL_BEGIN
#ifndef UCMP8_H
typedef struct _CompactByteArray CompactByteArray;
#endif
U_CDECL_END
U_NAMESPACE_BEGIN
/* forward declarations */
class RuleBasedBreakIterator;
class DictionaryBasedBreakIterator;
/**
* This class contains the internal static tables that are used by the
* RuleBasedBreakIterator. Once created, these tables are immutable,
* so they can be shared among all break iterators using a particular
* set of rules. This class uses a reference-counting scheme to
* manage the sharing.
*
* @author Richard Gillam
*/
class RuleBasedBreakIteratorTables {
private:
/**
* The number of RuleBasedBreakIterators using this object.
*/
int16_t refCount;
protected:
/**
* Whether or not we own the storage for the tables (the tables may be
* stored in a memory-mapped file)
*/
UBool ownTables;
private:
/**
* The textual description that was used to create these tables
*/
UnicodeString description;
/**
* A table that indexes from character values to character category numbers
*/
CompactByteArray* charCategoryTable;
/**
* The table of state transitions used for forward iteration
*/
int16_t* stateTable;
/**
* The table of state transitions used to sync up the iterator with the
* text in backwards and random-access iteration
*/
int16_t* backwardsStateTable;
/**
* A list of flags indicating which states in the state table are accepting
* ("end") states
*/
int8_t* endStates;
/**
* A list of flags indicating which states in the state table are
* lookahead states (states which turn lookahead on and off)
*/
int8_t* lookaheadStates;
/**
* The number of character categories (and, thus, the number of columns in
* the state tables)
*/
int32_t numCategories;
//=======================================================================
// constructor
//=======================================================================
/**
* Creates a tables object, adopting all of the tables that are passed in.
*/
protected:
RuleBasedBreakIteratorTables();
RuleBasedBreakIteratorTables(UDataMemory* memory);
UDataMemory *fMemory;
private:
/**
* The copy constructor is declared private and is a no-op.
* THIS CLASS MAY NOT BE COPIED.
*/
RuleBasedBreakIteratorTables(const RuleBasedBreakIteratorTables& that);
//=======================================================================
// boilerplate
//=======================================================================
protected:
/**
* Destructor
*/
virtual ~RuleBasedBreakIteratorTables();
private:
/**
* The assignment operator is declared private and is a no-op.
* THIS CLASS MAY NOT BE COPIED.
*/
RuleBasedBreakIteratorTables& operator=(const RuleBasedBreakIteratorTables& that);
/**
* Equality operator. Returns TRUE if both tables objects are of the
* same class, have the same behavior, and iterate over the same text.
*/
virtual UBool operator==(const RuleBasedBreakIteratorTables& that) const;
/**
* Not-equal operator. If operator== returns TRUE, this returns FALSE,
* and vice versa.
*/
UBool operator!=(const RuleBasedBreakIteratorTables& that) const;
/**
* Compute a hash code for these tables
* @return A hash code
*/
virtual int32_t hashCode(void) const;
/**
* Returns the description used to create these tables
*/
const UnicodeString& getRules(void) const;
//=======================================================================
// reference counting
//=======================================================================
/**
* increments the reference count.
*/
void addReference(void);
/**
* decrements the reference count and deletes the object if it reaches zero
*/
void removeReference(void);
protected:
//=======================================================================
// implementation
//=======================================================================
/**
* Looks up a character's category (i.e., its category for breaking purposes,
* not its Unicode category)
*/
virtual int32_t lookupCategory(UChar c, BreakIterator* bi) const;
/**
* Given a current state and a character category, looks up the
* next state to transition to in the state table.
*/
virtual int32_t lookupState(int32_t state, int32_t category) const;
/**
* Given a current state and a character category, looks up the
* next state to transition to in the backwards state table.
*/
virtual int32_t lookupBackwardState(int32_t state, int32_t category) const;
/**
* Returns true if the specified state is an accepting state.
*/
virtual UBool isEndState(int32_t state) const;
/**
* Returns true if the specified state is a lookahead state.
*/
virtual UBool isLookaheadState(int32_t state) const;
friend class RuleBasedBreakIterator;
friend class DictionaryBasedBreakIterator;
};
inline UBool
RuleBasedBreakIteratorTables::operator!=(const RuleBasedBreakIteratorTables& that) const {
return !operator==(that);
}
inline const UnicodeString&
RuleBasedBreakIteratorTables::getRules(void) const {
return description;
}
inline void
RuleBasedBreakIteratorTables::addReference(void) {
++refCount;
}
inline void
RuleBasedBreakIteratorTables::removeReference(void) {
if (--refCount <= 0)
delete this;
}
U_NAMESPACE_END
#endif