|  | /* | 
|  | ********************************************************************** | 
|  | *   Copyright (C) 1999-2000 IBM Corp. All rights reserved. | 
|  | ********************************************************************** | 
|  | *   Date        Name        Description | 
|  | *   12/1/99    rgillam     Complete port from Java. | 
|  | *   01/13/2000 helena      Added UErrorCode to ctors. | 
|  | ********************************************************************** | 
|  | */ | 
|  |  | 
|  | #include "ucmp8.h" | 
|  | #include "dbbi_tbl.h" | 
|  | #include "unicode/dbbi.h" | 
|  |  | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | //======================================================================= | 
|  | // constructor | 
|  | //======================================================================= | 
|  |  | 
|  | DictionaryBasedBreakIteratorTables::DictionaryBasedBreakIteratorTables( | 
|  | UDataMemory* tablesMemory, | 
|  | const char* dictionaryFilename, | 
|  | UErrorCode &status) | 
|  | : RuleBasedBreakIteratorTables(tablesMemory), | 
|  | dictionary(dictionaryFilename, status) | 
|  | { | 
|  | if(tablesMemory != 0) { | 
|  | const void* tablesImage = udata_getMemory(tablesMemory); | 
|  | if(tablesImage != 0) { | 
|  | if (U_FAILURE(status)) return; | 
|  | const int32_t* tablesIdx = (int32_t*) tablesImage; | 
|  | const int8_t* dbbiImage = ((const int8_t*)tablesImage + tablesIdx[8]); | 
|  | // we know the offset into the memory image where the DBBI stuff | 
|  | // starts is stored in element 8 of the array.  There should be | 
|  | // a way for the RBBI constructor to give us this, but there's | 
|  | // isn't a good one. | 
|  | const int32_t* dbbiIdx = (const int32_t*)dbbiImage; | 
|  |  | 
|  | categoryFlags = (int8_t*)((const int8_t*)dbbiImage + (int32_t)dbbiIdx[0]); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | //======================================================================= | 
|  | // boilerplate | 
|  | //======================================================================= | 
|  |  | 
|  | /** | 
|  | * Destructor | 
|  | */ | 
|  | DictionaryBasedBreakIteratorTables::~DictionaryBasedBreakIteratorTables() { | 
|  | if (ownTables) | 
|  | delete [] categoryFlags; | 
|  | } | 
|  |  | 
|  | int32_t | 
|  | DictionaryBasedBreakIteratorTables::lookupCategory(UChar c, | 
|  | BreakIterator* bi) const { | 
|  | // this override of lookupCategory() exists only to keep track of whether we've | 
|  | // passed over any dictionary characters.  It calls the inherited lookupCategory() | 
|  | // to do the real work, and then checks whether its return value is one of the | 
|  | // categories represented in the dictionary.  If it is, bump the dictionary- | 
|  | // character count. | 
|  | int32_t result = RuleBasedBreakIteratorTables::lookupCategory(c, bi); | 
|  | if (result != RuleBasedBreakIterator::UBRK_IGNORE && categoryFlags[result]) { | 
|  | ((DictionaryBasedBreakIterator*)bi)->bumpDictionaryCharCount(); | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | U_NAMESPACE_END | 
|  |  | 
|  | /* eof */ |