| /* |
| ***************************************************************************************** |
| * Copyright (C) 1997-1999, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ***************************************************************************************** |
| * |
| * File WDBKTBL.CPP |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 02/18/97 aliu Converted from OpenClass. Made statics const. |
| ***************************************************************************************** |
| */ |
| |
| // ***************************************************************************** |
| // This file was generated from the java source file WordBreakTable.java |
| // ***************************************************************************** |
| |
| #include "wdbktbl.h" |
| |
| // ***************************************************************************** |
| // class WordBreakTable |
| // |
| // The word break table implements a state machine that leads to the next |
| // transition state from the current one and is used by BreakIterator for |
| // character, word or sentence. To better illustrate the use of transition |
| // tables, the following example shows a very simplified version of the |
| // word break table that deals with only kNB (not a blank char) and kB |
| // (a blank char) character categories. The state machine of the word break |
| // table would look like, |
| // |
| // Diagram 1 : the state machine for kNB and kB |
| // |
| // kNB |
| // ---- |
| // kNB +----+/ \ |
| // ------>|SI+1| | |
| // / +----+<----/ |
| // +----+ kB| kNB +-------+ |
| // 0->|stop| V -------------> |SI_stop| |
| // +----+\------>+----+/ +-------+ |
| // |SI+2|<----\ |
| // kB +----+ | |
| // \----/ |
| // kB |
| // |
| // Table 1 : flattened state table for Diagram 1 |
| // --------------------------------------------- |
| // States kB kNB |
| // 0 stop stop |
| // 1 SI+2 SI+1 |
| // 2 SI+2 SI_stop |
| // |
| // In the table, SI+n shows where the characters will be "marked" and led |
| // to a different state if necessary. For example, consider the string |
| // "This is a test.". |
| // Iterating through the string shows the following, |
| // (stop)->'T'(SI+1)->'h'(SI+1)->'i'(SI+1)->'s'(SI+1)->' '(SI+2)->i(SI_stop) |
| // When a (SI_stop) is reached, we know that we have found a word break right |
| // after ' '. |
| // |
| // The actual char, word and sentence break data is a lot more complicated |
| // than the above. The character type showed here is only limited to kNB |
| // and kB for ease of demonstration. All the break tables are essentially |
| // a flattened state table of their orginal state machine diagrams. |
| // |
| // ***************************************************************************** |
| |
| // ------------------------------------- |
| |
| WordBreakTable::WordBreakTable(int32_t cols, const WordBreakTable::Node data[], int32_t data_length) |
| : fData(data), fData_length(data_length), fCols(cols) |
| { |
| } |
| |
| // ------------------------------------- |
| |
| const WordBreakTable::Node WordBreakTable::kMark_mask = (WordBreakTable::Node)0x80; |
| |
| const WordBreakTable::Node WordBreakTable::kIndex_mask = (WordBreakTable::Node)0x7F; |
| |
| const WordBreakTable::Node WordBreakTable::kInitial_state = 1; |
| |
| const WordBreakTable::Node WordBreakTable::kEnd_state = 0; |
| |
| //eof |