blob: e8cb873fc877588548f068cef5c8eba793e5079c [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
package com.ibm.icu.text;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
import java.util.HashSet;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.Iterator;
import java.util.HashMap;
import java.util.Map;
import java.util.Collection;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
//
// class RBBITableBuilder is part of the RBBI rule compiler.
// It builds the state transition table used by the RBBI runtime
// from the expression syntax tree generated by the rule scanner.
//
// This class is part of the RBBI implementation only.
// There is no user-visible public API here.
//
class RBBITableBuilder {
//
// RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
// one for each state.
static class RBBIStateDescriptor {
boolean fMarked;
int fAccepting;
int fLookAhead;
SortedSet fTagVals;
int fTagsIdx;
Set fPositions; // Set of parse tree positions associated
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
int[] fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates
RBBIStateDescriptor(int maxInputSymbol) {
fTagVals = new TreeSet();
fPositions = new HashSet();
fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized.
// It is indexed by input symbols, and will
// hold the next state number for each
// symbol.
}
}
private RBBIRuleBuilder fRB;
private int fRootIx; // The array index into RBBIRuleBuilder.fTreeRoots
// for the parse tree to operate on.
// Too bad Java can't do indirection more easily!
private List fDStates; // D states (Aho's terminology)
// Index is state number
// Contents are RBBIStateDescriptor pointers.
//-----------------------------------------------------------------------------
//
// Constructor for RBBITableBuilder.
//
// rootNode is an index into the array of root nodes that is held by
// the overall RBBIRuleBuilder.
//-----------------------------------------------------------------------------
RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) {
fRootIx = rootNodeIx;
fRB = rb;
fDStates = new ArrayList();
}
//-----------------------------------------------------------------------------
//
// RBBITableBuilder::build - This is the main function for building the DFA state transtion
// table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void build() {
// If there were no rules, just return. This situation can easily arise
// for the reverse rules.
if (fRB.fTreeRoots[fRootIx]==null) {
return;
}
//
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substition expression.
//
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
System.out.println("Parse tree after flattening variable references.");
fRB.fTreeRoots[fRootIx].printTree(true);
}
//
// If the rules contained any references to {bof}
// add a {bof} <cat> <former root of tree> to the
// tree. Means that all matches must start out with the
// {bof} fake character.
//
if (fRB.fSetBuilder.sawBOF()) {
RBBINode bofTop = new RBBINode(RBBINode.opCat);
RBBINode bofLeaf = new RBBINode(RBBINode.leafChar);
bofTop.fLeftChild = bofLeaf;
bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
bofLeaf.fParent = bofTop;
bofLeaf.fVal = 2; // Reserved value for {bof}.
fRB.fTreeRoots[fRootIx] = bofTop;
}
//
// Add a unique right-end marker to the expression.
// Appears as a cat-node, left child being the original tree,
// right child being the end marker.
//
RBBINode cn = new RBBINode(RBBINode.opCat);
cn.fLeftChild = fRB.fTreeRoots[fRootIx];
fRB.fTreeRoots[fRootIx].fParent = cn;
cn.fRightChild = new RBBINode(RBBINode.endMark);
cn.fRightChild.fParent = cn;
fRB.fTreeRoots[fRootIx] = cn;
//
// Replace all references to UnicodeSets with the tree for the equivalent
// expression.
//
fRB.fTreeRoots[fRootIx].flattenSets();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
System.out.println("Parse tree after flattening Unicode Set references.");
fRB.fTreeRoots[fRootIx].printTree(true);
}
//
// calculate the functions nullable, firstpos, lastpos and followpos on
// nodes in the parse tree.
// See the alogrithm description in Aho.
// Understanding how this works by looking at the code alone will be
// nearly impossible.
//
calcNullable(fRB.fTreeRoots[fRootIx]);
calcFirstPos(fRB.fTreeRoots[fRootIx]);
calcLastPos(fRB.fTreeRoots[fRootIx]);
calcFollowPos(fRB.fTreeRoots[fRootIx]);
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
System.out.print("\n");
printPosSets(fRB.fTreeRoots[fRootIx]);
}
//
// For "chained" rules, modify the followPos sets
//
if (fRB.fChainRules) {
calcChainedFollowPos(fRB.fTreeRoots[fRootIx]);
}
//
// BOF (start of input) test fixup.
//
if (fRB.fSetBuilder.sawBOF()) {
bofFixup();
}
//
// Build the DFA state transition tables.
//
buildStateTable();
flagAcceptingStates();
flagLookAheadStates();
flagTaggedStates();
//
// Update the global table of rule status {tag} values
// The rule builder has a global vector of status values that are common
// for all tables. Merge the ones from this table into the global set.
//
mergeRuleStatusVals();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("states")>=0) {printStates();};
}
//-----------------------------------------------------------------------------
//
// calcNullable. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcNullable(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.setRef ||
n.fType == RBBINode.endMark ) {
// These are non-empty leaf node types.
n.fNullable = false;
return;
}
if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
// Lookahead marker node. It's a leaf, so no recursion on children.
// It's nullable because it does not match any literal text from the input stream.
n.fNullable = true;
return;
}
// The node is not a leaf.
// Calculate nullable on its children.
calcNullable(n.fLeftChild);
calcNullable(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
}
else if (n.fType == RBBINode.opCat) {
n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
}
else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
n.fNullable = true;
}
else {
n.fNullable = false;
}
}
//-----------------------------------------------------------------------------
//
// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcFirstPos(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark ||
n.fType == RBBINode.lookAhead ||
n.fType == RBBINode.tag) {
// These are non-empty leaf node types.
n.fFirstPosSet.add(n);
return;
}
// The node is not a leaf.
// Calculate firstPos on its children.
calcFirstPos(n.fLeftChild);
calcFirstPos(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
}
else if (n.fType == RBBINode.opCat) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
if (n.fLeftChild.fNullable) {
n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
}
}
else if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opQuestion ||
n.fType == RBBINode.opPlus) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
}
}
//-----------------------------------------------------------------------------
//
// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcLastPos(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark ||
n.fType == RBBINode.lookAhead ||
n.fType == RBBINode.tag) {
// These are non-empty leaf node types.
n.fLastPosSet.add(n);
return;
}
// The node is not a leaf.
// Calculate lastPos on its children.
calcLastPos(n.fLeftChild);
calcLastPos(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
}
else if (n.fType == RBBINode.opCat) {
n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
if (n.fRightChild.fNullable) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
}
}
else if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opQuestion ||
n.fType == RBBINode.opPlus) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
}
}
//-----------------------------------------------------------------------------
//
// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcFollowPos(RBBINode n) {
if (n == null ||
n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark) {
return;
}
calcFollowPos(n.fLeftChild);
calcFollowPos(n.fRightChild);
// Aho rule #1
if (n.fType == RBBINode.opCat) {
RBBINode i; // is 'i' in Aho's description
Set LastPosOfLeftChild = n.fLeftChild.fLastPosSet;
Iterator ix = LastPosOfLeftChild.iterator();
while (ix.hasNext()) {
i = (RBBINode )ix.next();
i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
}
}
// Aho rule #2
if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opPlus) {
RBBINode i; // again, n and i are the names from Aho's description.
Iterator ix = n.fLastPosSet.iterator();
while (ix.hasNext()) {
i = (RBBINode) ix.next();
i.fFollowPos.addAll(n.fFirstPosSet);
}
}
}
//-----------------------------------------------------------------------------
//
// calcChainedFollowPos. Modify the previously calculated followPos sets
// to implement rule chaining. NOT described by Aho
//
//-----------------------------------------------------------------------------
void calcChainedFollowPos(RBBINode tree) {
List endMarkerNodes = new ArrayList();
List leafNodes = new ArrayList();
// get a list of all endmarker nodes.
tree.findNodes(endMarkerNodes, RBBINode.endMark);
// get a list all leaf nodes
tree.findNodes(leafNodes, RBBINode.leafChar);
// Get all nodes that can be the start a match, which is FirstPosition()
// of the portion of the tree corresponding to user-written rules.
// See the tree description in bofFixup().
RBBINode userRuleRoot = tree;
if (fRB.fSetBuilder.sawBOF()) {
userRuleRoot = tree.fLeftChild.fRightChild;
}
Assert.assrt(userRuleRoot != null);
Set matchStartNodes = userRuleRoot.fFirstPosSet;
// Iteratate over all leaf nodes,
//
Iterator endNodeIx = leafNodes.iterator();
while (endNodeIx.hasNext()) {
RBBINode tNode = (RBBINode)endNodeIx.next();
RBBINode endNode = null;
// Identify leaf nodes that correspond to overall rule match positions.
// These include an endMarkerNode in their followPos sets.
Iterator i = endMarkerNodes.iterator();
while (i.hasNext()) {
RBBINode endMarkerNode = (RBBINode)i.next();
if (tNode.fFollowPos.contains(endMarkerNode)) {
endNode = tNode;
break;
}
}
if (endNode == null) {
// node wasn't an end node. Try again with the next.
continue;
}
// We've got a node that can end a match.
// Line Break Specific hack: If this node's val correspond to the $CM char class,
// don't chain from it.
// TODO: Add rule syntax for this behavior, get specifics out of here and
// into the rule file.
if (fRB.fLBCMNoChain) {
int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
if (c != -1) {
// c == -1 occurs with sets containing only the {eof} marker string.
int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
continue;
}
}
}
// Now iterate over the nodes that can start a match, looking for ones
// with the same char class as our ending node.
RBBINode startNode;
Iterator startNodeIx = matchStartNodes.iterator();
while (startNodeIx.hasNext()) {
startNode = (RBBINode )startNodeIx.next();
if (startNode.fType != RBBINode.leafChar) {
continue;
}
if (endNode.fVal == startNode.fVal) {
// The end val (character class) of one possible match is the
// same as the start of another.
// Add all nodes from the followPos of the start node to the
// followPos set of the end node, which will have the effect of
// letting matches transition from a match state at endNode
// to the second char of a match starting with startNode.
endNode.fFollowPos.addAll(startNode.fFollowPos);
}
}
}
}
//-----------------------------------------------------------------------------
//
// bofFixup. Fixup for state tables that include {bof} beginning of input testing.
// Do an swizzle similar to chaining, modifying the followPos set of
// the bofNode to include the followPos nodes from other {bot} nodes
// scattered through the tree.
//
// This function has much in common with calcChainedFollowPos().
//
//-----------------------------------------------------------------------------
void bofFixup() {
//
// The parse tree looks like this ...
// fTree root --. <cat>
// / \
// <cat> <#end node>
// / \
// <bofNode> rest
// of tree
//
// We will be adding things to the followPos set of the <bofNode>
//
RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
Assert.assrt(bofNode.fType == RBBINode.leafChar);
Assert.assrt(bofNode.fVal == 2);
// Get all nodes that can be the start a match of the user-written rules
// (excluding the fake bofNode)
// We want the nodes that can start a match in the
// part labeled "rest of tree"
//
Set matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
Iterator startNodeIt = matchStartNodes.iterator();
while (startNodeIt.hasNext()) {
RBBINode startNode = (RBBINode)startNodeIt.next();
if (startNode.fType != RBBINode.leafChar) {
continue;
}
if (startNode.fVal == bofNode.fVal) {
// We found a leaf node corresponding to a {bof} that was
// explicitly written into a rule.
// Add everything from the followPos set of this node to the
// followPos set of the fake bofNode at the start of the tree.
//
bofNode.fFollowPos.addAll(startNode.fFollowPos);
}
}
}
//-----------------------------------------------------------------------------
//
// buildStateTable() Determine the set of runtime DFA states and the
// transition tables for these states, by the algorithm
// of fig. 3.44 in Aho.
//
// Most of the comments are quotes of Aho's psuedo-code.
//
//-----------------------------------------------------------------------------
void buildStateTable() {
//
// Add a dummy state 0 - the stop state. Not from Aho.
int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
fDStates.add(failState);
// initially, the only unmarked state in Dstates is firstpos(root),
// where toot is the root of the syntax tree for (r)#;
RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
fDStates.add(initialState);
// while there is an unmarked state T in Dstates do begin
for (;;) {
RBBIStateDescriptor T = null;
int tx;
for (tx=1; tx<fDStates.size(); tx++) {
RBBIStateDescriptor temp = (RBBIStateDescriptor )fDStates.get(tx);
if (temp.fMarked == false) {
T = temp;
break;
}
}
if (T == null) {
break;
}
// mark T;
T.fMarked = true;
// for each input symbol a do begin
int a;
for (a = 1; a<=lastInputSymbol; a++) {
// let U be the set of positions that are in followpos(p)
// for some position p in T
// such that the symbol at position p is a;
Set U = null;
RBBINode p;
Iterator pit = T.fPositions.iterator();
while (pit.hasNext()) {
p = (RBBINode )pit.next();
if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) {
if (U == null) {
U = new HashSet();
}
U.addAll(p.fFollowPos);
}
}
// if U is not empty and not in DStates then
int ux = 0;
boolean UinDstates = false;
if (U != null) {
Assert.assrt(U.size() > 0);
int ix;
for (ix=0; ix<fDStates.size(); ix++) {
RBBIStateDescriptor temp2;
temp2 = (RBBIStateDescriptor )fDStates.get(ix);
if (U.equals(temp2.fPositions)) {
U = temp2.fPositions;
ux = ix;
UinDstates = true;
break;
}
}
// Add U as an unmarked state to Dstates
if (!UinDstates)
{
RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol);
newState.fPositions = U;
fDStates.add(newState);
ux = fDStates.size()-1;
}
// Dtran[T, a] := U;
T.fDtran[a] = ux;
}
}
}
}
//-----------------------------------------------------------------------------
//
// flagAcceptingStates Identify accepting states.
// First get a list of all of the end marker nodes.
// Then, for each state s,
// if s contains one of the end marker nodes in its list of tree positions then
// s is an accepting state.
//
//-----------------------------------------------------------------------------
void flagAcceptingStates() {
List endMarkerNodes = new ArrayList();
RBBINode endMarker;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);
for (i=0; i<endMarkerNodes.size(); i++) {
endMarker = (RBBINode)endMarkerNodes.get(i);
for (n=0; n<fDStates.size(); n++) {
RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
//if (sd.fPositions.indexOf(endMarker) >= 0) {
if (sd.fPositions.contains(endMarker)) {
// Any non-zero value for fAccepting means this is an accepting node.
// The value is what will be returned to the user as the break status.
// If no other value was specified, force it to -1.
if (sd.fAccepting==0) {
// State hasn't been marked as accepting yet. Do it now.
sd.fAccepting = endMarker.fVal;
if (sd.fAccepting == 0) {
sd.fAccepting = -1;
}
}
if (sd.fAccepting==-1 && endMarker.fVal != 0) {
// Both lookahead and non-lookahead accepting for this state.
// Favor the look-ahead. Expedient for line break.
// TODO: need a more elegant resolution for conflicting rules.
sd.fAccepting = endMarker.fVal;
}
// implicit else:
// if sd.fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
// the fLookAhead field or this state also.
if (endMarker.fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
// Make value here an index to a side array in engine?
sd.fLookAhead = sd.fAccepting;
}
}
}
}
}
//-----------------------------------------------------------------------------
//
// flagLookAheadStates Very similar to flagAcceptingStates, above.
//
//-----------------------------------------------------------------------------
void flagLookAheadStates() {
List lookAheadNodes = new ArrayList();
RBBINode lookAheadNode;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
for (i=0; i<lookAheadNodes.size(); i++) {
lookAheadNode = (RBBINode )lookAheadNodes.get(i);
for (n=0; n<fDStates.size(); n++) {
RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
if (sd.fPositions.contains(lookAheadNode)) {
sd.fLookAhead = lookAheadNode.fVal;
}
}
}
}
//-----------------------------------------------------------------------------
//
// flagTaggedStates
//
//-----------------------------------------------------------------------------
void flagTaggedStates() {
List tagNodes = new ArrayList();
RBBINode tagNode;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em)
tagNode = (RBBINode )tagNodes.get(i);
for (n=0; n<fDStates.size(); n++) { // For each state s (row in the state table)
RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
if (sd.fPositions.contains(tagNode)) { // if s include the tag node t
sd.fTagVals.add(new Integer(tagNode.fVal));
}
}
}
}
//-----------------------------------------------------------------------------
//
// mergeRuleStatusVals
//
// Allocate positions in the global array of rule status {tag} values
//
// The RBBI runtime uses an array of {sets of status values} that can
// be returned for boundaries. Each accepting state that has non-zero
// status includes an index into this array. The format of the array
// is
// Num of status values in group 1
// status val
// status val
// ...
// Num of status vals in group 2
// status val
// status val
// ...
// etc.
//
//
//-----------------------------------------------------------------------------
void mergeRuleStatusVals() {
//
// The basic outline of what happens here is this...
//
// for each state in this state table
// if the status tag list for this state is in the global statuses list
// record where and
// continue with the next state
// else
// add the tag list for this state to the global list.
//
int i;
int n;
// Pre-load a single tag of {0} into the table.
// We will need this as a default, for rule sets with no explicit tagging,
// or with explicit tagging of {0}.
if (fRB.fRuleStatusVals.size() == 0) {
fRB.fRuleStatusVals.add(new Integer(1)); // Num of statuses in group
fRB.fRuleStatusVals.add(new Integer(0)); // and our single status of zero
SortedSet s0 = new TreeSet();
Integer izero = new Integer(0);
fRB.fStatusSets.put(s0, izero);
SortedSet s1 = new TreeSet();
s1.add(izero);
fRB.fStatusSets.put(s0, izero);
}
// For each state, check whether the state's status tag values are
// already entered into the status values array, and add them if not.
for (n=0; n<fDStates.size(); n++) {
RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
Set statusVals = sd.fTagVals;
Integer arrayIndexI = (Integer)fRB.fStatusSets.get(statusVals);
if (arrayIndexI == null) {
// This is the first encounter of this set of status values.
// Add them to the statusSets map, This map associates
// the set of status values with an index in the runtime status
// values array.
arrayIndexI = new Integer(fRB.fRuleStatusVals.size());
fRB.fStatusSets.put(statusVals, arrayIndexI);
// Add the new set of status values to the vector of values that
// will eventually become the array used by the runtime engine.
fRB.fRuleStatusVals.add(new Integer(statusVals.size()));
Iterator it = statusVals.iterator();
while (it.hasNext()) {
fRB.fRuleStatusVals.add(it.next());
}
}
// Save the runtime array index back into the state descriptor.
sd.fTagsIdx = arrayIndexI.intValue();
}
}
//-----------------------------------------------------------------------------
//
// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
// for each node in the tree.
//
//-----------------------------------------------------------------------------
void printPosSets(RBBINode n) {
if (n==null) {
return;
}
RBBINode.printNode(n);
System.out.print(" Nullable: " + n.fNullable);
System.out.print(" firstpos: ");
printSet(n.fFirstPosSet);
System.out.print(" lastpos: ");
printSet(n.fLastPosSet);
System.out.print(" followpos: ");
printSet(n.fFollowPos);
printPosSets(n.fLeftChild);
printPosSets(n.fRightChild);
}
//-----------------------------------------------------------------------------
//
// getTableSize() Calculate the size in bytes of the runtime form of this
// state transition table.
//
// Note: Refer to common/rbbidata.h from ICU4C for the declarations
// of the structures being matched by this calculation.
//
//-----------------------------------------------------------------------------
int getTableSize() {
int size = 0;
int numRows;
int numCols;
int rowSize;
if (fRB.fTreeRoots[fRootIx] == null) {
return 0;
}
size = /*sizeof(RBBIStateTable) - 4 */ 16; // The header, with no rows to the table.
numRows = fDStates.size();
numCols = fRB.fSetBuilder.getNumCharCategories();
// Note The declaration of RBBIStateTableRow is for a table of two columns.
// Therefore we subtract two from numCols when determining
// how much storage to add to a row for the total columns.
// rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
rowSize = 8 + 2*numCols;
size += numRows * rowSize;
while (size % 8 > 0) { // Size must be multiple of 8 bytes in size.
size++;
}
return size;
}
//-----------------------------------------------------------------------------
//
// exportTable() export the state transition table in the ICU4C format.
//
// Most of the table is 16 bit shorts. This function exports
// the whole thing as an array of shorts.
//
// The size of the array must be rounded up to a multiple of
// 8 bytes.
//
// See struct RBBIStateTable in ICU4C, common/rbbidata.h
//
//-----------------------------------------------------------------------------
short [] exportTable() {
int state;
int col;
if (fRB.fTreeRoots[fRootIx] == null) {
return new short[0];
}
Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
fDStates.size() < 0x7fff);
int numStates = fDStates.size();
// Size of table size in shorts.
// the "4" is the size of struct RBBIStateTableRow, the row header part only.
int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();
int tableSize = getTableSize() / 2;
short [] table = new short[tableSize];
//
// Fill in the header fields.
// Annoying because they really want to be ints, not shorts.
//
// RBBIStateTable.fNumStates
table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16);
table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff);
// RBBIStateTable.fRowLen
table[RBBIDataWrapper.ROWLEN] = (short)(rowLen >>> 16);
table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLen & 0x0000ffff);
// RBBIStateTable.fFlags
int flags = 0;
if (fRB.fLookAheadHardBreak) {
flags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
}
if (fRB.fSetBuilder.sawBOF()) {
flags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
}
table[RBBIDataWrapper.FLAGS] = (short)(flags >>> 16);
table[RBBIDataWrapper.FLAGS+1] = (short)(flags & 0x0000ffff);
int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
for (state=0; state<numStates; state++) {
RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(state);
int row = 8 + state*rowLen;
Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
table[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
table[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
table[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx;
for (col=0; col<numCharCategories; col++) {
table[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
}
}
return table;
}
//-----------------------------------------------------------------------------
//
// printSet Debug function. Print the contents of a set of Nodes
//
//-----------------------------------------------------------------------------
void printSet(Collection s) {
Iterator it = s.iterator();
while (it.hasNext()) {
RBBINode n = (RBBINode)it.next();
RBBINode.printInt(n.fSerialNum, 8);
}
System.out.println();
}
//-----------------------------------------------------------------------------
//
// printStates Debug Function. Dump the fully constructed state transition table.
//
//-----------------------------------------------------------------------------
void printStates() {
int c; // input "character"
int n; // state number
System.out.print("state | i n p u t s y m b o l s \n");
System.out.print(" | Acc LA Tag");
for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
RBBINode.printInt((int)c, 3);
}
System.out.print("\n");
System.out.print(" |---------------");
for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
System.out.print("---");
}
System.out.print("\n");
for (n=0; n<fDStates.size(); n++) {
RBBIStateDescriptor sd = (RBBIStateDescriptor)fDStates.get(n);
RBBINode.printInt(n, 5);
System.out.print(" | ");
RBBINode.printInt(sd.fAccepting, 3);
RBBINode.printInt(sd.fLookAhead, 4);
RBBINode.printInt(sd.fTagsIdx, 6);
System.out.print(" ");
for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
RBBINode.printInt(sd.fDtran[c], 3);
}
System.out.print("\n");
}
System.out.print("\n\n");
}
//-----------------------------------------------------------------------------
//
// printRuleStatusTable Debug Function. Dump the common rule status table
//
//-----------------------------------------------------------------------------
void printRuleStatusTable() {
int thisRecord = 0;
int nextRecord = 0;
int i;
List tbl = fRB.fRuleStatusVals;
System.out.print("index | tags \n");
System.out.print("-------------------\n");
while (nextRecord < tbl.size()) {
thisRecord = nextRecord;
nextRecord = thisRecord + ((Integer)tbl.get(thisRecord)).intValue() + 1;
RBBINode.printInt(thisRecord, 7);
for (i=thisRecord+1; i<nextRecord; i++) {
int val = ((Integer)tbl.get(i)).intValue();
RBBINode.printInt(val, 7);
}
System.out.print("\n");
}
System.out.print("\n\n");
}
}