blob: bf7ec5ebf9729afd13eb6d19fd918dc990ff4dc9 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2002-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.tool.localeconverter;
import java.io.*;
/**
* A Lex is a state machine. Transitions can be activated
* arbitrarily and can consume arbitrary amounts of text.
* A transition simply says it can consume the next character
* and returns the state that the machine should transition into.
* States that are > 0 are final states and cause the nextToken
* routine to return a value.
*/
public final class Lex {
private Transition[][] states; //final
private PushbackReader input; //final
private int state;
private String data;
private final StringBuffer dataBuffer = new StringBuffer();
private boolean debugMessagesOn;
private String debugTag;
public static final int END_OF_FILE = Integer.MAX_VALUE;
/** Construct a new machine. NOTE: setInput must be
* called before nextToken is called */
public Lex(final Transition[][] states) {
this.states = states;
//{{INIT_CONTROLS
//}}
}
/** Construct a new machine. */
public Lex(final Transition[][] statesIn, final PushbackReader inputIn) {
states = statesIn;
input = inputIn;
}
/** Return the current state */
public int getState() {
return state;
}
/** Return the data resulting from the last call to nextToken */
public String getData() {
if (data == null) {
data = dataBuffer.toString();
}
return data;
}
/** Return the input reader used by this machine */
public PushbackReader getInput() {
return input;
}
/** set the input reader used by this machine */
public void setInput(PushbackReader input) {
this.input = input;
}
/** Return the states used by this machine */
public Transition[][] getStates() {
return states;
}
public void setStates(Transition[][] states) {
this.states = states;
}
/** Return true if the specified string equals the
* string returned by getData(). This routine
* may be faster than calling getData because
* it does not create a string on the heap.
*/
public boolean dataEquals(final String other) {
if (data != null) {
//if dataBuffer has already been converted to
//a string, just compare the strings.
return data.equals(other);
} else {
if (other.length() != dataBuffer.length()) return false;
final int len = dataBuffer.length();
for (int i = 0; i < len; i++) {
if (other.charAt(i) != dataBuffer.charAt(i)) {
return false;
}
}
return true;
}
}
/**
* Append the data returned from getData() to the
* specified StringBuffer. This routine avoids
* the creation of a String on the heap.
*/
public void appendDataTo(StringBuffer buffer) {
buffer.append(dataBuffer.toString());
}
/**
* Return true if the data returned by getData()
* starts with the specified string. This routine avoids
* the creation of a String on the heap.
*/
public boolean dataStartsWith(String s) {
if (dataBuffer.length() < s.length()) {
return false;
} else {
final int sLength = s.length();
for (int i = 0; i < sLength; i++) {
if (dataBuffer.charAt(i) != s.charAt(i)) {
return false;
}
}
return true;
}
}
/**
* Convert the contents of the data buffer to an integer
* of the specified radix
*/
public int dataAsNumber(int radix) {
int value = 0;
final int len = dataBuffer.length();
for (int i = 0; i < len; i++) {
value = value*radix + Character.digit(dataBuffer.charAt(i), radix);
}
return value;
}
/**
* Get the next token from the input stream. The
* dataBuffer is cleared and the state is set to zero before
* parsing begins. Parsing continues until a state
* greater of equal to 0 s reached or an exception is thrown.
* After each non-terminal transition, the state machine
* walks through all the transitions, in order, for the current
* state until it finds one that will accept the current
* input character and then calls doAction on that transition.
*/
public int nextToken() throws IOException {
state = 0;
dataBuffer.setLength(0);
do {
int c = input.read();
final Transition[] transition = states[-state];
for (int i = 0; i < transition.length; i++) {
if (transition[i].accepts(c)) {
//System.out.println("state["+ -state+"].transition["+i+"] on "+c+" '"+(char)c+"' to state[");
state = transition[i].doAction(c, input, dataBuffer);
//println("" + -state + "]");
break;
}
}
} while (state <= 0);
data = null; //dump the cached data string
return state;
}
/**
* Get the next token and throw an acception if
* the state machine is not in the specified state.
*/
public void accept(final int neededState) throws IOException {
if (neededState != nextToken()) {
Exception e = new Exception();
e.printStackTrace();
throw new ParseException("Unexpected token - "+getData());
}
}
/**
* Get the next token and throw an exception if the
* state machine is not in the specified state and the
* value returned by getData() does not match the
* specified value.
*/
public void accept(final int neededState, final String neededValue) throws IOException {
accept(neededState);
if (!dataEquals(neededValue)) {
Exception e = new Exception();
e.printStackTrace();
throw new ParseException("Unexpected token - "+getData());
}
}
public void debug(boolean debugMessagesOn) {
this.debugMessagesOn = debugMessagesOn;
debugTag = null;
}
public void debug(boolean debugMessagesOn, String tag) {
this.debugMessagesOn = debugMessagesOn;
this.debugTag = tag;
}
/* private void print(String s) {
if (debugMessagesOn) {
System.out.print(s);
}
}
private void println(String s) {
if (debugMessagesOn) {
System.out.println(s+" <"+debugTag);
}
}
/**
* The interface for state machine transitions.
*/
public interface Transition {
/**
* Return true if the transition can accept the current input
* character.
*/
public boolean accepts(int c);
/**
* Perform the transition.
* @param c the current input character
* @param input the current input stream, minus the current input character
* @param buffer the current output buffer
* @return the state the machine should be in next
*/
public int doAction(int c, PushbackReader input, StringBuffer buffer) throws IOException;
}
/* constants for BaseTransitions */
/** Don't copy the current character to the output */
public static final byte IGNORE = 0x01;
/** Append the current character to the output */
public static final byte ACCUMULATE = 0x00;
private static final byte BUFFER_MASK = 0x01;
/** Remove the current character from the input stream */
public static final byte CONSUME = 0x00;
/** Return the current character to the input stream */
public static final byte PUTBACK = 0x10;
private static final byte INPUT_MASK = 0x10;
public static final byte
ACCUMULATE_CONSUME = (byte)(ACCUMULATE | CONSUME),
IGNORE_CONSUME = (byte)(IGNORE | CONSUME),
ACCUMULATE_PUTBACK = (byte)(ACCUMULATE | PUTBACK),
IGNORE_PUTBACK = (byte)(IGNORE | PUTBACK);
/**
* Base class for simple transition classes
*/
public static abstract class BaseTransition implements Transition {
private final boolean addToBuffer;
private final boolean unreadInput;
private final int next;
/**
* Construct a new transition. On execution, the
* specified action is performed and the
* specified state is returned.
* @param action the actions to perform to the
* input and output buffers.
* @param next the next state the machine should
* move into
*/
public BaseTransition(byte action, int next) {
this.addToBuffer = (action & BUFFER_MASK) == ACCUMULATE;
this.unreadInput = (action & INPUT_MASK) == PUTBACK;
this.next = next;
}
public abstract boolean accepts(int c);
public int doAction(final int c,
final PushbackReader input,
final StringBuffer buffer) throws IOException {
if (addToBuffer) {
buffer.append((char)c);
}
if (unreadInput) {
input.unread(c);
}
return next;
}
}
/**
* Accept end-of-file.
*/
public static final class EOFTransition extends BaseTransition {
public EOFTransition() {
this(IGNORE_CONSUME, END_OF_FILE);
}
public EOFTransition(int next) {
this(IGNORE_CONSUME, next);
}
public EOFTransition(byte action, int next) {
super(action, next);
}
public boolean accepts(int c) {
return c == -1;
}
}
/**
* Accept anything.
*/
public static final class DefaultTransition extends BaseTransition {
public DefaultTransition(byte action, int nextState) {
super(action, nextState);
}
public boolean accepts(int c) {
return true;
}
}
/**
* Accept any characters in the specified string.
*/
public static final class StringTransition extends BaseTransition {
private String chars;
public StringTransition(String chars, byte action, int nextState) {
super(action, nextState);
this.chars = chars;
}
public boolean accepts(int c) {
return chars.indexOf((char)c) != -1;
}
}
/**
* Accept only the specified character.
*/
public static final class CharTransition extends BaseTransition {
private char c;
public CharTransition(char c, byte action, int nextState) {
super(action, nextState);
this.c = c;
}
public boolean accepts(int c) {
return this.c == (char)c;
}
}
/**
* Accept anything, but throw the specified exception after
* performing the specified action
*/
public static final class ExceptionTransition extends BaseTransition {
private IOException e;
public ExceptionTransition(IOException e) {
super(IGNORE_PUTBACK, END_OF_FILE); //state is ignored
}
public ExceptionTransition(byte action, IOException e) {
super(action, END_OF_FILE); //state is ignored
}
public boolean accepts(int c) {
return true;
}
public final int doAction(final int c,
final PushbackReader input,
final StringBuffer buffer) throws IOException {
super.doAction(c, input, buffer);
throw e;
}
}
/**
* The base class for parse exceptions. Exceptions
* resulting from parsing errors should be subclasses of this
* class.
*/
public static final class ParseException extends IOException {
public final String reason;
public ParseException() {
this.reason = "unkown";
}
public ParseException(String reason) {
this.reason = reason;
}
public String toString() {
return reason;
}
}
/**
* Accept anything, execute as IGNORE_PUTBACK, and throw
* a ParseException with the specified message
*/
public static final class ParseExceptionTransition implements Transition {
private String reason;
public ParseExceptionTransition(String reason) {
this.reason = reason;
}
public boolean accepts(int c) {
return true;
}
public final int doAction(final int c,
final PushbackReader input,
final StringBuffer buffer) throws IOException {
input.unread((char)c);
throw new ParseException(reason);
}
}
//{{DECLARE_CONTROLS
//}}
}