001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.tool.localeconverter;
008:
009: import java.io.*;
010:
011: /**
012: * A Lex is a state machine. Transitions can be activated
013: * arbitrarily and can consume arbitrary amounts of text.
014: * A transition simply says it can consume the next character
015: * and returns the state that the machine should transition into.
016: * States that are > 0 are final states and cause the nextToken
017: * routine to return a value.
018: */
019: public final class Lex {
020: private Transition[][] states; //final
021: private PushbackReader input; //final
022: private int state;
023: private String data;
024: private final StringBuffer dataBuffer = new StringBuffer();
025: private boolean debugMessagesOn;
026: private String debugTag;
027:
028: public static final int END_OF_FILE = Integer.MAX_VALUE;
029:
030: /** Construct a new machine. NOTE: setInput must be
031: * called before nextToken is called */
032: public Lex(final Transition[][] states) {
033: this .states = states;
034: //{{INIT_CONTROLS
035: //}}
036: }
037:
038: /** Construct a new machine. */
039: public Lex(final Transition[][] statesIn,
040: final PushbackReader inputIn) {
041: states = statesIn;
042: input = inputIn;
043: }
044:
045: /** Return the current state */
046: public int getState() {
047: return state;
048: }
049:
050: /** Return the data resulting from the last call to nextToken */
051: public String getData() {
052: if (data == null) {
053: data = dataBuffer.toString();
054: }
055: return data;
056: }
057:
058: /** Return the input reader used by this machine */
059: public PushbackReader getInput() {
060: return input;
061: }
062:
063: /** set the input reader used by this machine */
064: public void setInput(PushbackReader input) {
065: this .input = input;
066: }
067:
068: /** Return the states used by this machine */
069: public Transition[][] getStates() {
070: return states;
071: }
072:
073: public void setStates(Transition[][] states) {
074: this .states = states;
075: }
076:
077: /** Return true if the specified string equals the
078: * string returned by getData(). This routine
079: * may be faster than calling getData because
080: * it does not create a string on the heap.
081: */
082: public boolean dataEquals(final String other) {
083: if (data != null) {
084: //if dataBuffer has already been converted to
085: //a string, just compare the strings.
086: return data.equals(other);
087: } else {
088: if (other.length() != dataBuffer.length())
089: return false;
090: final int len = dataBuffer.length();
091: for (int i = 0; i < len; i++) {
092: if (other.charAt(i) != dataBuffer.charAt(i)) {
093: return false;
094: }
095: }
096: return true;
097: }
098: }
099:
100: /**
101: * Append the data returned from getData() to the
102: * specified StringBuffer. This routine avoids
103: * the creation of a String on the heap.
104: */
105: public void appendDataTo(StringBuffer buffer) {
106: buffer.append(dataBuffer.toString());
107: }
108:
109: /**
110: * Return true if the data returned by getData()
111: * starts with the specified string. This routine avoids
112: * the creation of a String on the heap.
113: */
114: public boolean dataStartsWith(String s) {
115: if (dataBuffer.length() < s.length()) {
116: return false;
117: } else {
118: final int sLength = s.length();
119: for (int i = 0; i < sLength; i++) {
120: if (dataBuffer.charAt(i) != s.charAt(i)) {
121: return false;
122: }
123: }
124: return true;
125: }
126: }
127:
128: /**
129: * Convert the contents of the data buffer to an integer
130: * of the specified radix
131: */
132: public int dataAsNumber(int radix) {
133: int value = 0;
134: final int len = dataBuffer.length();
135: for (int i = 0; i < len; i++) {
136: value = value * radix
137: + Character.digit(dataBuffer.charAt(i), radix);
138: }
139: return value;
140: }
141:
142: /**
143: * Get the next token from the input stream. The
144: * dataBuffer is cleared and the state is set to zero before
145: * parsing begins. Parsing continues until a state
146: * greater of equal to 0 s reached or an exception is thrown.
147: * After each non-terminal transition, the state machine
148: * walks through all the transitions, in order, for the current
149: * state until it finds one that will accept the current
150: * input character and then calls doAction on that transition.
151: */
152: public int nextToken() throws IOException {
153: state = 0;
154: dataBuffer.setLength(0);
155:
156: do {
157: int c = input.read();
158: final Transition[] transition = states[-state];
159: for (int i = 0; i < transition.length; i++) {
160: if (transition[i].accepts(c)) {
161: //System.out.println("state["+ -state+"].transition["+i+"] on "+c+" '"+(char)c+"' to state[");
162: state = transition[i]
163: .doAction(c, input, dataBuffer);
164: //println("" + -state + "]");
165: break;
166: }
167: }
168: } while (state <= 0);
169: data = null; //dump the cached data string
170: return state;
171: }
172:
173: /**
174: * Get the next token and throw an acception if
175: * the state machine is not in the specified state.
176: */
177: public void accept(final int neededState) throws IOException {
178: if (neededState != nextToken()) {
179: Exception e = new Exception();
180: e.printStackTrace();
181: throw new ParseException("Unexpected token - " + getData());
182: }
183: }
184:
185: /**
186: * Get the next token and throw an exception if the
187: * state machine is not in the specified state and the
188: * value returned by getData() does not match the
189: * specified value.
190: */
191: public void accept(final int neededState, final String neededValue)
192: throws IOException {
193: accept(neededState);
194: if (!dataEquals(neededValue)) {
195: Exception e = new Exception();
196: e.printStackTrace();
197: throw new ParseException("Unexpected token - " + getData());
198: }
199: }
200:
201: public void debug(boolean debugMessagesOn) {
202: this .debugMessagesOn = debugMessagesOn;
203: debugTag = null;
204: }
205:
206: public void debug(boolean debugMessagesOn, String tag) {
207: this .debugMessagesOn = debugMessagesOn;
208: this .debugTag = tag;
209: }
210:
211: /* private void print(String s) {
212: if (debugMessagesOn) {
213: System.out.print(s);
214: }
215: }
216:
217: private void println(String s) {
218: if (debugMessagesOn) {
219: System.out.println(s+" <"+debugTag);
220: }
221: }
222:
223: /**
224: * The interface for state machine transitions.
225: */
226: public interface Transition {
227: /**
228: * Return true if the transition can accept the current input
229: * character.
230: */
231: public boolean accepts(int c);
232:
233: /**
234: * Perform the transition.
235: * @param c the current input character
236: * @param input the current input stream, minus the current input character
237: * @param buffer the current output buffer
238: * @return the state the machine should be in next
239: */
240: public int doAction(int c, PushbackReader input,
241: StringBuffer buffer) throws IOException;
242: }
243:
244: /* constants for BaseTransitions */
245: /** Don't copy the current character to the output */
246: public static final byte IGNORE = 0x01;
247: /** Append the current character to the output */
248: public static final byte ACCUMULATE = 0x00;
249: private static final byte BUFFER_MASK = 0x01;
250:
251: /** Remove the current character from the input stream */
252: public static final byte CONSUME = 0x00;
253: /** Return the current character to the input stream */
254: public static final byte PUTBACK = 0x10;
255: private static final byte INPUT_MASK = 0x10;
256:
257: public static final byte ACCUMULATE_CONSUME = (byte) (ACCUMULATE | CONSUME),
258: IGNORE_CONSUME = (byte) (IGNORE | CONSUME),
259: ACCUMULATE_PUTBACK = (byte) (ACCUMULATE | PUTBACK),
260: IGNORE_PUTBACK = (byte) (IGNORE | PUTBACK);
261:
262: /**
263: * Base class for simple transition classes
264: */
265: public static abstract class BaseTransition implements Transition {
266: private final boolean addToBuffer;
267: private final boolean unreadInput;
268: private final int next;
269:
270: /**
271: * Construct a new transition. On execution, the
272: * specified action is performed and the
273: * specified state is returned.
274: * @param action the actions to perform to the
275: * input and output buffers.
276: * @param next the next state the machine should
277: * move into
278: */
279: public BaseTransition(byte action, int next) {
280: this .addToBuffer = (action & BUFFER_MASK) == ACCUMULATE;
281: this .unreadInput = (action & INPUT_MASK) == PUTBACK;
282: this .next = next;
283: }
284:
285: public abstract boolean accepts(int c);
286:
287: public int doAction(final int c, final PushbackReader input,
288: final StringBuffer buffer) throws IOException {
289:
290: if (addToBuffer) {
291: buffer.append((char) c);
292: }
293: if (unreadInput) {
294: input.unread(c);
295: }
296: return next;
297: }
298: }
299:
300: /**
301: * Accept end-of-file.
302: */
303: public static final class EOFTransition extends BaseTransition {
304: public EOFTransition() {
305: this (IGNORE_CONSUME, END_OF_FILE);
306: }
307:
308: public EOFTransition(int next) {
309: this (IGNORE_CONSUME, next);
310: }
311:
312: public EOFTransition(byte action, int next) {
313: super (action, next);
314: }
315:
316: public boolean accepts(int c) {
317: return c == -1;
318: }
319: }
320:
321: /**
322: * Accept anything.
323: */
324: public static final class DefaultTransition extends BaseTransition {
325: public DefaultTransition(byte action, int nextState) {
326: super (action, nextState);
327: }
328:
329: public boolean accepts(int c) {
330: return true;
331: }
332: }
333:
334: /**
335: * Accept any characters in the specified string.
336: */
337: public static final class StringTransition extends BaseTransition {
338: private String chars;
339:
340: public StringTransition(String chars, byte action, int nextState) {
341: super (action, nextState);
342: this .chars = chars;
343: }
344:
345: public boolean accepts(int c) {
346: return chars.indexOf((char) c) != -1;
347: }
348: }
349:
350: /**
351: * Accept only the specified character.
352: */
353: public static final class CharTransition extends BaseTransition {
354: private char c;
355:
356: public CharTransition(char c, byte action, int nextState) {
357: super (action, nextState);
358: this .c = c;
359: }
360:
361: public boolean accepts(int c) {
362: return this .c == (char) c;
363: }
364: }
365:
366: /**
367: * Accept anything, but throw the specified exception after
368: * performing the specified action
369: */
370: public static final class ExceptionTransition extends
371: BaseTransition {
372: private IOException e;
373:
374: public ExceptionTransition(IOException e) {
375: super (IGNORE_PUTBACK, END_OF_FILE); //state is ignored
376: }
377:
378: public ExceptionTransition(byte action, IOException e) {
379: super (action, END_OF_FILE); //state is ignored
380: }
381:
382: public boolean accepts(int c) {
383: return true;
384: }
385:
386: public final int doAction(final int c,
387: final PushbackReader input, final StringBuffer buffer)
388: throws IOException {
389: super .doAction(c, input, buffer);
390: throw e;
391: }
392: }
393:
394: /**
395: * The base class for parse exceptions. Exceptions
396: * resulting from parsing errors should be subclasses of this
397: * class.
398: */
399: public static final class ParseException extends IOException {
400: public final String reason;
401:
402: public ParseException() {
403: this .reason = "unkown";
404: }
405:
406: public ParseException(String reason) {
407: this .reason = reason;
408: }
409:
410: public String toString() {
411: return reason;
412: }
413: }
414:
415: /**
416: * Accept anything, execute as IGNORE_PUTBACK, and throw
417: * a ParseException with the specified message
418: */
419: public static final class ParseExceptionTransition implements
420: Transition {
421: private String reason;
422:
423: public ParseExceptionTransition(String reason) {
424: this .reason = reason;
425: }
426:
427: public boolean accepts(int c) {
428: return true;
429: }
430:
431: public final int doAction(final int c,
432: final PushbackReader input, final StringBuffer buffer)
433: throws IOException {
434: input.unread((char) c);
435: throw new ParseException(reason);
436: }
437: }
438:
439: //{{DECLARE_CONTROLS
440: //}}
441: }
|