001: package murlen.util.fscript;
002:
003: //The lexer - kind of - it started life as a re-implementation of
004: //StreamTokenizer - hence the peculiarities.
005:
006: import java.io.*;
007: import java.util.Hashtable;
008:
009: /**
010: * <b>Re-Implementation of StreamTokenizer for FScript</b>
011: * <p>
012: * <I>Copyright (C) 2000 murlen.</I></p>
013: * <p>
014: * This library is free software; you can redistribute it and/or
015: * modify it under the terms of the GNU Library General Public
016: * License as published by the Free Software Foundation; either
017: * version 2 of the License, or (at your option) any later version.</p>
018: * <p>
019: * This library is distributed in the hope that it will be useful,
020: * but WITHOUT ANY WARRANTY; without even the implied warranty of
021: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
022: * Library General Public License for more details.</p>
023: *
024: * <p>You should have received a copy of the GNU Library General Public
025: * License along with this library; if not, write to the Free
026: * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA </p>
027: *
028: * <p>This class is a re-implementation of Sun's StreamTokenizer class
029: * as it was causing problems (especially parsing -ve numbers).</p>
030: * @author murlen
031: * @author Joachim Van der Auwera
032: * @version 1.12
033: *
034: * changes by Joachim Van der Auwera
035: * 31.08.2001
036: * - simplified (speeded up) handling of comments (there was also an
037: * inconsistency in the newline handling inside and outside comments).
038: * - small mistake disallowed the letter 'A' in TT_WORD
039: *
040: * declares a string a, which is initialised to one double quote.
041: * 11.8.2002 (murlen) Changed to allow \n \r \" \t in strings
042: * 17.02.2003 (jvda) Also allow \\ in strings
043: * 23.11.2003 (jvda) major rework of nextT() for speed
044: * renumbered TT_xxx constants for faster switch statements
045: * 14.04.2004 (murlen) elsif added
046: */
047:
048: final class LexAnn {
049: // table with matches between words and their token values
050: private static Hashtable wordToken;
051:
052: // maximum line length
053: private static final int MAX_LINE_LENGTH = 1024;
054:
055: //general
056: public static final int TT_WORD = 9000;
057: public static final int TT_INTEGER = 9001;
058: public static final int TT_DOUBLE = 9002;
059: public static final int TT_EOF = 9003; //never set by this class
060: public static final int TT_EOL = 9004;
061: public static final int TT_STRING = 9005;
062: public static final int TT_FUNC = 9006;
063: public static final int TT_ARRAY = 9007;
064: public static final int TT_NULL = 9008;
065:
066: //keywords
067: public static final int TT_IF = 9009;
068: public static final int TT_EIF = 9010;
069: public static final int TT_ELSE = 9011;
070: public static final int TT_THEN = 9012;
071: public static final int TT_ELSIF = 9013;
072:
073: public static final int TT_DEFFUNC = 9014;
074: public static final int TT_EDEFFUNC = 9015;
075: public static final int TT_WHILE = 9016;
076: public static final int TT_EWHILE = 9017;
077: public static final int TT_DEFINT = 9018;
078: public static final int TT_DEFSTRING = 9019;
079: public static final int TT_DEFDOUBLE = 9020;
080: public static final int TT_DEFOBJECT = 9021;
081: public static final int TT_RETURN = 9022;
082:
083: //math opts
084: public static final int TT_PLUS = 9023;
085: public static final int TT_MINUS = 9024;
086: public static final int TT_MULT = 9025;
087: public static final int TT_DIV = 9026;
088: public static final int TT_MOD = 9027;
089:
090: //logic
091: public static final int TT_LAND = 9028;
092: public static final int TT_LOR = 9029;
093: public static final int TT_LEQ = 9030;
094: public static final int TT_LNEQ = 9031;
095: public static final int TT_LGR = 9032;
096: public static final int TT_LLS = 9033;
097: public static final int TT_LGRE = 9034;
098: public static final int TT_LLSE = 9035;
099: public static final int TT_NOT = 9036;
100:
101: //other
102: public static final int TT_EQ = 9037;
103:
104: /** contains the current token type */
105: int ttype;
106:
107: /** Current token value object */
108: Object value;
109:
110: private boolean pBack;
111: private char cBuf[], line[];
112: private int length;
113: private int c = 0;
114: private static final int EOL = -1;
115: private int pos = 0;
116:
117: /** String representation of token */
118: public String toString() {
119:
120: Class c = getClass();
121: int n = 0;
122: String tokenName = "", ret = "";
123:
124: java.lang.reflect.Field[] fields = c.getFields();
125:
126: //try to get the human readable TT_* name via reflec magic
127: for (n = 0; n < fields.length; n++) {
128: java.lang.reflect.Field f = fields[n];
129: try {
130: if (f.getName().startsWith("TT")) {
131: if (ttype == f.getInt(this )) {
132: tokenName = f.getName();
133: }
134: }
135: } catch (Exception e) {
136: }
137:
138: }
139:
140: if (!tokenName.equals("")) {
141: ret = tokenName + ":" + value;
142: } else {
143: ret = String.valueOf((char) ttype) + ":" + value;
144: }
145:
146: return ret;
147: }
148:
149: /**Constructor*/
150: LexAnn() {
151: //note hard limit on how long a string can be
152: cBuf = new char[MAX_LINE_LENGTH];
153: }
154:
155: /**Convinience constructor which sets line as well*/
156: LexAnn(char[] firstLine) {
157: this ();
158: setString(firstLine);
159: }
160:
161: /**
162: * Sets the internal line buffer
163: * @param str - the string to use
164: */
165: void setString(char[] str) {
166: length = str.length;
167: line = str;
168: pos = 0;
169: c = 0;
170: }
171:
172: /**
173: *return the next char in the buffer
174: */
175: private int getChar() {
176: if (pos < length) {
177: return line[pos++];
178: } else {
179: return EOL;
180: }
181: }
182:
183: /**
184: * return the character at a current line pos (+offset)
185: * without affecting internal counters*/
186: private int peekChar(int offset) {
187: int n;
188:
189: n = pos + offset - 1;
190: if (n >= length) {
191: return EOL;
192: } else {
193: return line[n];
194: }
195: }
196:
197: /**Read the next token
198: * @return int - which is the charater read (not very useful)*/
199: int nextToken() throws IOException {
200:
201: if (!pBack) {
202: return nextT();
203: } else {
204: pBack = false;
205: return ttype;
206: }
207:
208: }
209:
210: /**Causes next call to nextToken to return same value*/
211: void pushBack() {
212: pBack = true;
213: }
214:
215: /**
216: * get the line which is currently being parsed
217: * @return
218: */
219: String getLine() {
220: return new String(line);
221: }
222:
223: //Internal next token function
224: private int nextT() {
225: int cPos = 0;
226: boolean getNext;
227: value = null;
228: ttype = 0;
229:
230: while (ttype == 0) {
231: getNext = true;
232: switch (c) {
233: // start of line of whitespace
234: case 0:
235: case ' ':
236: case '\t':
237: case '\n':
238: case '\r':
239: break;
240:
241: // end of line marker
242: case EOL:
243: ttype = TT_EOL;
244: break;
245:
246: // comments
247: case '#':
248: pos = length; // skip to end of line
249: ttype = TT_EOL;
250: break;
251:
252: // quoted strings
253: case '"':
254: c = getChar();
255: while ((c != EOL) && (c != '"')) {
256: if (c == '\\') {
257: switch (peekChar(1)) {
258: case 'n':
259: cBuf[cPos++] = '\n';
260: getChar();
261: break;
262: case 't':
263: cBuf[cPos++] = '\t';
264: getChar();
265: break;
266: case 'r':
267: cBuf[cPos++] = '\r';
268: getChar();
269: break;
270: case '\"':
271: cBuf[cPos++] = '"';
272: getChar();
273: break;
274: case '\\':
275: cBuf[cPos++] = '\\';
276: getChar();
277: break;
278: }
279: } else {
280: cBuf[cPos++] = (char) c;
281: }
282: c = getChar();
283: }
284: value = new String(cBuf, 0, cPos);
285: ttype = TT_STRING;
286: break;
287:
288: // Words
289: case 'A':
290: case 'B':
291: case 'C':
292: case 'D':
293: case 'E':
294: case 'F':
295: case 'G':
296: case 'H':
297: case 'I':
298: case 'J':
299: case 'K':
300: case 'L':
301: case 'M':
302: case 'N':
303: case 'O':
304: case 'P':
305: case 'Q':
306: case 'R':
307: case 'S':
308: case 'T':
309: case 'U':
310: case 'V':
311: case 'W':
312: case 'X':
313: case 'Y':
314: case 'Z':
315: case 'a':
316: case 'b':
317: case 'c':
318: case 'd':
319: case 'e':
320: case 'f':
321: case 'g':
322: case 'h':
323: case 'i':
324: case 'j':
325: case 'k':
326: case 'l':
327: case 'm':
328: case 'n':
329: case 'o':
330: case 'p':
331: case 'q':
332: case 'r':
333: case 's':
334: case 't':
335: case 'u':
336: case 'v':
337: case 'w':
338: case 'x':
339: case 'y':
340: case 'z':
341: while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
342: || (c >= '0' && c <= '9') || c == '_'
343: || c == '.') {
344: cBuf[cPos++] = (char) c;
345: c = getChar();
346: }
347: getNext = false;
348: value = new String(cBuf, 0, cPos);
349: Integer tt = (Integer) wordToken.get(value);
350: if (tt != null) {
351: ttype = tt.intValue();
352: } else {
353: if (c == '(') {
354: ttype = TT_FUNC;
355: } else if (c == '[') {
356: ttype = TT_ARRAY;
357: } else {
358: ttype = TT_WORD;
359: }
360: }
361: break;
362:
363: // Numbers
364: case '0':
365: case '1':
366: case '2':
367: case '3':
368: case '4':
369: case '5':
370: case '6':
371: case '7':
372: case '8':
373: case '9':
374: boolean isDouble = false;
375: while ((c >= '0' && c <= '9') || c == '.') {
376: if (c == '.')
377: isDouble = true;
378: cBuf[cPos++] = (char) c;
379: c = getChar();
380: }
381: getNext = false;
382: String str = new String(cBuf, 0, cPos);
383: if (isDouble) {
384: ttype = TT_DOUBLE;
385: value = new Double(str);
386: } else {
387: ttype = TT_INTEGER;
388: value = new Integer(str);
389: }
390: break;
391:
392: // others
393: case '+':
394: ttype = TT_PLUS;
395: break;
396: case '-':
397: ttype = TT_MINUS;
398: break;
399: case '*':
400: ttype = TT_MULT;
401: break;
402: case '/':
403: ttype = TT_DIV;
404: break;
405: case '%':
406: ttype = TT_MOD;
407: break;
408: case '>':
409: if (peekChar(1) == '=') {
410: getChar();
411: ttype = TT_LGRE;
412: } else {
413: ttype = TT_LGR;
414: }
415: break;
416: case '<':
417: if (peekChar(1) == '=') {
418: getChar();
419: ttype = TT_LLSE;
420: } else {
421: ttype = TT_LLS;
422: }
423: break;
424: case '=':
425: if (peekChar(1) == '=') {
426: getChar();
427: ttype = TT_LEQ;
428: } else {
429: ttype = TT_EQ;
430: }
431: break;
432: case '!':
433: if (peekChar(1) == '=') {
434: getChar();
435: ttype = TT_LNEQ;
436: } else {
437: ttype = TT_NOT;
438: }
439: break;
440: default:
441: if ((c == '|') && (peekChar(1) == '|')) {
442: getChar();
443: ttype = TT_LOR;
444: } else if ((c == '&') && (peekChar(1) == '&')) {
445: getChar();
446: ttype = TT_LAND;
447: } else {
448: ttype = c;
449: }
450: }
451: if (getNext)
452: c = getChar();
453: }
454: return ttype;
455: }
456:
457: static {
458: wordToken = new Hashtable();
459: wordToken.put("if", new Integer(TT_IF));
460: wordToken.put("then", new Integer(TT_THEN));
461: wordToken.put("endif", new Integer(TT_EIF));
462: wordToken.put("else", new Integer(TT_ELSE));
463: wordToken.put("elsif", new Integer(TT_ELSIF));
464: wordToken.put("elseif", new Integer(TT_ELSIF));
465: wordToken.put("while", new Integer(TT_WHILE));
466: wordToken.put("endwhile", new Integer(TT_EWHILE));
467: wordToken.put("func", new Integer(TT_DEFFUNC));
468: wordToken.put("endfunc", new Integer(TT_EDEFFUNC));
469: wordToken.put("return", new Integer(TT_RETURN));
470: wordToken.put("int", new Integer(TT_DEFINT));
471: wordToken.put("string", new Integer(TT_DEFSTRING));
472: wordToken.put("double", new Integer(TT_DEFDOUBLE));
473: wordToken.put("object", new Integer(TT_DEFOBJECT));
474: wordToken.put("null", new Integer(TT_NULL));
475: }
476: }
|