001: package gnu.ecmascript;
002:
003: import gnu.mapping.*;
004: import gnu.expr.*;
005: import gnu.lists.Sequence;
006: import gnu.text.Char;
007:
008: /**
009: * Reads EcmaScript token from a InPort.
010: */
011:
012: public class Lexer extends gnu.text.Lexer {
013: private boolean prevWasCR = false;
014:
015: public Lexer(InPort port) {
016: super (port);
017: }
018:
019: public final static Char lparenToken = Char.make('(');
020: public final static Char rparenToken = Char.make(')');
021: public final static Char lbraceToken = Char.make('{');
022: public final static Char rbraceToken = Char.make('}');
023: public final static Char lbracketToken = Char.make('[');
024: public final static Char rbracketToken = Char.make(']');
025: public final static Char dotToken = Char.make('.');
026: public final static Char condToken = Char.make('?');
027: public final static Char commaToken = Char.make(',');
028: public final static Char colonToken = Char.make(':');
029: public final static Char equalToken = Char.make('=');
030: public final static Char tildeToken = Char.make('~');
031: public final static Char notToken = Char.make('!');
032: public final static Char semicolonToken = Char.make(';');
033: public final static Object eolToken = Char.make('\n');
034: public final static Object eofToken = Sequence.eofValue;
035: public final static Reserved elseToken = new Reserved("else",
036: Reserved.ELSE_TOKEN);
037: public final static Reserved newToken = new Reserved("new",
038: Reserved.NEW_TOKEN);
039:
040: static java.util.Hashtable reserved;
041:
042: static synchronized void initReserved() {
043: if (reserved == null) {
044: reserved = new java.util.Hashtable(20);
045: reserved.put("null", new QuoteExp(null));
046: reserved.put("true", new QuoteExp(java.lang.Boolean.TRUE));
047: reserved
048: .put("false", new QuoteExp(java.lang.Boolean.FALSE));
049:
050: reserved
051: .put("var", new Reserved("var", Reserved.VAR_TOKEN));
052: reserved.put("if", new Reserved("if", Reserved.IF_TOKEN));
053: reserved.put("while", new Reserved("while",
054: Reserved.WHILE_TOKEN));
055: reserved
056: .put("for", new Reserved("for", Reserved.FOR_TOKEN));
057: reserved.put("continue", new Reserved("continue",
058: Reserved.CONTINUE_TOKEN));
059: reserved.put("break", new Reserved("break",
060: Reserved.BREAK_TOKEN));
061: reserved.put("return", new Reserved("return",
062: Reserved.RETURN_TOKEN));
063: reserved.put("with", new Reserved("with",
064: Reserved.WITH_TOKEN));
065: reserved.put("function", new Reserved("function",
066: Reserved.FUNCTION_TOKEN));
067: reserved.put("this", new Reserved("this",
068: Reserved.THIS_TOKEN));
069: reserved.put("else", elseToken);
070: reserved.put("new", newToken);
071: }
072: }
073:
074: public static Object checkReserved(String name) {
075: if (reserved == null)
076: initReserved();
077: return reserved.get(name);
078: }
079:
080: public Double getNumericLiteral(int c) throws java.io.IOException {
081: int radix = 10;
082: if (c == '0') {
083: c = read();
084: if (c == 'x' || c == 'X') {
085: radix = 16;
086: c = read();
087: } else if (c == '.' || c == 'e' || c == 'E')
088: ;
089: else
090: radix = 8;
091: }
092: int i = port.pos;
093: if (c >= 0)
094: i--; // Reset to position before current char c.
095: port.pos = i;
096: long ival = Lexer.readDigitsInBuffer(port, radix);
097: boolean digit_seen = port.pos > i;
098: if (digit_seen && port.pos < port.limit) {
099: c = port.buffer[port.pos];
100: if (!Character.isLetterOrDigit((char) c) && c != '.') {
101: double dval;
102: if (ival >= 0)
103: dval = (double) ival;
104: else
105: // FIXME - do we want to use gnu.math??
106: dval = gnu.math.IntNum.valueOf(port.buffer, i,
107: port.pos - i, radix, false).doubleValue();
108: return new Double(dval);
109: }
110: }
111: if (radix != 10)
112: error("invalid character in non-decimal number");
113: StringBuffer str = new StringBuffer(20);
114: if (digit_seen)
115: str.append(port.buffer, i, port.pos - i);
116:
117: /* location of decimal point in str. */
118: int point_loc = -1;
119: int exp = 0;
120: boolean exp_seen = false;
121: for (;;) {
122: c = port.read();
123: if (Character.digit((char) c, radix) >= 0) {
124: digit_seen = true;
125: str.append((char) c);
126: continue;
127: }
128: switch (c) {
129: case '.':
130: if (point_loc >= 0)
131: error("duplicate '.' in number");
132: else {
133: point_loc = str.length();
134: str.append('.');
135: }
136: continue;
137: case 'e':
138: case 'E':
139: int next;
140: if (radix != 10
141: || !((next = port.peek()) == '+' || next == '-' || Character
142: .digit((char) next, 10) >= 0))
143: break;
144: if (!digit_seen)
145: error("mantissa with no digits");
146: exp = readOptionalExponent();
147: exp_seen = true;
148: c = read();
149: break;
150: }
151: break;
152: }
153:
154: if (c >= 0)
155: port.unread();
156:
157: if (exp != 0) {
158: str.append('e');
159: str.append(exp);
160: }
161: return new Double(str.toString());
162: }
163:
164: public String getStringLiteral(char quote)
165: throws java.io.IOException, gnu.text.SyntaxException {
166: int i = port.pos;
167: int start = i;
168: int limit = port.limit;
169: char[] buffer = port.buffer;
170: char c;
171: for (; i < limit; i++) {
172: c = buffer[i];
173: if (c == quote) {
174: port.pos = i + 1;
175: return new String(buffer, start, i - start);
176: }
177: if (c == '\\' || c == '\n' || c == '\r')
178: break;
179: }
180: port.pos = i;
181: StringBuffer sbuf = new StringBuffer();
182: sbuf.append(buffer, start, i - start);
183: for (;;) {
184: int ch = port.read();
185: if (ch == quote)
186: return sbuf.toString();
187: if (ch < 0)
188: eofError("unterminated string literal");
189: if (ch == '\n' || ch == '\r')
190: fatal("string literal not terminated before end of line");
191: if (ch == '\\') {
192: ch = port.read();
193: int val;
194: switch (ch) {
195: case -1:
196: eofError("eof following '\\' in string");
197: case '\n':
198: case '\r':
199: fatal("line terminator following '\\' in string");
200: case '\'':
201: case '\"':
202: case '\\':
203: break;
204: case 'b':
205: ch = '\b';
206: break;
207: case 't':
208: ch = '\t';
209: break;
210: case 'n':
211: ch = '\n';
212: break;
213: case 'f':
214: ch = '\f';
215: break;
216: case 'r':
217: ch = '\r';
218: break;
219: case 'x':
220: case 'u':
221: val = 0;
222: for (i = ch == 'x' ? 2 : 4; --i >= 0;) {
223: int d = port.read();
224: if (d < 0)
225: eofError("eof following '\\" + ((char) ch)
226: + "' in string");
227: d = Character.forDigit((char) d, 16);
228: if (d < 0) {
229: error("invalid char following '\\"
230: + ((char) ch) + "' in string");
231: val = '?';
232: break;
233: }
234: val = 16 * val + d;
235: }
236: ch = val;
237: break;
238: default:
239: if (ch < '0' || ch > '7')
240: break;
241: val = 0;
242: for (i = 3; --i >= 0;) {
243: int d = port.read();
244: if (d < 0)
245: eofError("eof in octal escape in string literal");
246: d = Character.forDigit((char) d, 8);
247: if (d < 0) {
248: port.unread_quick();
249: break;
250: }
251: val = 8 * val + d;
252: }
253: ch = val;
254: break;
255:
256: }
257: }
258: sbuf.append((char) ch);
259: }
260: }
261:
262: public String getIdentifier(int ch) throws java.io.IOException {
263: int i = port.pos;
264: int start = i - 1;
265: int limit = port.limit;
266: char[] buffer = port.buffer;
267: while (i < limit && Character.isJavaIdentifierPart(buffer[i]))
268: i++;
269: port.pos = i;
270: if (i < limit)
271: return new String(buffer, start, i - start);
272: StringBuffer sbuf = new StringBuffer();
273: sbuf.append(buffer, start, i - start);
274: for (;;) {
275: ch = port.read();
276: if (ch < 0)
277: break;
278: if (Character.isJavaIdentifierPart((char) ch))
279: sbuf.append((char) ch);
280: else {
281: port.unread_quick();
282: break;
283: }
284: }
285: return sbuf.toString();
286: }
287:
288: public Object maybeAssignment(Object token)
289: throws java.io.IOException, gnu.text.SyntaxException {
290: int ch = read();
291: if (ch == '=') {
292: error("assignment operation not implemented");
293: // return makeAssignmentOp(token);
294: }
295: if (ch >= 0)
296: port.unread_quick();
297: return token;
298: }
299:
300: /**
301: * Returns the next token.
302: * Returns: <dl>
303: * <dt>end-of-file<dd>Sequence.eofValue
304: * <dt>end-of-line>dd>eolToken
305: * <dt>reserved word<dd> ???
306: * <dt>identifier><dd>a java.lang.String
307: * <dt>punctuator<dd> ???
308: * </dl>
309: * Literals are returned a QuoteExp objects, Specifically:
310: * <dl>
311: * <dt>numeric literal<dd>a QuoteExp of a java.lang.Double value
312: * <dt>boolean literal<dd>a QuoteExp of java.lang.Boolean.TRUE or FALSE
313: * <dt>null literal<dd>a QuoteExp whose value is null
314: * <dt>string literal<dd>a QuoteExp whose value is a String
315: * </dl>
316: */
317:
318: public Object getToken() throws java.io.IOException,
319: gnu.text.SyntaxException {
320: int ch = read();
321: for (;;) {
322: if (ch < 0)
323: return eofToken;
324: if (!Character.isWhitespace((char) ch))
325: break;
326: if (ch == '\r') {
327: prevWasCR = true;
328: return eolToken;
329: }
330: if (ch == '\n' && !prevWasCR)
331: return eolToken;
332: prevWasCR = false;
333: ch = read();
334: }
335:
336: switch (ch) {
337: case '.':
338: ch = port.peek();
339: if (ch >= '0' && ch <= '9')
340: return new QuoteExp(getNumericLiteral('.'));
341: return dotToken;
342: case '0':
343: case '1':
344: case '2':
345: case '3':
346: case '4':
347: case '5':
348: case '6':
349: case '7':
350: case '8':
351: case '9':
352: return new QuoteExp(getNumericLiteral(ch));
353: case '\'':
354: case '\"':
355: return new QuoteExp(getStringLiteral((char) ch));
356: case '(':
357: return lparenToken;
358: case ')':
359: return rparenToken;
360: case '[':
361: return lbracketToken;
362: case ']':
363: return rbracketToken;
364: case '{':
365: return lbraceToken;
366: case '}':
367: return rbraceToken;
368: case '?':
369: return condToken;
370: case ':':
371: return colonToken;
372: case ';':
373: return semicolonToken;
374: case ',':
375: return commaToken;
376: case '=':
377: if (port.peek() == '=') {
378: port.skip_quick();
379: return Reserved.opEqual;
380: }
381: return equalToken;
382: case '!':
383: if (port.peek() == '=') {
384: port.skip_quick();
385: return Reserved.opNotEqual;
386: }
387: return notToken;
388: case '~':
389: return tildeToken;
390: case '*':
391: return maybeAssignment(Reserved.opTimes);
392: case '/':
393: return maybeAssignment(Reserved.opDivide);
394: case '^':
395: return maybeAssignment(Reserved.opBitXor);
396: case '%':
397: return maybeAssignment(Reserved.opRemainder);
398: case '+':
399: if (port.peek() == '+') {
400: port.skip_quick();
401: return maybeAssignment(Reserved.opPlusPlus);
402: }
403: return maybeAssignment(Reserved.opPlus);
404: case '-':
405: if (port.peek() == '-') {
406: port.skip_quick();
407: return maybeAssignment(Reserved.opMinusMinus);
408: }
409: return maybeAssignment(Reserved.opMinus);
410: case '&':
411: if (port.peek() == '&') {
412: port.skip_quick();
413: return maybeAssignment(Reserved.opBoolAnd);
414: }
415: return maybeAssignment(Reserved.opBitAnd);
416: case '|':
417: if (port.peek() == '|') {
418: port.skip_quick();
419: return maybeAssignment(Reserved.opBoolOr);
420: }
421: return maybeAssignment(Reserved.opBitOr);
422: case '>':
423: ch = port.peek();
424: switch (ch) {
425: case '>':
426: port.skip_quick();
427: if (port.peek() == '>') {
428: port.skip_quick();
429: return maybeAssignment(Reserved.opRshiftUnsigned);
430: }
431: return maybeAssignment(Reserved.opRshiftSigned);
432: case '=':
433: port.skip_quick();
434: return Reserved.opGreaterEqual;
435: default:
436: return Reserved.opGreater;
437: }
438: case '<':
439: ch = port.peek();
440: switch (ch) {
441: case '<':
442: port.skip_quick();
443: return maybeAssignment(Reserved.opLshift);
444: case '=':
445: port.skip_quick();
446: return Reserved.opLessEqual;
447: default:
448: return Reserved.opLess;
449: }
450: }
451: if (Character.isJavaIdentifierStart((char) ch)) {
452: String word = getIdentifier(ch).intern();
453: Object token = checkReserved(word);
454: if (token != null)
455: return token;
456: return word;
457: }
458: return Char.make((char) ch);
459: }
460:
461: public static Object getToken(InPort inp)
462: throws java.io.IOException, gnu.text.SyntaxException {
463: return new Lexer(inp).getToken();
464: }
465:
466: public static void main(String[] args) {
467: InPort inp = InPort.inDefault();
468: Lexer reader = new Lexer(inp);
469: for (;;) {
470: try {
471: Object token = reader.getToken();
472: OutPort out = OutPort.outDefault();
473: out.print("token:");
474: out.print(token);
475: out.println(" [class:" + token.getClass() + "]");
476: if (token == Sequence.eofValue)
477: break;
478: } catch (Exception ex) {
479: System.err.println("caught exception:" + ex);
480: return;
481: }
482: }
483: }
484: }
|