001: /*
002: * Javassist, a Java-bytecode translator toolkit.
003: * Copyright (C) 1999-2006 Shigeru Chiba. All Rights Reserved.
004: *
005: * The contents of this file are subject to the Mozilla Public License Version
006: * 1.1 (the "License"); you may not use this file except in compliance with
007: * the License. Alternatively, the contents of this file may be used under
008: * the terms of the GNU Lesser General Public License Version 2.1 or later.
009: *
010: * Software distributed under the License is distributed on an "AS IS" basis,
011: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
012: * for the specific language governing rights and limitations under the
013: * License.
014: */
015:
016: package javassist.compiler;
017:
018: class Token {
019: public Token next = null;
020: public int tokenId;
021:
022: public long longValue;
023: public double doubleValue;
024: public String textValue;
025: }
026:
027: public class Lex implements TokenId {
028: private int lastChar;
029: private StringBuffer textBuffer;
030: private Token currentToken;
031: private Token lookAheadTokens;
032:
033: private String input;
034: private int position, maxlen, lineNumber;
035:
036: /**
037: * Constructs a lexical analyzer.
038: */
039: public Lex(String s) {
040: lastChar = -1;
041: textBuffer = new StringBuffer();
042: currentToken = new Token();
043: lookAheadTokens = null;
044:
045: input = s;
046: position = 0;
047: maxlen = s.length();
048: lineNumber = 0;
049: }
050:
051: public int get() {
052: if (lookAheadTokens == null)
053: return get(currentToken);
054: else {
055: Token t;
056: currentToken = t = lookAheadTokens;
057: lookAheadTokens = lookAheadTokens.next;
058: return t.tokenId;
059: }
060: }
061:
062: /**
063: * Looks at the next token.
064: */
065: public int lookAhead() {
066: return lookAhead(0);
067: }
068:
069: public int lookAhead(int i) {
070: Token tk = lookAheadTokens;
071: if (tk == null) {
072: lookAheadTokens = tk = currentToken; // reuse an object!
073: tk.next = null;
074: get(tk);
075: }
076:
077: for (; i-- > 0; tk = tk.next)
078: if (tk.next == null) {
079: Token tk2;
080: tk.next = tk2 = new Token();
081: get(tk2);
082: }
083:
084: currentToken = tk;
085: return tk.tokenId;
086: }
087:
088: public String getString() {
089: return currentToken.textValue;
090: }
091:
092: public long getLong() {
093: return currentToken.longValue;
094: }
095:
096: public double getDouble() {
097: return currentToken.doubleValue;
098: }
099:
100: private int get(Token token) {
101: int t;
102: do {
103: t = readLine(token);
104: } while (t == '\n');
105: token.tokenId = t;
106: return t;
107: }
108:
109: private int readLine(Token token) {
110: int c = getNextNonWhiteChar();
111: if (c < 0)
112: return c;
113: else if (c == '\n') {
114: ++lineNumber;
115: return '\n';
116: } else if (c == '\'')
117: return readCharConst(token);
118: else if (c == '"')
119: return readStringL(token);
120: else if ('0' <= c && c <= '9')
121: return readNumber(c, token);
122: else if (c == '.') {
123: c = getc();
124: if ('0' <= c && c <= '9') {
125: StringBuffer tbuf = textBuffer;
126: tbuf.setLength(0);
127: tbuf.append('.');
128: return readDouble(tbuf, c, token);
129: } else {
130: ungetc(c);
131: return readSeparator('.');
132: }
133: } else if (Character.isJavaIdentifierStart((char) c))
134: return readIdentifier(c, token);
135: else
136: return readSeparator(c);
137: }
138:
139: private int getNextNonWhiteChar() {
140: int c;
141: do {
142: c = getc();
143: if (c == '/') {
144: c = getc();
145: if (c == '/')
146: do {
147: c = getc();
148: } while (c != '\n' && c != '\r' && c != -1);
149: else if (c == '*')
150: while (true) {
151: c = getc();
152: if (c == -1)
153: break;
154: else if (c == '*')
155: if ((c = getc()) == '/') {
156: c = ' ';
157: break;
158: } else
159: ungetc(c);
160: }
161: else {
162: ungetc(c);
163: c = '/';
164: }
165: }
166: } while (isBlank(c));
167: return c;
168: }
169:
170: private int readCharConst(Token token) {
171: int c;
172: int value = 0;
173: while ((c = getc()) != '\'')
174: if (c == '\\')
175: value = readEscapeChar();
176: else if (c < 0x20) {
177: if (c == '\n')
178: ++lineNumber;
179:
180: return BadToken;
181: } else
182: value = c;
183:
184: token.longValue = value;
185: return CharConstant;
186: }
187:
188: private int readEscapeChar() {
189: int c = getc();
190: if (c == 'n')
191: c = '\n';
192: else if (c == 't')
193: c = '\t';
194: else if (c == 'r')
195: c = '\r';
196: else if (c == 'f')
197: c = '\f';
198: else if (c == '\n')
199: ++lineNumber;
200:
201: return c;
202: }
203:
204: private int readStringL(Token token) {
205: int c;
206: StringBuffer tbuf = textBuffer;
207: tbuf.setLength(0);
208: for (;;) {
209: while ((c = getc()) != '"') {
210: if (c == '\\')
211: c = readEscapeChar();
212: else if (c == '\n' || c < 0) {
213: ++lineNumber;
214: return BadToken;
215: }
216:
217: tbuf.append((char) c);
218: }
219:
220: for (;;) {
221: c = getc();
222: if (c == '\n')
223: ++lineNumber;
224: else if (!isBlank(c))
225: break;
226: }
227:
228: if (c != '"') {
229: ungetc(c);
230: break;
231: }
232: }
233:
234: token.textValue = tbuf.toString();
235: return StringL;
236: }
237:
238: private int readNumber(int c, Token token) {
239: long value = 0;
240: int c2 = getc();
241: if (c == '0')
242: if (c2 == 'X' || c2 == 'x')
243: for (;;) {
244: c = getc();
245: if ('0' <= c && c <= '9')
246: value = value * 16 + (long) (c - '0');
247: else if ('A' <= c && c <= 'F')
248: value = value * 16 + (long) (c - 'A' + 10);
249: else if ('a' <= c && c <= 'f')
250: value = value * 16 + (long) (c - 'a' + 10);
251: else {
252: token.longValue = value;
253: if (c == 'L' || c == 'l')
254: return LongConstant;
255: else {
256: ungetc(c);
257: return IntConstant;
258: }
259: }
260: }
261: else if ('0' <= c2 && c2 <= '7') {
262: value = c2 - '0';
263: for (;;) {
264: c = getc();
265: if ('0' <= c && c <= '7')
266: value = value * 8 + (long) (c - '0');
267: else {
268: token.longValue = value;
269: if (c == 'L' || c == 'l')
270: return LongConstant;
271: else {
272: ungetc(c);
273: return IntConstant;
274: }
275: }
276: }
277: }
278:
279: value = c - '0';
280: while ('0' <= c2 && c2 <= '9') {
281: value = value * 10 + c2 - '0';
282: c2 = getc();
283: }
284:
285: token.longValue = value;
286: if (c2 == 'F' || c2 == 'f') {
287: token.doubleValue = (double) value;
288: return FloatConstant;
289: } else if (c2 == 'E' || c2 == 'e' || c2 == 'D' || c2 == 'd'
290: || c2 == '.') {
291: StringBuffer tbuf = textBuffer;
292: tbuf.setLength(0);
293: tbuf.append(value);
294: return readDouble(tbuf, c2, token);
295: } else if (c2 == 'L' || c2 == 'l')
296: return LongConstant;
297: else {
298: ungetc(c2);
299: return IntConstant;
300: }
301: }
302:
303: private int readDouble(StringBuffer sbuf, int c, Token token) {
304: if (c != 'E' && c != 'e' && c != 'D' && c != 'd') {
305: sbuf.append((char) c);
306: for (;;) {
307: c = getc();
308: if ('0' <= c && c <= '9')
309: sbuf.append((char) c);
310: else
311: break;
312: }
313: }
314:
315: if (c == 'E' || c == 'e') {
316: sbuf.append((char) c);
317: c = getc();
318: if (c == '+' || c == '-') {
319: sbuf.append((char) c);
320: c = getc();
321: }
322:
323: while ('0' <= c && c <= '9') {
324: sbuf.append((char) c);
325: c = getc();
326: }
327: }
328:
329: try {
330: token.doubleValue = Double.parseDouble(sbuf.toString());
331: } catch (NumberFormatException e) {
332: return BadToken;
333: }
334:
335: if (c == 'F' || c == 'f')
336: return FloatConstant;
337: else {
338: if (c != 'D' && c != 'd')
339: ungetc(c);
340:
341: return DoubleConstant;
342: }
343: }
344:
345: // !"#$%&'( )*+,-./0 12345678 9:;<=>?
346: private static final int[] equalOps = { NEQ, 0, 0, 0, MOD_E, AND_E,
347: 0, 0, 0, MUL_E, PLUS_E, 0, MINUS_E, 0, DIV_E, 0, 0, 0, 0,
348: 0, 0, 0, 0, 0, 0, 0, 0, LE, EQ, GE, 0 };
349:
350: private int readSeparator(int c) {
351: int c2, c3;
352: if ('!' <= c && c <= '?') {
353: int t = equalOps[c - '!'];
354: if (t == 0)
355: return c;
356: else {
357: c2 = getc();
358: if (c == c2)
359: switch (c) {
360: case '=':
361: return EQ;
362: case '+':
363: return PLUSPLUS;
364: case '-':
365: return MINUSMINUS;
366: case '&':
367: return ANDAND;
368: case '<':
369: c3 = getc();
370: if (c3 == '=')
371: return LSHIFT_E;
372: else {
373: ungetc(c3);
374: return LSHIFT;
375: }
376: case '>':
377: c3 = getc();
378: if (c3 == '=')
379: return RSHIFT_E;
380: else if (c3 == '>') {
381: c3 = getc();
382: if (c3 == '=')
383: return ARSHIFT_E;
384: else {
385: ungetc(c3);
386: return ARSHIFT;
387: }
388: } else {
389: ungetc(c3);
390: return RSHIFT;
391: }
392: default:
393: break;
394: }
395: else if (c2 == '=')
396: return t;
397: }
398: } else if (c == '^') {
399: c2 = getc();
400: if (c2 == '=')
401: return EXOR_E;
402: } else if (c == '|') {
403: c2 = getc();
404: if (c2 == '=')
405: return OR_E;
406: else if (c2 == '|')
407: return OROR;
408: } else
409: return c;
410:
411: ungetc(c2);
412: return c;
413: }
414:
415: private int readIdentifier(int c, Token token) {
416: StringBuffer tbuf = textBuffer;
417: tbuf.setLength(0);
418:
419: do {
420: tbuf.append((char) c);
421: c = getc();
422: } while (Character.isJavaIdentifierPart((char) c));
423:
424: ungetc(c);
425:
426: String name = tbuf.toString();
427: int t = ktable.lookup(name);
428: if (t >= 0)
429: return t;
430: else {
431: /* tbuf.toString() is executed quickly since it does not
432: * need memory copy. Using a hand-written extensible
433: * byte-array class instead of StringBuffer is not a good idea
434: * for execution speed. Converting a byte array to a String
435: * object is very slow. Using an extensible char array
436: * might be OK.
437: */
438: token.textValue = name;
439: return Identifier;
440: }
441: }
442:
443: private static final KeywordTable ktable = new KeywordTable();
444:
445: static {
446: ktable.append("abstract", ABSTRACT);
447: ktable.append("boolean", BOOLEAN);
448: ktable.append("break", BREAK);
449: ktable.append("byte", BYTE);
450: ktable.append("case", CASE);
451: ktable.append("catch", CATCH);
452: ktable.append("char", CHAR);
453: ktable.append("class", CLASS);
454: ktable.append("const", CONST);
455: ktable.append("continue", CONTINUE);
456: ktable.append("default", DEFAULT);
457: ktable.append("do", DO);
458: ktable.append("double", DOUBLE);
459: ktable.append("else", ELSE);
460: ktable.append("extends", EXTENDS);
461: ktable.append("false", FALSE);
462: ktable.append("final", FINAL);
463: ktable.append("finally", FINALLY);
464: ktable.append("float", FLOAT);
465: ktable.append("for", FOR);
466: ktable.append("goto", GOTO);
467: ktable.append("if", IF);
468: ktable.append("implements", IMPLEMENTS);
469: ktable.append("import", IMPORT);
470: ktable.append("instanceof", INSTANCEOF);
471: ktable.append("int", INT);
472: ktable.append("interface", INTERFACE);
473: ktable.append("long", LONG);
474: ktable.append("native", NATIVE);
475: ktable.append("new", NEW);
476: ktable.append("null", NULL);
477: ktable.append("package", PACKAGE);
478: ktable.append("private", PRIVATE);
479: ktable.append("protected", PROTECTED);
480: ktable.append("public", PUBLIC);
481: ktable.append("return", RETURN);
482: ktable.append("short", SHORT);
483: ktable.append("static", STATIC);
484: ktable.append("strictfp", STRICT);
485: ktable.append("super", SUPER);
486: ktable.append("switch", SWITCH);
487: ktable.append("synchronized", SYNCHRONIZED);
488: ktable.append("this", THIS);
489: ktable.append("throw", THROW);
490: ktable.append("throws", THROWS);
491: ktable.append("transient", TRANSIENT);
492: ktable.append("true", TRUE);
493: ktable.append("try", TRY);
494: ktable.append("void", VOID);
495: ktable.append("volatile", VOLATILE);
496: ktable.append("while", WHILE);
497: }
498:
499: private static boolean isBlank(int c) {
500: return c == ' ' || c == '\t' || c == '\f' || c == '\r'
501: || c == '\n';
502: }
503:
504: private static boolean isDigit(int c) {
505: return '0' <= c && c <= '9';
506: }
507:
508: private void ungetc(int c) {
509: lastChar = c;
510: }
511:
512: public String getTextAround() {
513: int begin = position - 10;
514: if (begin < 0)
515: begin = 0;
516:
517: int end = position + 10;
518: if (end > maxlen)
519: end = maxlen;
520:
521: return input.substring(begin, end);
522: }
523:
524: private int getc() {
525: if (lastChar < 0)
526: if (position < maxlen)
527: return input.charAt(position++);
528: else
529: return -1;
530: else {
531: int c = lastChar;
532: lastChar = -1;
533: return c;
534: }
535: }
536: }
|