0001: /*
0002: * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
0003: *
0004: * This file is part of Resin(R) Open Source
0005: *
0006: * Each copy or derived work must preserve the copyright notice and this
0007: * notice unmodified.
0008: *
0009: * Resin Open Source is free software; you can redistribute it and/or modify
0010: * it under the terms of the GNU General Public License as published by
0011: * the Free Software Foundation; either version 2 of the License, or
0012: * (at your option) any later version.
0013: *
0014: * Resin Open Source is distributed in the hope that it will be useful,
0015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
0017: * of NON-INFRINGEMENT. See the GNU General Public License for more
0018: * details.
0019: *
0020: * You should have received a copy of the GNU General Public License
0021: * along with Resin Open Source; if not, write to the
0022: * Free SoftwareFoundation, Inc.
0023: * 59 Temple Place, Suite 330
0024: * Boston, MA 02111-1307 USA
0025: *
0026: * @author Scott Ferguson
0027: */
0028:
0029: package com.caucho.es.parser;
0030:
0031: import com.caucho.es.*;
0032: import com.caucho.java.LineMap;
0033: import com.caucho.util.CharBuffer;
0034: import com.caucho.util.L10N;
0035: import com.caucho.vfs.ReadStream;
0036:
0037: import java.io.CharConversionException;
0038: import java.io.IOException;
0039: import java.util.ArrayList;
0040: import java.util.HashMap;
0041: import java.util.regex.Pattern;
0042:
0043: /**
0044: * JavaScript lexer.
0045: */
0046: class Lexer {
0047: private static final L10N L = new L10N(Lexer.class);
0048:
0049: final static int ERROR = -3;
0050: final static int START = -2;
0051: final static int EOF = -1;
0052:
0053: final static int RESERVED = 256;
0054: final static int LITERAL = RESERVED + 1;
0055: final static int REGEXP = LITERAL + 1;
0056: final static int IDENTIFIER = REGEXP + 1;
0057: final static int THIS = IDENTIFIER + 1;
0058:
0059: final static int HASH_DEF = THIS + 1;
0060: final static int HASH_REF = HASH_DEF + 1;
0061:
0062: final static int BIN_OP = HASH_REF + 1;
0063: final static int UNARY_OP = BIN_OP + 1;
0064: final static int BANDU_OP = UNARY_OP + 1;
0065:
0066: final static int RSHIFT = BANDU_OP + 1;
0067: final static int URSHIFT = RSHIFT + 1;
0068: final static int LSHIFT = URSHIFT + 1;
0069: final static int BITAND = LSHIFT + 1;
0070: final static int BITOR = BITAND + 1;
0071:
0072: final static int GEQ = BITOR + 1;
0073: final static int LEQ = GEQ + 1;
0074: final static int EQ = LEQ + 1;
0075: final static int NEQ = EQ + 1;
0076:
0077: final static int STRICT_EQ = NEQ + 1;
0078: final static int STRICT_NEQ = STRICT_EQ + 1;
0079:
0080: final static int AND = STRICT_NEQ + 1;
0081: final static int OR = AND + 1;
0082:
0083: final static int ASSIGN_OP = OR + 1;
0084:
0085: final static int PREFIX = ASSIGN_OP + 1;
0086: final static int POSTFIX = PREFIX + 1;
0087: final static int DELETE = POSTFIX + 1;
0088: final static int VOID = DELETE + 1;
0089: final static int TYPEOF = VOID + 1;
0090:
0091: final static int IF = TYPEOF + 1;
0092: final static int ELSE = IF + 1;
0093:
0094: final static int SWITCH = ELSE + 1;
0095: final static int CASE = SWITCH + 1;
0096: final static int DEFAULT = CASE + 1;
0097:
0098: final static int WHILE = DEFAULT + 1;
0099: final static int DO = WHILE + 1;
0100: final static int FOR = DO + 1;
0101: final static int IN = FOR + 1;
0102: final static int BREAK = IN + 1;
0103: final static int CONTINUE = BREAK + 1;
0104:
0105: final static int FUNCTION = CONTINUE + 1;
0106: final static int CONSTRUCTOR = FUNCTION;
0107: final static int RETURN = CONSTRUCTOR + 1;
0108:
0109: final static int NEW = RETURN + 1;
0110: final static int VAR = NEW + 1;
0111: final static int WITH = VAR + 1;
0112:
0113: final static int NULL = WITH + 1;
0114: final static int UNDEFINED = NULL + 1;
0115: final static int TRUE = UNDEFINED + 1;
0116: final static int FALSE = TRUE + 1;
0117: final static int EVAL = FALSE + 1;
0118:
0119: final static int CLASS = EVAL + 1;
0120: final static int EXTENDS = CLASS + 1;
0121:
0122: final static int SYNCHRONIZED = EXTENDS + 1;
0123:
0124: final static int TRY = SYNCHRONIZED + 1;
0125: final static int CATCH = TRY + 1;
0126: final static int FINALLY = CATCH + 1;
0127: final static int THROW = FINALLY + 1;
0128:
0129: final static int IMPORT = THROW + 1;
0130: final static int STATIC = IMPORT + 1;
0131:
0132: final static int LAST_LEXEME = STATIC;
0133:
0134: static HashMap ops;
0135: static HashMap reserved;
0136:
0137: Global resin;
0138: ReadStream is;
0139: int peek = -1;
0140: int peek2 = -1;
0141:
0142: ArrayList macros = new ArrayList();
0143:
0144: CharBuffer macroText;
0145: int macroIndex;
0146: int macroOldLine;
0147:
0148: int _flags;
0149:
0150: int state;
0151: int lbrace;
0152: int stringClose;
0153: boolean isRegexp;
0154:
0155: LineMap lineMap;
0156: String filename;
0157: String lastFilename;
0158: String beginFilename;
0159:
0160: int lastLine;
0161: int beginLine;
0162: int beginLineCh;
0163: int line;
0164: int lineCh;
0165:
0166: Op op;
0167: int lexeme;
0168: int lastLexeme;
0169: CharBuffer text;
0170: CharBuffer lineText = new CharBuffer();
0171: boolean isEof = false;
0172: ESId id;
0173: ESBase literal;
0174: int intValue;
0175: boolean hasLf;
0176: boolean regexpOk;
0177: String writeln;
0178:
0179: CharBuffer temp = new CharBuffer();
0180:
0181: Lexer(ReadStream is, String filename, int line, LineMap lineMap) {
0182: this .filename = filename;
0183: this .line = line;
0184: this .lastFilename = filename;
0185: this .lastLine = line;
0186: this .lineMap = lineMap;
0187: this .is = is;
0188: peek = -1;
0189: peek2 = -1;
0190: text = new CharBuffer();
0191: lexeme = START;
0192: lastLexeme = START;
0193: regexpOk = true;
0194: macroText = null;
0195: macroIndex = 0;
0196:
0197: // Initialize the operator table
0198: if (ops == null) {
0199: ops = new HashMap();
0200: opsPut(".", '.', '.', Parser.PREC_DOT, false);
0201: opsPut("++", '+', POSTFIX, Parser.PREC_DOT, false);
0202: opsPut("--", '-', POSTFIX, Parser.PREC_DOT, false);
0203:
0204: opsPut("@", '@', '@', Parser.PREC_DOT, false);
0205:
0206: opsPut("~", '~', UNARY_OP, Parser.PREC_UMINUS, false);
0207: opsPut("!", '!', UNARY_OP, Parser.PREC_UMINUS, false);
0208:
0209: opsPut("*", '*', BIN_OP, Parser.PREC_TIMES, false);
0210: opsPut("/", '/', BIN_OP, Parser.PREC_TIMES, false);
0211: opsPut("%", '%', BIN_OP, Parser.PREC_TIMES, false);
0212:
0213: opsPut("+", '+', BANDU_OP, Parser.PREC_PLUS, false);
0214: opsPut("-", '-', BANDU_OP, Parser.PREC_PLUS, false);
0215:
0216: opsPut(">>", RSHIFT, BIN_OP, Parser.PREC_SHIFT, false);
0217: opsPut(">>>", URSHIFT, BIN_OP, Parser.PREC_SHIFT, false);
0218: opsPut("<<", LSHIFT, BIN_OP, Parser.PREC_SHIFT, false);
0219:
0220: opsPut(">", '>', BIN_OP, Parser.PREC_CMP, false);
0221: opsPut(">=", GEQ, BIN_OP, Parser.PREC_CMP, false);
0222: opsPut("<", '<', BIN_OP, Parser.PREC_CMP, false);
0223: opsPut("<=", LEQ, BIN_OP, Parser.PREC_CMP, false);
0224: opsPut("==", EQ, BIN_OP, Parser.PREC_CMP, false);
0225: opsPut("!=", NEQ, BIN_OP, Parser.PREC_CMP, false);
0226: opsPut("===", STRICT_EQ, BIN_OP, Parser.PREC_CMP, false);
0227: opsPut("!==", STRICT_NEQ, BIN_OP, Parser.PREC_CMP, false);
0228:
0229: opsPut("&", '&', BIN_OP, Parser.PREC_BITAND, false);
0230: opsPut("^", '^', BIN_OP, Parser.PREC_BITXOR, false);
0231: opsPut("|", '|', BIN_OP, Parser.PREC_BITOR, false);
0232:
0233: opsPut("&&", AND, BIN_OP, Parser.PREC_AND, false);
0234: opsPut("||", OR, BIN_OP, Parser.PREC_OR, false);
0235:
0236: opsPut("?", '?', '?', Parser.PREC_COND, false);
0237:
0238: opsPut("=", '=', '=', Parser.PREC_ASSIGN, true);
0239: opsPut("*=", '*', '=', Parser.PREC_ASSIGN, true);
0240: opsPut("/=", '/', '=', Parser.PREC_ASSIGN, true);
0241: opsPut("%=", '%', '=', Parser.PREC_ASSIGN, true);
0242: opsPut("+=", '+', '=', Parser.PREC_ASSIGN, true);
0243: opsPut("-=", '-', '=', Parser.PREC_ASSIGN, true);
0244: opsPut(">>=", RSHIFT, '=', Parser.PREC_ASSIGN, true);
0245: opsPut(">>>=", URSHIFT, '=', Parser.PREC_ASSIGN, true);
0246: opsPut("<<=", LSHIFT, '=', Parser.PREC_ASSIGN, true);
0247: opsPut("&=", '&', '=', Parser.PREC_ASSIGN, true);
0248: opsPut("^=", '^', '=', Parser.PREC_ASSIGN, true);
0249: opsPut("|=", '|', '=', Parser.PREC_ASSIGN, true);
0250:
0251: opsPut(",", ',', ',', Parser.PREC_COMMA, false);
0252:
0253: reserved = new HashMap();
0254: resPut("new", NEW);
0255: resPut("var", VAR);
0256: resPut("delete", DELETE);
0257: resPut("void", VOID);
0258: resPut("typeof", TYPEOF);
0259:
0260: resPut("if", IF);
0261: resPut("else", ELSE);
0262: resPut("switch", SWITCH);
0263: resPut("case", CASE);
0264: resPut("default", DEFAULT);
0265:
0266: resPut("while", WHILE);
0267: resPut("do", DO);
0268: resPut("for", FOR);
0269: resPut("in", IN);
0270: resPut("break", BREAK);
0271: resPut("continue", CONTINUE);
0272:
0273: resPut("null", NULL);
0274: resPut("undefined", UNDEFINED);
0275: resPut("true", TRUE);
0276: resPut("false", FALSE);
0277: resPut("this", THIS);
0278: resPut("eval", EVAL);
0279:
0280: resPut("function", FUNCTION);
0281: //resPut("constructor", CONSTRUCTOR);
0282: resPut("return", RETURN);
0283:
0284: resPut("with", WITH);
0285:
0286: resPut("class", CLASS);
0287: resPut("extends", EXTENDS);
0288:
0289: resPut("synchronized", SYNCHRONIZED);
0290:
0291: resPut("try", TRY);
0292: resPut("catch", CATCH);
0293: resPut("finally", FINALLY);
0294: resPut("throw", THROW);
0295:
0296: resPut("import", IMPORT);
0297: resPut("static", STATIC);
0298:
0299: resPut("const", RESERVED);
0300: resPut("debugger", RESERVED);
0301: resPut("enum", RESERVED);
0302: resPut("export", RESERVED);
0303: resPut("super", RESERVED);
0304: /*
0305: resPut("boolean", RESERVED);
0306: resPut("byte", RESERVED);
0307: resPut("char", RESERVED);
0308: resPut("double", RESERVED);
0309: resPut("float", RESERVED);
0310: resPut("int", RESERVED);
0311: resPut("long", RESERVED);
0312: resPut("short", RESERVED);
0313: */
0314: resPut("public", RESERVED);
0315: resPut("private", RESERVED);
0316: resPut("protected", RESERVED);
0317: resPut("throws", RESERVED);
0318: }
0319: }
0320:
0321: Lexer(ReadStream is, String filename, int line) {
0322: this (is, filename, line, null);
0323: }
0324:
0325: Lexer(ReadStream is, LineMap lineMap) {
0326: this (is, null, 1, lineMap);
0327: }
0328:
0329: void setLineMap(LineMap lineMap) {
0330: this .lineMap = lineMap;
0331: }
0332:
0333: private void opsPut(String name, int code, int lex, int prec,
0334: boolean flag) {
0335: ops.put(new CharBuffer(name), new Op(code, lex, prec, flag));
0336: }
0337:
0338: private void resPut(String name, int code) {
0339: reserved.put(new CharBuffer(name), new Integer(code));
0340: }
0341:
0342: int peek() throws ESParseException {
0343: try {
0344: if (lexeme == START) {
0345: lexeme = lex();
0346: }
0347:
0348: lastLexeme = lexeme;
0349:
0350: return lexeme;
0351: } catch (ESParseException e) {
0352: throw e;
0353: } catch (Exception e) {
0354: e.printStackTrace();
0355: throw error(e.toString());
0356: }
0357: }
0358:
0359: int next() throws ESParseException {
0360: try {
0361: int value = lexeme;
0362:
0363: if (value == START) {
0364: value = lex();
0365: }
0366:
0367: lastLexeme = value;
0368: lexeme = START;
0369:
0370: lastFilename = beginFilename;
0371: lastLine = beginLine;
0372:
0373: return value;
0374: } catch (ESParseException e) {
0375: throw e;
0376: } catch (Exception e) {
0377: e.printStackTrace();
0378: throw error(e == null ? "" : e.toString());
0379: }
0380: }
0381:
0382: int prev() {
0383: if (lastLexeme == START)
0384: throw new RuntimeException();
0385:
0386: lexeme = lastLexeme;
0387:
0388: lastLexeme = START;
0389:
0390: return lexeme;
0391: }
0392:
0393: int last() {
0394: if (lastLexeme == START)
0395: throw new RuntimeException();
0396:
0397: return lastLexeme;
0398: }
0399:
0400: private int peekCh() throws ESParseException {
0401: try {
0402: int ch = read();
0403: ungetc(ch);
0404: return (ch);
0405: } catch (Exception e) {
0406: return -1;
0407: }
0408: }
0409:
0410: /**
0411: * Returns the next lexeme
0412: */
0413: private int lex() throws ESParseException {
0414: lastFilename = beginFilename;
0415: lastLine = beginLine;
0416:
0417: hasLf = false;
0418:
0419: while (true) {
0420: beginFilename = filename;
0421: beginLine = line;
0422: beginLineCh = lineCh;
0423:
0424: int ch = read();
0425:
0426: switch (ch) {
0427: case -1:
0428: isEof = true;
0429: return EOF;
0430:
0431: case ' ':
0432: case '\t':
0433: case '\f':
0434: case 0x0b: /* vertical tab */
0435: break;
0436:
0437: case '\n':
0438: newline();
0439: hasLf = true;
0440: break;
0441:
0442: case '+':
0443: case '-':
0444: case '*':
0445: case '!':
0446: case ',':
0447: case '^':
0448: case '<':
0449: case '>':
0450: case '&':
0451: case '|':
0452: case '=':
0453: case '~':
0454: case '?':
0455: regexpOk = true; // exception ++/--
0456: return lexOp(ch);
0457:
0458: case ')':
0459: case ']':
0460: regexpOk = false;
0461: return ch;
0462:
0463: case ':':
0464: case ';':
0465: case '(':
0466: case '[':
0467: case '{':
0468: case '}':
0469: regexpOk = true;
0470: return ch;
0471:
0472: case '.': {
0473: int ch2 = read();
0474:
0475: if (ch2 >= '0' && ch2 <= '9') {
0476: regexpOk = false;
0477: return lexFloat(0, ch2);
0478: } else {
0479: regexpOk = true;
0480: ungetc(ch2);
0481: return lexOp(ch);
0482: }
0483: }
0484:
0485: case '/': {
0486: int ch2 = read();
0487:
0488: if (ch2 == '/') {
0489: for (ch2 = read(); ch2 > 0 && ch2 != '\n'; ch2 = read()) {
0490: }
0491:
0492: ungetc(ch2);
0493: break;
0494: } else if (ch2 == '*') {
0495: boolean seenStar = false;
0496: for (ch2 = read(); ch2 > 0
0497: && (!seenStar || ch2 != '/'); ch2 = read()) {
0498: if (ch2 == '/') {
0499: ch2 = read();
0500: if (ch2 == '*')
0501: throw error(L.l("comments can't nest"));
0502: }
0503:
0504: seenStar = ch2 == '*';
0505:
0506: if (ch2 == '\n') {
0507: newline();
0508: hasLf = true;
0509: }
0510: }
0511: break;
0512: } else if (regexpOk) {
0513: regexpOk = false;
0514:
0515: ungetc(ch2);
0516: lexString('/', null, true, false);
0517:
0518: readRegexpFlags();
0519: try {
0520: Pattern regexp = Pattern.compile(literal
0521: .toString(), _flags);
0522: // checking for errors
0523: } catch (Exception e) {
0524: // e.printStackTrace();
0525: throw error(String.valueOf(e));
0526: }
0527:
0528: return REGEXP;
0529: } else {
0530: ungetc(ch2);
0531: return lexOp(ch);
0532: }
0533: }
0534:
0535: case '0':
0536: case '1':
0537: case '2':
0538: case '3':
0539: case '4':
0540: case '5':
0541: case '6':
0542: case '7':
0543: case '8':
0544: case '9':
0545: regexpOk = false;
0546: return lexNumber(ch);
0547:
0548: case '"':
0549: case '\'':
0550: regexpOk = false;
0551: return lexString((char) ch, null, false, false);
0552:
0553: case '@': {
0554: int ch2 = read();
0555:
0556: switch (ch2) {
0557: case '"':
0558: CharBuffer macro = new CharBuffer();
0559: macro.append('(');
0560: interpolate(macro, '"', null, "\"", "\"", false,
0561: false);
0562: macro.append(')');
0563: pushMacro(macro);
0564: break;
0565:
0566: case '\'':
0567: macro = new CharBuffer();
0568: macro.append('(');
0569: interpolate(macro, '\'', null, "\'", "\'", false,
0570: false);
0571: macro.append(')');
0572: pushMacro(macro);
0573: break;
0574:
0575: case '@':
0576: if ((ch2 = read()) < 0)
0577: throw error(L.l("unexpected end of file"));
0578: switch (ch2) {
0579: case '{':
0580: ch2 = '}';
0581: break;
0582: case '<':
0583: ch2 = '>';
0584: break;
0585: case '(':
0586: ch2 = ')';
0587: break;
0588: case '[':
0589: ch2 = ']';
0590: break;
0591: }
0592:
0593: return lexString((char) ch2, null, true, false);
0594:
0595: case '<':
0596: if ((ch2 = read()) != '<')
0597: throw error(L.l("illegal character at `@'"));
0598: if (scanMultiline())
0599: return LITERAL;
0600: break;
0601:
0602: case '/':
0603: macro = new CharBuffer();
0604: macro.append("new RegExp(");
0605: interpolate(macro, '/', null, "@@/", "/", true,
0606: false);
0607: macro.append(",");
0608: macro.append(readRegexpFlags());
0609: macro.append(")");
0610: pushMacro(macro);
0611: break;
0612:
0613: default:
0614: return lexOp('@');
0615: }
0616: break;
0617: }
0618:
0619: case '%': {
0620: int ch2 = read();
0621:
0622: regexpOk = true;
0623: ungetc(ch2);
0624: return lexOp(ch);
0625: }
0626:
0627: case '#': {
0628: int ch2 = read();
0629: if (line == 1 && lineCh == 2 && ch2 == '!') {
0630: for (; ch2 > 0 && ch2 != '\n'; ch2 = read()) {
0631: }
0632:
0633: ungetc(ch2);
0634: break;
0635: }
0636:
0637: if (ch2 >= 'a' && ch2 <= 'z' || ch2 >= 'A'
0638: && ch2 <= 'Z') {
0639: temp.clear();
0640: for (; ch2 >= 'a' && ch2 <= 'z' || ch2 >= 'A'
0641: && ch2 <= 'Z'; ch2 = read()) {
0642: temp.append((char) ch2);
0643: }
0644:
0645: if (temp.toString().equals("line"))
0646: scanLine(ch2);
0647: else if (temp.toString().equals("file"))
0648: scanFile(ch2);
0649: else
0650: throw error(L.l("expected pragma at `{0}'",
0651: temp));
0652:
0653: break;
0654: }
0655:
0656: if (ch2 < '0' || ch2 > '9')
0657: throw error(L.l("expected digit at {0}",
0658: badChar(ch2)));
0659: intValue = 0;
0660:
0661: for (; ch2 >= '0' && ch2 <= '9'; ch2 = read())
0662: intValue = 10 * intValue + ch2 - '0';
0663:
0664: if (ch2 == '=')
0665: return HASH_DEF;
0666: else if (ch2 == '#')
0667: return HASH_REF;
0668: else
0669: throw error(L.l("expected sharp variable at {0}",
0670: badChar(ch)));
0671: }
0672:
0673: default:
0674: if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'
0675: || ch == '_' || ch == '$') {
0676: regexpOk = false;
0677: return lexId(ch);
0678: } else {
0679: throw error(L.l("illegal character at {0}",
0680: badChar(ch)));
0681: }
0682: }
0683: }
0684: }
0685:
0686: /**
0687: * Returns the text object for the lexeme.
0688: */
0689: CharBuffer getText() {
0690: return text;
0691: }
0692:
0693: boolean isEof() {
0694: return isEof;
0695: }
0696:
0697: /**
0698: * Used for error messages.
0699: */
0700: String getToken() {
0701: return lineText.substring(beginLineCh, lineCh);
0702: }
0703:
0704: /**
0705: * Returns the Id
0706: */
0707: ESId getId() {
0708: return id;
0709: }
0710:
0711: /**
0712: * Returns true if seen linefeed since the last.
0713: */
0714: boolean seenLineFeed() {
0715: return hasLf;
0716: }
0717:
0718: ESParseException error(String text) {
0719: return new ESParseException(filename, beginLine, beginLineCh,
0720: line, lineCh, text);
0721: }
0722:
0723: private String hex(int value) {
0724: CharBuffer cb = new CharBuffer();
0725:
0726: for (int b = 3; b >= 0; b--) {
0727: int v = (value >> (4 * b)) & 0xf;
0728: if (v < 10)
0729: cb.append((char) (v + '0'));
0730: else
0731: cb.append((char) (v - 10 + 'a'));
0732: }
0733:
0734: return cb.toString();
0735: }
0736:
0737: private String badChar(int ch) {
0738: if (ch >= 0x20 && ch <= 0x7f)
0739: return "`" + (char) ch + "'";
0740: else if (ch == '\n')
0741: return L.l("end of line");
0742: else if (ch == -1)
0743: return L.l("end of file");
0744: else
0745: return "`" + (char) ch + "' (\\u" + hex(ch) + ")";
0746: }
0747:
0748: String getFilename() {
0749: if (lineMap != null) {
0750: LineMap.Line map = lineMap.getLine(line);
0751: if (map != null)
0752: return map.getSourceFilename();
0753: }
0754:
0755: return filename;
0756: }
0757:
0758: long getLastModified() {
0759: if (is.getPath() == null)
0760: return 0;
0761: else
0762: return is.getPath().getLastModified();
0763: }
0764:
0765: int getLine() {
0766: if (lineMap != null) {
0767: LineMap.Line map = lineMap.getLine(line);
0768: if (map != null) {
0769: return map.getSourceLine(line);
0770: }
0771: }
0772:
0773: return line;
0774: }
0775:
0776: String getLastFilename() {
0777: if (lineMap != null) {
0778: LineMap.Line map = lineMap.getLine(lastLine);
0779: if (map != null)
0780: return map.getSourceFilename();
0781: }
0782:
0783: return lastFilename;
0784: }
0785:
0786: int getLastLine() {
0787: if (lineMap != null) {
0788: LineMap.Line map = lineMap.getLine(lastLine);
0789: if (map != null) {
0790: return map.getSourceLine(lastLine);
0791: }
0792: }
0793:
0794: return lastLine;
0795: }
0796:
0797: private void pushMacro(CharBuffer cb) throws ESParseException {
0798: if (peek >= 0)
0799: cb.append((char) read()); // Because of peek
0800: if (peek >= 0)
0801: cb.append((char) read()); // Because of peek
0802: if (macroText != null)
0803: macros.add(new Macro(macroText, macroIndex, macroOldLine));
0804: macroText = cb;
0805: macroIndex = 0;
0806: macroOldLine = line;
0807: }
0808:
0809: /**
0810: * Update variables to handle a newline.
0811: */
0812: private void newline() {
0813: line++;
0814: lineCh = 0;
0815: lineText.clear();
0816: }
0817:
0818: /**
0819: * Handles all the goodies for a floating point number after the
0820: * dot or 'e'
0821: */
0822: private int lexFloat(double value, int ch) throws ESParseException {
0823: int expt = 0;
0824:
0825: for (; ch >= '0' && ch <= '9'; ch = read()) {
0826: value = 10 * value + ch - '0';
0827: expt--;
0828: }
0829:
0830: if (ch == 'e' || ch == 'E') {
0831: ch = read();
0832:
0833: int sign = 1;
0834: if (ch == '-') {
0835: sign = -1;
0836: ch = read();
0837: } else if (ch == '+') {
0838: ch = read();
0839: }
0840:
0841: if (ch < '0' || ch > '9')
0842: throw error(L
0843: .l("expected exponent at {0}", badChar(ch)));
0844:
0845: int userExpt = 0;
0846: for (; ch >= '0' && ch <= '9'; ch = read()) {
0847: userExpt = 10 * userExpt + ch - '0';
0848: }
0849:
0850: expt += sign * userExpt;
0851: }
0852:
0853: ungetc(ch);
0854: if (expt >= 0)
0855: literal = ESNumber.create(value * Math.pow(10, expt));
0856: else
0857: literal = ESNumber.create(value / Math.pow(10, -expt));
0858: return LITERAL;
0859: }
0860:
0861: /**
0862: * Lexeme for a number
0863: */
0864: private int lexNumber(int ch) throws ESParseException {
0865: int radix = 10;
0866: double value = 0;
0867: boolean hasChar = true;
0868:
0869: if (ch == '0') {
0870: ch = read();
0871: if (ch >= '0' && ch <= '9')
0872: radix = 8;
0873: else if (ch == 'x' || ch == 'X') {
0874: hasChar = false;
0875: radix = 16;
0876: ch = read();
0877: }
0878: }
0879:
0880: for (; ch >= 0; ch = read()) {
0881: if (ch >= '0' && ch <= '9') {
0882: value = radix * value + ch - '0';
0883: hasChar = true;
0884:
0885: if (radix == 8 && ch >= '8')
0886: throw error(L.l("expected octal digit at {0}",
0887: badChar(ch)));
0888: } else if (radix == 16 && ch >= 'a' && ch <= 'f') {
0889: hasChar = true;
0890: value = radix * value + ch - 'a' + 10;
0891: } else if (radix == 16 && ch >= 'A' && ch <= 'F') {
0892: hasChar = true;
0893: value = radix * value + ch - 'A' + 10;
0894: } else
0895: break;
0896: }
0897:
0898: if (!hasChar)
0899: throw error(L.l("expected hex digit at {0}", badChar(ch)));
0900:
0901: if (radix == 10 && ch == '.') {
0902: ch = read();
0903:
0904: if (ch >= '0' && ch <= '9')
0905: return lexFloat(value, ch);
0906: else {
0907: ungetc(ch);
0908: literal = ESNumber.create(value);
0909: return LITERAL;
0910: }
0911: } else if (radix == 10 && (ch == 'e' || ch == 'E'))
0912: return lexFloat(value, ch);
0913: else {
0914: ungetc(ch);
0915: literal = ESNumber.create(value);
0916: return LITERAL;
0917: }
0918: }
0919:
0920: /**
0921: * Returns the number for a hex digit.
0922: */
0923: private int hexDigit(int ch) throws ESParseException {
0924: if (ch >= '0' && ch <= '9')
0925: return ch - '0';
0926: else if (ch >= 'a' && ch <= 'f')
0927: return ch - 'a' + 10;
0928: else if (ch >= 'A' && ch <= 'F')
0929: return ch - 'A' + 10;
0930: else
0931: throw error(L.l("expected hex digit at {0}", badChar(ch)));
0932: }
0933:
0934: /**
0935: * Lexeme for a string.
0936: */
0937: private int lexString(char endCh, String endTail, boolean isRegexp,
0938: boolean isMultiline) throws ESParseException {
0939: text.setLength(0);
0940:
0941: int ch = read();
0942: for (; ch >= 0; ch = read()) {
0943: if (ch == '\n') {
0944: if (isMultiline) {
0945: } else if (isRegexp)
0946: throw error(L
0947: .l("unexpected end of line in regular expression"));
0948: else
0949: throw error(L.l("unexpected end of line in string"));
0950: newline();
0951: }
0952:
0953: if (ch != endCh) {
0954: } else if (endTail == null) {
0955: literal = ESString.create(text.toString());
0956: return LITERAL;
0957: } else if (!text.endsWith(endTail)) {
0958: } else if (text.length() == endTail.length()) {
0959: literal = ESString.create("");
0960: return LITERAL;
0961: } else {
0962: char tailCh = text.charAt(text.length()
0963: - endTail.length() - 1);
0964:
0965: if (tailCh == '\n') {
0966: text
0967: .setLength(text.length() - endTail.length()
0968: - 1);
0969: literal = ESString.create(text.toString());
0970: return LITERAL;
0971: }
0972: }
0973:
0974: if (ch == '\\') {
0975: ch = read();
0976: switch (ch) {
0977: case -1:
0978: if (isRegexp)
0979: throw error(L
0980: .l("unexpected end of file in regular expression"));
0981: else
0982: throw error(L
0983: .l("unexpected end of file in string"));
0984:
0985: case '\n':
0986: if (isRegexp)
0987: throw error(L
0988: .l("unexpected end of line in regular expression"));
0989: else
0990: throw error(L
0991: .l("unexpected end of line in string"));
0992:
0993: case 'b':
0994: if (isRegexp)
0995: text.append("\\b");
0996: else
0997: text.append('\b');
0998: break;
0999:
1000: case 'e':
1001: text.append((char) 0x1b);
1002: break;
1003:
1004: case 'f':
1005: text.append('\f');
1006: break;
1007:
1008: case 'n':
1009: text.append('\n');
1010: break;
1011:
1012: case 'r':
1013: text.append('\r');
1014: break;
1015:
1016: case 't':
1017: text.append('\t');
1018: break;
1019:
1020: case 'v':
1021: text.append((char) 0xb);
1022: break;
1023:
1024: case 'c': {
1025: ch = read();
1026: if (ch >= 'a' && ch <= 'z')
1027: text.append((char) (ch - 'a' + 1));
1028: else if (ch >= 'A' && ch <= 'Z')
1029: text.append((char) (ch - 'A' + 1));
1030: else if (ch - '@' >= 0 && ch - '@' < ' ')
1031: text.append((char) (ch - '@'));
1032: else
1033: throw error(L.l(
1034: "expected control character at {0}",
1035: badChar(ch)));
1036: }
1037: break;
1038:
1039: case 'o': {
1040: int value = 0;
1041: while ((ch = read()) >= '0' && ch <= '8') {
1042: value = 8 * value + ch - '0';
1043: }
1044: ungetc(ch);
1045: text.append((char) value);
1046: }
1047: break;
1048:
1049: case 'x': {
1050: int value = 16 * hexDigit(read());
1051: value += hexDigit(read());
1052: text.append((char) value);
1053: }
1054: break;
1055:
1056: case 'u': {
1057: int value = 4096 * hexDigit(read());
1058: value += 256 * hexDigit(read());
1059: value += 16 * hexDigit(read());
1060: value += hexDigit(read());
1061: text.append((char) value);
1062: }
1063: break;
1064:
1065: case '0':
1066: case '1':
1067: case '2':
1068: case '3':
1069: case '4':
1070: case '5':
1071: case '6':
1072: case '7': {
1073: int value = ch - '0';
1074:
1075: if (ch != '0' && isRegexp) {
1076: text.append('\\');
1077: text.append((char) ch);
1078: break;
1079: }
1080:
1081: if ((ch = read()) >= '0' && ch <= '7') {
1082: value = 8 * value + ch - '0';
1083:
1084: if (value >= 040) {
1085: } else if ((ch = read()) >= '0' && ch <= '7')
1086: value = 8 * value + ch - '0';
1087: else
1088: ungetc(ch);
1089: } else
1090: ungetc(ch);
1091: text.append((char) value);
1092: }
1093: break;
1094:
1095: default:
1096: if (isRegexp)
1097: text.append('\\');
1098: text.append((char) ch);
1099: break;
1100: }
1101: } else {
1102: text.append((char) ch);
1103: }
1104: }
1105:
1106: if (ch != -1) {
1107: } else if (isRegexp)
1108: throw error(L
1109: .l("unexpected end of file in regular expression"));
1110: else
1111: throw error(L.l("unexpected end of file in string"));
1112:
1113: literal = ESString.create(text.toString());
1114:
1115: return LITERAL;
1116: }
1117:
1118: private void scanMacroStatement(CharBuffer macro, int end,
1119: boolean isRegexp, boolean multiline)
1120: throws ESParseException {
1121: int ch;
1122:
1123: while ((ch = read()) >= 0 && ch != end) {
1124: macro.append((char) ch);
1125:
1126: switch (ch) {
1127: case '\\':
1128: ch = read();
1129: macro.append((char) ch);
1130: break;
1131:
1132: case '\'':
1133: case '"':
1134: int testch = ch;
1135:
1136: while ((ch = read()) >= 0) {
1137: if (ch == '\\') {
1138: macro.append((char) ch);
1139: ch = read();
1140: } else if (ch == testch) {
1141: macro.append((char) ch);
1142: break;
1143: } else if (ch == '\n') {
1144: if (!multiline)
1145: throw error("unexpected end of line in "
1146: + (isRegexp ? "regular expression"
1147: : "string"));
1148: newline();
1149: }
1150:
1151: macro.append((char) ch);
1152: }
1153: break;
1154:
1155: case '(':
1156: scanMacroStatement(macro, ')', isRegexp, multiline);
1157: macro.append(')');
1158: break;
1159:
1160: case '{':
1161: scanMacroStatement(macro, '}', isRegexp, multiline);
1162: macro.append('}');
1163: break;
1164:
1165: case '\n':
1166: if (!multiline)
1167: throw error("unexpected end of line in "
1168: + (isRegexp ? "regular expression"
1169: : "string"));
1170: newline();
1171: break;
1172:
1173: default:
1174: break;
1175: }
1176: }
1177: }
1178:
1179: private void interpolate(CharBuffer macro, int tail,
1180: String matchText, String beginStr, String endStr,
1181: boolean isRegexp, boolean multiline)
1182: throws ESParseException {
1183: int ch = read();
1184: int ch1;
1185:
1186: macro.append(beginStr);
1187: int start = macro.length();
1188: loop: for (; ch >= 0; ch = read()) {
1189: switch (ch) {
1190: case '\\':
1191: macro.append((char) ch);
1192: ch = read();
1193: if (ch != -1)
1194: macro.append((char) ch);
1195: break;
1196:
1197: case '$':
1198: if ((ch = read()) == -1)
1199: break;
1200:
1201: if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'
1202: || ch == '_' || ch == '$') {
1203: macro.append(endStr);
1204: macro.append("+(");
1205: macro.append((char) ch);
1206:
1207: while ((ch = read()) >= 0
1208: && (ch >= 'a' && ch <= 'z')
1209: || (ch >= 'A' && ch <= 'Z')
1210: || (ch >= '0' && ch <= '9') || ch == '_'
1211: || ch == '$') {
1212: macro.append((char) ch);
1213: }
1214: ungetc(ch);
1215: macro.append(")+");
1216: macro.append(beginStr);
1217: } else if (ch == '{') {
1218: macro.append(endStr);
1219: macro.append("+(");
1220: scanMacroStatement(macro, '}', isRegexp, multiline);
1221: macro.append(")+");
1222: macro.append(beginStr);
1223: } else if (ch == '(') {
1224: macro.append(endStr);
1225: macro.append("+(");
1226: scanMacroStatement(macro, ')', isRegexp, multiline);
1227: macro.append(")+");
1228: macro.append(beginStr);
1229: } else {
1230: ungetc(ch);
1231: macro.append('$');
1232: }
1233: break;
1234:
1235: default:
1236: if (ch == '\n') {
1237: newline();
1238: if (!multiline)
1239: throw error("unexpected end of line in "
1240: + (isRegexp ? "regular expression"
1241: : "string"));
1242: }
1243:
1244: if (ch != tail) {
1245: } else if (matchText == null) {
1246: break loop;
1247: } else if (!macro.endsWith(matchText)) {
1248: } else if (macro.length() - start == matchText.length()) {
1249: macro.setLength(start);
1250: break loop;
1251: } else if (macro.charAt(macro.length()
1252: - matchText.length() - 1) == '\n') {
1253: macro.setLength(macro.length() - matchText.length()
1254: - 1);
1255: break loop;
1256: }
1257:
1258: macro.append((char) ch);
1259:
1260: break;
1261: }
1262: }
1263:
1264: macro.append(endStr);
1265: }
1266:
1267: private boolean scanMultiline() throws ESParseException {
1268: int ch;
1269: CharBuffer end = new CharBuffer();
1270: boolean interpolate = true;
1271: boolean endNewline = true;
1272:
1273: if ((ch = read()) >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'
1274: || ch == '_' || ch == '$') {
1275: for (; ch >= 0 && ch >= 'a' && ch <= 'z' || ch >= 'A'
1276: && ch <= 'Z' || ch == '_' || ch == '$' || ch >= '0'
1277: && ch <= '9'; ch = read()) {
1278: end.append((char) ch);
1279: }
1280: } else if (ch == '\'') {
1281: interpolate = false;
1282: for (ch = read(); ch >= 0 && ch != '\'' && ch != '\n'; ch = read()) {
1283: end.append((char) ch);
1284: }
1285:
1286: if (ch != '\'')
1287: throw error(L.l("multiline escape error at {0}",
1288: badChar(ch)));
1289: ch = read();
1290: } else if (ch == '`') {
1291: interpolate = false;
1292: for (ch = read(); ch >= 0 && ch != '`' && ch != '\n'; ch = read()) {
1293: end.append((char) ch);
1294: }
1295:
1296: if (ch != '`')
1297: throw error(L.l("multiline escape error at {0}",
1298: badChar(ch)));
1299: endNewline = false;
1300: } else if (ch == '\"') {
1301: for (ch = read(); ch >= 0 && ch != '\"' && ch != '\n'; ch = read()) {
1302: end.append((char) ch);
1303: }
1304:
1305: if (ch != '\"')
1306: throw error(L.l("multiline escape error at {0}",
1307: badChar(ch)));
1308: ch = read();
1309: }
1310:
1311: int oldLine = line;
1312: CharBuffer lineTail = null;
1313:
1314: if (endNewline) {
1315: lineTail = new CharBuffer();
1316: for (; ch >= 0 && ch != '\n'; ch = read()) {
1317: lineTail.append((char) ch);
1318: }
1319: if (ch == '\r') {
1320: lineTail.append((char) ch);
1321: ch = read();
1322: }
1323: if (ch == '\n') {
1324: newline();
1325: lineTail.append((char) ch);
1326: }
1327: }
1328:
1329: CharBuffer macro = null;
1330: String endString = end.toString();
1331: if (interpolate) {
1332: macro = new CharBuffer();
1333: macro.append('(');
1334: interpolate(macro, '\n', endString, "@<<`" + endString
1335: + "`", "\n" + endString + '\n', false, true);
1336: macro.append("+'\\n')");
1337: } else {
1338: if (endNewline) {
1339: lexString('\n', endString, false, true);
1340: text.append('\n');
1341: literal = ESString.create(text);
1342: } else {
1343: lexString('\n', endString, false, true);
1344: line -= 2;
1345: }
1346: }
1347:
1348: if (endNewline) {
1349: pushMacro(lineTail);
1350: line = oldLine;
1351: }
1352:
1353: if (interpolate) {
1354: pushMacro(macro);
1355: line++;
1356: return false;
1357: } else
1358: return true;
1359: }
1360:
1361: private int readRegexpFlags() throws ESParseException {
1362: int ch;
1363: while (true) {
1364: switch ((ch = read())) {
1365: case 'x':
1366: _flags |= Pattern.COMMENTS;
1367: break;
1368: case 'i':
1369: _flags |= Pattern.CASE_INSENSITIVE;
1370: break;
1371: case 'g':
1372: break;
1373: case 'm':
1374: _flags |= Pattern.MULTILINE;
1375: break;
1376: case 's':
1377: break;
1378: default:
1379: ungetc(ch);
1380: return _flags;
1381: }
1382: }
1383: }
1384:
1385: /**
1386: * Lexeme for an Id. Reserved words are looked up in a
1387: * HashMap.
1388: */
1389: private int lexId(int ch) throws ESParseException {
1390: text.setLength(0);
1391:
1392: text.append((char) ch);
1393:
1394: while (true) {
1395: ch = read();
1396:
1397: if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'
1398: || ch == '_' || ch == '$' || ch >= '0' && ch <= '9') {
1399: text.append((char) ch);
1400: } else {
1401: ungetc(ch);
1402:
1403: break;
1404: }
1405: }
1406:
1407: Integer value = (Integer) reserved.get(text);
1408:
1409: if (value == null) {
1410: id = ESId.intern(text.toString());
1411: return IDENTIFIER;
1412: } else {
1413: int intValue = value.intValue();
1414:
1415: switch (intValue) {
1416: case NULL:
1417: literal = ESBase.esNull;
1418: return LITERAL;
1419:
1420: case UNDEFINED:
1421: literal = ESBase.esUndefined;
1422: return LITERAL;
1423:
1424: case FALSE:
1425: literal = ESBoolean.create(false);
1426: return LITERAL;
1427:
1428: case TRUE:
1429: literal = ESBoolean.create(true);
1430: return LITERAL;
1431:
1432: default:
1433: return value.intValue();
1434: }
1435: }
1436: }
1437:
1438: /**
1439: * Lexeme for an operation
1440: */
1441: private int lexOp(int ch) throws ESParseException {
1442: text.setLength(0);
1443: text.append((char) ch);
1444:
1445: loop: while ((ch = read()) >= 0) {
1446: switch (ch) {
1447: case '+':
1448: case '-':
1449: case '*':
1450: case '/':
1451: case '%':
1452: case '!':
1453: case '<':
1454: case '.':
1455: case '>':
1456: case '&':
1457: case '|':
1458: case '=':
1459: case '^':
1460: case '?':
1461: text.append((char) ch);
1462:
1463: op = (Op) ops.get(text);
1464: if (op == null) {
1465: text.setLength(text.length() - 1);
1466: ungetc(ch);
1467: break loop;
1468: }
1469: break;
1470:
1471: default:
1472: ungetc(ch);
1473: break loop;
1474: }
1475: }
1476:
1477: op = (Op) ops.get(text);
1478:
1479: // XXX: non-reachable
1480: if (op == null)
1481: throw error(L.l("expected operator at `{0}'", text
1482: .toString()));
1483:
1484: return op.lexeme;
1485: }
1486:
1487: /**
1488: * Return the operation for a lexeme. Binary operations like '*' will
1489: * return BIN_OP as the lexeme. Calling getOp() will get the actual
1490: * operation.
1491: */
1492: int getOp() {
1493: return op.op;
1494: }
1495:
1496: int getPrecedence() {
1497: return op.precedence;
1498: }
1499:
1500: boolean isRightAssoc() {
1501: return op.isRightAssoc;
1502: }
1503:
1504: ESBase getLiteral() {
1505: return literal;
1506: }
1507:
1508: int getFlags() {
1509: return _flags;
1510: }
1511:
1512: private void scanLine(int ch) throws ESParseException {
1513: for (; ch == ' ' || ch == '\t'; ch = read()) {
1514: }
1515:
1516: if (ch < '0' || ch > '9')
1517: throw error(L.l("expected digit at {0}", badChar(ch)));
1518:
1519: line = 0;
1520: for (; ch >= '0' && ch <= '9'; ch = read())
1521: line = 10 * line + ch - '0';
1522:
1523: for (; ch == ' ' || ch == '\t'; ch = read()) {
1524: }
1525:
1526: if (ch != '#')
1527: throw error(L.l("expected `#' at {0}", badChar(ch)));
1528: }
1529:
1530: private void scanFile(int ch) throws ESParseException {
1531: for (; ch == ' ' || ch == '\t'; ch = read()) {
1532: }
1533:
1534: temp.clear();
1535: for (; ch >= 0 && ch != ' ' && ch != '\t' && ch != '#'; ch = read())
1536: temp.append((char) ch);
1537:
1538: if (temp.length() == 0)
1539: throw error(L.l("expected filename at {0}", badChar(ch)));
1540: filename = temp.toString();
1541:
1542: for (; ch == ' ' || ch == '\t'; ch = read()) {
1543: }
1544:
1545: line = 0;
1546: for (; ch >= '0' && ch <= '9'; ch = read())
1547: line = 10 * line + ch - '0';
1548:
1549: if (line == 0)
1550: line = 1;
1551:
1552: for (; ch == ' ' || ch == '\t'; ch = read()) {
1553: }
1554:
1555: if (ch != '#')
1556: throw error(L.l("expected `#' at {0}", badChar(ch)));
1557: }
1558:
1559: /**
1560: * Reads the next character.
1561: */
1562: private int read() throws ESParseException {
1563: lineCh++;
1564: if (peek >= 0) {
1565: int ch = peek;
1566: peek = peek2;
1567: peek2 = -1;
1568: return ch;
1569: }
1570:
1571: while (macroText != null) {
1572: if (macroIndex < macroText.length()) {
1573: int ch = macroText.charAt(macroIndex++);
1574: lineText.append((char) ch);
1575: return ch;
1576: }
1577:
1578: line = macroOldLine;
1579:
1580: if (macros.size() == 0)
1581: macroText = null;
1582: else {
1583: Macro macro = (Macro) macros.remove(macros.size() - 1);
1584: macroText = macro.text;
1585: macroIndex = macro.index;
1586: macroOldLine = macro.oldLine;
1587: }
1588: }
1589:
1590: try {
1591: int ch = is.readChar();
1592:
1593: if (ch == '\r') {
1594: ch = is.readChar();
1595: if (ch != '\n') {
1596: if (ch == '\r')
1597: peek = '\n';
1598: else
1599: peek = ch;
1600: }
1601: ch = '\n';
1602: }
1603: lineText.append((char) ch);
1604:
1605: return ch;
1606: } catch (CharConversionException e1) {
1607: throw error(L.l("expected {0} encoded character", is
1608: .getEncoding()));
1609: } catch (IOException e1) {
1610: throw new ESParseException(e1);
1611: }
1612: }
1613:
1614: private void ungetc(int ch) {
1615: peek2 = peek;
1616: peek = ch;
1617: if (lineCh > 0)
1618: lineCh--;
1619:
1620: /*
1621: if (ch == '\n')
1622: line--;
1623: */
1624: }
1625:
1626: static class Op {
1627: int op;
1628: int lexeme;
1629: int precedence;
1630: boolean isRightAssoc;
1631:
1632: Op(int op, int lexeme, int precedence, boolean isRightAssoc) {
1633: this .op = op;
1634: this .lexeme = lexeme;
1635: this .precedence = precedence;
1636: this .isRightAssoc = isRightAssoc;
1637: }
1638: };
1639:
1640: class Macro {
1641: CharBuffer text;
1642: int index;
1643: int oldLine;
1644:
1645: void clear() {
1646: text.clear();
1647: index = 0;
1648: }
1649:
1650: Macro(CharBuffer cb, int index, int oldLine) {
1651: this.text = cb;
1652: this.index = index;
1653: this.oldLine = oldLine;
1654: }
1655: }
1656: }
|