0001: /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
0002: *
0003: * ***** BEGIN LICENSE BLOCK *****
0004: * Version: MPL 1.1/GPL 2.0
0005: *
0006: * The contents of this file are subject to the Mozilla Public License Version
0007: * 1.1 (the "License"); you may not use this file except in compliance with
0008: * the License. You may obtain a copy of the License at
0009: * http://www.mozilla.org/MPL/
0010: *
0011: * Software distributed under the License is distributed on an "AS IS" basis,
0012: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
0013: * for the specific language governing rights and limitations under the
0014: * License.
0015: *
0016: * The Original Code is Rhino code, released
0017: * May 6, 1999.
0018: *
0019: * The Initial Developer of the Original Code is
0020: * Netscape Communications Corporation.
0021: * Portions created by the Initial Developer are Copyright (C) 1997-1999
0022: * the Initial Developer. All Rights Reserved.
0023: *
0024: * Contributor(s):
0025: * Roger Lawrence
0026: * Mike McCabe
0027: * Igor Bukanov
0028: * Ethan Hugg
0029: * Bob Jervis
0030: * Terry Lucas
0031: * Milen Nankov
0032: *
0033: * Alternatively, the contents of this file may be used under the terms of
0034: * the GNU General Public License Version 2 or later (the "GPL"), in which
0035: * case the provisions of the GPL are applicable instead of those above. If
0036: * you wish to allow use of your version of this file only under the terms of
0037: * the GPL and not to allow others to use your version of this file under the
0038: * MPL, indicate your decision by deleting the provisions above and replacing
0039: * them with the notice and other provisions required by the GPL. If you do
0040: * not delete the provisions above, a recipient may use your version of this
0041: * file under either the MPL or the GPL.
0042: *
0043: * ***** END LICENSE BLOCK ***** */
0044:
0045: package org.mozilla.javascript;
0046:
0047: import java.io.*;
0048:
0049: /**
0050: * This class implements the JavaScript scanner.
0051: *
0052: * It is based on the C source files jsscan.c and jsscan.h
0053: * in the jsref package.
0054: *
0055: * @see org.mozilla.javascript.Parser
0056: *
0057: * @author Mike McCabe
0058: * @author Brendan Eich
0059: */
0060:
0061: class TokenStream {
0062: /*
0063: * For chars - because we need something out-of-range
0064: * to check. (And checking EOF by exception is annoying.)
0065: * Note distinction from EOF token type!
0066: */
0067: private final static int EOF_CHAR = -1;
0068:
0069: TokenStream(Parser parser, Reader sourceReader,
0070: String sourceString, int lineno) {
0071: this .parser = parser;
0072: this .lineno = lineno;
0073: if (sourceReader != null) {
0074: if (sourceString != null)
0075: Kit.codeBug();
0076: this .sourceReader = sourceReader;
0077: this .sourceBuffer = new char[512];
0078: this .sourceEnd = 0;
0079: } else {
0080: if (sourceString == null)
0081: Kit.codeBug();
0082: this .sourceString = sourceString;
0083: this .sourceEnd = sourceString.length();
0084: }
0085: this .sourceCursor = 0;
0086: }
0087:
0088: /* This function uses the cached op, string and number fields in
0089: * TokenStream; if getToken has been called since the passed token
0090: * was scanned, the op or string printed may be incorrect.
0091: */
0092: String tokenToString(int token) {
0093: if (Token.printTrees) {
0094: String name = Token.name(token);
0095:
0096: switch (token) {
0097: case Token.STRING:
0098: case Token.REGEXP:
0099: case Token.NAME:
0100: return name + " `" + this .string + "'";
0101:
0102: case Token.NUMBER:
0103: return "NUMBER " + this .number;
0104: }
0105:
0106: return name;
0107: }
0108: return "";
0109: }
0110:
0111: static boolean isKeyword(String s) {
0112: return Token.EOF != stringToKeyword(s);
0113: }
0114:
0115: private static int stringToKeyword(String name) {
0116: // #string_id_map#
0117: // The following assumes that Token.EOF == 0
0118: final int Id_break = Token.BREAK, Id_case = Token.CASE, Id_continue = Token.CONTINUE, Id_default = Token.DEFAULT, Id_delete = Token.DELPROP, Id_do = Token.DO, Id_else = Token.ELSE, Id_export = Token.EXPORT, Id_false = Token.FALSE, Id_for = Token.FOR, Id_function = Token.FUNCTION, Id_if = Token.IF, Id_in = Token.IN, Id_let = Token.LET, Id_new = Token.NEW, Id_null = Token.NULL, Id_return = Token.RETURN, Id_switch = Token.SWITCH, Id_this = Token.THIS, Id_true = Token.TRUE, Id_typeof = Token.TYPEOF, Id_var = Token.VAR, Id_void = Token.VOID, Id_while = Token.WHILE, Id_with = Token.WITH, Id_yield = Token.YIELD,
0119:
0120: // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
0121: Id_abstract = Token.RESERVED, Id_boolean = Token.RESERVED, Id_byte = Token.RESERVED, Id_catch = Token.CATCH, Id_char = Token.RESERVED, Id_class = Token.RESERVED, Id_const = Token.CONST, Id_debugger = Token.DEBUGGER, Id_double = Token.RESERVED, Id_enum = Token.RESERVED, Id_extends = Token.RESERVED, Id_final = Token.RESERVED, Id_finally = Token.FINALLY, Id_float = Token.RESERVED, Id_goto = Token.RESERVED, Id_implements = Token.RESERVED, Id_import = Token.IMPORT, Id_instanceof = Token.INSTANCEOF, Id_int = Token.RESERVED, Id_interface = Token.RESERVED, Id_long = Token.RESERVED, Id_native = Token.RESERVED, Id_package = Token.RESERVED, Id_private = Token.RESERVED, Id_protected = Token.RESERVED, Id_public = Token.RESERVED, Id_short = Token.RESERVED, Id_static = Token.RESERVED, Id_super = Token.RESERVED, Id_synchronized = Token.RESERVED, Id_throw = Token.THROW, Id_throws = Token.RESERVED, Id_transient = Token.RESERVED, Id_try = Token.TRY, Id_volatile = Token.RESERVED;
0122:
0123: int id;
0124: String s = name;
0125: // #generated# Last update: 2007-04-18 13:53:30 PDT
0126: L0: {
0127: id = 0;
0128: String X = null;
0129: int c;
0130: L: switch (s.length()) {
0131: case 2:
0132: c = s.charAt(1);
0133: if (c == 'f') {
0134: if (s.charAt(0) == 'i') {
0135: id = Id_if;
0136: break L0;
0137: }
0138: } else if (c == 'n') {
0139: if (s.charAt(0) == 'i') {
0140: id = Id_in;
0141: break L0;
0142: }
0143: } else if (c == 'o') {
0144: if (s.charAt(0) == 'd') {
0145: id = Id_do;
0146: break L0;
0147: }
0148: }
0149: break L;
0150: case 3:
0151: switch (s.charAt(0)) {
0152: case 'f':
0153: if (s.charAt(2) == 'r' && s.charAt(1) == 'o') {
0154: id = Id_for;
0155: break L0;
0156: }
0157: break L;
0158: case 'i':
0159: if (s.charAt(2) == 't' && s.charAt(1) == 'n') {
0160: id = Id_int;
0161: break L0;
0162: }
0163: break L;
0164: case 'l':
0165: if (s.charAt(2) == 't' && s.charAt(1) == 'e') {
0166: id = Id_let;
0167: break L0;
0168: }
0169: break L;
0170: case 'n':
0171: if (s.charAt(2) == 'w' && s.charAt(1) == 'e') {
0172: id = Id_new;
0173: break L0;
0174: }
0175: break L;
0176: case 't':
0177: if (s.charAt(2) == 'y' && s.charAt(1) == 'r') {
0178: id = Id_try;
0179: break L0;
0180: }
0181: break L;
0182: case 'v':
0183: if (s.charAt(2) == 'r' && s.charAt(1) == 'a') {
0184: id = Id_var;
0185: break L0;
0186: }
0187: break L;
0188: }
0189: break L;
0190: case 4:
0191: switch (s.charAt(0)) {
0192: case 'b':
0193: X = "byte";
0194: id = Id_byte;
0195: break L;
0196: case 'c':
0197: c = s.charAt(3);
0198: if (c == 'e') {
0199: if (s.charAt(2) == 's' && s.charAt(1) == 'a') {
0200: id = Id_case;
0201: break L0;
0202: }
0203: } else if (c == 'r') {
0204: if (s.charAt(2) == 'a' && s.charAt(1) == 'h') {
0205: id = Id_char;
0206: break L0;
0207: }
0208: }
0209: break L;
0210: case 'e':
0211: c = s.charAt(3);
0212: if (c == 'e') {
0213: if (s.charAt(2) == 's' && s.charAt(1) == 'l') {
0214: id = Id_else;
0215: break L0;
0216: }
0217: } else if (c == 'm') {
0218: if (s.charAt(2) == 'u' && s.charAt(1) == 'n') {
0219: id = Id_enum;
0220: break L0;
0221: }
0222: }
0223: break L;
0224: case 'g':
0225: X = "goto";
0226: id = Id_goto;
0227: break L;
0228: case 'l':
0229: X = "long";
0230: id = Id_long;
0231: break L;
0232: case 'n':
0233: X = "null";
0234: id = Id_null;
0235: break L;
0236: case 't':
0237: c = s.charAt(3);
0238: if (c == 'e') {
0239: if (s.charAt(2) == 'u' && s.charAt(1) == 'r') {
0240: id = Id_true;
0241: break L0;
0242: }
0243: } else if (c == 's') {
0244: if (s.charAt(2) == 'i' && s.charAt(1) == 'h') {
0245: id = Id_this ;
0246: break L0;
0247: }
0248: }
0249: break L;
0250: case 'v':
0251: X = "void";
0252: id = Id_void;
0253: break L;
0254: case 'w':
0255: X = "with";
0256: id = Id_with;
0257: break L;
0258: }
0259: break L;
0260: case 5:
0261: switch (s.charAt(2)) {
0262: case 'a':
0263: X = "class";
0264: id = Id_class;
0265: break L;
0266: case 'e':
0267: c = s.charAt(0);
0268: if (c == 'b') {
0269: X = "break";
0270: id = Id_break;
0271: } else if (c == 'y') {
0272: X = "yield";
0273: id = Id_yield;
0274: }
0275: break L;
0276: case 'i':
0277: X = "while";
0278: id = Id_while;
0279: break L;
0280: case 'l':
0281: X = "false";
0282: id = Id_false;
0283: break L;
0284: case 'n':
0285: c = s.charAt(0);
0286: if (c == 'c') {
0287: X = "const";
0288: id = Id_const;
0289: } else if (c == 'f') {
0290: X = "final";
0291: id = Id_final;
0292: }
0293: break L;
0294: case 'o':
0295: c = s.charAt(0);
0296: if (c == 'f') {
0297: X = "float";
0298: id = Id_float;
0299: } else if (c == 's') {
0300: X = "short";
0301: id = Id_short;
0302: }
0303: break L;
0304: case 'p':
0305: X = "super";
0306: id = Id_super ;
0307: break L;
0308: case 'r':
0309: X = "throw";
0310: id = Id_throw;
0311: break L;
0312: case 't':
0313: X = "catch";
0314: id = Id_catch;
0315: break L;
0316: }
0317: break L;
0318: case 6:
0319: switch (s.charAt(1)) {
0320: case 'a':
0321: X = "native";
0322: id = Id_native;
0323: break L;
0324: case 'e':
0325: c = s.charAt(0);
0326: if (c == 'd') {
0327: X = "delete";
0328: id = Id_delete;
0329: } else if (c == 'r') {
0330: X = "return";
0331: id = Id_return;
0332: }
0333: break L;
0334: case 'h':
0335: X = "throws";
0336: id = Id_throws;
0337: break L;
0338: case 'm':
0339: X = "import";
0340: id = Id_import;
0341: break L;
0342: case 'o':
0343: X = "double";
0344: id = Id_double;
0345: break L;
0346: case 't':
0347: X = "static";
0348: id = Id_static;
0349: break L;
0350: case 'u':
0351: X = "public";
0352: id = Id_public;
0353: break L;
0354: case 'w':
0355: X = "switch";
0356: id = Id_switch;
0357: break L;
0358: case 'x':
0359: X = "export";
0360: id = Id_export;
0361: break L;
0362: case 'y':
0363: X = "typeof";
0364: id = Id_typeof;
0365: break L;
0366: }
0367: break L;
0368: case 7:
0369: switch (s.charAt(1)) {
0370: case 'a':
0371: X = "package";
0372: id = Id_package;
0373: break L;
0374: case 'e':
0375: X = "default";
0376: id = Id_default;
0377: break L;
0378: case 'i':
0379: X = "finally";
0380: id = Id_finally;
0381: break L;
0382: case 'o':
0383: X = "boolean";
0384: id = Id_boolean;
0385: break L;
0386: case 'r':
0387: X = "private";
0388: id = Id_private;
0389: break L;
0390: case 'x':
0391: X = "extends";
0392: id = Id_extends;
0393: break L;
0394: }
0395: break L;
0396: case 8:
0397: switch (s.charAt(0)) {
0398: case 'a':
0399: X = "abstract";
0400: id = Id_abstract;
0401: break L;
0402: case 'c':
0403: X = "continue";
0404: id = Id_continue;
0405: break L;
0406: case 'd':
0407: X = "debugger";
0408: id = Id_debugger;
0409: break L;
0410: case 'f':
0411: X = "function";
0412: id = Id_function;
0413: break L;
0414: case 'v':
0415: X = "volatile";
0416: id = Id_volatile;
0417: break L;
0418: }
0419: break L;
0420: case 9:
0421: c = s.charAt(0);
0422: if (c == 'i') {
0423: X = "interface";
0424: id = Id_interface;
0425: } else if (c == 'p') {
0426: X = "protected";
0427: id = Id_protected;
0428: } else if (c == 't') {
0429: X = "transient";
0430: id = Id_transient;
0431: }
0432: break L;
0433: case 10:
0434: c = s.charAt(1);
0435: if (c == 'm') {
0436: X = "implements";
0437: id = Id_implements ;
0438: } else if (c == 'n') {
0439: X = "instanceof";
0440: id = Id_instanceof ;
0441: }
0442: break L;
0443: case 12:
0444: X = "synchronized";
0445: id = Id_synchronized;
0446: break L;
0447: }
0448: if (X != null && X != s && !X.equals(s))
0449: id = 0;
0450: }
0451: // #/generated#
0452: // #/string_id_map#
0453: if (id == 0) {
0454: return Token.EOF;
0455: }
0456: return id & 0xff;
0457: }
0458:
0459: final int getLineno() {
0460: return lineno;
0461: }
0462:
0463: final String getString() {
0464: return string;
0465: }
0466:
0467: final double getNumber() {
0468: return number;
0469: }
0470:
0471: final boolean eof() {
0472: return hitEOF;
0473: }
0474:
0475: final int getToken() throws IOException {
0476: int c;
0477:
0478: retry: for (;;) {
0479: // Eat whitespace, possibly sensitive to newlines.
0480: for (;;) {
0481: c = getChar();
0482: if (c == EOF_CHAR) {
0483: return Token.EOF;
0484: } else if (c == '\n') {
0485: dirtyLine = false;
0486: return Token.EOL;
0487: } else if (!isJSSpace(c)) {
0488: if (c != '-') {
0489: dirtyLine = true;
0490: }
0491: break;
0492: }
0493: }
0494:
0495: if (c == '@')
0496: return Token.XMLATTR;
0497:
0498: // identifier/keyword/instanceof?
0499: // watch out for starting with a <backslash>
0500: boolean identifierStart;
0501: boolean isUnicodeEscapeStart = false;
0502: if (c == '\\') {
0503: c = getChar();
0504: if (c == 'u') {
0505: identifierStart = true;
0506: isUnicodeEscapeStart = true;
0507: stringBufferTop = 0;
0508: } else {
0509: identifierStart = false;
0510: ungetChar(c);
0511: c = '\\';
0512: }
0513: } else {
0514: identifierStart = Character
0515: .isJavaIdentifierStart((char) c);
0516: if (identifierStart) {
0517: stringBufferTop = 0;
0518: addToString(c);
0519: }
0520: }
0521:
0522: if (identifierStart) {
0523: boolean containsEscape = isUnicodeEscapeStart;
0524: for (;;) {
0525: if (isUnicodeEscapeStart) {
0526: // strictly speaking we should probably push-back
0527: // all the bad characters if the <backslash>uXXXX
0528: // sequence is malformed. But since there isn't a
0529: // correct context(is there?) for a bad Unicode
0530: // escape sequence in an identifier, we can report
0531: // an error here.
0532: int escapeVal = 0;
0533: for (int i = 0; i != 4; ++i) {
0534: c = getChar();
0535: escapeVal = Kit.xDigitToInt(c, escapeVal);
0536: // Next check takes care about c < 0 and bad escape
0537: if (escapeVal < 0) {
0538: break;
0539: }
0540: }
0541: if (escapeVal < 0) {
0542: parser.addError("msg.invalid.escape");
0543: return Token.ERROR;
0544: }
0545: addToString(escapeVal);
0546: isUnicodeEscapeStart = false;
0547: } else {
0548: c = getChar();
0549: if (c == '\\') {
0550: c = getChar();
0551: if (c == 'u') {
0552: isUnicodeEscapeStart = true;
0553: containsEscape = true;
0554: } else {
0555: parser
0556: .addError("msg.illegal.character");
0557: return Token.ERROR;
0558: }
0559: } else {
0560: if (c == EOF_CHAR
0561: || !Character
0562: .isJavaIdentifierPart((char) c)) {
0563: break;
0564: }
0565: addToString(c);
0566: }
0567: }
0568: }
0569: ungetChar(c);
0570:
0571: String str = getStringFromBuffer();
0572: if (!containsEscape) {
0573: // OPT we shouldn't have to make a string (object!) to
0574: // check if it's a keyword.
0575:
0576: // Return the corresponding token if it's a keyword
0577: int result = stringToKeyword(str);
0578: if (result != Token.EOF) {
0579: if ((result == Token.LET || result == Token.YIELD)
0580: && parser.compilerEnv
0581: .getLanguageVersion() < Context.VERSION_1_7) {
0582: // LET and YIELD are tokens only in 1.7 and later
0583: result = Token.NAME;
0584: }
0585: if (result != Token.RESERVED) {
0586: return result;
0587: } else if (!parser.compilerEnv
0588: .isReservedKeywordAsIdentifier()) {
0589: return result;
0590: } else {
0591: // If implementation permits to use future reserved
0592: // keywords in violation with the EcmaScript,
0593: // treat it as name but issue warning
0594: parser.addWarning("msg.reserved.keyword",
0595: str);
0596: }
0597: }
0598: }
0599: this .string = (String) allStrings.intern(str);
0600: return Token.NAME;
0601: }
0602:
0603: // is it a number?
0604: if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
0605:
0606: stringBufferTop = 0;
0607: int base = 10;
0608:
0609: if (c == '0') {
0610: c = getChar();
0611: if (c == 'x' || c == 'X') {
0612: base = 16;
0613: c = getChar();
0614: } else if (isDigit(c)) {
0615: base = 8;
0616: } else {
0617: addToString('0');
0618: }
0619: }
0620:
0621: if (base == 16) {
0622: while (0 <= Kit.xDigitToInt(c, 0)) {
0623: addToString(c);
0624: c = getChar();
0625: }
0626: } else {
0627: while ('0' <= c && c <= '9') {
0628: /*
0629: * We permit 08 and 09 as decimal numbers, which
0630: * makes our behavior a superset of the ECMA
0631: * numeric grammar. We might not always be so
0632: * permissive, so we warn about it.
0633: */
0634: if (base == 8 && c >= '8') {
0635: parser.addWarning("msg.bad.octal.literal",
0636: c == '8' ? "8" : "9");
0637: base = 10;
0638: }
0639: addToString(c);
0640: c = getChar();
0641: }
0642: }
0643:
0644: boolean isInteger = true;
0645:
0646: if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
0647: isInteger = false;
0648: if (c == '.') {
0649: do {
0650: addToString(c);
0651: c = getChar();
0652: } while (isDigit(c));
0653: }
0654: if (c == 'e' || c == 'E') {
0655: addToString(c);
0656: c = getChar();
0657: if (c == '+' || c == '-') {
0658: addToString(c);
0659: c = getChar();
0660: }
0661: if (!isDigit(c)) {
0662: parser.addError("msg.missing.exponent");
0663: return Token.ERROR;
0664: }
0665: do {
0666: addToString(c);
0667: c = getChar();
0668: } while (isDigit(c));
0669: }
0670: }
0671: ungetChar(c);
0672: String numString = getStringFromBuffer();
0673:
0674: double dval;
0675: if (base == 10 && !isInteger) {
0676: try {
0677: // Use Java conversion to number from string...
0678: dval = Double.valueOf(numString).doubleValue();
0679: } catch (NumberFormatException ex) {
0680: parser.addError("msg.caught.nfe");
0681: return Token.ERROR;
0682: }
0683: } else {
0684: dval = ScriptRuntime.stringToNumber(numString, 0,
0685: base);
0686: }
0687:
0688: this .number = dval;
0689: return Token.NUMBER;
0690: }
0691:
0692: // is it a string?
0693: if (c == '"' || c == '\'') {
0694: // We attempt to accumulate a string the fast way, by
0695: // building it directly out of the reader. But if there
0696: // are any escaped characters in the string, we revert to
0697: // building it out of a StringBuffer.
0698:
0699: int quoteChar = c;
0700: stringBufferTop = 0;
0701:
0702: c = getChar();
0703: strLoop: while (c != quoteChar) {
0704: if (c == '\n' || c == EOF_CHAR) {
0705: ungetChar(c);
0706: parser.addError("msg.unterminated.string.lit");
0707: return Token.ERROR;
0708: }
0709:
0710: if (c == '\\') {
0711: // We've hit an escaped character
0712: int escapeVal;
0713:
0714: c = getChar();
0715: switch (c) {
0716: case 'b':
0717: c = '\b';
0718: break;
0719: case 'f':
0720: c = '\f';
0721: break;
0722: case 'n':
0723: c = '\n';
0724: break;
0725: case 'r':
0726: c = '\r';
0727: break;
0728: case 't':
0729: c = '\t';
0730: break;
0731:
0732: // \v a late addition to the ECMA spec,
0733: // it is not in Java, so use 0xb
0734: case 'v':
0735: c = 0xb;
0736: break;
0737:
0738: case 'u':
0739: // Get 4 hex digits; if the u escape is not
0740: // followed by 4 hex digits, use 'u' + the
0741: // literal character sequence that follows.
0742: int escapeStart = stringBufferTop;
0743: addToString('u');
0744: escapeVal = 0;
0745: for (int i = 0; i != 4; ++i) {
0746: c = getChar();
0747: escapeVal = Kit.xDigitToInt(c,
0748: escapeVal);
0749: if (escapeVal < 0) {
0750: continue strLoop;
0751: }
0752: addToString(c);
0753: }
0754: // prepare for replace of stored 'u' sequence
0755: // by escape value
0756: stringBufferTop = escapeStart;
0757: c = escapeVal;
0758: break;
0759: case 'x':
0760: // Get 2 hex digits, defaulting to 'x'+literal
0761: // sequence, as above.
0762: c = getChar();
0763: escapeVal = Kit.xDigitToInt(c, 0);
0764: if (escapeVal < 0) {
0765: addToString('x');
0766: continue strLoop;
0767: } else {
0768: int c1 = c;
0769: c = getChar();
0770: escapeVal = Kit.xDigitToInt(c,
0771: escapeVal);
0772: if (escapeVal < 0) {
0773: addToString('x');
0774: addToString(c1);
0775: continue strLoop;
0776: } else {
0777: // got 2 hex digits
0778: c = escapeVal;
0779: }
0780: }
0781: break;
0782:
0783: case '\n':
0784: // Remove line terminator after escape to follow
0785: // SpiderMonkey and C/C++
0786: c = getChar();
0787: continue strLoop;
0788:
0789: default:
0790: if ('0' <= c && c < '8') {
0791: int val = c - '0';
0792: c = getChar();
0793: if ('0' <= c && c < '8') {
0794: val = 8 * val + c - '0';
0795: c = getChar();
0796: if ('0' <= c && c < '8'
0797: && val <= 037) {
0798: // c is 3rd char of octal sequence only
0799: // if the resulting val <= 0377
0800: val = 8 * val + c - '0';
0801: c = getChar();
0802: }
0803: }
0804: ungetChar(c);
0805: c = val;
0806: }
0807: }
0808: }
0809: addToString(c);
0810: c = getChar();
0811: }
0812:
0813: String str = getStringFromBuffer();
0814: this .string = (String) allStrings.intern(str);
0815: return Token.STRING;
0816: }
0817:
0818: switch (c) {
0819: case ';':
0820: return Token.SEMI;
0821: case '[':
0822: return Token.LB;
0823: case ']':
0824: return Token.RB;
0825: case '{':
0826: return Token.LC;
0827: case '}':
0828: return Token.RC;
0829: case '(':
0830: return Token.LP;
0831: case ')':
0832: return Token.RP;
0833: case ',':
0834: return Token.COMMA;
0835: case '?':
0836: return Token.HOOK;
0837: case ':':
0838: if (matchChar(':')) {
0839: return Token.COLONCOLON;
0840: } else {
0841: return Token.COLON;
0842: }
0843: case '.':
0844: if (matchChar('.')) {
0845: return Token.DOTDOT;
0846: } else if (matchChar('(')) {
0847: return Token.DOTQUERY;
0848: } else {
0849: return Token.DOT;
0850: }
0851:
0852: case '|':
0853: if (matchChar('|')) {
0854: return Token.OR;
0855: } else if (matchChar('=')) {
0856: return Token.ASSIGN_BITOR;
0857: } else {
0858: return Token.BITOR;
0859: }
0860:
0861: case '^':
0862: if (matchChar('=')) {
0863: return Token.ASSIGN_BITXOR;
0864: } else {
0865: return Token.BITXOR;
0866: }
0867:
0868: case '&':
0869: if (matchChar('&')) {
0870: return Token.AND;
0871: } else if (matchChar('=')) {
0872: return Token.ASSIGN_BITAND;
0873: } else {
0874: return Token.BITAND;
0875: }
0876:
0877: case '=':
0878: if (matchChar('=')) {
0879: if (matchChar('='))
0880: return Token.SHEQ;
0881: else
0882: return Token.EQ;
0883: } else {
0884: return Token.ASSIGN;
0885: }
0886:
0887: case '!':
0888: if (matchChar('=')) {
0889: if (matchChar('='))
0890: return Token.SHNE;
0891: else
0892: return Token.NE;
0893: } else {
0894: return Token.NOT;
0895: }
0896:
0897: case '<':
0898: /* NB:treat HTML begin-comment as comment-till-eol */
0899: if (matchChar('!')) {
0900: if (matchChar('-')) {
0901: if (matchChar('-')) {
0902: skipLine();
0903: continue retry;
0904: }
0905: ungetChar('-');
0906: }
0907: ungetChar('!');
0908: }
0909: if (matchChar('<')) {
0910: if (matchChar('=')) {
0911: return Token.ASSIGN_LSH;
0912: } else {
0913: return Token.LSH;
0914: }
0915: } else {
0916: if (matchChar('=')) {
0917: return Token.LE;
0918: } else {
0919: return Token.LT;
0920: }
0921: }
0922:
0923: case '>':
0924: if (matchChar('>')) {
0925: if (matchChar('>')) {
0926: if (matchChar('=')) {
0927: return Token.ASSIGN_URSH;
0928: } else {
0929: return Token.URSH;
0930: }
0931: } else {
0932: if (matchChar('=')) {
0933: return Token.ASSIGN_RSH;
0934: } else {
0935: return Token.RSH;
0936: }
0937: }
0938: } else {
0939: if (matchChar('=')) {
0940: return Token.GE;
0941: } else {
0942: return Token.GT;
0943: }
0944: }
0945:
0946: case '*':
0947: if (matchChar('=')) {
0948: return Token.ASSIGN_MUL;
0949: } else {
0950: return Token.MUL;
0951: }
0952:
0953: case '/':
0954: // is it a // comment?
0955: if (matchChar('/')) {
0956: skipLine();
0957: continue retry;
0958: }
0959: if (matchChar('*')) {
0960: boolean lookForSlash = false;
0961: for (;;) {
0962: c = getChar();
0963: if (c == EOF_CHAR) {
0964: parser.addError("msg.unterminated.comment");
0965: return Token.ERROR;
0966: } else if (c == '*') {
0967: lookForSlash = true;
0968: } else if (c == '/') {
0969: if (lookForSlash) {
0970: continue retry;
0971: }
0972: } else {
0973: lookForSlash = false;
0974: }
0975: }
0976: }
0977:
0978: if (matchChar('=')) {
0979: return Token.ASSIGN_DIV;
0980: } else {
0981: return Token.DIV;
0982: }
0983:
0984: case '%':
0985: if (matchChar('=')) {
0986: return Token.ASSIGN_MOD;
0987: } else {
0988: return Token.MOD;
0989: }
0990:
0991: case '~':
0992: return Token.BITNOT;
0993:
0994: case '+':
0995: if (matchChar('=')) {
0996: return Token.ASSIGN_ADD;
0997: } else if (matchChar('+')) {
0998: return Token.INC;
0999: } else {
1000: return Token.ADD;
1001: }
1002:
1003: case '-':
1004: if (matchChar('=')) {
1005: c = Token.ASSIGN_SUB;
1006: } else if (matchChar('-')) {
1007: if (!dirtyLine) {
1008: // treat HTML end-comment after possible whitespace
1009: // after line start as comment-utill-eol
1010: if (matchChar('>')) {
1011: skipLine();
1012: continue retry;
1013: }
1014: }
1015: c = Token.DEC;
1016: } else {
1017: c = Token.SUB;
1018: }
1019: dirtyLine = true;
1020: return c;
1021:
1022: default:
1023: parser.addError("msg.illegal.character");
1024: return Token.ERROR;
1025: }
1026: }
1027: }
1028:
1029: private static boolean isAlpha(int c) {
1030: // Use 'Z' < 'a'
1031: if (c <= 'Z') {
1032: return 'A' <= c;
1033: } else {
1034: return 'a' <= c && c <= 'z';
1035: }
1036: }
1037:
1038: static boolean isDigit(int c) {
1039: return '0' <= c && c <= '9';
1040: }
1041:
1042: /* As defined in ECMA. jsscan.c uses C isspace() (which allows
1043: * \v, I think.) note that code in getChar() implicitly accepts
1044: * '\r' ==
1045: as well.
1046: */
1047: static boolean isJSSpace(int c) {
1048: if (c <= 127) {
1049: return c == 0x20 || c == 0x9 || c == 0xC || c == 0xB;
1050: } else {
1051: return c == 0xA0
1052: || Character.getType((char) c) == Character.SPACE_SEPARATOR;
1053: }
1054: }
1055:
1056: private static boolean isJSFormatChar(int c) {
1057: return c > 127
1058: && Character.getType((char) c) == Character.FORMAT;
1059: }
1060:
1061: /**
1062: * Parser calls the method when it gets / or /= in literal context.
1063: */
1064: void readRegExp(int startToken) throws IOException {
1065: stringBufferTop = 0;
1066: if (startToken == Token.ASSIGN_DIV) {
1067: // Miss-scanned /=
1068: addToString('=');
1069: } else {
1070: if (startToken != Token.DIV)
1071: Kit.codeBug();
1072: }
1073:
1074: int c;
1075: while ((c = getChar()) != '/') {
1076: if (c == '\n' || c == EOF_CHAR) {
1077: ungetChar(c);
1078: throw parser.reportError("msg.unterminated.re.lit");
1079: }
1080: if (c == '\\') {
1081: addToString(c);
1082: c = getChar();
1083: }
1084:
1085: addToString(c);
1086: }
1087: int reEnd = stringBufferTop;
1088:
1089: while (true) {
1090: if (matchChar('g'))
1091: addToString('g');
1092: else if (matchChar('i'))
1093: addToString('i');
1094: else if (matchChar('m'))
1095: addToString('m');
1096: else
1097: break;
1098: }
1099:
1100: if (isAlpha(peekChar())) {
1101: throw parser.reportError("msg.invalid.re.flag");
1102: }
1103:
1104: this .string = new String(stringBuffer, 0, reEnd);
1105: this .regExpFlags = new String(stringBuffer, reEnd,
1106: stringBufferTop - reEnd);
1107: }
1108:
1109: boolean isXMLAttribute() {
1110: return xmlIsAttribute;
1111: }
1112:
1113: int getFirstXMLToken() throws IOException {
1114: xmlOpenTagsCount = 0;
1115: xmlIsAttribute = false;
1116: xmlIsTagContent = false;
1117: ungetChar('<');
1118: return getNextXMLToken();
1119: }
1120:
1121: int getNextXMLToken() throws IOException {
1122: stringBufferTop = 0; // remember the XML
1123:
1124: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1125: if (xmlIsTagContent) {
1126: switch (c) {
1127: case '>':
1128: addToString(c);
1129: xmlIsTagContent = false;
1130: xmlIsAttribute = false;
1131: break;
1132: case '/':
1133: addToString(c);
1134: if (peekChar() == '>') {
1135: c = getChar();
1136: addToString(c);
1137: xmlIsTagContent = false;
1138: xmlOpenTagsCount--;
1139: }
1140: break;
1141: case '{':
1142: ungetChar(c);
1143: this .string = getStringFromBuffer();
1144: return Token.XML;
1145: case '\'':
1146: case '"':
1147: addToString(c);
1148: if (!readQuotedString(c))
1149: return Token.ERROR;
1150: break;
1151: case '=':
1152: addToString(c);
1153: xmlIsAttribute = true;
1154: break;
1155: case ' ':
1156: case '\t':
1157: case '\r':
1158: case '\n':
1159: addToString(c);
1160: break;
1161: default:
1162: addToString(c);
1163: xmlIsAttribute = false;
1164: break;
1165: }
1166:
1167: if (!xmlIsTagContent && xmlOpenTagsCount == 0) {
1168: this .string = getStringFromBuffer();
1169: return Token.XMLEND;
1170: }
1171: } else {
1172: switch (c) {
1173: case '<':
1174: addToString(c);
1175: c = peekChar();
1176: switch (c) {
1177: case '!':
1178: c = getChar(); // Skip !
1179: addToString(c);
1180: c = peekChar();
1181: switch (c) {
1182: case '-':
1183: c = getChar(); // Skip -
1184: addToString(c);
1185: c = getChar();
1186: if (c == '-') {
1187: addToString(c);
1188: if (!readXmlComment())
1189: return Token.ERROR;
1190: } else {
1191: // throw away the string in progress
1192: stringBufferTop = 0;
1193: this .string = null;
1194: parser.addError("msg.XML.bad.form");
1195: return Token.ERROR;
1196: }
1197: break;
1198: case '[':
1199: c = getChar(); // Skip [
1200: addToString(c);
1201: if (getChar() == 'C' && getChar() == 'D'
1202: && getChar() == 'A'
1203: && getChar() == 'T'
1204: && getChar() == 'A'
1205: && getChar() == '[') {
1206: addToString('C');
1207: addToString('D');
1208: addToString('A');
1209: addToString('T');
1210: addToString('A');
1211: addToString('[');
1212: if (!readCDATA())
1213: return Token.ERROR;
1214:
1215: } else {
1216: // throw away the string in progress
1217: stringBufferTop = 0;
1218: this .string = null;
1219: parser.addError("msg.XML.bad.form");
1220: return Token.ERROR;
1221: }
1222: break;
1223: default:
1224: if (!readEntity())
1225: return Token.ERROR;
1226: break;
1227: }
1228: break;
1229: case '?':
1230: c = getChar(); // Skip ?
1231: addToString(c);
1232: if (!readPI())
1233: return Token.ERROR;
1234: break;
1235: case '/':
1236: // End tag
1237: c = getChar(); // Skip /
1238: addToString(c);
1239: if (xmlOpenTagsCount == 0) {
1240: // throw away the string in progress
1241: stringBufferTop = 0;
1242: this .string = null;
1243: parser.addError("msg.XML.bad.form");
1244: return Token.ERROR;
1245: }
1246: xmlIsTagContent = true;
1247: xmlOpenTagsCount--;
1248: break;
1249: default:
1250: // Start tag
1251: xmlIsTagContent = true;
1252: xmlOpenTagsCount++;
1253: break;
1254: }
1255: break;
1256: case '{':
1257: ungetChar(c);
1258: this .string = getStringFromBuffer();
1259: return Token.XML;
1260: default:
1261: addToString(c);
1262: break;
1263: }
1264: }
1265: }
1266:
1267: stringBufferTop = 0; // throw away the string in progress
1268: this .string = null;
1269: parser.addError("msg.XML.bad.form");
1270: return Token.ERROR;
1271: }
1272:
1273: /**
1274: *
1275: */
1276: private boolean readQuotedString(int quote) throws IOException {
1277: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1278: addToString(c);
1279: if (c == quote)
1280: return true;
1281: }
1282:
1283: stringBufferTop = 0; // throw away the string in progress
1284: this .string = null;
1285: parser.addError("msg.XML.bad.form");
1286: return false;
1287: }
1288:
1289: /**
1290: *
1291: */
1292: private boolean readXmlComment() throws IOException {
1293: for (int c = getChar(); c != EOF_CHAR;) {
1294: addToString(c);
1295: if (c == '-' && peekChar() == '-') {
1296: c = getChar();
1297: addToString(c);
1298: if (peekChar() == '>') {
1299: c = getChar(); // Skip >
1300: addToString(c);
1301: return true;
1302: } else {
1303: continue;
1304: }
1305: }
1306: c = getChar();
1307: }
1308:
1309: stringBufferTop = 0; // throw away the string in progress
1310: this .string = null;
1311: parser.addError("msg.XML.bad.form");
1312: return false;
1313: }
1314:
1315: /**
1316: *
1317: */
1318: private boolean readCDATA() throws IOException {
1319: for (int c = getChar(); c != EOF_CHAR;) {
1320: addToString(c);
1321: if (c == ']' && peekChar() == ']') {
1322: c = getChar();
1323: addToString(c);
1324: if (peekChar() == '>') {
1325: c = getChar(); // Skip >
1326: addToString(c);
1327: return true;
1328: } else {
1329: continue;
1330: }
1331: }
1332: c = getChar();
1333: }
1334:
1335: stringBufferTop = 0; // throw away the string in progress
1336: this .string = null;
1337: parser.addError("msg.XML.bad.form");
1338: return false;
1339: }
1340:
1341: /**
1342: *
1343: */
1344: private boolean readEntity() throws IOException {
1345: int declTags = 1;
1346: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1347: addToString(c);
1348: switch (c) {
1349: case '<':
1350: declTags++;
1351: break;
1352: case '>':
1353: declTags--;
1354: if (declTags == 0)
1355: return true;
1356: break;
1357: }
1358: }
1359:
1360: stringBufferTop = 0; // throw away the string in progress
1361: this .string = null;
1362: parser.addError("msg.XML.bad.form");
1363: return false;
1364: }
1365:
1366: /**
1367: *
1368: */
1369: private boolean readPI() throws IOException {
1370: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1371: addToString(c);
1372: if (c == '?' && peekChar() == '>') {
1373: c = getChar(); // Skip >
1374: addToString(c);
1375: return true;
1376: }
1377: }
1378:
1379: stringBufferTop = 0; // throw away the string in progress
1380: this .string = null;
1381: parser.addError("msg.XML.bad.form");
1382: return false;
1383: }
1384:
1385: private String getStringFromBuffer() {
1386: return new String(stringBuffer, 0, stringBufferTop);
1387: }
1388:
1389: private void addToString(int c) {
1390: int N = stringBufferTop;
1391: if (N == stringBuffer.length) {
1392: char[] tmp = new char[stringBuffer.length * 2];
1393: System.arraycopy(stringBuffer, 0, tmp, 0, N);
1394: stringBuffer = tmp;
1395: }
1396: stringBuffer[N] = (char) c;
1397: stringBufferTop = N + 1;
1398: }
1399:
1400: private void ungetChar(int c) {
1401: // can not unread past across line boundary
1402: if (ungetCursor != 0 && ungetBuffer[ungetCursor - 1] == '\n')
1403: Kit.codeBug();
1404: ungetBuffer[ungetCursor++] = c;
1405: }
1406:
1407: private boolean matchChar(int test) throws IOException {
1408: int c = getChar();
1409: if (c == test) {
1410: return true;
1411: } else {
1412: ungetChar(c);
1413: return false;
1414: }
1415: }
1416:
1417: private int peekChar() throws IOException {
1418: int c = getChar();
1419: ungetChar(c);
1420: return c;
1421: }
1422:
1423: private int getChar() throws IOException {
1424: if (ungetCursor != 0) {
1425: return ungetBuffer[--ungetCursor];
1426: }
1427:
1428: for (;;) {
1429: int c;
1430: if (sourceString != null) {
1431: if (sourceCursor == sourceEnd) {
1432: hitEOF = true;
1433: return EOF_CHAR;
1434: }
1435: c = sourceString.charAt(sourceCursor++);
1436: } else {
1437: if (sourceCursor == sourceEnd) {
1438: if (!fillSourceBuffer()) {
1439: hitEOF = true;
1440: return EOF_CHAR;
1441: }
1442: }
1443: c = sourceBuffer[sourceCursor++];
1444: }
1445:
1446: if (lineEndChar >= 0) {
1447: if (lineEndChar == '\r' && c == '\n') {
1448: lineEndChar = '\n';
1449: continue;
1450: }
1451: lineEndChar = -1;
1452: lineStart = sourceCursor - 1;
1453: lineno++;
1454: }
1455:
1456: if (c <= 127) {
1457: if (c == '\n' || c == '\r') {
1458: lineEndChar = c;
1459: c = '\n';
1460: }
1461: } else {
1462: if (isJSFormatChar(c)) {
1463: continue;
1464: }
1465: if (ScriptRuntime.isJSLineTerminator(c)) {
1466: lineEndChar = c;
1467: c = '\n';
1468: }
1469: }
1470: return c;
1471: }
1472: }
1473:
1474: private void skipLine() throws IOException {
1475: // skip to end of line
1476: int c;
1477: while ((c = getChar()) != EOF_CHAR && c != '\n') {
1478: }
1479: ungetChar(c);
1480: }
1481:
1482: final int getOffset() {
1483: int n = sourceCursor - lineStart;
1484: if (lineEndChar >= 0) {
1485: --n;
1486: }
1487: return n;
1488: }
1489:
1490: final String getLine() {
1491: if (sourceString != null) {
1492: // String case
1493: int lineEnd = sourceCursor;
1494: if (lineEndChar >= 0) {
1495: --lineEnd;
1496: } else {
1497: for (; lineEnd != sourceEnd; ++lineEnd) {
1498: int c = sourceString.charAt(lineEnd);
1499: if (ScriptRuntime.isJSLineTerminator(c)) {
1500: break;
1501: }
1502: }
1503: }
1504: return sourceString.substring(lineStart, lineEnd);
1505: } else {
1506: // Reader case
1507: int lineLength = sourceCursor - lineStart;
1508: if (lineEndChar >= 0) {
1509: --lineLength;
1510: } else {
1511: // Read until the end of line
1512: for (;; ++lineLength) {
1513: int i = lineStart + lineLength;
1514: if (i == sourceEnd) {
1515: try {
1516: if (!fillSourceBuffer()) {
1517: break;
1518: }
1519: } catch (IOException ioe) {
1520: // ignore it, we're already displaying an error...
1521: break;
1522: }
1523: // i recalculuation as fillSourceBuffer can move saved
1524: // line buffer and change lineStart
1525: i = lineStart + lineLength;
1526: }
1527: int c = sourceBuffer[i];
1528: if (ScriptRuntime.isJSLineTerminator(c)) {
1529: break;
1530: }
1531: }
1532: }
1533: return new String(sourceBuffer, lineStart, lineLength);
1534: }
1535: }
1536:
1537: private boolean fillSourceBuffer() throws IOException {
1538: if (sourceString != null)
1539: Kit.codeBug();
1540: if (sourceEnd == sourceBuffer.length) {
1541: if (lineStart != 0) {
1542: System.arraycopy(sourceBuffer, lineStart, sourceBuffer,
1543: 0, sourceEnd - lineStart);
1544: sourceEnd -= lineStart;
1545: sourceCursor -= lineStart;
1546: lineStart = 0;
1547: } else {
1548: char[] tmp = new char[sourceBuffer.length * 2];
1549: System.arraycopy(sourceBuffer, 0, tmp, 0, sourceEnd);
1550: sourceBuffer = tmp;
1551: }
1552: }
1553: int n = sourceReader.read(sourceBuffer, sourceEnd,
1554: sourceBuffer.length - sourceEnd);
1555: if (n < 0) {
1556: return false;
1557: }
1558: sourceEnd += n;
1559: return true;
1560: }
1561:
1562: // stuff other than whitespace since start of line
1563: private boolean dirtyLine;
1564:
1565: String regExpFlags;
1566:
1567: // Set this to an inital non-null value so that the Parser has
1568: // something to retrieve even if an error has occured and no
1569: // string is found. Fosters one class of error, but saves lots of
1570: // code.
1571: private String string = "";
1572: private double number;
1573:
1574: private char[] stringBuffer = new char[128];
1575: private int stringBufferTop;
1576: private ObjToIntMap allStrings = new ObjToIntMap(50);
1577:
1578: // Room to backtrace from to < on failed match of the last - in <!--
1579: private final int[] ungetBuffer = new int[3];
1580: private int ungetCursor;
1581:
1582: private boolean hitEOF = false;
1583:
1584: private int lineStart = 0;
1585: private int lineno;
1586: private int lineEndChar = -1;
1587:
1588: private String sourceString;
1589: private Reader sourceReader;
1590: private char[] sourceBuffer;
1591: private int sourceEnd;
1592: private int sourceCursor;
1593:
1594: // for xml tokenizer
1595: private boolean xmlIsAttribute;
1596: private boolean xmlIsTagContent;
1597: private int xmlOpenTagsCount;
1598:
1599: private Parser parser;
1600: }
|