0001: /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
0002: *
0003: * ***** BEGIN LICENSE BLOCK *****
0004: * Version: MPL 1.1/GPL 2.0
0005: *
0006: * The contents of this file are subject to the Mozilla Public License Version
0007: * 1.1 (the "License"); you may not use this file except in compliance with
0008: * the License. You may obtain a copy of the License at
0009: * http://www.mozilla.org/MPL/
0010: *
0011: * Software distributed under the License is distributed on an "AS IS" basis,
0012: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
0013: * for the specific language governing rights and limitations under the
0014: * License.
0015: *
0016: * The Original Code is Rhino code, released
0017: * May 6, 1999.
0018: *
0019: * The Initial Developer of the Original Code is
0020: * Netscape Communications Corporation.
0021: * Portions created by the Initial Developer are Copyright (C) 1997-1999
0022: * the Initial Developer. All Rights Reserved.
0023: *
0024: * Contributor(s):
0025: * Roger Lawrence
0026: * Mike McCabe
0027: * Igor Bukanov
0028: * Ethan Hugg
0029: * Bob Jervis
0030: * Terry Lucas
0031: * Milen Nankov
0032: *
0033: * Alternatively, the contents of this file may be used under the terms of
0034: * the GNU General Public License Version 2 or later (the "GPL"), in which
0035: * case the provisions of the GPL are applicable instead of those above. If
0036: * you wish to allow use of your version of this file only under the terms of
0037: * the GPL and not to allow others to use your version of this file under the
0038: * MPL, indicate your decision by deleting the provisions above and replacing
0039: * them with the notice and other provisions required by the GPL. If you do
0040: * not delete the provisions above, a recipient may use your version of this
0041: * file under either the MPL or the GPL.
0042: *
0043: * ***** END LICENSE BLOCK ***** */
0044:
0045: package org.mozilla.javascript;
0046:
0047: import java.io.*;
0048: import org.netbeans.spi.lexer.LexerInput;
0049:
0050: /**
0051: * This class implements the JavaScript scanner.
0052: *
0053: * It is based on the C source files jsscan.c and jsscan.h
0054: * in the jsref package.
0055: *
0056: * @see org.mozilla.javascript.Parser
0057: *
0058: * @author Mike McCabe
0059: * @author Brendan Eich
0060: */
0061:
0062: // <netbeans>
0063: public// </netbeans>
0064: class TokenStream {
0065: /*
0066: * For chars - because we need something out-of-range
0067: * to check. (And checking EOF by exception is annoying.)
0068: * Note distinction from EOF token type!
0069: */
0070: private final static int EOF_CHAR = -1;
0071:
0072: // <netbeans>
0073: private LexerInput lexerInput;
0074:
0075: public void setInput(LexerInput lexerInput) {
0076: this .lexerInput = lexerInput;
0077: }
0078:
0079: /** Construct a tokenstream suitable for syntax highlighting lexing (returns
0080: * space and comment tokens, uses a LexerInput, etc. */
0081: public TokenStream(Parser parser, LexerInput lexerInput,
0082: Reader sourceReader, String sourceString, int lineno) {
0083: this (parser, sourceReader, sourceString, lineno);
0084: this .lexerInput = lexerInput;
0085: this .syntaxLexing = true;
0086: }
0087:
0088: // </netbeans>
0089:
0090: TokenStream(Parser parser, Reader sourceReader,
0091: String sourceString, int lineno) {
0092: this .parser = parser;
0093: this .lineno = lineno;
0094: if (sourceReader != null) {
0095: if (sourceString != null)
0096: Kit.codeBug();
0097: this .sourceReader = sourceReader;
0098: this .sourceBuffer = new char[512];
0099: this .sourceEnd = 0;
0100: } else {
0101: if (sourceString == null)
0102: Kit.codeBug();
0103: this .sourceString = sourceString;
0104: this .sourceEnd = sourceString.length();
0105: }
0106: this .sourceCursor = 0;
0107: }
0108:
0109: /* This function uses the cached op, string and number fields in
0110: * TokenStream; if getToken has been called since the passed token
0111: * was scanned, the op or string printed may be incorrect.
0112: */
0113: String tokenToString(int token) {
0114: if (Token.printTrees) {
0115: String name = Token.name(token);
0116:
0117: switch (token) {
0118: case Token.STRING:
0119: case Token.REGEXP:
0120: case Token.NAME:
0121: return name + " `" + this .string + "'";
0122:
0123: case Token.NUMBER:
0124: return "NUMBER " + this .number;
0125: }
0126:
0127: return name;
0128: }
0129: return "";
0130: }
0131:
0132: static boolean isKeyword(String s) {
0133: return Token.EOF != stringToKeyword(s);
0134: }
0135:
0136: private static int stringToKeyword(String name) {
0137: // #string_id_map#
0138: // The following assumes that Token.EOF == 0
0139: final int Id_break = Token.BREAK, Id_case = Token.CASE, Id_continue = Token.CONTINUE, Id_default = Token.DEFAULT, Id_delete = Token.DELPROP, Id_do = Token.DO, Id_else = Token.ELSE, Id_export = Token.EXPORT, Id_false = Token.FALSE, Id_for = Token.FOR, Id_function = Token.FUNCTION, Id_if = Token.IF, Id_in = Token.IN, Id_new = Token.NEW, Id_null = Token.NULL, Id_return = Token.RETURN, Id_switch = Token.SWITCH, Id_this = Token.THIS, Id_true = Token.TRUE, Id_typeof = Token.TYPEOF, Id_var = Token.VAR, Id_void = Token.VOID, Id_while = Token.WHILE, Id_with = Token.WITH,
0140:
0141: // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
0142: Id_abstract = Token.RESERVED, Id_boolean = Token.RESERVED, Id_byte = Token.RESERVED, Id_catch = Token.CATCH, Id_char = Token.RESERVED, Id_class = Token.RESERVED, Id_const = Token.CONST, Id_debugger = Token.RESERVED, Id_double = Token.RESERVED, Id_enum = Token.RESERVED, Id_extends = Token.RESERVED, Id_final = Token.RESERVED, Id_finally = Token.FINALLY, Id_float = Token.RESERVED, Id_goto = Token.RESERVED, Id_implements = Token.RESERVED, Id_import = Token.IMPORT, Id_instanceof = Token.INSTANCEOF, Id_int = Token.RESERVED, Id_interface = Token.RESERVED, Id_long = Token.RESERVED, Id_native = Token.RESERVED, Id_package = Token.RESERVED, Id_private = Token.RESERVED, Id_protected = Token.RESERVED, Id_public = Token.RESERVED, Id_short = Token.RESERVED, Id_static = Token.RESERVED, Id_super = Token.RESERVED, Id_synchronized = Token.RESERVED, Id_throw = Token.THROW, Id_throws = Token.RESERVED, Id_transient = Token.RESERVED, Id_try = Token.TRY, Id_volatile = Token.RESERVED;
0143:
0144: int id;
0145: String s = name;
0146: // #generated# Last update: 2001-06-01 17:45:01 CEST
0147: L0: {
0148: id = 0;
0149: String X = null;
0150: int c;
0151: L: switch (s.length()) {
0152: case 2:
0153: c = s.charAt(1);
0154: if (c == 'f') {
0155: if (s.charAt(0) == 'i') {
0156: id = Id_if;
0157: break L0;
0158: }
0159: } else if (c == 'n') {
0160: if (s.charAt(0) == 'i') {
0161: id = Id_in;
0162: break L0;
0163: }
0164: } else if (c == 'o') {
0165: if (s.charAt(0) == 'd') {
0166: id = Id_do;
0167: break L0;
0168: }
0169: }
0170: break L;
0171: case 3:
0172: switch (s.charAt(0)) {
0173: case 'f':
0174: if (s.charAt(2) == 'r' && s.charAt(1) == 'o') {
0175: id = Id_for;
0176: break L0;
0177: }
0178: break L;
0179: case 'i':
0180: if (s.charAt(2) == 't' && s.charAt(1) == 'n') {
0181: id = Id_int;
0182: break L0;
0183: }
0184: break L;
0185: case 'n':
0186: if (s.charAt(2) == 'w' && s.charAt(1) == 'e') {
0187: id = Id_new;
0188: break L0;
0189: }
0190: break L;
0191: case 't':
0192: if (s.charAt(2) == 'y' && s.charAt(1) == 'r') {
0193: id = Id_try;
0194: break L0;
0195: }
0196: break L;
0197: case 'v':
0198: if (s.charAt(2) == 'r' && s.charAt(1) == 'a') {
0199: id = Id_var;
0200: break L0;
0201: }
0202: break L;
0203: }
0204: break L;
0205: case 4:
0206: switch (s.charAt(0)) {
0207: case 'b':
0208: X = "byte";
0209: id = Id_byte;
0210: break L;
0211: case 'c':
0212: c = s.charAt(3);
0213: if (c == 'e') {
0214: if (s.charAt(2) == 's' && s.charAt(1) == 'a') {
0215: id = Id_case;
0216: break L0;
0217: }
0218: } else if (c == 'r') {
0219: if (s.charAt(2) == 'a' && s.charAt(1) == 'h') {
0220: id = Id_char;
0221: break L0;
0222: }
0223: }
0224: break L;
0225: case 'e':
0226: c = s.charAt(3);
0227: if (c == 'e') {
0228: if (s.charAt(2) == 's' && s.charAt(1) == 'l') {
0229: id = Id_else;
0230: break L0;
0231: }
0232: } else if (c == 'm') {
0233: if (s.charAt(2) == 'u' && s.charAt(1) == 'n') {
0234: id = Id_enum;
0235: break L0;
0236: }
0237: }
0238: break L;
0239: case 'g':
0240: X = "goto";
0241: id = Id_goto;
0242: break L;
0243: case 'l':
0244: X = "long";
0245: id = Id_long;
0246: break L;
0247: case 'n':
0248: X = "null";
0249: id = Id_null;
0250: break L;
0251: case 't':
0252: c = s.charAt(3);
0253: if (c == 'e') {
0254: if (s.charAt(2) == 'u' && s.charAt(1) == 'r') {
0255: id = Id_true;
0256: break L0;
0257: }
0258: } else if (c == 's') {
0259: if (s.charAt(2) == 'i' && s.charAt(1) == 'h') {
0260: id = Id_this ;
0261: break L0;
0262: }
0263: }
0264: break L;
0265: case 'v':
0266: X = "void";
0267: id = Id_void;
0268: break L;
0269: case 'w':
0270: X = "with";
0271: id = Id_with;
0272: break L;
0273: }
0274: break L;
0275: case 5:
0276: switch (s.charAt(2)) {
0277: case 'a':
0278: X = "class";
0279: id = Id_class;
0280: break L;
0281: case 'e':
0282: X = "break";
0283: id = Id_break;
0284: break L;
0285: case 'i':
0286: X = "while";
0287: id = Id_while;
0288: break L;
0289: case 'l':
0290: X = "false";
0291: id = Id_false;
0292: break L;
0293: case 'n':
0294: c = s.charAt(0);
0295: if (c == 'c') {
0296: X = "const";
0297: id = Id_const;
0298: } else if (c == 'f') {
0299: X = "final";
0300: id = Id_final;
0301: }
0302: break L;
0303: case 'o':
0304: c = s.charAt(0);
0305: if (c == 'f') {
0306: X = "float";
0307: id = Id_float;
0308: } else if (c == 's') {
0309: X = "short";
0310: id = Id_short;
0311: }
0312: break L;
0313: case 'p':
0314: X = "super";
0315: id = Id_super ;
0316: break L;
0317: case 'r':
0318: X = "throw";
0319: id = Id_throw;
0320: break L;
0321: case 't':
0322: X = "catch";
0323: id = Id_catch;
0324: break L;
0325: }
0326: break L;
0327: case 6:
0328: switch (s.charAt(1)) {
0329: case 'a':
0330: X = "native";
0331: id = Id_native;
0332: break L;
0333: case 'e':
0334: c = s.charAt(0);
0335: if (c == 'd') {
0336: X = "delete";
0337: id = Id_delete;
0338: } else if (c == 'r') {
0339: X = "return";
0340: id = Id_return;
0341: }
0342: break L;
0343: case 'h':
0344: X = "throws";
0345: id = Id_throws;
0346: break L;
0347: case 'm':
0348: X = "import";
0349: id = Id_import;
0350: break L;
0351: case 'o':
0352: X = "double";
0353: id = Id_double;
0354: break L;
0355: case 't':
0356: X = "static";
0357: id = Id_static;
0358: break L;
0359: case 'u':
0360: X = "public";
0361: id = Id_public;
0362: break L;
0363: case 'w':
0364: X = "switch";
0365: id = Id_switch;
0366: break L;
0367: case 'x':
0368: X = "export";
0369: id = Id_export;
0370: break L;
0371: case 'y':
0372: X = "typeof";
0373: id = Id_typeof;
0374: break L;
0375: }
0376: break L;
0377: case 7:
0378: switch (s.charAt(1)) {
0379: case 'a':
0380: X = "package";
0381: id = Id_package;
0382: break L;
0383: case 'e':
0384: X = "default";
0385: id = Id_default;
0386: break L;
0387: case 'i':
0388: X = "finally";
0389: id = Id_finally;
0390: break L;
0391: case 'o':
0392: X = "boolean";
0393: id = Id_boolean;
0394: break L;
0395: case 'r':
0396: X = "private";
0397: id = Id_private;
0398: break L;
0399: case 'x':
0400: X = "extends";
0401: id = Id_extends;
0402: break L;
0403: }
0404: break L;
0405: case 8:
0406: switch (s.charAt(0)) {
0407: case 'a':
0408: X = "abstract";
0409: id = Id_abstract;
0410: break L;
0411: case 'c':
0412: X = "continue";
0413: id = Id_continue;
0414: break L;
0415: case 'd':
0416: X = "debugger";
0417: id = Id_debugger;
0418: break L;
0419: case 'f':
0420: X = "function";
0421: id = Id_function;
0422: break L;
0423: case 'v':
0424: X = "volatile";
0425: id = Id_volatile;
0426: break L;
0427: }
0428: break L;
0429: case 9:
0430: c = s.charAt(0);
0431: if (c == 'i') {
0432: X = "interface";
0433: id = Id_interface;
0434: } else if (c == 'p') {
0435: X = "protected";
0436: id = Id_protected;
0437: } else if (c == 't') {
0438: X = "transient";
0439: id = Id_transient;
0440: }
0441: break L;
0442: case 10:
0443: c = s.charAt(1);
0444: if (c == 'm') {
0445: X = "implements";
0446: id = Id_implements ;
0447: } else if (c == 'n') {
0448: X = "instanceof";
0449: id = Id_instanceof ;
0450: }
0451: break L;
0452: case 12:
0453: X = "synchronized";
0454: id = Id_synchronized;
0455: break L;
0456: }
0457: if (X != null && X != s && !X.equals(s))
0458: id = 0;
0459: }
0460: // #/generated#
0461: // #/string_id_map#
0462: if (id == 0) {
0463: return Token.EOF;
0464: }
0465: return id & 0xff;
0466: }
0467:
0468: // <netbeans>
0469: public// </netbeans>
0470: final int getLineno() {
0471: return lineno;
0472: }
0473:
0474: // <netbeans>
0475: public// </netbeans>
0476: final String getString() {
0477: return string;
0478: }
0479:
0480: // <netbeans>
0481: public// </netbeans>
0482: final double getNumber() {
0483: return number;
0484: }
0485:
0486: // <netbeans>
0487: public// </netbeans>
0488: final boolean eof() {
0489: return hitEOF;
0490: }
0491:
0492: // <netbeans>
0493: public int seenSpaces() {
0494: return seenSpaces;
0495: }
0496:
0497: private int seenSpaces;
0498:
0499: // </netbeans>
0500:
0501: // <netbeans>
0502: public int getToken() throws IOException {
0503: // Split strings and regexps into separate begin, literal, end tokens
0504: if (syntaxLexing && stringMode != NO_LITERAL) {
0505: switch (stringMode) {
0506: case IN_ERROR: {
0507: for (int i = 1, n = string.length(); i < n; i++) {
0508: int c = lexerInput.read();
0509: assert c == string.charAt(i) : string + i + ":"
0510: + (char) c;
0511: }
0512: stringMode = NO_LITERAL;
0513: return Token.ERROR;
0514: }
0515: case IN_STRING: {
0516: if (string.length() == 2) {
0517: int c = lexerInput.read();
0518: assert c == '"' || c == '\'' : (char) c;
0519: stringMode = NO_LITERAL;
0520: return Token.STRING_END;
0521: } else {
0522: for (int i = 1, n = string.length() - 1; i < n; i++) {
0523: int c = lexerInput.read();
0524: assert c == string.charAt(i) : string + i + ":"
0525: + (char) c;
0526: }
0527: stringMode = END_STRING;
0528: return Token.STRING;
0529: }
0530: }
0531: case END_STRING: {
0532: int c = lexerInput.read();
0533: assert c == '"' || c == '\'' : (char) c;
0534: stringMode = NO_LITERAL;
0535: return Token.STRING_END;
0536: }
0537: case IN_REGEXP: {
0538: if (string.length() == 2) {
0539: int c = lexerInput.read();
0540: assert c == '/' : (char) c;
0541: stringMode = NO_LITERAL;
0542: return Token.REGEXP_END;
0543: } else {
0544: int last = string.lastIndexOf('/');
0545: for (int i = 1; i < last; i++) {
0546: int c = lexerInput.read();
0547: assert c == string.charAt(i) : string + i + ":"
0548: + (char) c;
0549: }
0550: stringMode = END_REGEXP;
0551: return Token.REGEXP;
0552: }
0553: }
0554: case END_REGEXP: {
0555: int last = string.lastIndexOf('/');
0556: for (int i = last, n = string.length(); i < n; i++) {
0557: int c = lexerInput.read();
0558: assert c == string.charAt(i) : string + i + ":"
0559: + (char) c;
0560: }
0561: stringMode = NO_LITERAL;
0562: return Token.REGEXP_END;
0563: }
0564: }
0565: assert false : stringMode;
0566: }
0567:
0568: int token = privateGetToken();
0569: if (syntaxLexing) {
0570: if (token != Token.WHITESPACE && token != Token.EOL) {
0571: // Update divIsRegexp state
0572: // THIS IS NOT COMPLETE; there are other possible constructions with operators but covers 90+% of cases
0573: divIsRegexp = (token == Token.LP
0574: || token == Token.COMMA || token == Token.NOT
0575: || token == Token.ASSIGN || token == Token.SEMI || token == Token.COLON);
0576: }
0577: if (token == Token.STRING) {
0578: assert stringMode == NO_LITERAL;
0579: // TODO - can I just use string?
0580: //assert string == string && string.equals(string);
0581: string = lexerInput.readText().toString();
0582: int restLength = string.length() - 1;
0583: if (restLength > 0) {
0584: lexerInput.backup(restLength);
0585: stringMode = IN_STRING;
0586: } else {
0587: stringMode = NO_LITERAL;
0588: }
0589: token = Token.STRING_BEGIN;
0590: } else if (token == Token.REGEXP) {
0591: assert stringMode == NO_LITERAL;
0592: // TODO - can I just use string?
0593: //assert string == string && string.equals(string);
0594: string = lexerInput.readText().toString();
0595: int restLength = string.length() - 1;
0596: if (restLength > 0) {
0597: lexerInput.backup(restLength);
0598: stringMode = IN_REGEXP;
0599: } else {
0600: stringMode = NO_LITERAL;
0601: }
0602: token = Token.REGEXP_BEGIN;
0603: } else if (token == Token.REGEXP_ERROR) {
0604: assert stringMode == NO_LITERAL;
0605: string = lexerInput.readText().toString();
0606: int restLength = string.length() - 1;
0607: if (restLength > 0) {
0608: lexerInput.backup(restLength);
0609: stringMode = IN_ERROR;
0610: } else {
0611: stringMode = NO_LITERAL;
0612: }
0613: token = Token.REGEXP_BEGIN;
0614: } else if (token == Token.STRING_ERROR) {
0615: assert stringMode == NO_LITERAL;
0616: string = lexerInput.readText().toString();
0617: int restLength = string.length() - 1;
0618: if (restLength > 0) {
0619: lexerInput.backup(restLength);
0620: stringMode = IN_ERROR;
0621: } else {
0622: stringMode = NO_LITERAL;
0623: }
0624: token = Token.STRING_BEGIN;
0625: }
0626: }
0627:
0628: return token;
0629: }
0630:
0631: // </netbeans>
0632: final int privateGetToken() throws IOException {
0633: int c;
0634: // <netbeans>
0635: seenSpaces = 0;
0636: // </netbeans>
0637: retry: for (;;) {
0638: // <netbeans>
0639: //int initialPos = getBufferOffset();
0640: // </netbeans>
0641: // Eat whitespace, possibly sensitive to newlines.
0642: for (;;) {
0643: c = getChar();
0644: if (c == EOF_CHAR) {
0645: return Token.EOF;
0646: } else if (c == '\n') {
0647: dirtyLine = false;
0648: // <netbeans>
0649: seenSpaces++;
0650: // TODO -- annotate whitespace block here
0651: // </netbeans>
0652: return Token.EOL;
0653: } else if (!isJSSpace(c)) {
0654: if (c != '-') {
0655: dirtyLine = true;
0656: }
0657: break;
0658: // <netbeans>
0659: } else {
0660: seenSpaces++;
0661: // </netbeans>
0662: }
0663: }
0664:
0665: // <netbeans>
0666: // Possibly return whitespace tokens
0667: if (syntaxLexing && seenSpaces > 0) {
0668: ungetChar(c);
0669: return Token.WHITESPACE;
0670: }
0671: // </netbeans>
0672:
0673: if (c == '@')
0674: return Token.XMLATTR;
0675:
0676: // identifier/keyword/instanceof?
0677: // watch out for starting with a <backslash>
0678: boolean identifierStart;
0679: boolean isUnicodeEscapeStart = false;
0680: if (c == '\\') {
0681: c = getChar();
0682: if (c == 'u') {
0683: identifierStart = true;
0684: isUnicodeEscapeStart = true;
0685: stringBufferTop = 0;
0686: } else {
0687: identifierStart = false;
0688: ungetChar(c);
0689: c = '\\';
0690: }
0691: } else {
0692: identifierStart = Character
0693: .isJavaIdentifierStart((char) c);
0694: if (identifierStart) {
0695: stringBufferTop = 0;
0696: addToString(c);
0697: }
0698: }
0699:
0700: if (identifierStart) {
0701: boolean containsEscape = isUnicodeEscapeStart;
0702: for (;;) {
0703: if (isUnicodeEscapeStart) {
0704: // strictly speaking we should probably push-back
0705: // all the bad characters if the <backslash>uXXXX
0706: // sequence is malformed. But since there isn't a
0707: // correct context(is there?) for a bad Unicode
0708: // escape sequence in an identifier, we can report
0709: // an error here.
0710: int escapeVal = 0;
0711: for (int i = 0; i != 4; ++i) {
0712: c = getChar();
0713: escapeVal = Kit.xDigitToInt(c, escapeVal);
0714: // Next check takes care about c < 0 and bad escape
0715: if (escapeVal < 0) {
0716: break;
0717: }
0718: }
0719: if (escapeVal < 0) {
0720: parser.addError("msg.invalid.escape");
0721: return Token.ERROR;
0722: }
0723: addToString(escapeVal);
0724: isUnicodeEscapeStart = false;
0725: } else {
0726: c = getChar();
0727: if (c == '\\') {
0728: c = getChar();
0729: if (c == 'u') {
0730: isUnicodeEscapeStart = true;
0731: containsEscape = true;
0732: } else {
0733: parser
0734: .addError("msg.illegal.character");
0735: return Token.ERROR;
0736: }
0737: } else {
0738: if (c == EOF_CHAR
0739: || !Character
0740: .isJavaIdentifierPart((char) c)) {
0741: break;
0742: }
0743: addToString(c);
0744: }
0745: }
0746: }
0747: ungetChar(c);
0748:
0749: String str = getStringFromBuffer();
0750: if (!containsEscape) {
0751: // OPT we shouldn't have to make a string (object!) to
0752: // check if it's a keyword.
0753:
0754: // Return the corresponding token if it's a keyword
0755: int result = stringToKeyword(str);
0756: if (result != Token.EOF) {
0757: if (result != Token.RESERVED) {
0758: return result;
0759: } else if (!parser.compilerEnv
0760: .isReservedKeywordAsIdentifier()) {
0761: return result;
0762: } else {
0763: // If implementation permits to use future reserved
0764: // keywords in violation with the EcmaScript,
0765: // treat it as name but issue warning
0766: parser.addWarning("msg.reserved.keyword",
0767: str
0768: // <netbeans>
0769: , new Object[] { null, str }
0770: // </netbeans>
0771: );
0772: }
0773: }
0774: }
0775: // <netbeans>
0776: if (syntaxLexing && "undefined".equals(str)) {
0777: return Token.UNDEFINED;
0778: }
0779: // </netbeans>
0780: this .string = (String) allStrings.intern(str);
0781: return Token.NAME;
0782: }
0783:
0784: // is it a number?
0785: if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
0786:
0787: stringBufferTop = 0;
0788: int base = 10;
0789:
0790: if (c == '0') {
0791: c = getChar();
0792: if (c == 'x' || c == 'X') {
0793: base = 16;
0794: c = getChar();
0795: } else if (isDigit(c)) {
0796: base = 8;
0797: } else {
0798: addToString('0');
0799: }
0800: }
0801:
0802: if (base == 16) {
0803: while (0 <= Kit.xDigitToInt(c, 0)) {
0804: addToString(c);
0805: c = getChar();
0806: }
0807: } else {
0808: while ('0' <= c && c <= '9') {
0809: /*
0810: * We permit 08 and 09 as decimal numbers, which
0811: * makes our behavior a superset of the ECMA
0812: * numeric grammar. We might not always be so
0813: * permissive, so we warn about it.
0814: */
0815: if (base == 8 && c >= '8') {
0816: parser.addWarning("msg.bad.octal.literal",
0817: c == '8' ? "8" : "9"
0818: // <netbeans>
0819: , null
0820: // </netbeans>
0821: );
0822: base = 10;
0823: }
0824: addToString(c);
0825: c = getChar();
0826: }
0827: }
0828:
0829: boolean isInteger = true;
0830:
0831: if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
0832: isInteger = false;
0833: if (c == '.') {
0834: do {
0835: addToString(c);
0836: c = getChar();
0837: } while (isDigit(c));
0838: }
0839: if (c == 'e' || c == 'E') {
0840: addToString(c);
0841: c = getChar();
0842: if (c == '+' || c == '-') {
0843: addToString(c);
0844: c = getChar();
0845: }
0846: if (!isDigit(c)) {
0847: parser.addError("msg.missing.exponent");
0848: return Token.ERROR;
0849: }
0850: do {
0851: addToString(c);
0852: c = getChar();
0853: } while (isDigit(c));
0854: }
0855: }
0856: ungetChar(c);
0857: String numString = getStringFromBuffer();
0858:
0859: double dval;
0860: if (base == 10 && !isInteger) {
0861: try {
0862: // Use Java conversion to number from string...
0863: dval = Double.valueOf(numString).doubleValue();
0864: } catch (NumberFormatException ex) {
0865: parser.addError("msg.caught.nfe");
0866: return Token.ERROR;
0867: }
0868: } else {
0869: dval = ScriptRuntime.stringToNumber(numString, 0,
0870: base);
0871: }
0872:
0873: this .number = dval;
0874: return Token.NUMBER;
0875: }
0876:
0877: // is it a string?
0878: if (c == '"' || c == '\'') {
0879: // We attempt to accumulate a string the fast way, by
0880: // building it directly out of the reader. But if there
0881: // are any escaped characters in the string, we revert to
0882: // building it out of a StringBuffer.
0883:
0884: int quoteChar = c;
0885: stringBufferTop = 0;
0886:
0887: c = getChar();
0888: strLoop: while (c != quoteChar) {
0889: if (c == '\n' || c == EOF_CHAR) {
0890: ungetChar(c);
0891: parser.addError("msg.unterminated.string.lit");
0892: return Token.STRING_ERROR;
0893: }
0894:
0895: if (c == '\\') {
0896: // We've hit an escaped character
0897: int escapeVal;
0898:
0899: c = getChar();
0900: switch (c) {
0901: case 'b':
0902: c = '\b';
0903: break;
0904: case 'f':
0905: c = '\f';
0906: break;
0907: case 'n':
0908: c = '\n';
0909: break;
0910: case 'r':
0911: c = '\r';
0912: break;
0913: case 't':
0914: c = '\t';
0915: break;
0916:
0917: // \v a late addition to the ECMA spec,
0918: // it is not in Java, so use 0xb
0919: case 'v':
0920: c = 0xb;
0921: break;
0922:
0923: case 'u':
0924: // Get 4 hex digits; if the u escape is not
0925: // followed by 4 hex digits, use 'u' + the
0926: // literal character sequence that follows.
0927: int escapeStart = stringBufferTop;
0928: addToString('u');
0929: escapeVal = 0;
0930: for (int i = 0; i != 4; ++i) {
0931: c = getChar();
0932: escapeVal = Kit.xDigitToInt(c,
0933: escapeVal);
0934: if (escapeVal < 0) {
0935: continue strLoop;
0936: }
0937: addToString(c);
0938: }
0939: // prepare for replace of stored 'u' sequence
0940: // by escape value
0941: stringBufferTop = escapeStart;
0942: c = escapeVal;
0943: break;
0944: case 'x':
0945: // Get 2 hex digits, defaulting to 'x'+literal
0946: // sequence, as above.
0947: c = getChar();
0948: escapeVal = Kit.xDigitToInt(c, 0);
0949: if (escapeVal < 0) {
0950: addToString('x');
0951: continue strLoop;
0952: } else {
0953: int c1 = c;
0954: c = getChar();
0955: escapeVal = Kit.xDigitToInt(c,
0956: escapeVal);
0957: if (escapeVal < 0) {
0958: addToString('x');
0959: addToString(c1);
0960: continue strLoop;
0961: } else {
0962: // got 2 hex digits
0963: c = escapeVal;
0964: }
0965: }
0966: break;
0967:
0968: case '\n':
0969: // Remove line terminator after escape to follow
0970: // SpiderMonkey and C/C++
0971: c = getChar();
0972: continue strLoop;
0973:
0974: default:
0975: if ('0' <= c && c < '8') {
0976: int val = c - '0';
0977: c = getChar();
0978: if ('0' <= c && c < '8') {
0979: val = 8 * val + c - '0';
0980: c = getChar();
0981: if ('0' <= c && c < '8'
0982: && val <= 037) {
0983: // c is 3rd char of octal sequence only
0984: // if the resulting val <= 0377
0985: val = 8 * val + c - '0';
0986: c = getChar();
0987: }
0988: }
0989: ungetChar(c);
0990: c = val;
0991: }
0992: }
0993: }
0994: addToString(c);
0995: c = getChar();
0996: }
0997:
0998: String str = getStringFromBuffer();
0999: this .string = (String) allStrings.intern(str);
1000: return Token.STRING;
1001: }
1002:
1003: switch (c) {
1004: case ';':
1005: return Token.SEMI;
1006: case '[':
1007: return Token.LB;
1008: case ']':
1009: return Token.RB;
1010: case '{':
1011: return Token.LC;
1012: case '}':
1013: return Token.RC;
1014: case '(':
1015: return Token.LP;
1016: case ')':
1017: return Token.RP;
1018: case ',':
1019: return Token.COMMA;
1020: case '?':
1021: return Token.HOOK;
1022: case ':':
1023: if (matchChar(':')) {
1024: return Token.COLONCOLON;
1025: } else {
1026: return Token.COLON;
1027: }
1028: case '.':
1029: if (matchChar('.')) {
1030: return Token.DOTDOT;
1031: } else if (matchChar('(')) {
1032: return Token.DOTQUERY;
1033: } else {
1034: return Token.DOT;
1035: }
1036:
1037: case '|':
1038: if (matchChar('|')) {
1039: return Token.OR;
1040: } else if (matchChar('=')) {
1041: return Token.ASSIGN_BITOR;
1042: } else {
1043: return Token.BITOR;
1044: }
1045:
1046: case '^':
1047: if (matchChar('=')) {
1048: return Token.ASSIGN_BITXOR;
1049: } else {
1050: return Token.BITXOR;
1051: }
1052:
1053: case '&':
1054: if (matchChar('&')) {
1055: return Token.AND;
1056: } else if (matchChar('=')) {
1057: return Token.ASSIGN_BITAND;
1058: } else {
1059: return Token.BITAND;
1060: }
1061:
1062: case '=':
1063: if (matchChar('=')) {
1064: if (matchChar('='))
1065: return Token.SHEQ;
1066: else
1067: return Token.EQ;
1068: } else {
1069: return Token.ASSIGN;
1070: }
1071:
1072: case '!':
1073: if (matchChar('=')) {
1074: if (matchChar('='))
1075: return Token.SHNE;
1076: else
1077: return Token.NE;
1078: } else {
1079: return Token.NOT;
1080: }
1081:
1082: case '<':
1083: /* NB:treat HTML begin-comment as comment-till-eol */
1084: if (matchChar('!')) {
1085: if (matchChar('-')) {
1086: if (matchChar('-')) {
1087: skipLine();
1088: continue retry;
1089: }
1090: ungetChar('-');
1091: }
1092: ungetChar('!');
1093: }
1094: if (matchChar('<')) {
1095: if (matchChar('=')) {
1096: return Token.ASSIGN_LSH;
1097: } else {
1098: return Token.LSH;
1099: }
1100: } else {
1101: if (matchChar('=')) {
1102: return Token.LE;
1103: } else {
1104: return Token.LT;
1105: }
1106: }
1107:
1108: case '>':
1109: if (matchChar('>')) {
1110: if (matchChar('>')) {
1111: if (matchChar('=')) {
1112: return Token.ASSIGN_URSH;
1113: } else {
1114: return Token.URSH;
1115: }
1116: } else {
1117: if (matchChar('=')) {
1118: return Token.ASSIGN_RSH;
1119: } else {
1120: return Token.RSH;
1121: }
1122: }
1123: } else {
1124: if (matchChar('=')) {
1125: return Token.GE;
1126: } else {
1127: return Token.GT;
1128: }
1129: }
1130:
1131: case '*':
1132: if (matchChar('=')) {
1133: return Token.ASSIGN_MUL;
1134: } else {
1135: return Token.MUL;
1136: }
1137:
1138: case '/':
1139: // is it a // comment?
1140: if (matchChar('/')) {
1141: skipLine();
1142: // <netbeans>
1143: // Rhino doesn't return comment tokens
1144: // ...but I will!
1145: //continue retry;
1146: if (syntaxLexing) {
1147: return Token.LINE_COMMENT;
1148: } else {
1149: continue retry;
1150: }
1151: // </netbeans>
1152: }
1153: if (matchChar('*')) {
1154: boolean lookForSlash = false;
1155: for (;;) {
1156: c = getChar();
1157: if (c == EOF_CHAR) {
1158: parser.addError("msg.unterminated.comment");
1159: return Token.ERROR;
1160: } else if (c == '*') {
1161: lookForSlash = true;
1162: } else if (c == '/') {
1163: if (lookForSlash) {
1164: // <netbeans>
1165: // Rhino doesn't return comment tokens
1166: // ...but I will!
1167: //continue retry;
1168: if (syntaxLexing) {
1169: return Token.BLOCK_COMMENT;
1170: } else {
1171: continue retry;
1172: }
1173: // </netbeans>
1174: }
1175: } else {
1176: lookForSlash = false;
1177: }
1178: }
1179: }
1180:
1181: // <netbeans>
1182: if (syntaxLexing && divIsRegexp) {
1183: try {
1184: readRegExp(Token.DIV);
1185: } catch (Throwable t) {
1186: return Token.REGEXP_ERROR;
1187: }
1188: return Token.REGEXP;
1189: }
1190: // </netbeans>
1191:
1192: if (matchChar('=')) {
1193: return Token.ASSIGN_DIV;
1194: } else {
1195: return Token.DIV;
1196: }
1197:
1198: case '%':
1199: if (matchChar('=')) {
1200: return Token.ASSIGN_MOD;
1201: } else {
1202: return Token.MOD;
1203: }
1204:
1205: case '~':
1206: return Token.BITNOT;
1207:
1208: case '+':
1209: if (matchChar('=')) {
1210: return Token.ASSIGN_ADD;
1211: } else if (matchChar('+')) {
1212: return Token.INC;
1213: } else {
1214: return Token.ADD;
1215: }
1216:
1217: case '-':
1218: if (matchChar('=')) {
1219: c = Token.ASSIGN_SUB;
1220: } else if (matchChar('-')) {
1221: if (!dirtyLine) {
1222: // treat HTML end-comment after possible whitespace
1223: // after line start as comment-utill-eol
1224: if (matchChar('>')) {
1225: skipLine();
1226: continue retry;
1227: }
1228: }
1229: c = Token.DEC;
1230: } else {
1231: c = Token.SUB;
1232: }
1233: dirtyLine = true;
1234: return c;
1235:
1236: default:
1237: parser.addError("msg.illegal.character");
1238: return Token.ERROR;
1239: }
1240: }
1241: }
1242:
1243: private static boolean isAlpha(int c) {
1244: // Use 'Z' < 'a'
1245: if (c <= 'Z') {
1246: return 'A' <= c;
1247: } else {
1248: return 'a' <= c && c <= 'z';
1249: }
1250: }
1251:
1252: static boolean isDigit(int c) {
1253: return '0' <= c && c <= '9';
1254: }
1255:
1256: /* As defined in ECMA. jsscan.c uses C isspace() (which allows
1257: * \v, I think.) note that code in getChar() implicitly accepts
1258: * '\r' ==
1259: as well.
1260: */
1261: static boolean isJSSpace(int c) {
1262: if (c <= 127) {
1263: return c == 0x20 || c == 0x9 || c == 0xC || c == 0xB;
1264: } else {
1265: return c == 0xA0
1266: || Character.getType((char) c) == Character.SPACE_SEPARATOR;
1267: }
1268: }
1269:
1270: private static boolean isJSFormatChar(int c) {
1271: return c > 127
1272: && Character.getType((char) c) == Character.FORMAT;
1273: }
1274:
1275: /**
1276: * Parser calls the method when it gets / or /= in literal context.
1277: */
1278: // <netbeans>
1279: public// </netbeans>
1280: void readRegExp(int startToken) throws IOException {
1281: stringBufferTop = 0;
1282: if (startToken == Token.ASSIGN_DIV) {
1283: // Miss-scanned /=
1284: addToString('=');
1285: } else {
1286: if (startToken != Token.DIV)
1287: Kit.codeBug();
1288: }
1289:
1290: int c;
1291: while ((c = getChar()) != '/') {
1292: if (c == '\n' || c == EOF_CHAR) {
1293: ungetChar(c);
1294: throw parser.reportError("msg.unterminated.re.lit");
1295: }
1296: if (c == '\\') {
1297: addToString(c);
1298: c = getChar();
1299: }
1300:
1301: addToString(c);
1302: }
1303: int reEnd = stringBufferTop;
1304:
1305: while (true) {
1306: if (matchChar('g'))
1307: addToString('g');
1308: else if (matchChar('i'))
1309: addToString('i');
1310: else if (matchChar('m'))
1311: addToString('m');
1312: else
1313: break;
1314: }
1315:
1316: if (isAlpha(peekChar())) {
1317: throw parser.reportError("msg.invalid.re.flag");
1318: }
1319:
1320: this .string = new String(stringBuffer, 0, reEnd);
1321: this .regExpFlags = new String(stringBuffer, reEnd,
1322: stringBufferTop - reEnd);
1323: }
1324:
1325: boolean isXMLAttribute() {
1326: return xmlIsAttribute;
1327: }
1328:
1329: int getFirstXMLToken() throws IOException {
1330: xmlOpenTagsCount = 0;
1331: xmlIsAttribute = false;
1332: xmlIsTagContent = false;
1333: ungetChar('<');
1334: return getNextXMLToken();
1335: }
1336:
1337: int getNextXMLToken() throws IOException {
1338: stringBufferTop = 0; // remember the XML
1339:
1340: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1341: if (xmlIsTagContent) {
1342: switch (c) {
1343: case '>':
1344: addToString(c);
1345: xmlIsTagContent = false;
1346: xmlIsAttribute = false;
1347: break;
1348: case '/':
1349: addToString(c);
1350: if (peekChar() == '>') {
1351: c = getChar();
1352: addToString(c);
1353: xmlIsTagContent = false;
1354: xmlOpenTagsCount--;
1355: }
1356: break;
1357: case '{':
1358: ungetChar(c);
1359: this .string = getStringFromBuffer();
1360: return Token.XML;
1361: case '\'':
1362: case '"':
1363: addToString(c);
1364: if (!readQuotedString(c))
1365: return Token.ERROR;
1366: break;
1367: case '=':
1368: addToString(c);
1369: xmlIsAttribute = true;
1370: break;
1371: case ' ':
1372: case '\t':
1373: case '\r':
1374: case '\n':
1375: addToString(c);
1376: break;
1377: default:
1378: addToString(c);
1379: xmlIsAttribute = false;
1380: break;
1381: }
1382:
1383: if (!xmlIsTagContent && xmlOpenTagsCount == 0) {
1384: this .string = getStringFromBuffer();
1385: return Token.XMLEND;
1386: }
1387: } else {
1388: switch (c) {
1389: case '<':
1390: addToString(c);
1391: c = peekChar();
1392: switch (c) {
1393: case '!':
1394: c = getChar(); // Skip !
1395: addToString(c);
1396: c = peekChar();
1397: switch (c) {
1398: case '-':
1399: c = getChar(); // Skip -
1400: addToString(c);
1401: c = getChar();
1402: if (c == '-') {
1403: addToString(c);
1404: if (!readXmlComment())
1405: return Token.ERROR;
1406: } else {
1407: // throw away the string in progress
1408: stringBufferTop = 0;
1409: this .string = null;
1410: parser.addError("msg.XML.bad.form");
1411: return Token.ERROR;
1412: }
1413: break;
1414: case '[':
1415: c = getChar(); // Skip [
1416: addToString(c);
1417: if (getChar() == 'C' && getChar() == 'D'
1418: && getChar() == 'A'
1419: && getChar() == 'T'
1420: && getChar() == 'A'
1421: && getChar() == '[') {
1422: addToString('C');
1423: addToString('D');
1424: addToString('A');
1425: addToString('T');
1426: addToString('A');
1427: addToString('[');
1428: if (!readCDATA())
1429: return Token.ERROR;
1430:
1431: } else {
1432: // throw away the string in progress
1433: stringBufferTop = 0;
1434: this .string = null;
1435: parser.addError("msg.XML.bad.form");
1436: return Token.ERROR;
1437: }
1438: break;
1439: default:
1440: if (!readEntity())
1441: return Token.ERROR;
1442: break;
1443: }
1444: break;
1445: case '?':
1446: c = getChar(); // Skip ?
1447: addToString(c);
1448: if (!readPI())
1449: return Token.ERROR;
1450: break;
1451: case '/':
1452: // End tag
1453: c = getChar(); // Skip /
1454: addToString(c);
1455: if (xmlOpenTagsCount == 0) {
1456: // throw away the string in progress
1457: stringBufferTop = 0;
1458: this .string = null;
1459: parser.addError("msg.XML.bad.form");
1460: return Token.ERROR;
1461: }
1462: xmlIsTagContent = true;
1463: xmlOpenTagsCount--;
1464: break;
1465: default:
1466: // Start tag
1467: xmlIsTagContent = true;
1468: xmlOpenTagsCount++;
1469: break;
1470: }
1471: break;
1472: case '{':
1473: ungetChar(c);
1474: this .string = getStringFromBuffer();
1475: return Token.XML;
1476: default:
1477: addToString(c);
1478: break;
1479: }
1480: }
1481: }
1482:
1483: stringBufferTop = 0; // throw away the string in progress
1484: this .string = null;
1485: parser.addError("msg.XML.bad.form");
1486: return Token.ERROR;
1487: }
1488:
1489: /**
1490: *
1491: */
1492: private boolean readQuotedString(int quote) throws IOException {
1493: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1494: addToString(c);
1495: if (c == quote)
1496: return true;
1497: }
1498:
1499: stringBufferTop = 0; // throw away the string in progress
1500: this .string = null;
1501: parser.addError("msg.XML.bad.form");
1502: return false;
1503: }
1504:
1505: /**
1506: *
1507: */
1508: private boolean readXmlComment() throws IOException {
1509: for (int c = getChar(); c != EOF_CHAR;) {
1510: addToString(c);
1511: if (c == '-' && peekChar() == '-') {
1512: c = getChar();
1513: addToString(c);
1514: if (peekChar() == '>') {
1515: c = getChar(); // Skip >
1516: addToString(c);
1517: return true;
1518: } else {
1519: continue;
1520: }
1521: }
1522: c = getChar();
1523: }
1524:
1525: stringBufferTop = 0; // throw away the string in progress
1526: this .string = null;
1527: parser.addError("msg.XML.bad.form");
1528: return false;
1529: }
1530:
1531: /**
1532: *
1533: */
1534: private boolean readCDATA() throws IOException {
1535: for (int c = getChar(); c != EOF_CHAR;) {
1536: addToString(c);
1537: if (c == ']' && peekChar() == ']') {
1538: c = getChar();
1539: addToString(c);
1540: if (peekChar() == '>') {
1541: c = getChar(); // Skip >
1542: addToString(c);
1543: return true;
1544: } else {
1545: continue;
1546: }
1547: }
1548: c = getChar();
1549: }
1550:
1551: stringBufferTop = 0; // throw away the string in progress
1552: this .string = null;
1553: parser.addError("msg.XML.bad.form");
1554: return false;
1555: }
1556:
1557: /**
1558: *
1559: */
1560: private boolean readEntity() throws IOException {
1561: int declTags = 1;
1562: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1563: addToString(c);
1564: switch (c) {
1565: case '<':
1566: declTags++;
1567: break;
1568: case '>':
1569: declTags--;
1570: if (declTags == 0)
1571: return true;
1572: break;
1573: }
1574: }
1575:
1576: stringBufferTop = 0; // throw away the string in progress
1577: this .string = null;
1578: parser.addError("msg.XML.bad.form");
1579: return false;
1580: }
1581:
1582: /**
1583: *
1584: */
1585: private boolean readPI() throws IOException {
1586: for (int c = getChar(); c != EOF_CHAR; c = getChar()) {
1587: addToString(c);
1588: if (c == '?' && peekChar() == '>') {
1589: c = getChar(); // Skip >
1590: addToString(c);
1591: return true;
1592: }
1593: }
1594:
1595: stringBufferTop = 0; // throw away the string in progress
1596: this .string = null;
1597: parser.addError("msg.XML.bad.form");
1598: return false;
1599: }
1600:
1601: private String getStringFromBuffer() {
1602: return new String(stringBuffer, 0, stringBufferTop);
1603: }
1604:
1605: private void addToString(int c) {
1606: int N = stringBufferTop;
1607: if (N == stringBuffer.length) {
1608: char[] tmp = new char[stringBuffer.length * 2];
1609: System.arraycopy(stringBuffer, 0, tmp, 0, N);
1610: stringBuffer = tmp;
1611: }
1612: stringBuffer[N] = (char) c;
1613: stringBufferTop = N + 1;
1614: }
1615:
1616: private void ungetChar(int c) {
1617: // <netbeans>
1618: if (lexerInput != null) {
1619: lexerInput.backup(1);
1620: return;
1621: }
1622: // </netbeans>
1623:
1624: // can not unread past across line boundary
1625: if (ungetCursor != 0 && ungetBuffer[ungetCursor - 1] == '\n')
1626: Kit.codeBug();
1627: ungetBuffer[ungetCursor++] = c;
1628: }
1629:
1630: private boolean matchChar(int test) throws IOException {
1631: int c = getChar();
1632: if (c == test) {
1633: return true;
1634: } else {
1635: ungetChar(c);
1636: return false;
1637: }
1638: }
1639:
1640: private int peekChar() throws IOException {
1641: int c = getChar();
1642: ungetChar(c);
1643: return c;
1644: }
1645:
1646: private int getChar() throws IOException {
1647: // <netbeans>
1648: if (lexerInput != null) {
1649: int c = lexerInput.read();
1650: //if (c == LexerInput.EOF) {
1651: // return EOF_CHAR;
1652: //} else {
1653: // return c;
1654: //}
1655: assert LexerInput.EOF == EOF_CHAR;
1656: return c;
1657: }
1658: // </netbeans>
1659:
1660: if (ungetCursor != 0) {
1661: return ungetBuffer[--ungetCursor];
1662: }
1663:
1664: for (;;) {
1665: int c;
1666: if (sourceString != null) {
1667: if (sourceCursor == sourceEnd) {
1668: hitEOF = true;
1669: return EOF_CHAR;
1670: }
1671: c = sourceString.charAt(sourceCursor++);
1672: } else {
1673: if (sourceCursor == sourceEnd) {
1674: if (!fillSourceBuffer()) {
1675: hitEOF = true;
1676: return EOF_CHAR;
1677: }
1678: }
1679: c = sourceBuffer[sourceCursor++];
1680: }
1681:
1682: if (lineEndChar >= 0) {
1683: if (lineEndChar == '\r' && c == '\n') {
1684: lineEndChar = '\n';
1685: continue;
1686: }
1687: lineEndChar = -1;
1688: lineStart = sourceCursor - 1;
1689: lineno++;
1690: }
1691:
1692: if (c <= 127) {
1693: if (c == '\n' || c == '\r') {
1694: lineEndChar = c;
1695: c = '\n';
1696: }
1697: } else {
1698: if (isJSFormatChar(c)) {
1699: continue;
1700: }
1701: if (ScriptRuntime.isJSLineTerminator(c)) {
1702: lineEndChar = c;
1703: c = '\n';
1704: }
1705: }
1706: return c;
1707: }
1708: }
1709:
1710: private void skipLine() throws IOException {
1711: // skip to end of line
1712: int c;
1713: while ((c = getChar()) != EOF_CHAR && c != '\n') {
1714: }
1715: ungetChar(c);
1716: }
1717:
1718: final int getOffset() {
1719: // XXX This is not right for the new lexer approach
1720: // <netbeans>: Shouldn't this subtract the ungetCursor? </netbeans>
1721: int n = sourceCursor - lineStart;
1722: if (lineEndChar >= 0) {
1723: --n;
1724: }
1725: return n;
1726: }
1727:
1728: // <netbeans>
1729: public final int getBufferOffset() {
1730: int n = sourceCursor - ungetCursor;
1731: return n;
1732: }
1733:
1734: // </netbeans>
1735:
1736: final String getLine() {
1737: // <netbeans>
1738: if (lexerInput != null) {
1739: // We should never be calling getLine() when syntax lexing, this
1740: // is used for parser error messages
1741: //Kit.codeBug();
1742: return "?";
1743: }
1744: // </netbeans>
1745: if (sourceString != null) {
1746: // String case
1747: int lineEnd = sourceCursor;
1748: if (lineEndChar >= 0) {
1749: --lineEnd;
1750: } else {
1751: for (; lineEnd != sourceEnd; ++lineEnd) {
1752: int c = sourceString.charAt(lineEnd);
1753: if (ScriptRuntime.isJSLineTerminator(c)) {
1754: break;
1755: }
1756: }
1757: }
1758: return sourceString.substring(lineStart, lineEnd);
1759: } else {
1760: // Reader case
1761: int lineLength = sourceCursor - lineStart;
1762: if (lineEndChar >= 0) {
1763: --lineLength;
1764: } else {
1765: // Read until the end of line
1766: for (;; ++lineLength) {
1767: int i = lineStart + lineLength;
1768: if (i == sourceEnd) {
1769: try {
1770: if (!fillSourceBuffer()) {
1771: break;
1772: }
1773: } catch (IOException ioe) {
1774: // ignore it, we're already displaying an error...
1775: break;
1776: }
1777: // i recalculuation as fillSourceBuffer can move saved
1778: // line buffer and change lineStart
1779: i = lineStart + lineLength;
1780: }
1781: int c = sourceBuffer[i];
1782: if (ScriptRuntime.isJSLineTerminator(c)) {
1783: break;
1784: }
1785: }
1786: }
1787: return new String(sourceBuffer, lineStart, lineLength);
1788: }
1789: }
1790:
1791: private boolean fillSourceBuffer() throws IOException {
1792: // <netbeans>
1793: if (lexerInput != null) {
1794: return true;
1795: }
1796: // </netbeans>
1797:
1798: if (sourceString != null)
1799: Kit.codeBug();
1800: if (sourceEnd == sourceBuffer.length) {
1801: if (lineStart != 0) {
1802: System.arraycopy(sourceBuffer, lineStart, sourceBuffer,
1803: 0, sourceEnd - lineStart);
1804: sourceEnd -= lineStart;
1805: sourceCursor -= lineStart;
1806: lineStart = 0;
1807: } else {
1808: char[] tmp = new char[sourceBuffer.length * 2];
1809: System.arraycopy(sourceBuffer, 0, tmp, 0, sourceEnd);
1810: sourceBuffer = tmp;
1811: }
1812: }
1813: int n = sourceReader.read(sourceBuffer, sourceEnd,
1814: sourceBuffer.length - sourceEnd);
1815: if (n < 0) {
1816: return false;
1817: }
1818: sourceEnd += n;
1819: return true;
1820: }
1821:
1822: // <netbeans>
1823: /** Set whether comment tokens should be included as return values
1824: * from getToken() or not. In parsing mode, it should not. But when
1825: * doing lexical analysis as part of syntax highlighting for example,
1826: * it should.
1827: */
1828: public void setSyntaxLexing(boolean syntaxLexing) {
1829: this .syntaxLexing = syntaxLexing;
1830: }
1831:
1832: private boolean syntaxLexing;
1833:
1834: /** Restore state from the given Object which was earlier created via {@link toState} */
1835: public void fromState(Object object) {
1836: if (object == null) {
1837: return;
1838: }
1839: LexingState ls = (LexingState) object;
1840: ls.restore(this );
1841: }
1842:
1843: /** Record all state that needs to be restored to resume lexing from the same input position */
1844: public Object toState() {
1845: LexingState ls = new LexingState(this );
1846:
1847: return ls;
1848: }
1849:
1850: private static class LexingState {
1851: private static final int DIV_REGEXP = 1 << 0;
1852: private static final int DIRTY = 1 << 1;
1853: private static final int EOF = 1 << 2;
1854: private static final int XML_ATTR = 1 << 3;
1855: private static final int XML_TAG = 1 << 4;
1856: private static final int STRING_MODE_SHIFT = 5;
1857:
1858: private int flags;
1859: private String regExpFlags;
1860: private String bufferedString;
1861: private int xmlOpenTagsCount;
1862: private String string;
1863:
1864: LexingState(TokenStream stream) {
1865: this .regExpFlags = stream.regExpFlags;
1866: if (stream.stringBufferTop > 0) {
1867: this .bufferedString = new String(
1868: TokenStream.stringBuffer, 0,
1869: stream.stringBufferTop);
1870: }
1871: this .xmlOpenTagsCount = stream.xmlOpenTagsCount;
1872: this .string = stream.string;
1873: int state = 0;
1874: if (stream.dirtyLine) {
1875: state += DIRTY;
1876: }
1877: if (stream.divIsRegexp) {
1878: state += DIV_REGEXP;
1879: }
1880: if (stream.hitEOF) {
1881: state += EOF;
1882: }
1883: if (stream.xmlIsAttribute) {
1884: state += XML_ATTR;
1885: }
1886: if (stream.xmlIsTagContent) {
1887: state += XML_TAG;
1888: }
1889: if (stream.stringMode != 0) {
1890: state += (stream.stringMode << STRING_MODE_SHIFT);
1891: }
1892: this .flags = state;
1893: }
1894:
1895: public void restore(TokenStream stream) {
1896: stream.regExpFlags = this .regExpFlags;
1897: stream.dirtyLine = ((flags & DIRTY) != 0);
1898: stream.divIsRegexp = ((flags & DIV_REGEXP) != 0);
1899: stream.hitEOF = ((flags & EOF) != 0);
1900: stream.xmlIsAttribute = ((flags & XML_ATTR) != 0);
1901: stream.xmlIsTagContent = ((flags & XML_TAG) != 0);
1902: if (this .bufferedString != null) {
1903: assert TokenStream.stringBuffer.length >= this .bufferedString
1904: .length();
1905: char[] chars = this .bufferedString.toCharArray();
1906: System.arraycopy(chars, 0, TokenStream.stringBuffer, 0,
1907: chars.length);
1908: stream.stringBufferTop = bufferedString.length();
1909: } else {
1910: stream.stringBufferTop = 0;
1911: }
1912: stream.xmlOpenTagsCount = this .xmlOpenTagsCount;
1913: stream.stringMode = (short) (flags >> STRING_MODE_SHIFT);
1914: stream.string = this .string;
1915: }
1916:
1917: @Override
1918: public boolean equals(Object obj) {
1919: if (obj == null) {
1920: return false;
1921: }
1922: if (getClass() != obj.getClass()) {
1923: return false;
1924: }
1925: final TokenStream.LexingState other = (TokenStream.LexingState) obj;
1926: if (this .flags != other.flags) {
1927: return false;
1928: }
1929: if (this .regExpFlags != other.regExpFlags
1930: && (this .regExpFlags == null || !this .regExpFlags
1931: .equals(other.regExpFlags))) {
1932: return false;
1933: }
1934: if (this .bufferedString != other.bufferedString
1935: && (this .bufferedString == null || !this .bufferedString
1936: .equals(other.bufferedString))) {
1937: return false;
1938: }
1939: if (this .string != other.string
1940: && (this .string == null || !this .string
1941: .equals(other.string))) {
1942: return false;
1943: }
1944: if (this .xmlOpenTagsCount != other.xmlOpenTagsCount) {
1945: return false;
1946: }
1947: return true;
1948: }
1949:
1950: @Override
1951: public int hashCode() {
1952: int hash = 7;
1953: hash = 37
1954: * hash
1955: + (this .regExpFlags != null ? this .regExpFlags
1956: .hashCode() : 0);
1957: hash = 37
1958: * hash
1959: + (this .bufferedString != null ? this .bufferedString
1960: .hashCode()
1961: : 0);
1962: hash = 37 * hash + this .flags;
1963: hash = 37
1964: * hash
1965: + (this .string != null ? this .string.hashCode() : 0);
1966: hash = 37
1967: * hash
1968: + (this .regExpFlags != null ? this .regExpFlags
1969: .hashCode() : 0);
1970: hash = 37 * hash + this .xmlOpenTagsCount;
1971: return hash;
1972: }
1973:
1974: private String toStateString(int localState) {
1975: StringBuilder sb = new StringBuilder();
1976:
1977: if ((localState & DIV_REGEXP) != 0) {
1978: sb.append("divregexp|");
1979: }
1980:
1981: if ((localState & DIRTY) != 0) {
1982: sb.append("dirty|");
1983: }
1984:
1985: if ((localState & EOF) != 0) {
1986: sb.append("eof|");
1987: }
1988:
1989: if ((localState & XML_TAG) != 0) {
1990: sb.append("xmltag|");
1991: }
1992:
1993: if ((localState & XML_ATTR) != 0) {
1994: sb.append("xmlattr|");
1995: }
1996:
1997: sb
1998: .append("stringmode="
1999: + (localState >> STRING_MODE_SHIFT));
2000:
2001: String s = sb.toString();
2002:
2003: return s;
2004: }
2005:
2006: @Override
2007: public String toString() {
2008: return "LS(regexp=" + this .regExpFlags + ",string="
2009: + this .bufferedString + "xmlOpen="
2010: + this .xmlOpenTagsCount + ",flags="
2011: + toStateString(this .flags) + ",stringlit="
2012: + this .string + ")";
2013: }
2014: }
2015:
2016: // Regular expressions in JavaScript are tricky to detect at the lexical level.
2017: // Rhino usually detects this at parse time. However, that won't do - if I have code
2018: // like this:
2019: // foo(/ba))/)
2020: // then I'll end up with some extra )'s that the lexer think are unbalanced right parens,
2021: // rather than regexp literal contents. With quotes in there, I can also end up with
2022: // unterminated strings etc -- all of these confused features driven on lexical features:
2023: // indentation, bracket matching, etc.
2024: private boolean divIsRegexp = true;
2025:
2026: // I need to split strings and regexps up into begin, literal, end tokens
2027: // (for bracket matching and completion purposes).
2028: private static final int NO_LITERAL = 0;
2029: private static final int IN_ERROR = 1;
2030: private static final int IN_STRING = 2;
2031: private static final int END_STRING = 3;
2032: private static final int IN_REGEXP = 4;
2033: private static final int END_REGEXP = 5;
2034: private short stringMode = NO_LITERAL;
2035: // </netbeans>
2036:
2037: // stuff other than whitespace since start of line
2038: private boolean dirtyLine;
2039:
2040: String regExpFlags;
2041:
2042: // Set this to an inital non-null value so that the Parser has
2043: // something to retrieve even if an error has occured and no
2044: // string is found. Fosters one class of error, but saves lots of
2045: // code.
2046: private String string = "";
2047: private double number;
2048:
2049: private static char[] stringBuffer = new char[128];
2050: private int stringBufferTop;
2051: private ObjToIntMap allStrings = new ObjToIntMap(50);
2052:
2053: // Room to backtrace from to < on failed match of the last - in <!--
2054: private final int[] ungetBuffer = new int[3];
2055: private int ungetCursor;
2056:
2057: private boolean hitEOF = false;
2058:
2059: private int lineStart = 0;
2060: private int lineno;
2061: private int lineEndChar = -1;
2062:
2063: private String sourceString;
2064: private Reader sourceReader;
2065: private char[] sourceBuffer;
2066: private int sourceEnd;
2067: private int sourceCursor;
2068:
2069: // for xml tokenizer
2070: private boolean xmlIsAttribute;
2071: private boolean xmlIsTagContent;
2072: private int xmlOpenTagsCount;
2073:
2074: private Parser parser;
2075: }
|