0001: /*
0002: * Janino - An embedded Java[TM] compiler
0003: *
0004: * Copyright (c) 2001-2007, Arno Unkrig
0005: * All rights reserved.
0006: *
0007: * Redistribution and use in source and binary forms, with or without
0008: * modification, are permitted provided that the following conditions
0009: * are met:
0010: *
0011: * 1. Redistributions of source code must retain the above copyright
0012: * notice, this list of conditions and the following disclaimer.
0013: * 2. Redistributions in binary form must reproduce the above
0014: * copyright notice, this list of conditions and the following
0015: * disclaimer in the documentation and/or other materials
0016: * provided with the distribution.
0017: * 3. The name of the author may not be used to endorse or promote
0018: * products derived from this software without specific prior
0019: * written permission.
0020: *
0021: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
0022: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0023: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0024: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
0025: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
0026: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
0027: * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0028: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
0029: * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
0030: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
0031: * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0032: */
0033:
0034: package org.codehaus.janino;
0035:
0036: import java.io.*;
0037: import java.util.*;
0038:
0039: import org.codehaus.janino.util.LocatedException;
0040: import org.codehaus.janino.util.TeeReader;
0041:
0042: /**
0043: * Splits up a character stream into tokens and returns them as
0044: * {@link java.lang.String String} objects.
0045: * <p>
0046: * The <code>optionalFileName</code> parameter passed to many
0047: * constructors should point
0048: */
0049:
0050: public class Scanner {
0051:
0052: // Public Scanners that read from a file.
0053:
0054: /**
0055: * Set up a scanner that reads tokens from the given file in the default charset.
0056: * <p>
0057: * <b>This method is deprecated because it leaves the input file open.</b>
0058: *
0059: * @deprecated
0060: */
0061: public Scanner(String fileName) throws ScanException, IOException {
0062: this (fileName, // optionalFileName
0063: new FileInputStream(fileName) // is
0064: );
0065: }
0066:
0067: /**
0068: * Set up a scanner that reads tokens from the given file in the given encoding.
0069: * <p>
0070: * <b>This method is deprecated because it leaves the input file open.</b>
0071: *
0072: * @deprecated
0073: */
0074: public Scanner(String fileName, String encoding)
0075: throws ScanException, IOException {
0076: this (fileName, // optionalFileName
0077: new FileInputStream(fileName), // is
0078: encoding // optionalEncoding
0079: );
0080: }
0081:
0082: /**
0083: * Set up a scanner that reads tokens from the given file in the platform
0084: * default encoding.
0085: * <p>
0086: * <b>This method is deprecated because it leaves the input file open.</b>
0087: *
0088: * @deprecated
0089: */
0090: public Scanner(File file) throws ScanException, IOException {
0091: this (file.getAbsolutePath(), // optionalFileName
0092: new FileInputStream(file), // is
0093: null // optionalEncoding
0094: );
0095: }
0096:
0097: /**
0098: * Set up a scanner that reads tokens from the given file in the given encoding.
0099: * <p>
0100: * <b>This method is deprecated because it leaves the input file open.</b>
0101: *
0102: * @deprecated
0103: */
0104: public Scanner(File file, String optionalEncoding)
0105: throws ScanException, IOException {
0106: this (file.getAbsolutePath(), // optionalFileName
0107: new FileInputStream(file), // fis
0108: optionalEncoding // optionalEncoding
0109: );
0110: }
0111:
0112: // Public Scanners that read from an InputStream
0113:
0114: /**
0115: * Set up a scanner that reads tokens from the given
0116: * {@link InputStream} in the platform default encoding.
0117: * <p>
0118: * The <code>fileName</code> is solely used for reporting in thrown
0119: * exceptions.
0120: */
0121: public Scanner(String optionalFileName, InputStream is)
0122: throws ScanException, IOException {
0123: this (optionalFileName, new InputStreamReader(is), // in
0124: (short) 1, (short) 0 // initialLineNumber, initialColumnNumber
0125: );
0126: }
0127:
0128: /**
0129: * Set up a scanner that reads tokens from the given
0130: * {@link InputStream} with the given <code>optionalEncoding</code>
0131: * (<code>null</code> means platform default encoding).
0132: * <p>
0133: * The <code>optionalFileName</code> is used for reporting errors during
0134: * compilation and for source level debugging, and should name an existing
0135: * file. If <code>null</code> is passed, and the system property
0136: * <code>org.codehaus.janino.source_debugging.enable</code> is set to "true", then
0137: * a temporary file in <code>org.codehaus.janino.source_debugging.dir</code> or the
0138: * system's default temp dir is created in order to make the source code
0139: * available to a debugger.
0140: */
0141: public Scanner(String optionalFileName, InputStream is,
0142: String optionalEncoding) throws ScanException, IOException {
0143: this (
0144: optionalFileName, // optionalFileName
0145: ( // in
0146: optionalEncoding == null ? new InputStreamReader(is)
0147: : new InputStreamReader(is, optionalEncoding)),
0148: (short) 1, (short) 0 // initialLineNumber, initialColumnNumber
0149: );
0150: }
0151:
0152: // Public Scanners that read from a Reader.
0153:
0154: /**
0155: * Set up a scanner that reads tokens from the given
0156: * {@link Reader}.
0157: * <p>
0158: * The <code>optionalFileName</code> is used for reporting errors during
0159: * compilation and for source level debugging, and should name an existing
0160: * file. If <code>null</code> is passed, and the system property
0161: * <code>org.codehaus.janino.source_debugging.enable</code> is set to "true", then
0162: * a temporary file in <code>org.codehaus.janino.source_debugging.dir</code> or the
0163: * system's default temp dir is created in order to make the source code
0164: * available to a debugger.
0165: */
0166: public Scanner(String optionalFileName, Reader in)
0167: throws ScanException, IOException {
0168: this (optionalFileName, // optionalFileName
0169: in, // in
0170: (short) 1, // initialLineNumber
0171: (short) 0 // initialColumnNumber
0172: );
0173: }
0174:
0175: /**
0176: * Creates a {@link Scanner} that counts lines and columns from non-default initial
0177: * values.
0178: */
0179: public Scanner(String optionalFileName, Reader in,
0180: short initialLineNumber, // "1" is a good idea
0181: short initialColumnNumber // "0" is a good idea
0182: ) throws ScanException, IOException {
0183:
0184: // Debugging on source code level is only possible if the code comes from
0185: // a "real" Java source file which the debugger can read. If this is not the
0186: // case, and we absolutely want source code level debugging, then we write
0187: // a verbatim copy of the source code into a temporary file in the system
0188: // temp directory.
0189: // This behavior is controlled by the two system properties
0190: // org.codehaus.janino.source_debugging.enable
0191: // org.codehaus.janino.source_debugging.dir
0192: // JANINO is designed to compile in memory to save the overhead of disk
0193: // I/O, so writing this file is only recommended for source code level
0194: // debugging purposes.
0195: if (optionalFileName == null
0196: && Boolean
0197: .getBoolean("org.codehaus.janino.source_debugging.enable")) {
0198: String dirName = System
0199: .getProperty("org.codehaus.janino.source_debugging.dir");
0200: File dir = dirName == null ? null : new File(dirName);
0201: File temporaryFile = File.createTempFile("janino", ".java",
0202: dir);
0203: temporaryFile.deleteOnExit();
0204: in = new TeeReader(in, // in
0205: new FileWriter(temporaryFile), // out
0206: true // closeWriterOnEOF
0207: );
0208: optionalFileName = temporaryFile.getAbsolutePath();
0209: }
0210:
0211: this .optionalFileName = optionalFileName;
0212: this .in = new UnicodeUnescapeReader(in);
0213: this .nextCharLineNumber = initialLineNumber;
0214: this .nextCharColumnNumber = initialColumnNumber;
0215:
0216: this .readNextChar();
0217: this .nextToken = this .internalRead();
0218: this .nextButOneToken = null;
0219: }
0220:
0221: /**
0222: * Return the file name optionally passed to the constructor.
0223: */
0224: public String getFileName() {
0225: return this .optionalFileName;
0226: }
0227:
0228: /**
0229: * Closes the character source (file, {@link InputStream}, {@link Reader}) associated
0230: * with this object. The results of future calls to {@link #peek()} and
0231: * {@link #read()} are undefined.
0232: * <p>
0233: * <b>This method is deprecated, because the concept described above is confusing. An
0234: * application should close the underlying {@link InputStream} or {@link Reader} itself.</b>
0235: *
0236: * @deprecated
0237: */
0238: public void close() throws IOException {
0239: this .in.close();
0240: }
0241:
0242: /**
0243: * Read the next token from the input.
0244: */
0245: public Token read() throws ScanException, IOException {
0246: Token res = this .nextToken;
0247: if (this .nextButOneToken != null) {
0248: this .nextToken = this .nextButOneToken;
0249: this .nextButOneToken = null;
0250: } else {
0251: this .nextToken = this .internalRead();
0252: }
0253: return res;
0254: }
0255:
0256: /**
0257: * Peek the next token, but don't remove it from the input.
0258: */
0259: public Token peek() {
0260: if (Scanner.DEBUG)
0261: System.err.println("peek() => \"" + this .nextToken + "\"");
0262: return this .nextToken;
0263: }
0264:
0265: /**
0266: * Peek the next but one token, neither remove the next nor the next but one token from the
0267: * input.
0268: * <p>
0269: * This makes parsing so much easier, e.g. for class literals like
0270: * <code>Map.class</code>.
0271: */
0272: public Token peekNextButOne() throws ScanException, IOException {
0273: if (this .nextButOneToken == null)
0274: this .nextButOneToken = this .internalRead();
0275: return this .nextButOneToken;
0276: }
0277:
0278: /**
0279: * Get the text of the doc comment (a.k.a. "JAVADOC comment") preceeding
0280: * the next token.
0281: * @return <code>null</code> if the next token is not preceeded by a doc comment
0282: */
0283: public String doc() {
0284: String s = this .docComment;
0285: this .docComment = null;
0286: return s;
0287: }
0288:
0289: /**
0290: * Returns the {@link Location} of the next token.
0291: */
0292: public Location location() {
0293: return this .nextToken.getLocation();
0294: }
0295:
0296: public abstract class Token {
0297: private/*final*/String optionalFileName;
0298: private/*final*/short lineNumber;
0299: private/*final*/short columnNumber;
0300: private Location location = null;
0301:
0302: private Token() {
0303: this .optionalFileName = Scanner.this .optionalFileName;
0304: this .lineNumber = Scanner.this .tokenLineNumber;
0305: this .columnNumber = Scanner.this .tokenColumnNumber;
0306: }
0307:
0308: public Location getLocation() {
0309: if (this .location == null)
0310: this .location = new Location(this .optionalFileName,
0311: this .lineNumber, this .columnNumber);
0312: return this .location;
0313: }
0314:
0315: public boolean isKeyword() {
0316: return false;
0317: }
0318:
0319: public boolean isKeyword(String k) {
0320: return false;
0321: }
0322:
0323: public boolean isKeyword(String[] ks) {
0324: return false;
0325: }
0326:
0327: public String getKeyword() throws ScanException {
0328: throw new ScanException("Not a keyword token");
0329: }
0330:
0331: public boolean isIdentifier() {
0332: return false;
0333: }
0334:
0335: public boolean isIdentifier(String id) {
0336: return false;
0337: }
0338:
0339: public String getIdentifier() throws ScanException {
0340: throw new ScanException("Not an identifier token");
0341: }
0342:
0343: public boolean isLiteral() {
0344: return false;
0345: }
0346:
0347: public Object getLiteralValue() throws ScanException {
0348: throw new ScanException("Not a literal token");
0349: }
0350:
0351: public boolean isOperator() {
0352: return false;
0353: }
0354:
0355: public boolean isOperator(String o) {
0356: return false;
0357: }
0358:
0359: public boolean isOperator(String[] os) {
0360: return false;
0361: }
0362:
0363: public String getOperator() throws ScanException {
0364: throw new ScanException("Not an operator token");
0365: }
0366:
0367: public boolean isEOF() {
0368: return false;
0369: }
0370: }
0371:
0372: public class KeywordToken extends Token {
0373: private final String keyword;
0374:
0375: /**
0376: * @param keyword Must be in interned string!
0377: */
0378: private KeywordToken(String keyword) {
0379: this .keyword = keyword;
0380: }
0381:
0382: public boolean isKeyword() {
0383: return true;
0384: }
0385:
0386: public boolean isKeyword(String k) {
0387: return this .keyword == k;
0388: }
0389:
0390: public boolean isKeyword(String[] ks) {
0391: for (int i = 0; i < ks.length; ++i) {
0392: if (this .keyword == ks[i])
0393: return true;
0394: }
0395: return false;
0396: }
0397:
0398: public String getKeyword() {
0399: return this .keyword;
0400: }
0401:
0402: public String toString() {
0403: return this .keyword;
0404: }
0405: }
0406:
0407: public class IdentifierToken extends Token {
0408: private final String identifier;
0409:
0410: private IdentifierToken(String identifier) {
0411: this .identifier = identifier;
0412: }
0413:
0414: public boolean isIdentifier() {
0415: return true;
0416: }
0417:
0418: public boolean isIdentifier(String id) {
0419: return this .identifier.equals(id);
0420: }
0421:
0422: public String getIdentifier() {
0423: return this .identifier;
0424: }
0425:
0426: public String toString() {
0427: return this .identifier;
0428: }
0429: }
0430:
0431: /**
0432: * This value represents the "magic" literal "2147483648" which is only
0433: * allowed in a negated context.
0434: */
0435: public static final Integer MAGIC_INTEGER = new Integer(
0436: Integer.MIN_VALUE);
0437:
0438: /**
0439: * This value represents the "magic" literal "9223372036854775808L" which is only
0440: * allowed in a negated context.
0441: */
0442: public static final Long MAGIC_LONG = new Long(Long.MIN_VALUE);
0443:
0444: /**
0445: * The type of the <code>value</code> parameter determines the type of the literal
0446: * token:
0447: * <table>
0448: * <tr><th>Type/value returned by {@link #getLiteralValue()}</th><th>Literal</th></tr>
0449: * <tr><td>{@link String}</td><td>STRING literal</td></tr>
0450: * <tr><td>{@link Character}</td><td>CHAR literal</td></tr>
0451: * <tr><td>{@link Integer}</td><td>INT literal</td></tr>
0452: * <tr><td>{@link Long}</td><td>LONG literal</td></tr>
0453: * <tr><td>{@link Float}</td><td>FLOAT literal</td></tr>
0454: * <tr><td>{@link Double}</td><td>DOUBLE literal</td></tr>
0455: * <tr><td>{@link Boolean}</td><td>BOOLEAN literal</td></tr>
0456: * <tr><td><code>null</code></td><td>NULL literal</td></tr>
0457: * </table>
0458: */
0459: public final class LiteralToken extends Token {
0460: private final Object value;
0461:
0462: public LiteralToken(Object value) {
0463: this .value = value;
0464: }
0465:
0466: // Implement {@link Literal}.
0467: public final boolean isLiteral() {
0468: return true;
0469: }
0470:
0471: public Object getLiteralValue() {
0472: return this .value;
0473: }
0474:
0475: public String toString() {
0476: return Scanner.literalValueToString(this .value);
0477: }
0478: }
0479:
0480: public static String literalValueToString(Object v) {
0481: if (v instanceof String) {
0482: StringBuffer sb = new StringBuffer();
0483: sb.append('"');
0484: String s = (String) v;
0485: for (int i = 0; i < s.length(); ++i) {
0486: char c = s.charAt(i);
0487:
0488: if (c == '"') {
0489: sb.append("\\\"");
0490: } else {
0491: Scanner.escapeCharacter(c, sb);
0492: }
0493: }
0494: sb.append('"');
0495: return sb.toString();
0496: }
0497: if (v instanceof Character) {
0498: char c = ((Character) v).charValue();
0499: if (c == '\'')
0500: return "'\\''";
0501: StringBuffer sb = new StringBuffer("'");
0502: Scanner.escapeCharacter(c, sb);
0503: return sb.append('\'').toString();
0504: }
0505: if (v instanceof Integer) {
0506: if (v == Scanner.MAGIC_INTEGER)
0507: return "2147483648";
0508: return v.toString();
0509: }
0510: if (v instanceof Long) {
0511: if (v == Scanner.MAGIC_LONG)
0512: return "9223372036854775808L";
0513: return v.toString() + 'L';
0514: }
0515: if (v instanceof Float) {
0516: return v.toString() + 'F';
0517: }
0518: if (v instanceof Double) {
0519: return v.toString() + 'D';
0520: }
0521: if (v instanceof Boolean) {
0522: return v.toString();
0523: }
0524: if (v == null) {
0525: return "null";
0526: }
0527: throw new RuntimeException("Unexpected value type \""
0528: + v.getClass().getName() + "\"");
0529: }
0530:
0531: public class OperatorToken extends Token {
0532: private final String operator;
0533:
0534: /**
0535: *
0536: * @param operator Must be an interned string!
0537: */
0538: private OperatorToken(String operator) {
0539: this .operator = operator;
0540: }
0541:
0542: public boolean isOperator() {
0543: return true;
0544: }
0545:
0546: public boolean isOperator(String o) {
0547: return this .operator == o;
0548: }
0549:
0550: public boolean isOperator(String[] os) {
0551: for (int i = 0; i < os.length; ++i) {
0552: if (this .operator == os[i])
0553: return true;
0554: }
0555: return false;
0556: }
0557:
0558: public String getOperator() {
0559: return this .operator;
0560: }
0561:
0562: public String toString() {
0563: return this .operator;
0564: }
0565: }
0566:
0567: public class EOFToken extends Token {
0568: public boolean isEOF() {
0569: return true;
0570: }
0571:
0572: public String toString() {
0573: return "End-Of-File";
0574: }
0575: }
0576:
0577: /**
0578: * Escape unprintable characters appropriately, i.e. as
0579: * backslash-letter or backslash-U-four-hex-digits.
0580: * <p>
0581: * Notice: Single and double quotes are not escaped!
0582: */
0583: private static void escapeCharacter(char c, StringBuffer sb) {
0584:
0585: // Backslash escape sequences.
0586: int idx = "\b\t\n\f\r\\".indexOf(c);
0587: if (idx != -1) {
0588: sb.append('\\').append("btnfr\\".charAt(idx));
0589: } else
0590:
0591: // Printable characters.
0592: if (c >= ' ' && c < 255 && c != 127) {
0593: sb.append(c);
0594: } else
0595:
0596: // Backslash-U escape sequences.
0597: {
0598: sb.append("\\u");
0599: String hs = Integer.toHexString(0xffff & c);
0600: for (int j = hs.length(); j < 4; ++j)
0601: sb.append('0');
0602: sb.append(hs);
0603: }
0604: }
0605:
0606: private Token internalRead() throws ScanException, IOException {
0607: if (this .docComment != null) {
0608: this .warning("MDC", "Misplaced doc comment", this .nextToken
0609: .getLocation());
0610: this .docComment = null;
0611: }
0612:
0613: // Skip whitespace and process comments.
0614: int state = 0;
0615: StringBuffer dcsb = null; // For doc comment
0616:
0617: PROCESS_COMMENTS: for (;;) {
0618: switch (state) {
0619:
0620: case 0: // Outside any comment
0621: if (this .nextChar == -1) {
0622: return new EOFToken();
0623: } else if (Character.isWhitespace((char) this .nextChar)) {
0624: ;
0625: } else if (this .nextChar == '/') {
0626: state = 1;
0627: } else {
0628: break PROCESS_COMMENTS;
0629: }
0630: break;
0631:
0632: case 1: // After "/"
0633: if (this .nextChar == -1) {
0634: return new OperatorToken("/");
0635: } else if (this .nextChar == '=') {
0636: this .readNextChar();
0637: return new OperatorToken("/=");
0638: } else if (this .nextChar == '/') {
0639: state = 2;
0640: } else if (this .nextChar == '*') {
0641: state = 3;
0642: } else {
0643: return new OperatorToken("/");
0644: }
0645: break;
0646:
0647: case 2: // After "//..."
0648: if (this .nextChar == -1) {
0649: return new EOFToken();
0650: } else if (this .nextChar == '\r'
0651: || this .nextChar == '\n') {
0652: state = 0;
0653: } else {
0654: ;
0655: }
0656: break;
0657:
0658: case 3: // After "/*"
0659: if (this .nextChar == -1) {
0660: throw new ScanException(
0661: "EOF in traditional comment");
0662: } else if (this .nextChar == '*') {
0663: state = 4;
0664: } else {
0665: state = 9;
0666: }
0667: break;
0668:
0669: case 4: // After "/**"
0670: if (this .nextChar == -1) {
0671: throw new ScanException("EOF in doc comment");
0672: } else if (this .nextChar == '/') {
0673: state = 0;
0674: } else {
0675: if (this .docComment != null)
0676: this .warning("MDC", "Multiple doc comments",
0677: new Location(this .optionalFileName,
0678: this .nextCharLineNumber,
0679: this .nextCharColumnNumber));
0680: dcsb = new StringBuffer();
0681: dcsb.append((char) this .nextChar);
0682: state = ((this .nextChar == '\r' || this .nextChar == '\n') ? 6
0683: : this .nextChar == '*' ? 8 : 5);
0684: }
0685: break;
0686:
0687: case 5: // After "/**..."
0688: if (this .nextChar == -1) {
0689: throw new ScanException("EOF in doc comment");
0690: } else if (this .nextChar == '*') {
0691: state = 8;
0692: } else if (this .nextChar == '\r'
0693: || this .nextChar == '\n') {
0694: dcsb.append((char) this .nextChar);
0695: state = 6;
0696: } else {
0697: dcsb.append((char) this .nextChar);
0698: }
0699: break;
0700:
0701: case 6: // After "/**...\n"
0702: if (this .nextChar == -1) {
0703: throw new ScanException("EOF in doc comment");
0704: } else if (this .nextChar == '*') {
0705: state = 7;
0706: } else if (this .nextChar == '\r'
0707: || this .nextChar == '\n') {
0708: dcsb.append((char) this .nextChar);
0709: } else if (this .nextChar == ' '
0710: || this .nextChar == '\t') {
0711: ;
0712: } else {
0713: dcsb.append((char) this .nextChar);
0714: state = 5;
0715: }
0716: break;
0717:
0718: case 7: // After "/**...\n *"
0719: if (this .nextChar == -1) {
0720: throw new ScanException("EOF in doc comment");
0721: } else if (this .nextChar == '*') {
0722: ;
0723: } else if (this .nextChar == '/') {
0724: this .docComment = dcsb.toString();
0725: state = 0;
0726: } else {
0727: dcsb.append((char) this .nextChar);
0728: state = 5;
0729: }
0730: break;
0731:
0732: case 8: // After "/**...*"
0733: if (this .nextChar == -1) {
0734: throw new ScanException("EOF in doc comment");
0735: } else if (this .nextChar == '/') {
0736: this .docComment = dcsb.toString();
0737: state = 0;
0738: } else if (this .nextChar == '*') {
0739: dcsb.append('*');
0740: } else {
0741: dcsb.append('*');
0742: dcsb.append((char) this .nextChar);
0743: state = 5;
0744: }
0745: break;
0746:
0747: case 9: // After "/*..."
0748: if (this .nextChar == -1) {
0749: throw new ScanException(
0750: "EOF in traditional comment");
0751: } else if (this .nextChar == '*') {
0752: state = 10;
0753: } else {
0754: ;
0755: }
0756: break;
0757:
0758: case 10: // After "/*...*"
0759: if (this .nextChar == -1) {
0760: throw new ScanException(
0761: "EOF in traditional comment");
0762: } else if (this .nextChar == '/') {
0763: state = 0;
0764: } else if (this .nextChar == '*') {
0765: ;
0766: } else {
0767: state = 9;
0768: }
0769: }
0770: this .readNextChar();
0771: }
0772:
0773: /*
0774: * Whitespace and comments are now skipped; "nextChar" is definitely
0775: * the first character of the token.
0776: */
0777: this .tokenLineNumber = this .nextCharLineNumber;
0778: this .tokenColumnNumber = this .nextCharColumnNumber;
0779:
0780: // Scan identifier.
0781: if (Character.isJavaIdentifierStart((char) this .nextChar)) {
0782: StringBuffer sb = new StringBuffer();
0783: sb.append((char) this .nextChar);
0784: for (;;) {
0785: this .readNextChar();
0786: if (this .nextChar == -1
0787: || !Character
0788: .isJavaIdentifierPart((char) this .nextChar))
0789: break;
0790: sb.append((char) this .nextChar);
0791: }
0792: String s = sb.toString();
0793: if (s.equals("true"))
0794: return new LiteralToken(Boolean.TRUE);
0795: if (s.equals("false"))
0796: return new LiteralToken(Boolean.FALSE);
0797: if (s.equals("null"))
0798: return new LiteralToken(null);
0799: {
0800: String v = (String) Scanner.JAVA_KEYWORDS.get(s);
0801: if (v != null)
0802: return new KeywordToken(v);
0803: }
0804: return new IdentifierToken(s);
0805: }
0806:
0807: // Scan numeric literal.
0808: if (Character.isDigit((char) this .nextChar)) {
0809: return this .scanNumericLiteral(0);
0810: }
0811:
0812: // A "." is special: Could either be a floating-point constant like ".001", or the "."
0813: // operator.
0814: if (this .nextChar == '.') {
0815: this .readNextChar();
0816: if (Character.isDigit((char) this .nextChar)) {
0817: return this .scanNumericLiteral(2);
0818: } else {
0819: return new OperatorToken(".");
0820: }
0821: }
0822:
0823: // Scan string literal.
0824: if (this .nextChar == '"') {
0825: StringBuffer sb = new StringBuffer("");
0826: this .readNextChar();
0827: if (this .nextChar == -1)
0828: throw new ScanException("EOF in string literal");
0829: if (this .nextChar == '\r' || this .nextChar == '\n')
0830: throw new ScanException("Line break in string literal");
0831: while (this .nextChar != '"') {
0832: sb.append(this .unescapeCharacterLiteral());
0833: }
0834: this .readNextChar();
0835: return new LiteralToken(sb.toString());
0836: }
0837:
0838: // Scan character literal.
0839: if (this .nextChar == '\'') {
0840: this .readNextChar();
0841: if (this .nextChar == '\'')
0842: throw new ScanException(
0843: "Single quote must be backslash-escaped in character literal");
0844: char lit = this .unescapeCharacterLiteral();
0845: if (this .nextChar != '\'')
0846: throw new ScanException("Closing single quote missing");
0847: this .readNextChar();
0848:
0849: return new LiteralToken(new Character(lit));
0850: }
0851:
0852: // Scan separator / operator.
0853: {
0854: String v = (String) Scanner.JAVA_OPERATORS.get(new String(
0855: new char[] { (char) this .nextChar }));
0856: if (v != null) {
0857: for (;;) {
0858: this .readNextChar();
0859: String v2 = (String) Scanner.JAVA_OPERATORS.get(v
0860: + (char) this .nextChar);
0861: if (v2 == null)
0862: return new OperatorToken(v);
0863: v = v2;
0864: }
0865: }
0866: }
0867:
0868: throw new ScanException("Invalid character input \""
0869: + (char) this .nextChar + "\" (character code "
0870: + this .nextChar + ")");
0871: }
0872:
0873: private Token scanNumericLiteral(int initialState)
0874: throws ScanException, IOException {
0875: StringBuffer sb = (initialState == 2) ? new StringBuffer("0.")
0876: : new StringBuffer();
0877: int state = initialState;
0878: for (;;) {
0879: switch (state) {
0880:
0881: case 0: // First character.
0882: if (this .nextChar == '0') {
0883: state = 6;
0884: } else
0885: /* if (Character.isDigit((char) this.nextChar)) */{
0886: sb.append((char) this .nextChar);
0887: state = 1;
0888: }
0889: break;
0890:
0891: case 1: // Decimal digits.
0892: if (Character.isDigit((char) this .nextChar)) {
0893: sb.append((char) this .nextChar);
0894: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
0895: this .readNextChar();
0896: return this .stringToLongLiteralToken(sb.toString(),
0897: 10);
0898: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0899: this .readNextChar();
0900: return this
0901: .stringToFloatLiteralToken(sb.toString());
0902: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0903: this .readNextChar();
0904: return this .stringToDoubleLiteralToken(sb
0905: .toString());
0906: } else if (this .nextChar == '.') {
0907: sb.append('.');
0908: state = 2;
0909: } else if (this .nextChar == 'E' || this .nextChar == 'e') {
0910: sb.append('E');
0911: state = 3;
0912: } else {
0913: return this .stringToIntegerLiteralToken(sb
0914: .toString(), 10);
0915: }
0916: break;
0917:
0918: case 2: // After decimal point.
0919: if (Character.isDigit((char) this .nextChar)) {
0920: sb.append((char) this .nextChar);
0921: } else if (this .nextChar == 'e' || this .nextChar == 'E') {
0922: sb.append('E');
0923: state = 3;
0924: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0925: this .readNextChar();
0926: return this
0927: .stringToFloatLiteralToken(sb.toString());
0928: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0929: this .readNextChar();
0930: return this .stringToDoubleLiteralToken(sb
0931: .toString());
0932: } else {
0933: return this .stringToDoubleLiteralToken(sb
0934: .toString());
0935: }
0936: break;
0937:
0938: case 3: // Read exponent.
0939: if (Character.isDigit((char) this .nextChar)) {
0940: sb.append((char) this .nextChar);
0941: state = 5;
0942: } else if (this .nextChar == '-' || this .nextChar == '+') {
0943: sb.append((char) this .nextChar);
0944: state = 4;
0945: } else {
0946: throw new ScanException(
0947: "Exponent missing after \"E\"");
0948: }
0949: break;
0950:
0951: case 4: // After exponent sign.
0952: if (Character.isDigit((char) this .nextChar)) {
0953: sb.append((char) this .nextChar);
0954: state = 5;
0955: } else {
0956: throw new ScanException(
0957: "Exponent missing after \"E\" and sign");
0958: }
0959: break;
0960:
0961: case 5: // After first exponent digit.
0962: if (Character.isDigit((char) this .nextChar)) {
0963: sb.append((char) this .nextChar);
0964: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0965: this .readNextChar();
0966: return this
0967: .stringToFloatLiteralToken(sb.toString());
0968: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0969: this .readNextChar();
0970: return this .stringToDoubleLiteralToken(sb
0971: .toString());
0972: } else {
0973: return this .stringToDoubleLiteralToken(sb
0974: .toString());
0975: }
0976: break;
0977:
0978: case 6: // After leading zero
0979: if ("01234567".indexOf(this .nextChar) != -1) {
0980: sb.append((char) this .nextChar);
0981: state = 7;
0982: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
0983: this .readNextChar();
0984: return this .stringToLongLiteralToken("0", 10);
0985: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0986: this .readNextChar();
0987: return this .stringToFloatLiteralToken("0");
0988: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0989: this .readNextChar();
0990: return this .stringToDoubleLiteralToken("0");
0991: } else if (this .nextChar == '.') {
0992: sb.append("0.");
0993: state = 2;
0994: } else if (this .nextChar == 'E' || this .nextChar == 'e') {
0995: sb.append('E');
0996: state = 3;
0997: } else if (this .nextChar == 'x' || this .nextChar == 'X') {
0998: state = 8;
0999: } else {
1000: return this .stringToIntegerLiteralToken("0", 10);
1001: }
1002: break;
1003:
1004: case 7: // In octal literal.
1005: if ("01234567".indexOf(this .nextChar) != -1) {
1006: sb.append((char) this .nextChar);
1007: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
1008: // Octal long literal.
1009: this .readNextChar();
1010: return this .stringToLongLiteralToken(sb.toString(),
1011: 8);
1012: } else {
1013: // Octal int literal
1014: return this .stringToIntegerLiteralToken(sb
1015: .toString(), 8);
1016: }
1017: break;
1018:
1019: case 8: // First hex digit
1020: if (Character.digit((char) this .nextChar, 16) != -1) {
1021: sb.append((char) this .nextChar);
1022: state = 9;
1023: } else {
1024: throw new ScanException(
1025: "Hex digit expected after \"0x\"");
1026: }
1027: break;
1028:
1029: case 9:
1030: if (Character.digit((char) this .nextChar, 16) != -1) {
1031: sb.append((char) this .nextChar);
1032: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
1033: // Hex long literal
1034: this .readNextChar();
1035: return this .stringToLongLiteralToken(sb.toString(),
1036: 16);
1037: } else {
1038: // Hex long literal
1039: return this .stringToIntegerLiteralToken(sb
1040: .toString(), 16);
1041: }
1042: break;
1043: }
1044: this .readNextChar();
1045: }
1046: }
1047:
1048: private LiteralToken stringToIntegerLiteralToken(final String s,
1049: int radix) throws ScanException {
1050: int x;
1051: switch (radix) {
1052:
1053: case 10:
1054: // Special case: Decimal literal 2^31 must only appear in "negated" context, i.e.
1055: // "-2147483648" is a valid long literal, but "2147483648" is not.
1056: if (s.equals("2147483648"))
1057: return new LiteralToken(Scanner.MAGIC_INTEGER);
1058: try {
1059: x = Integer.parseInt(s);
1060: } catch (NumberFormatException e) {
1061: throw new ScanException(
1062: "Value of decimal integer literal \"" + s
1063: + "\" is out of range");
1064: }
1065: break;
1066:
1067: case 8:
1068: // Cannot use "Integer.parseInt(s, 8)" because that parses SIGNED values.
1069: x = 0;
1070: for (int i = 0; i < s.length(); ++i) {
1071: if ((x & 0xe0000000) != 0)
1072: throw new ScanException(
1073: "Value of octal integer literal \"" + s
1074: + "\" is out of range");
1075: x = (x << 3) + Character.digit(s.charAt(i), 8);
1076: }
1077: break;
1078:
1079: case 16:
1080: // Cannot use "Integer.parseInt(s, 16)" because that parses SIGNED values.
1081: x = 0;
1082: for (int i = 0; i < s.length(); ++i) {
1083: if ((x & 0xf0000000) != 0)
1084: throw new ScanException(
1085: "Value of hexadecimal integer literal \""
1086: + s + "\" is out of range");
1087: x = (x << 4) + Character.digit(s.charAt(i), 16);
1088: }
1089: break;
1090:
1091: default:
1092: throw new RuntimeException("Illegal radix " + radix);
1093: }
1094: return new LiteralToken(new Integer(x));
1095: }
1096:
1097: private LiteralToken stringToLongLiteralToken(final String s,
1098: int radix) throws ScanException {
1099: long x;
1100: switch (radix) {
1101:
1102: case 10:
1103: // Special case: Decimal literal 2^63 must only appear in "negated" context, i.e.
1104: // "-9223372036854775808" is a valid long literal, but "9223372036854775808" is not.
1105: if (s.equals("9223372036854775808"))
1106: return new LiteralToken(Scanner.MAGIC_LONG);
1107:
1108: try {
1109: x = Long.parseLong(s);
1110: } catch (NumberFormatException e) {
1111: throw new ScanException(
1112: "Value of decimal long literal \"" + s
1113: + "\" is out of range");
1114: }
1115: break;
1116:
1117: case 8:
1118: // Cannot use "Long.parseLong(s, 8)" because that parses SIGNED values.
1119: x = 0L;
1120: for (int i = 0; i < s.length(); ++i) {
1121: if ((x & 0xe000000000000000L) != 0L)
1122: throw new ScanException(
1123: "Value of octal long literal \"" + s
1124: + "\" is out of range");
1125: x = (x << 3) + Character.digit(s.charAt(i), 8);
1126: }
1127: break;
1128:
1129: case 16:
1130: // Cannot use "Long.parseLong(s, 16)" because that parses SIGNED values.
1131: x = 0L;
1132: for (int i = 0; i < s.length(); ++i) {
1133: if ((x & 0xf000000000000000L) != 0L)
1134: throw new ScanException(
1135: "Value of hexadecimal long literal \"" + s
1136: + "\" is out of range");
1137: x = (x << 4) + (long) Character.digit(s.charAt(i), 16);
1138: }
1139: break;
1140:
1141: default:
1142: throw new RuntimeException("Illegal radix " + radix);
1143: }
1144: return new LiteralToken(new Long(x));
1145: }
1146:
1147: private LiteralToken stringToFloatLiteralToken(final String s)
1148: throws ScanException {
1149: float f;
1150: try {
1151: f = Float.parseFloat(s);
1152: } catch (NumberFormatException e) {
1153: throw new RuntimeException("SNO: parsing float literal \""
1154: + s + "\" throws a \"NumberFormatException\"");
1155: }
1156: if (Float.isInfinite(f))
1157: throw new ScanException("Value of float literal \"" + s
1158: + "\" is out of range");
1159: if (Float.isNaN(f))
1160: throw new RuntimeException("SNO: parsing float literal \""
1161: + s + "\" results is NaN");
1162:
1163: // Check for FLOAT underrun.
1164: if (f == 0.0F) {
1165: for (int i = 0; i < s.length(); ++i) {
1166: char c = s.charAt(i);
1167: if ("123456789".indexOf(c) != -1)
1168: throw new ScanException(
1169: "Literal \""
1170: + s
1171: + "\" is too small to be represented as a float");
1172: if ("0.".indexOf(c) == -1)
1173: break;
1174: }
1175: }
1176:
1177: return new LiteralToken(new Float(f));
1178: }
1179:
1180: private LiteralToken stringToDoubleLiteralToken(final String s)
1181: throws ScanException {
1182: double d;
1183: try {
1184: d = Double.parseDouble(s);
1185: } catch (NumberFormatException e) {
1186: throw new RuntimeException("SNO: parsing double literal \""
1187: + s + "\" throws a \"NumberFormatException\"");
1188: }
1189: if (Double.isInfinite(d))
1190: throw new ScanException("Value of double literal \"" + s
1191: + "\" is out of range");
1192: if (Double.isNaN(d))
1193: throw new RuntimeException("SNO: parsing double literal \""
1194: + s + "\" results is NaN");
1195:
1196: // Check for DOUBLE underrun.
1197: if (d == 0.0D) {
1198: for (int i = 0; i < s.length(); ++i) {
1199: char c = s.charAt(i);
1200: if ("123456789".indexOf(c) != -1)
1201: throw new ScanException(
1202: "Literal \""
1203: + s
1204: + "\" is too small to be represented as a double");
1205: if ("0.".indexOf(c) == -1)
1206: break;
1207: }
1208: }
1209:
1210: return new LiteralToken(new Double(d));
1211: }
1212:
1213: private char unescapeCharacterLiteral() throws ScanException,
1214: IOException {
1215: if (this .nextChar == -1)
1216: throw new ScanException("EOF in character literal");
1217:
1218: if (this .nextChar == '\r' || this .nextChar == '\n')
1219: throw new ScanException("Line break in literal not allowed");
1220:
1221: if (this .nextChar != '\\') {
1222: char res = (char) this .nextChar;
1223: this .readNextChar();
1224: return res;
1225: }
1226: this .readNextChar();
1227: int idx = "btnfr".indexOf(this .nextChar);
1228: if (idx != -1) {
1229: char res = "\b\t\n\f\r".charAt(idx);
1230: this .readNextChar();
1231: return res;
1232: }
1233: idx = "01234567".indexOf(this .nextChar);
1234: if (idx != -1) {
1235: int code = idx;
1236: this .readNextChar();
1237: idx = "01234567".indexOf(this .nextChar);
1238: if (idx == -1)
1239: return (char) code;
1240: code = 8 * code + idx;
1241: this .readNextChar();
1242: idx = "01234567".indexOf(this .nextChar);
1243: if (idx == -1)
1244: return (char) code;
1245: code = 8 * code + idx;
1246: if (code > 255)
1247: throw new ScanException("Invalid octal escape");
1248: this .readNextChar();
1249: return (char) code;
1250: }
1251:
1252: char res = (char) this .nextChar;
1253: this .readNextChar();
1254: return res;
1255: }
1256:
1257: // Read one character and store in "nextChar".
1258: private void readNextChar() throws IOException, ScanException {
1259: try {
1260: this .nextChar = this .in.read();
1261: } catch (UnicodeUnescapeException ex) {
1262: throw new ScanException(ex.getMessage(), ex);
1263: }
1264: if (this .nextChar == '\r') {
1265: ++this .nextCharLineNumber;
1266: this .nextCharColumnNumber = 0;
1267: this .crLfPending = true;
1268: } else if (this .nextChar == '\n') {
1269: if (this .crLfPending) {
1270: this .crLfPending = false;
1271: } else {
1272: ++this .nextCharLineNumber;
1273: this .nextCharColumnNumber = 0;
1274: }
1275: } else {
1276: ++this .nextCharColumnNumber;
1277: }
1278: //System.out.println("'" + (char) nextChar + "' = " + (int) nextChar);
1279: }
1280:
1281: private static final boolean DEBUG = false;
1282:
1283: private/*final*/String optionalFileName;
1284: private/*final*/Reader in;
1285: private int nextChar = -1; // Always valid (one character read-ahead).
1286: private boolean crLfPending = false;
1287: private short nextCharLineNumber;
1288: private short nextCharColumnNumber;
1289:
1290: private Token nextToken; // Is always non-null (one token read-ahead).
1291: private Token nextButOneToken; // Is only non-null after "peekNextButOne()".
1292: private short tokenLineNumber; // Line number of "nextToken" (typically starting at one).
1293: private short tokenColumnNumber; // Column number of first character of "nextToken" (1 if token is immediately preceeded by a line break).
1294: private String docComment = null; // The optional JAVADOC comment preceeding the "nextToken".
1295:
1296: private static final Map JAVA_KEYWORDS = new HashMap();
1297: static {
1298: String[] ks = { "abstract", "boolean", "break", "byte", "case",
1299: "catch", "char", "class", "const", "continue",
1300: "default", "do", "double", "else", "extends", "final",
1301: "finally", "float", "for", "goto", "if", "implements",
1302: "import", "instanceof", "int", "interface", "long",
1303: "native", "new", "package", "private", "protected",
1304: "public", "return", "short", "static", "strictfp",
1305: "super", "switch", "synchronized", "this", "throw",
1306: "throws", "transient", "try", "void", "volatile",
1307: "while" };
1308: for (int i = 0; i < ks.length; ++i)
1309: Scanner.JAVA_KEYWORDS.put(ks[i], ks[i]);
1310: }
1311: private static final Map JAVA_OPERATORS = new HashMap();
1312: static {
1313: String[] ops = {
1314: // Separators:
1315: "(", ")", "{", "}", "[", "]", ";",
1316: ",",
1317: ".",
1318: // Operators:
1319: "=", ">", "<", "!", "~", "?", ":", "==", "<=", ">=",
1320: "!=", "&&", "||", "++", "--", "+", "-", "*", "/", "&",
1321: "|", "^", "%", "<<", ">>", ">>>", "+=", "-=", "*=",
1322: "/=", "&=", "|=", "^=", "%=", "<<=", ">>=", ">>>=", };
1323: for (int i = 0; i < ops.length; ++i)
1324: Scanner.JAVA_OPERATORS.put(ops[i], ops[i]);
1325: }
1326:
1327: /**
1328: * An exception that reflects an error during parsing.
1329: */
1330: public class ScanException extends LocatedException {
1331:
1332: public ScanException(String message) {
1333: super (message, new Location(Scanner.this .optionalFileName,
1334: Scanner.this .nextCharLineNumber,
1335: Scanner.this .nextCharColumnNumber));
1336: }
1337:
1338: public ScanException(String message, Throwable cause) {
1339: super (message, new Location(Scanner.this .optionalFileName,
1340: Scanner.this .nextCharLineNumber,
1341: Scanner.this .nextCharColumnNumber), cause);
1342: }
1343: }
1344:
1345: /**
1346: * By default, warnings are discarded, but an application my install a
1347: * {@link WarningHandler}.
1348: * <p>
1349: * Notice that there is no <code>Scanner.setErrorHandler()</code> method, but scan errors
1350: * always throw a {@link ScanException}. The reason being is that there is no reasonable
1351: * way to recover from scan errors and continue scanning, so there is no need to install
1352: * a custom scan error handler.
1353: *
1354: * @param optionalWarningHandler <code>null</code> to indicate that no warnings be issued
1355: */
1356: public void setWarningHandler(WarningHandler optionalWarningHandler) {
1357: this .optionalWarningHandler = optionalWarningHandler;
1358: }
1359:
1360: // Used for elaborate warning handling.
1361: private WarningHandler optionalWarningHandler = null;
1362:
1363: /**
1364: * Issues a warning with the given message and location and returns. This is done through
1365: * a {@link WarningHandler} that was installed through
1366: * {@link #setWarningHandler(WarningHandler)}.
1367: * <p>
1368: * The <code>handle</code> argument qulifies the warning and is typically used by
1369: * the {@link WarningHandler} to suppress individual warnings.
1370: */
1371: private void warning(String handle, String message,
1372: Location optionalLocation) {
1373: if (this.optionalWarningHandler != null)
1374: this.optionalWarningHandler.handleWarning(handle, message,
1375: optionalLocation);
1376: }
1377: }
|