0001: /*
0002: * Janino - An embedded Java[TM] compiler
0003: *
0004: * Copyright (c) 2006, Arno Unkrig
0005: * All rights reserved.
0006: *
0007: * Redistribution and use in source and binary forms, with or without
0008: * modification, are permitted provided that the following conditions
0009: * are met:
0010: *
0011: * 1. Redistributions of source code must retain the above copyright
0012: * notice, this list of conditions and the following disclaimer.
0013: * 2. Redistributions in binary form must reproduce the above
0014: * copyright notice, this list of conditions and the following
0015: * disclaimer in the documentation and/or other materials
0016: * provided with the distribution.
0017: * 3. The name of the author may not be used to endorse or promote
0018: * products derived from this software without specific prior
0019: * written permission.
0020: *
0021: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
0022: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0023: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0024: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
0025: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
0026: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
0027: * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0028: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
0029: * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
0030: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
0031: * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0032: */
0033:
0034: package org.codehaus.janino;
0035:
0036: import java.io.*;
0037: import java.util.*;
0038:
0039: import org.codehaus.janino.util.TeeReader;
0040:
0041: /**
0042: * Splits up a character stream into tokens and returns them as
0043: * {@link java.lang.String String} objects.
0044: * <p>
0045: * The <code>optionalFileName</code> parameter passed to many
0046: * constructors should point
0047: */
0048:
0049: public class Scanner {
0050:
0051: // Public Scanners that read from a file.
0052:
0053: /**
0054: * Set up a scanner that reads tokens from the given file in the default charset.
0055: * <p>
0056: * <b>This method is deprecated because it leaves the input file open.</b>
0057: *
0058: * @deprecated
0059: */
0060: public Scanner(String fileName) throws ScanException, IOException {
0061: this (fileName, // optionalFileName
0062: new FileInputStream(fileName) // is
0063: );
0064: }
0065:
0066: /**
0067: * Set up a scanner that reads tokens from the given file in the given encoding.
0068: * <p>
0069: * <b>This method is deprecated because it leaves the input file open.</b>
0070: *
0071: * @deprecated
0072: */
0073: public Scanner(String fileName, String encoding)
0074: throws ScanException, IOException {
0075: this (fileName, // optionalFileName
0076: new FileInputStream(fileName), // is
0077: encoding // optionalEncoding
0078: );
0079: }
0080:
0081: /**
0082: * Set up a scanner that reads tokens from the given file in the platform
0083: * default encoding.
0084: * <p>
0085: * <b>This method is deprecated because it leaves the input file open.</b>
0086: *
0087: * @deprecated
0088: */
0089: public Scanner(File file) throws ScanException, IOException {
0090: this (file.getAbsolutePath(), // optionalFileName
0091: new FileInputStream(file), // is
0092: null // optionalEncoding
0093: );
0094: }
0095:
0096: /**
0097: * Set up a scanner that reads tokens from the given file in the given encoding.
0098: * <p>
0099: * <b>This method is deprecated because it leaves the input file open.</b>
0100: *
0101: * @deprecated
0102: */
0103: public Scanner(File file, String optionalEncoding)
0104: throws ScanException, IOException {
0105: this (file.getAbsolutePath(), // optionalFileName
0106: new FileInputStream(file), // fis
0107: optionalEncoding // optionalEncoding
0108: );
0109: }
0110:
0111: // Public Scanners that read from an InputStream
0112:
0113: /**
0114: * Set up a scanner that reads tokens from the given
0115: * {@link InputStream} in the platform default encoding.
0116: * <p>
0117: * The <code>fileName</code> is solely used for reporting in thrown
0118: * exceptions.
0119: * @param optionalFileName
0120: * @param is
0121: * @throws ScanException
0122: * @throws IOException
0123: */
0124: public Scanner(String optionalFileName, InputStream is)
0125: throws ScanException, IOException {
0126: this (optionalFileName, new InputStreamReader(is), // in
0127: (short) 1, (short) 0 // initialLineNumber, initialColumnNumber
0128: );
0129: }
0130:
0131: /**
0132: * Set up a scanner that reads tokens from the given
0133: * {@link InputStream} with the given <code>optionalEncoding</code>
0134: * (<code>null</code> means platform default encoding).
0135: * <p>
0136: * The <code>optionalFileName</code> is used for reporting errors during
0137: * compilation and for source level debugging, and should name an existing
0138: * file. If <code>null</code> is passed, and the system property
0139: * <code>org.codehaus.janino.source_debugging.enable</code> is set to "true", then
0140: * a temporary file in <code>org.codehaus.janino.source_debugging.dir</code> or the
0141: * system's default temp dir is created in order to make the source code
0142: * available to a debugger.
0143: */
0144: public Scanner(String optionalFileName, InputStream is,
0145: String optionalEncoding) throws ScanException, IOException {
0146: this (
0147: optionalFileName, // optionalFileName
0148: ( // in
0149: optionalEncoding == null ? new InputStreamReader(is)
0150: : new InputStreamReader(is, optionalEncoding)),
0151: (short) 1, (short) 0 // initialLineNumber, initialColumnNumber
0152: );
0153: }
0154:
0155: // Public Scanners that read from a Reader.
0156:
0157: /**
0158: * Set up a scanner that reads tokens from the given
0159: * {@link Reader}.
0160: * <p>
0161: * The <code>optionalFileName</code> is used for reporting errors during
0162: * compilation and for source level debugging, and should name an existing
0163: * file. If <code>null</code> is passed, and the system property
0164: * <code>org.codehaus.janino.source_debugging.enable</code> is set to "true", then
0165: * a temporary file in <code>org.codehaus.janino.source_debugging.dir</code> or the
0166: * system's default temp dir is created in order to make the source code
0167: * available to a debugger.
0168: */
0169: public Scanner(String optionalFileName, Reader in)
0170: throws ScanException, IOException {
0171: this (optionalFileName, // optionalFileName
0172: in, // in
0173: (short) 1, // initialLineNumber
0174: (short) 0 // initialColumnNumber
0175: );
0176: }
0177:
0178: /**
0179: * Creates a {@link Scanner} that counts lines and columns from non-default initial
0180: * values.
0181: */
0182: public Scanner(String optionalFileName, Reader in,
0183: short initialLineNumber, // "1" is a good idea
0184: short initialColumnNumber // "0" is a good idea
0185: ) throws ScanException, IOException {
0186:
0187: // Debugging on source code level is only possible if the code comes from
0188: // a "real" Java source file which the debugger can read. If this is not the
0189: // case, and we absolutely want source code level debugging, then we write
0190: // a verbatim copy of the source code into a temporary file in the system
0191: // temp directory.
0192: // This behavior is controlled by the two system properties
0193: // org.codehaus.janino.source_debugging.enable
0194: // org.codehaus.janino.source_debugging.dir
0195: // JANINO is designed to compile in memory to save the overhead of disk
0196: // I/O, so writing this file is only recommended for source code level
0197: // debugging purposes.
0198: if (optionalFileName == null
0199: && Boolean
0200: .getBoolean("org.codehaus.janino.source_debugging.enable")) {
0201: String dirName = System
0202: .getProperty("org.codehaus.janino.source_debugging.dir");
0203: File dir = dirName == null ? null : new File(dirName);
0204: File temporaryFile = File.createTempFile("janino", ".java",
0205: dir);
0206: temporaryFile.deleteOnExit();
0207: in = new TeeReader(in, // in
0208: new FileWriter(temporaryFile), // out
0209: true // closeWriterOnEOF
0210: );
0211: optionalFileName = temporaryFile.getAbsolutePath();
0212: }
0213:
0214: this .optionalFileName = optionalFileName;
0215: this .in = new UnicodeUnescapeReader(in);
0216: this .nextCharLineNumber = initialLineNumber;
0217: this .nextCharColumnNumber = initialColumnNumber;
0218:
0219: this .readNextChar();
0220: this .nextToken = this .internalRead();
0221: this .nextButOneToken = null;
0222: }
0223:
0224: /**
0225: * Return the file name optionally passed to the constructor.
0226: */
0227: public String getFileName() {
0228: return this .optionalFileName;
0229: }
0230:
0231: /**
0232: * Closes the character source (file, {@link InputStream}, {@link Reader}) associated
0233: * with this object. The results of future calls to {@link #peek()} and
0234: * {@link #read()} are undefined.
0235: * <p>
0236: * <b>This method is deprecated, because the concept described above is confusing. An
0237: * application should close the underlying {@link InputStream} or {@link Reader} itself.</b>
0238: *
0239: * @deprecated
0240: */
0241: public void close() throws IOException {
0242: this .in.close();
0243: }
0244:
0245: /**
0246: * Read the next token from the input.
0247: */
0248: public Token read() throws ScanException, IOException {
0249: Token res = this .nextToken;
0250: if (this .nextButOneToken != null) {
0251: this .nextToken = this .nextButOneToken;
0252: this .nextButOneToken = null;
0253: } else {
0254: this .nextToken = this .internalRead();
0255: }
0256: return res;
0257: }
0258:
0259: /**
0260: * Peek the next token, but don't remove it from the input.
0261: */
0262: public Token peek() {
0263: if (Scanner.DEBUG)
0264: System.err.println("peek() => \"" + this .nextToken + "\"");
0265: return this .nextToken;
0266: }
0267:
0268: /**
0269: * Peek the next but one token, neither remove the next nor the next but one token from the
0270: * input.
0271: * <p>
0272: * This makes parsing so much easier, e.g. for class literals like
0273: * <code>Map.class</code>.
0274: */
0275: public Token peekNextButOne() throws ScanException, IOException {
0276: if (this .nextButOneToken == null)
0277: this .nextButOneToken = this .internalRead();
0278: return this .nextButOneToken;
0279: }
0280:
0281: /**
0282: * Get the text of the doc comment (a.k.a. "JAVADOC comment") preceeding
0283: * the next token.
0284: * @return <code>null</code> if the next token is not preceeded by a doc comment
0285: */
0286: public String doc() {
0287: String s = this .docComment;
0288: this .docComment = null;
0289: return s;
0290: }
0291:
0292: /**
0293: * Returns the {@link Location} of the next token.
0294: */
0295: public Location location() {
0296: return this .nextToken.getLocation();
0297: }
0298:
0299: public abstract class Token {
0300: private/*final*/String optionalFileName;
0301: private/*final*/short lineNumber;
0302: private/*final*/short columnNumber;
0303: private Location location = null;
0304:
0305: private Token() {
0306: this .optionalFileName = Scanner.this .optionalFileName;
0307: this .lineNumber = Scanner.this .tokenLineNumber;
0308: this .columnNumber = Scanner.this .tokenColumnNumber;
0309: }
0310:
0311: public Location getLocation() {
0312: if (this .location == null)
0313: this .location = new Location(this .optionalFileName,
0314: this .lineNumber, this .columnNumber);
0315: return this .location;
0316: }
0317:
0318: public boolean isKeyword() {
0319: return false;
0320: }
0321:
0322: public boolean isKeyword(String k) {
0323: return false;
0324: }
0325:
0326: public boolean isKeyword(String[] ks) {
0327: return false;
0328: }
0329:
0330: public String getKeyword() throws ScanException {
0331: throw new ScanException("Not a keyword token");
0332: }
0333:
0334: public boolean isIdentifier() {
0335: return false;
0336: }
0337:
0338: public boolean isIdentifier(String id) {
0339: return false;
0340: }
0341:
0342: public String getIdentifier() throws ScanException {
0343: throw new ScanException("Not an identifier token");
0344: }
0345:
0346: public boolean isLiteral() {
0347: return false;
0348: }
0349:
0350: public Object getLiteralValue() throws ScanException {
0351: throw new ScanException("Not a literal token");
0352: }
0353:
0354: public boolean isOperator() {
0355: return false;
0356: }
0357:
0358: public boolean isOperator(String o) {
0359: return false;
0360: }
0361:
0362: public boolean isOperator(String[] os) {
0363: return false;
0364: }
0365:
0366: public String getOperator() throws ScanException {
0367: throw new ScanException("Not an operator token");
0368: }
0369:
0370: public boolean isEOF() {
0371: return false;
0372: }
0373: }
0374:
0375: public class KeywordToken extends Token {
0376: private final String keyword;
0377:
0378: /**
0379: * @param keyword Must be in interned string!
0380: */
0381: private KeywordToken(String keyword) {
0382: this .keyword = keyword;
0383: }
0384:
0385: public boolean isKeyword() {
0386: return true;
0387: }
0388:
0389: public boolean isKeyword(String k) {
0390: return this .keyword == k;
0391: }
0392:
0393: public boolean isKeyword(String[] ks) {
0394: for (int i = 0; i < ks.length; ++i) {
0395: if (this .keyword == ks[i])
0396: return true;
0397: }
0398: return false;
0399: }
0400:
0401: public String getKeyword() {
0402: return this .keyword;
0403: }
0404:
0405: public String toString() {
0406: return this .keyword;
0407: }
0408: }
0409:
0410: public class IdentifierToken extends Token {
0411: private final String identifier;
0412:
0413: private IdentifierToken(String identifier) {
0414: this .identifier = identifier;
0415: }
0416:
0417: public boolean isIdentifier() {
0418: return true;
0419: }
0420:
0421: public boolean isIdentifier(String id) {
0422: return this .identifier.equals(id);
0423: }
0424:
0425: public String getIdentifier() {
0426: return this .identifier;
0427: }
0428:
0429: public String toString() {
0430: return this .identifier;
0431: }
0432: }
0433:
0434: /**
0435: * This value represents the "magic" literal "2147483648" which is only
0436: * allowed in a negated context.
0437: */
0438: public static final Integer MAGIC_INTEGER = new Integer(
0439: Integer.MIN_VALUE);
0440:
0441: /**
0442: * This value represents the "magic" literal "9223372036854775808L" which is only
0443: * allowed in a negated context.
0444: */
0445: public static final Long MAGIC_LONG = new Long(Long.MIN_VALUE);
0446:
0447: /**
0448: * The type of the <code>value</code> parameter determines the type of the literal
0449: * token:
0450: * <table>
0451: * <tr><th>Type/value returned by {@link #getLiteralValue()}</th><th>Literal</th></tr>
0452: * <tr><td>{@link String}</td><td>STRING literal</td></tr>
0453: * <tr><td>{@link Character}</td><td>CHAR literal</td></tr>
0454: * <tr><td>{@link Integer}</td><td>INT literal</td></tr>
0455: * <tr><td>{@link Long}</td><td>LONG literal</td></tr>
0456: * <tr><td>{@link Float}</td><td>FLOAT literal</td></tr>
0457: * <tr><td>{@link Double}</td><td>DOUBLE literal</td></tr>
0458: * <tr><td>{@link Boolean}</td><td>BOOLEAN literal</td></tr>
0459: * <tr><td><code>null</code></td><td>NULL literal</td></tr>
0460: * </table>
0461: */
0462: public final class LiteralToken extends Token {
0463: private final Object value;
0464:
0465: public LiteralToken(Object value) {
0466: this .value = value;
0467: }
0468:
0469: // Implement {@link Literal}.
0470: public final boolean isLiteral() {
0471: return true;
0472: }
0473:
0474: public Object getLiteralValue() {
0475: return this .value;
0476: }
0477:
0478: public String toString() {
0479: return Scanner.literalValueToString(this .value);
0480: }
0481: }
0482:
0483: public static String literalValueToString(Object v) {
0484: if (v instanceof String) {
0485: StringBuffer sb = new StringBuffer();
0486: sb.append('"');
0487: String s = (String) v;
0488: for (int i = 0; i < s.length(); ++i) {
0489: char c = s.charAt(i);
0490:
0491: if (c == '"') {
0492: sb.append("\\\"");
0493: } else {
0494: Scanner.escapeCharacter(c, sb);
0495: }
0496: }
0497: sb.append('"');
0498: return sb.toString();
0499: }
0500: if (v instanceof Character) {
0501: char c = ((Character) v).charValue();
0502: if (c == '\'')
0503: return "'\\''";
0504: StringBuffer sb = new StringBuffer("'");
0505: Scanner.escapeCharacter(c, sb);
0506: return sb.append('\'').toString();
0507: }
0508: if (v instanceof Integer) {
0509: if (v == Scanner.MAGIC_INTEGER)
0510: return "2147483648";
0511: return v.toString();
0512: }
0513: if (v instanceof Long) {
0514: if (v == Scanner.MAGIC_LONG)
0515: return "9223372036854775808L";
0516: return v.toString() + 'L';
0517: }
0518: if (v instanceof Float) {
0519: return v.toString() + 'F';
0520: }
0521: if (v instanceof Double) {
0522: return v.toString() + 'D';
0523: }
0524: if (v instanceof Boolean) {
0525: return v.toString();
0526: }
0527: if (v == null) {
0528: return "null";
0529: }
0530: throw new RuntimeException("Unexpected value type \""
0531: + v.getClass().getName() + "\"");
0532: }
0533:
0534: public class OperatorToken extends Token {
0535: private final String operator;
0536:
0537: /**
0538: *
0539: * @param operator Must be an interned string!
0540: */
0541: private OperatorToken(String operator) {
0542: this .operator = operator;
0543: }
0544:
0545: public boolean isOperator() {
0546: return true;
0547: }
0548:
0549: public boolean isOperator(String o) {
0550: return this .operator == o;
0551: }
0552:
0553: public boolean isOperator(String[] os) {
0554: for (int i = 0; i < os.length; ++i) {
0555: if (this .operator == os[i])
0556: return true;
0557: }
0558: return false;
0559: }
0560:
0561: public String getOperator() {
0562: return this .operator;
0563: }
0564:
0565: public String toString() {
0566: return this .operator;
0567: }
0568: }
0569:
0570: public class EOFToken extends Token {
0571: public boolean isEOF() {
0572: return true;
0573: }
0574:
0575: public String toString() {
0576: return "End-Of-File";
0577: }
0578: }
0579:
0580: /**
0581: * Escape unprintable characters appropriately, i.e. as
0582: * backslash-letter or backslash-U-four-hex-digits.
0583: * <p>
0584: * Notice: Single and double quotes are not escaped!
0585: */
0586: private static void escapeCharacter(char c, StringBuffer sb) {
0587:
0588: // Backslash escape sequences.
0589: int idx = "\b\t\n\f\r\\".indexOf(c);
0590: if (idx != -1) {
0591: sb.append('\\').append("btnfr\\".charAt(idx));
0592: } else
0593:
0594: // Printable characters.
0595: if (c >= ' ' && c < 255 && c != 127) {
0596: sb.append(c);
0597: } else
0598:
0599: // Backslash-U escape sequences.
0600: {
0601: sb.append("\\u");
0602: String hs = Integer.toHexString(0xffff & c);
0603: for (int j = hs.length(); j < 4; ++j)
0604: sb.append('0');
0605: sb.append(hs);
0606: }
0607: }
0608:
0609: private Token internalRead() throws ScanException, IOException {
0610: if (this .docComment != null) {
0611: this .warning("MDC", "Misplaced doc comment", this .nextToken
0612: .getLocation());
0613: this .docComment = null;
0614: }
0615:
0616: // Skip whitespace and process comments.
0617: int state = 0;
0618: StringBuffer dcsb = null; // For doc comment
0619:
0620: PROCESS_COMMENTS: for (;;) {
0621: switch (state) {
0622:
0623: case 0: // Outside any comment
0624: if (this .nextChar == -1) {
0625: return new EOFToken();
0626: } else if (Character.isWhitespace((char) this .nextChar)) {
0627: ;
0628: } else if (this .nextChar == '/') {
0629: state = 1;
0630: } else {
0631: break PROCESS_COMMENTS;
0632: }
0633: break;
0634:
0635: case 1: // After "/"
0636: if (this .nextChar == -1) {
0637: return new OperatorToken("/");
0638: } else if (this .nextChar == '=') {
0639: this .readNextChar();
0640: return new OperatorToken("/=");
0641: } else if (this .nextChar == '/') {
0642: state = 2;
0643: } else if (this .nextChar == '*') {
0644: state = 3;
0645: } else {
0646: return new OperatorToken("/");
0647: }
0648: break;
0649:
0650: case 2: // After "//..."
0651: if (this .nextChar == -1) {
0652: return new EOFToken();
0653: } else if (this .nextChar == '\r'
0654: || this .nextChar == '\n') {
0655: state = 0;
0656: } else {
0657: ;
0658: }
0659: break;
0660:
0661: case 3: // After "/*"
0662: if (this .nextChar == -1) {
0663: throw new ScanException(
0664: "EOF in traditional comment");
0665: } else if (this .nextChar == '*') {
0666: state = 4;
0667: } else {
0668: state = 9;
0669: }
0670: break;
0671:
0672: case 4: // After "/**"
0673: if (this .nextChar == -1) {
0674: throw new ScanException("EOF in doc comment");
0675: } else if (this .nextChar == '/') {
0676: state = 0;
0677: } else {
0678: if (this .docComment != null)
0679: this .warning("MDC", "Multiple doc comments",
0680: new Location(this .optionalFileName,
0681: this .nextCharLineNumber,
0682: this .nextCharColumnNumber));
0683: dcsb = new StringBuffer();
0684: dcsb.append((char) this .nextChar);
0685: state = ((this .nextChar == '\r' || this .nextChar == '\n') ? 6
0686: : this .nextChar == '*' ? 8 : 5);
0687: }
0688: break;
0689:
0690: case 5: // After "/**..."
0691: if (this .nextChar == -1) {
0692: throw new ScanException("EOF in doc comment");
0693: } else if (this .nextChar == '*') {
0694: state = 8;
0695: } else if (this .nextChar == '\r'
0696: || this .nextChar == '\n') {
0697: dcsb.append((char) this .nextChar);
0698: state = 6;
0699: } else {
0700: dcsb.append((char) this .nextChar);
0701: }
0702: break;
0703:
0704: case 6: // After "/**...\n"
0705: if (this .nextChar == -1) {
0706: throw new ScanException("EOF in doc comment");
0707: } else if (this .nextChar == '*') {
0708: state = 7;
0709: } else if (this .nextChar == '\r'
0710: || this .nextChar == '\n') {
0711: dcsb.append((char) this .nextChar);
0712: } else if (this .nextChar == ' '
0713: || this .nextChar == '\t') {
0714: ;
0715: } else {
0716: dcsb.append((char) this .nextChar);
0717: state = 5;
0718: }
0719: break;
0720:
0721: case 7: // After "/**...\n *"
0722: if (this .nextChar == -1) {
0723: throw new ScanException("EOF in doc comment");
0724: } else if (this .nextChar == '*') {
0725: ;
0726: } else if (this .nextChar == '/') {
0727: this .docComment = dcsb.toString();
0728: state = 0;
0729: } else {
0730: dcsb.append((char) this .nextChar);
0731: state = 5;
0732: }
0733: break;
0734:
0735: case 8: // After "/**...*"
0736: if (this .nextChar == -1) {
0737: throw new ScanException("EOF in doc comment");
0738: } else if (this .nextChar == '/') {
0739: this .docComment = dcsb.toString();
0740: state = 0;
0741: } else if (this .nextChar == '*') {
0742: dcsb.append('*');
0743: } else {
0744: dcsb.append('*');
0745: dcsb.append((char) this .nextChar);
0746: state = 5;
0747: }
0748: break;
0749:
0750: case 9: // After "/*..."
0751: if (this .nextChar == -1) {
0752: throw new ScanException(
0753: "EOF in traditional comment");
0754: } else if (this .nextChar == '*') {
0755: state = 10;
0756: } else {
0757: ;
0758: }
0759: break;
0760:
0761: case 10: // After "/*...*"
0762: if (this .nextChar == -1) {
0763: throw new ScanException(
0764: "EOF in traditional comment");
0765: } else if (this .nextChar == '/') {
0766: state = 0;
0767: } else if (this .nextChar == '*') {
0768: ;
0769: } else {
0770: state = 9;
0771: }
0772: }
0773: this .readNextChar();
0774: }
0775:
0776: /*
0777: * Whitespace and comments are now skipped; "nextChar" is definitely
0778: * the first character of the token.
0779: */
0780: this .tokenLineNumber = this .nextCharLineNumber;
0781: this .tokenColumnNumber = this .nextCharColumnNumber;
0782:
0783: // Scan identifier.
0784: if (Character.isJavaIdentifierStart((char) this .nextChar)) {
0785: StringBuffer sb = new StringBuffer();
0786: sb.append((char) this .nextChar);
0787: for (;;) {
0788: this .readNextChar();
0789: if (this .nextChar == -1
0790: || !Character
0791: .isJavaIdentifierPart((char) this .nextChar))
0792: break;
0793: sb.append((char) this .nextChar);
0794: }
0795: String s = sb.toString();
0796: if (s.equals("true"))
0797: return new LiteralToken(Boolean.TRUE);
0798: if (s.equals("false"))
0799: return new LiteralToken(Boolean.FALSE);
0800: if (s.equals("null"))
0801: return new LiteralToken(null);
0802: {
0803: String v = (String) Scanner.JAVA_KEYWORDS.get(s);
0804: if (v != null)
0805: return new KeywordToken(v);
0806: }
0807: return new IdentifierToken(s);
0808: }
0809:
0810: // Scan numeric literal.
0811: if (Character.isDigit((char) this .nextChar)) {
0812: return this .scanNumericLiteral(0);
0813: }
0814:
0815: // A "." is special: Could either be a floating-point constant like ".001", or the "."
0816: // operator.
0817: if (this .nextChar == '.') {
0818: this .readNextChar();
0819: if (Character.isDigit((char) this .nextChar)) {
0820: return this .scanNumericLiteral(2);
0821: } else {
0822: return new OperatorToken(".");
0823: }
0824: }
0825:
0826: // Scan string literal.
0827: if (this .nextChar == '"') {
0828: StringBuffer sb = new StringBuffer("");
0829: this .readNextChar();
0830: if (this .nextChar == -1)
0831: throw new ScanException("EOF in string literal");
0832: if (this .nextChar == '\r' || this .nextChar == '\n')
0833: throw new ScanException("Line break in string literal");
0834: while (this .nextChar != '"') {
0835: sb.append(this .unescapeCharacterLiteral());
0836: }
0837: this .readNextChar();
0838: return new LiteralToken(sb.toString());
0839: }
0840:
0841: // Scan character literal.
0842: if (this .nextChar == '\'') {
0843: this .readNextChar();
0844: char lit = this .unescapeCharacterLiteral();
0845: if (this .nextChar != '\'')
0846: throw new ScanException("Closing single quote missing");
0847: this .readNextChar();
0848:
0849: return new LiteralToken(new Character(lit));
0850: }
0851:
0852: // Scan separator / operator.
0853: {
0854: String v = (String) Scanner.JAVA_OPERATORS.get(new String(
0855: new char[] { (char) this .nextChar }));
0856: if (v != null) {
0857: for (;;) {
0858: this .readNextChar();
0859: String v2 = (String) Scanner.JAVA_OPERATORS.get(v
0860: + (char) this .nextChar);
0861: if (v2 == null)
0862: return new OperatorToken(v);
0863: v = v2;
0864: }
0865: }
0866: }
0867:
0868: throw new ScanException("Invalid character input \""
0869: + (char) this .nextChar + "\" (character code "
0870: + this .nextChar + ")");
0871: }
0872:
0873: private Token scanNumericLiteral(int initialState)
0874: throws ScanException, IOException {
0875: StringBuffer sb = (initialState == 2) ? new StringBuffer("0.")
0876: : new StringBuffer();
0877: int state = initialState;
0878: for (;;) {
0879: switch (state) {
0880:
0881: case 0: // First character.
0882: if (this .nextChar == '0') {
0883: state = 6;
0884: } else
0885: /* if (Character.isDigit((char) this.nextChar)) */{
0886: sb.append((char) this .nextChar);
0887: state = 1;
0888: }
0889: break;
0890:
0891: case 1: // Decimal digits.
0892: if (Character.isDigit((char) this .nextChar)) {
0893: sb.append((char) this .nextChar);
0894: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
0895: this .readNextChar();
0896: return this .stringToLongLiteralToken(sb.toString(),
0897: 10);
0898: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0899: this .readNextChar();
0900: return this
0901: .stringToFloatLiteralToken(sb.toString());
0902: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0903: this .readNextChar();
0904: return this .stringToDoubleLiteralToken(sb
0905: .toString());
0906: } else if (this .nextChar == '.') {
0907: sb.append('.');
0908: state = 2;
0909: } else if (this .nextChar == 'E' || this .nextChar == 'e') {
0910: sb.append('E');
0911: state = 3;
0912: } else {
0913: return this .stringToIntegerLiteralToken(sb
0914: .toString(), 10);
0915: }
0916: break;
0917:
0918: case 2: // After decimal point.
0919: if (Character.isDigit((char) this .nextChar)) {
0920: sb.append((char) this .nextChar);
0921: } else if (this .nextChar == 'e' || this .nextChar == 'E') {
0922: sb.append('E');
0923: state = 3;
0924: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0925: this .readNextChar();
0926: return this
0927: .stringToFloatLiteralToken(sb.toString());
0928: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0929: this .readNextChar();
0930: return this .stringToDoubleLiteralToken(sb
0931: .toString());
0932: } else {
0933: return this .stringToDoubleLiteralToken(sb
0934: .toString());
0935: }
0936: break;
0937:
0938: case 3: // Read exponent.
0939: if (Character.isDigit((char) this .nextChar)) {
0940: sb.append((char) this .nextChar);
0941: state = 5;
0942: } else if (this .nextChar == '-' || this .nextChar == '+') {
0943: sb.append((char) this .nextChar);
0944: state = 4;
0945: } else {
0946: throw new ScanException(
0947: "Exponent missing after \"E\"");
0948: }
0949: break;
0950:
0951: case 4: // After exponent sign.
0952: if (Character.isDigit((char) this .nextChar)) {
0953: sb.append((char) this .nextChar);
0954: state = 5;
0955: } else {
0956: throw new ScanException(
0957: "Exponent missing after \"E\" and sign");
0958: }
0959: break;
0960:
0961: case 5: // After first exponent digit.
0962: if (Character.isDigit((char) this .nextChar)) {
0963: sb.append((char) this .nextChar);
0964: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0965: this .readNextChar();
0966: return this
0967: .stringToFloatLiteralToken(sb.toString());
0968: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0969: this .readNextChar();
0970: return this .stringToDoubleLiteralToken(sb
0971: .toString());
0972: } else {
0973: return this .stringToDoubleLiteralToken(sb
0974: .toString());
0975: }
0976: break;
0977:
0978: case 6: // After leading zero
0979: if ("01234567".indexOf(this .nextChar) != -1) {
0980: sb.append((char) this .nextChar);
0981: state = 7;
0982: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
0983: this .readNextChar();
0984: return this .stringToLongLiteralToken("0", 10);
0985: } else if (this .nextChar == 'f' || this .nextChar == 'F') {
0986: this .readNextChar();
0987: return this .stringToFloatLiteralToken("0");
0988: } else if (this .nextChar == 'd' || this .nextChar == 'D') {
0989: this .readNextChar();
0990: return this .stringToDoubleLiteralToken("0");
0991: } else if (this .nextChar == '.') {
0992: sb.append("0.");
0993: state = 2;
0994: } else if (this .nextChar == 'E' || this .nextChar == 'e') {
0995: sb.append('E');
0996: state = 3;
0997: } else if (this .nextChar == 'x' || this .nextChar == 'X') {
0998: state = 8;
0999: } else {
1000: return this .stringToIntegerLiteralToken("0", 10);
1001: }
1002: break;
1003:
1004: case 7: // In octal literal.
1005: if ("01234567".indexOf(this .nextChar) != -1) {
1006: sb.append((char) this .nextChar);
1007: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
1008: // Octal long literal.
1009: this .readNextChar();
1010: return this .stringToLongLiteralToken(sb.toString(),
1011: 8);
1012: } else {
1013: // Octal int literal
1014: return this .stringToIntegerLiteralToken(sb
1015: .toString(), 8);
1016: }
1017: break;
1018:
1019: case 8: // First hex digit
1020: if (Character.digit((char) this .nextChar, 16) != -1) {
1021: sb.append((char) this .nextChar);
1022: state = 9;
1023: } else {
1024: throw new ScanException(
1025: "Hex digit expected after \"0x\"");
1026: }
1027: break;
1028:
1029: case 9:
1030: if (Character.digit((char) this .nextChar, 16) != -1) {
1031: sb.append((char) this .nextChar);
1032: } else if (this .nextChar == 'l' || this .nextChar == 'L') {
1033: // Hex long literal
1034: this .readNextChar();
1035: return this .stringToLongLiteralToken(sb.toString(),
1036: 16);
1037: } else {
1038: // Hex long literal
1039: return this .stringToIntegerLiteralToken(sb
1040: .toString(), 16);
1041: }
1042: break;
1043: }
1044: this .readNextChar();
1045: }
1046: }
1047:
1048: private LiteralToken stringToIntegerLiteralToken(final String s,
1049: int radix) throws ScanException {
1050: int x;
1051: switch (radix) {
1052:
1053: case 10:
1054: // Special case: Decimal literal 2^31 must only appear in "negated" context, i.e.
1055: // "-2147483648" is a valid long literal, but "2147483648" is not.
1056: if (s.equals("2147483648"))
1057: return new LiteralToken(Scanner.MAGIC_INTEGER);
1058: try {
1059: x = Integer.parseInt(s);
1060: } catch (NumberFormatException e) {
1061: throw new ScanException(
1062: "Value of decimal integer literal \"" + s
1063: + "\" is out of range");
1064: }
1065: break;
1066:
1067: case 8:
1068: // Cannot use "Integer.parseInt(s, 8)" because that parses SIGNED values.
1069: x = 0;
1070: for (int i = 0; i < s.length(); ++i) {
1071: if ((x & 0xe0000000) != 0)
1072: throw new ScanException(
1073: "Value of octal integer literal \"" + s
1074: + "\" is out of range");
1075: x = (x << 3) + Character.digit(s.charAt(i), 8);
1076: }
1077: break;
1078:
1079: case 16:
1080: // Cannot use "Integer.parseInt(s, 16)" because that parses SIGNED values.
1081: x = 0;
1082: for (int i = 0; i < s.length(); ++i) {
1083: if ((x & 0xf0000000) != 0)
1084: throw new ScanException(
1085: "Value of hexadecimal integer literal \""
1086: + s + "\" is out of range");
1087: x = (x << 4) + Character.digit(s.charAt(i), 16);
1088: }
1089: break;
1090:
1091: default:
1092: throw new RuntimeException("Illegal radix " + radix);
1093: }
1094: return new LiteralToken(new Integer(x));
1095: }
1096:
1097: private LiteralToken stringToLongLiteralToken(final String s,
1098: int radix) throws ScanException {
1099: long x;
1100: switch (radix) {
1101:
1102: case 10:
1103: // Special case: Decimal literal 2^63 must only appear in "negated" context, i.e.
1104: // "-9223372036854775808" is a valid long literal, but "9223372036854775808" is not.
1105: if (s.equals("9223372036854775808"))
1106: return new LiteralToken(Scanner.MAGIC_LONG);
1107:
1108: try {
1109: x = Long.parseLong(s);
1110: } catch (NumberFormatException e) {
1111: throw new ScanException(
1112: "Value of decimal long literal \"" + s
1113: + "\" is out of range");
1114: }
1115: break;
1116:
1117: case 8:
1118: // Cannot use "Long.parseLong(s, 8)" because that parses SIGNED values.
1119: x = 0L;
1120: for (int i = 0; i < s.length(); ++i) {
1121: if ((x & 0xe000000000000000L) != 0L)
1122: throw new ScanException(
1123: "Value of octal long literal \"" + s
1124: + "\" is out of range");
1125: x = (x << 3) + Character.digit(s.charAt(i), 8);
1126: }
1127: break;
1128:
1129: case 16:
1130: // Cannot use "Long.parseLong(s, 16)" because that parses SIGNED values.
1131: x = 0L;
1132: for (int i = 0; i < s.length(); ++i) {
1133: if ((x & 0xf000000000000000L) != 0L)
1134: throw new ScanException(
1135: "Value of hexadecimal long literal \"" + s
1136: + "\" is out of range");
1137: x = (x << 4) + (long) Character.digit(s.charAt(i), 16);
1138: }
1139: break;
1140:
1141: default:
1142: throw new RuntimeException("Illegal radix " + radix);
1143: }
1144: return new LiteralToken(new Long(x));
1145: }
1146:
1147: private LiteralToken stringToFloatLiteralToken(final String s)
1148: throws ScanException {
1149: float f;
1150: try {
1151: f = Float.parseFloat(s);
1152: } catch (NumberFormatException e) {
1153: throw new ScanException("Value of float literal \"" + s
1154: + "\" is out of range");
1155: }
1156:
1157: return new LiteralToken(new Float(f));
1158: }
1159:
1160: private LiteralToken stringToDoubleLiteralToken(final String s)
1161: throws ScanException {
1162: double d;
1163: try {
1164: d = Double.parseDouble(s);
1165: } catch (NumberFormatException e) {
1166: throw new ScanException("Value of double literal \"" + s
1167: + "\" is out of range");
1168: }
1169:
1170: return new LiteralToken(new Double(d));
1171: }
1172:
1173: private char unescapeCharacterLiteral() throws ScanException,
1174: IOException {
1175: if (this .nextChar == -1)
1176: throw new ScanException("EOF in character literal");
1177:
1178: if (this .nextChar != '\\') {
1179: char res = (char) this .nextChar;
1180: this .readNextChar();
1181: return res;
1182: }
1183: this .readNextChar();
1184: int idx = "btnfr".indexOf(this .nextChar);
1185: if (idx != -1) {
1186: char res = "\b\t\n\f\r".charAt(idx);
1187: this .readNextChar();
1188: return res;
1189: }
1190: idx = "01234567".indexOf(this .nextChar);
1191: if (idx != -1) {
1192: int code = idx;
1193: this .readNextChar();
1194: idx = "01234567".indexOf(this .nextChar);
1195: if (idx == -1)
1196: return (char) code;
1197: code = 8 * code + idx;
1198: this .readNextChar();
1199: idx = "01234567".indexOf(this .nextChar);
1200: if (idx == -1)
1201: return (char) code;
1202: code = 8 * code + idx;
1203: if (code > 255)
1204: throw new ScanException("Invalid octal escape");
1205: this .readNextChar();
1206: return (char) code;
1207: }
1208:
1209: char res = (char) this .nextChar;
1210: this .readNextChar();
1211: return res;
1212: }
1213:
1214: // Read one character and store in "nextChar".
1215: private void readNextChar() throws IOException, ScanException {
1216: try {
1217: this .nextChar = this .in.read();
1218: } catch (UnicodeUnescapeException ex) {
1219: throw new ScanException(ex.getMessage());
1220: }
1221: if (this .nextChar == '\r') {
1222: ++this .nextCharLineNumber;
1223: this .nextCharColumnNumber = 0;
1224: this .crLfPending = true;
1225: } else if (this .nextChar == '\n') {
1226: if (this .crLfPending) {
1227: this .crLfPending = false;
1228: } else {
1229: ++this .nextCharLineNumber;
1230: this .nextCharColumnNumber = 0;
1231: }
1232: } else {
1233: ++this .nextCharColumnNumber;
1234: }
1235: //System.out.println("'" + (char) nextChar + "' = " + (int) nextChar);
1236: }
1237:
1238: private static final boolean DEBUG = false;
1239:
1240: private/*final*/String optionalFileName;
1241: private/*final*/Reader in;
1242: private int nextChar = -1; // Always valid (one character read-ahead).
1243: private boolean crLfPending = false;
1244: private short nextCharLineNumber;
1245: private short nextCharColumnNumber;
1246:
1247: private Token nextToken; // Is always non-null (one token read-ahead).
1248: private Token nextButOneToken; // Is only non-null after "peekNextButOne()".
1249: private short tokenLineNumber; // Line number of "nextToken" (typically starting at one).
1250: private short tokenColumnNumber; // Column number of first character of "nextToken" (1 if token is immediately preceeded by a line break).
1251: private String docComment = null; // The optional JAVADOC comment preceeding the "nextToken".
1252:
1253: private static final Map JAVA_KEYWORDS = new HashMap();
1254: static {
1255: String[] ks = { "abstract", "boolean", "break", "byte", "case",
1256: "catch", "char", "class", "const", "continue",
1257: "default", "do", "double", "else", "extends", "final",
1258: "finally", "float", "for", "goto", "if", "implements",
1259: "import", "instanceof", "int", "interface", "long",
1260: "native", "new", "package", "private", "protected",
1261: "public", "return", "short", "static", "strictfp",
1262: "super", "switch", "synchronized", "this", "throw",
1263: "throws", "transient", "try", "void", "volatile",
1264: "while" };
1265: for (int i = 0; i < ks.length; ++i)
1266: Scanner.JAVA_KEYWORDS.put(ks[i], ks[i]);
1267: }
1268: private static final Map JAVA_OPERATORS = new HashMap();
1269: static {
1270: String[] ops = {
1271: // Separators:
1272: "(", ")", "{", "}", "[", "]", ";",
1273: ",",
1274: ".",
1275: // Operators:
1276: "=", ">", "<", "!", "~", "?", ":", "==", "<=", ">=",
1277: "!=", "&&", "||", "++", "--", "+", "-", "*", "/", "&",
1278: "|", "^", "%", "<<", ">>", ">>>", "+=", "-=", "*=",
1279: "/=", "&=", "|=", "^=", "%=", "<<=", ">>=", ">>>=", };
1280: for (int i = 0; i < ops.length; ++i)
1281: Scanner.JAVA_OPERATORS.put(ops[i], ops[i]);
1282: }
1283:
1284: /**
1285: * An exception that reflects an error during parsing.
1286: */
1287: public class ScanException extends LocatedException {
1288: public ScanException(String message) {
1289: super (message, new Location(Scanner.this .optionalFileName,
1290: Scanner.this .nextCharLineNumber,
1291: Scanner.this .nextCharColumnNumber));
1292: }
1293: }
1294:
1295: public static class LocatedException extends Exception {
1296: LocatedException(String message, Location optionalLocation) {
1297: super (message);
1298: this .optionalLocation = optionalLocation;
1299: }
1300:
1301: /**
1302: * Returns the message specified at creation time, preceeded
1303: * with nicely formatted location information (if any).
1304: */
1305: public String getMessage() {
1306: return (this .optionalLocation == null) ? super .getMessage()
1307: : this .optionalLocation.toString() + ": "
1308: + super .getMessage();
1309: }
1310:
1311: /**
1312: * Returns the {@link Location} object specified at
1313: * construction time (may be <code>null</code>).
1314: */
1315: public Location getLocation() {
1316: return this .optionalLocation;
1317: }
1318:
1319: private final Location optionalLocation;
1320: }
1321:
1322: /**
1323: * By default, warnings are discarded, but an application my install a
1324: * {@link WarningHandler}.
1325: * <p>
1326: * Notice that there is no <code>Scanner.setErrorHandler()</code> method, but scan errors
1327: * always throw a {@link ScanException}. The reason being is that there is no reasonable
1328: * way to recover from scan errors and continue scanning, so there is no need to install
1329: * a custom scan error handler.
1330: *
1331: * @param optionalWarningHandler <code>null</code> to indicate that no warnings be issued
1332: */
1333: public void setWarningHandler(WarningHandler optionalWarningHandler) {
1334: this .optionalWarningHandler = optionalWarningHandler;
1335: }
1336:
1337: // Used for elaborate warning handling.
1338: private WarningHandler optionalWarningHandler = null;
1339:
1340: /**
1341: * Issues a warning with the given message and location and returns. This is done through
1342: * a {@link WarningHandler} that was installed through
1343: * {@link #setWarningHandler(WarningHandler)}.
1344: * <p>
1345: * The <code>handle</code> argument qulifies the warning and is typically used by
1346: * the {@link WarningHandler} to suppress individual warnings.
1347: */
1348: private void warning(String handle, String message,
1349: Location optionalLocation) {
1350: if (this.optionalWarningHandler != null)
1351: this.optionalWarningHandler.handleWarning(handle, message,
1352: optionalLocation);
1353: }
1354: }
|