0001: /* ====================================================================
0002: * Tea - Copyright (c) 1997-2000 Walt Disney Internet Group
0003: * ====================================================================
0004: * The Tea Software License, Version 1.1
0005: *
0006: * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Walt Disney Internet Group (http://opensource.go.com/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact opensource@dig.com.
0031: *
0032: * 5. Products derived from this software may not be called "Tea",
0033: * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet",
0034: * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior
0035: * written permission of the Walt Disney Internet Group.
0036: *
0037: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0038: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0039: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0040: * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS
0041: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
0042: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
0043: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
0044: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
0045: * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0046: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0047: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * For more information about Tea, please see http://opensource.go.com/.
0051: */
0052:
0053: package com.go.tea.compiler;
0054:
0055: import java.io.*;
0056: import java.util.Vector;
0057: import java.util.Stack;
0058: import com.go.trove.io.SourceReader;
0059:
0060: /******************************************************************************
0061: * A Scanner breaks up a source file into its basic elements, called
0062: * {@link Token Tokens}. Add an {@link ErrorListener} to capture any syntax
0063: * errors detected by the Scanner.
0064: *
0065: * @author Brian S O'Neill
0066: * @version
0067: * <!--$$Revision:--> 45 <!-- $-->, <!--$$JustDate:--> 00/12/13 <!-- $-->
0068: */
0069: public class Scanner {
0070: private SourceReader mSource;
0071: private CompilationUnit mUnit;
0072:
0073: private boolean mEmitSpecial;
0074:
0075: /** StringBuffer for temporary use. */
0076: private StringBuffer mWord = new StringBuffer(20);
0077:
0078: /** The scanner supports any amount of lookahead. */
0079: private Stack mLookahead = new Stack();
0080:
0081: private Token mEOFToken;
0082:
0083: private Vector mListeners = new Vector(1);
0084: private int mErrorCount = 0;
0085:
0086: private MessageFormatter mFormatter;
0087:
0088: public Scanner(SourceReader in) {
0089: this (in, null);
0090: }
0091:
0092: public Scanner(SourceReader in, CompilationUnit unit) {
0093: mSource = in;
0094: mUnit = unit;
0095: mFormatter = MessageFormatter.lookup(this );
0096: }
0097:
0098: public void addErrorListener(ErrorListener listener) {
0099: mListeners.addElement(listener);
0100: }
0101:
0102: public void removeErrorListener(ErrorListener listener) {
0103: mListeners.removeElement(listener);
0104: }
0105:
0106: private void dispatchParseError(ErrorEvent e) {
0107: mErrorCount++;
0108:
0109: synchronized (mListeners) {
0110: for (int i = 0; i < mListeners.size(); i++) {
0111: ((ErrorListener) mListeners.elementAt(i))
0112: .compileError(e);
0113: }
0114: }
0115: }
0116:
0117: private void error(String str, SourceInfo info) {
0118: dispatchParseError(new ErrorEvent(this , mFormatter.format(str),
0119: info, mUnit));
0120: }
0121:
0122: private void error(String str) {
0123: error(str, new SourceInfo(mSource.getLineNumber(), mSource
0124: .getStartPosition(), mSource.getEndPosition()));
0125: }
0126:
0127: /**
0128: * Passing true causes Scanner to emit additional tokens that should not
0129: * be bassed into a Parser. These are {@link Token.COMMENT},
0130: * {@link Token.ENTER_CODE}, and {@link Token.ENTER_TEXT}. By default,
0131: * these special tokens are not emitted.
0132: */
0133: public void emitSpecialTokens(boolean enable) {
0134: mEmitSpecial = enable;
0135: }
0136:
0137: /**
0138: * Returns EOF as the last token.
0139: */
0140: public synchronized Token readToken() throws IOException {
0141: if (mLookahead.empty()) {
0142: return scanToken();
0143: } else {
0144: return (Token) mLookahead.pop();
0145: }
0146: }
0147:
0148: /**
0149: * Returns EOF as the last token.
0150: */
0151: public synchronized Token peekToken() throws IOException {
0152: if (mLookahead.empty()) {
0153: return (Token) mLookahead.push(scanToken());
0154: } else {
0155: return (Token) mLookahead.peek();
0156: }
0157: }
0158:
0159: public synchronized void unreadToken(Token token)
0160: throws IOException {
0161: mLookahead.push(token);
0162: }
0163:
0164: public void close() throws IOException {
0165: mSource.close();
0166: }
0167:
0168: public int getErrorCount() {
0169: return mErrorCount;
0170: }
0171:
0172: private Token scanToken() throws IOException {
0173: int c;
0174: int peek;
0175:
0176: int startPos;
0177:
0178: while ((c = mSource.read()) != -1) {
0179: switch (c) {
0180:
0181: case SourceReader.ENTER_TEXT:
0182: Token enter;
0183: if (mEmitSpecial) {
0184: enter = makeStringToken(Token.ENTER_TEXT, mSource
0185: .getEndTag());
0186: } else {
0187: enter = null;
0188: }
0189:
0190: Token t = scanText(c);
0191:
0192: if (mEmitSpecial) {
0193: if (t.getStringValue().length() > 0) {
0194: mLookahead.push(t);
0195: }
0196: return enter;
0197: }
0198:
0199: if (t.getStringValue().length() == 0) {
0200: continue;
0201: }
0202:
0203: return t;
0204:
0205: case SourceReader.ENTER_CODE:
0206: // Entering code while in code is illegal. Just let the parser
0207: // deal with it.
0208: return makeStringToken(Token.ENTER_CODE, mSource
0209: .getBeginTag());
0210:
0211: case '(':
0212: return makeToken(Token.LPAREN);
0213: case ')':
0214: return makeToken(Token.RPAREN);
0215:
0216: case '{':
0217: return makeToken(Token.LBRACE);
0218: case '}':
0219: return makeToken(Token.RBRACE);
0220:
0221: case '[':
0222: return makeToken(Token.LBRACK);
0223: case ']':
0224: return makeToken(Token.RBRACK);
0225:
0226: case ';':
0227: return makeToken(Token.SEMI);
0228:
0229: case ',':
0230: return makeToken(Token.COMMA);
0231:
0232: case '.':
0233: peek = mSource.peek();
0234:
0235: if (peek >= '0' && peek <= '9') {
0236: error("number.decimal.start");
0237: return scanNumber(c);
0238: } else if (peek == '.') {
0239: startPos = mSource.getStartPosition();
0240: // read the second '.'
0241: mSource.read();
0242:
0243: peek = mSource.peek();
0244: if (peek == '.') {
0245: // read the third '.'
0246: mSource.read();
0247: return makeToken(Token.ELLIPSIS, startPos);
0248: } else {
0249: return makeToken(Token.DOTDOT, startPos);
0250: }
0251: } else {
0252: return makeToken(Token.DOT);
0253: }
0254:
0255: case '#':
0256: peek = mSource.peek();
0257:
0258: if (peek == '#') {
0259: startPos = mSource.getStartPosition();
0260: mSource.read();
0261: return makeToken(Token.DOUBLE_HASH, startPos);
0262: } else {
0263: return makeToken(Token.HASH);
0264: }
0265:
0266: case '!':
0267: if (mSource.peek() == '=') {
0268: startPos = mSource.getStartPosition();
0269: mSource.read();
0270: return makeToken(Token.NE, startPos);
0271: } else {
0272: return makeStringToken(Token.UNKNOWN, String
0273: .valueOf((char) c));
0274: }
0275:
0276: case '<':
0277: if (mSource.peek() == '=') {
0278: startPos = mSource.getStartPosition();
0279: mSource.read();
0280: return makeToken(Token.LE, startPos);
0281: } else {
0282: return makeToken(Token.LT);
0283: }
0284:
0285: case '>':
0286: if (mSource.peek() == '=') {
0287: startPos = mSource.getStartPosition();
0288: mSource.read();
0289: return makeToken(Token.GE, startPos);
0290: } else {
0291: return makeToken(Token.GT);
0292: }
0293:
0294: case '=':
0295: if (mSource.peek() == '=') {
0296: startPos = mSource.getStartPosition();
0297: mSource.read();
0298: return makeToken(Token.EQ, startPos);
0299: } else {
0300: return makeToken(Token.ASSIGN);
0301: }
0302:
0303: case '&':
0304: return makeToken(Token.CONCAT);
0305:
0306: case '+':
0307: return makeToken(Token.PLUS);
0308:
0309: case '-':
0310: return makeToken(Token.MINUS);
0311:
0312: case '*':
0313: return makeToken(Token.MULT);
0314:
0315: case '%':
0316: return makeToken(Token.MOD);
0317:
0318: case '/':
0319: startPos = mSource.getStartPosition();
0320: peek = mSource.peek();
0321:
0322: if (peek == '*') {
0323: mSource.read();
0324: mSource.ignoreTags(true);
0325: t = scanMultiLineComment(startPos);
0326: mSource.ignoreTags(false);
0327: if (mEmitSpecial) {
0328: return t;
0329: } else {
0330: continue;
0331: }
0332: } else if (peek == '/') {
0333: mSource.read();
0334: t = scanOneLineComment(startPos);
0335: if (mEmitSpecial) {
0336: return t;
0337: } else {
0338: continue;
0339: }
0340: } else {
0341: return makeToken(Token.DIV);
0342: }
0343:
0344: case '\"':
0345: case '\'':
0346: mSource.ignoreTags(true);
0347: t = scanString(c);
0348: mSource.ignoreTags(false);
0349: return t;
0350:
0351: case '0':
0352: case '1':
0353: case '2':
0354: case '3':
0355: case '4':
0356: case '5':
0357: case '6':
0358: case '7':
0359: case '8':
0360: case '9':
0361: return scanNumber(c);
0362:
0363: case 'a':
0364: case 'b':
0365: case 'c':
0366: case 'd':
0367: case 'e':
0368: case 'f':
0369: case 'g':
0370: case 'h':
0371: case 'i':
0372: case 'j':
0373: case 'k':
0374: case 'l':
0375: case 'm':
0376: case 'n':
0377: case 'o':
0378: case 'p':
0379: case 'q':
0380: case 'r':
0381: case 's':
0382: case 't':
0383: case 'u':
0384: case 'v':
0385: case 'w':
0386: case 'x':
0387: case 'y':
0388: case 'z':
0389: case 'A':
0390: case 'B':
0391: case 'C':
0392: case 'D':
0393: case 'E':
0394: case 'F':
0395: case 'G':
0396: case 'H':
0397: case 'I':
0398: case 'J':
0399: case 'K':
0400: case 'L':
0401: case 'M':
0402: case 'N':
0403: case 'O':
0404: case 'P':
0405: case 'Q':
0406: case 'R':
0407: case 'S':
0408: case 'T':
0409: case 'U':
0410: case 'V':
0411: case 'W':
0412: case 'X':
0413: case 'Y':
0414: case 'Z':
0415: case '_':
0416: return scanIdentifier(c);
0417:
0418: case ' ':
0419: case '\0':
0420: case '\t':
0421: case '\r':
0422: case '\n':
0423: continue;
0424:
0425: default:
0426: if (Character.isWhitespace((char) c)) {
0427: continue;
0428: }
0429:
0430: if (Character.isLetter((char) c)) {
0431: return scanIdentifier(c);
0432: } else {
0433: return makeStringToken(Token.UNKNOWN, String
0434: .valueOf((char) c));
0435: }
0436: }
0437: }
0438:
0439: if (mEOFToken == null) {
0440: mEOFToken = makeToken(Token.EOF);
0441: }
0442:
0443: return mEOFToken;
0444: }
0445:
0446: // The ENTER_TEXT code has already been scanned when this is called.
0447: private Token scanText(int c) throws IOException {
0448: // Read first character in text so that source info does not include
0449: // tags.
0450: c = mSource.read();
0451:
0452: int startLine = mSource.getLineNumber();
0453: int startPos = mSource.getStartPosition();
0454: int endPos = mSource.getEndPosition();
0455: StringBuffer buf = new StringBuffer(256);
0456:
0457: while (c != -1) {
0458: if (c == SourceReader.ENTER_CODE) {
0459: if (mEmitSpecial) {
0460: mLookahead.push(makeStringToken(Token.ENTER_CODE,
0461: mSource.getBeginTag()));
0462: }
0463: break;
0464: } else if (c == SourceReader.ENTER_TEXT) {
0465: buf.append(mSource.getEndTag());
0466: } else {
0467: buf.append((char) c);
0468: }
0469:
0470: if (mSource.peek() < 0) {
0471: endPos = mSource.getEndPosition();
0472: }
0473:
0474: c = mSource.read();
0475: }
0476: ;
0477:
0478: if (c == -1) {
0479: // If the last token in the source file is text, trim all trailing
0480: // whitespace from it.
0481:
0482: int length = buf.length();
0483:
0484: int i;
0485: for (i = length - 1; i >= 0; i--) {
0486: if (buf.charAt(i) > ' ') {
0487: break;
0488: }
0489: }
0490:
0491: buf.setLength(i + 1);
0492: }
0493:
0494: String str;
0495: if (buf.length() == 0) {
0496: str = "";
0497: } else {
0498: str = new String(buf.toString());
0499: }
0500:
0501: return new StringToken(startLine, startPos, endPos,
0502: Token.STRING, str);
0503: }
0504:
0505: private Token scanString(int delimiter) throws IOException {
0506: int c;
0507: int startLine = mSource.getLineNumber();
0508: int startPos = mSource.getStartPosition();
0509: mWord.setLength(0);
0510:
0511: while ((c = mSource.read()) != -1) {
0512: if (c == delimiter) {
0513: break;
0514: }
0515:
0516: if (c == '\n' || c == '\r') {
0517: error("string.newline");
0518: break;
0519: }
0520:
0521: if (c == '\\') {
0522: int next = mSource.read();
0523: switch (next) {
0524: case '0':
0525: c = '\0';
0526: break;
0527: case 'b':
0528: c = '\b';
0529: break;
0530: case 't':
0531: c = '\t';
0532: break;
0533: case 'n':
0534: c = '\n';
0535: break;
0536: case 'f':
0537: c = '\f';
0538: break;
0539: case 'r':
0540: c = '\r';
0541: break;
0542: case '\\':
0543: c = '\\';
0544: break;
0545: case '\'':
0546: c = '\'';
0547: break;
0548: case '\"':
0549: c = '\"';
0550: break;
0551: default:
0552: error("escape.code");
0553: c = next;
0554: break;
0555: }
0556: }
0557:
0558: mWord.append((char) c);
0559: }
0560:
0561: if (c == -1) {
0562: error("string.eof");
0563: }
0564:
0565: Token t = new StringToken(startLine, startPos, mSource
0566: .getEndPosition(), Token.STRING, mWord.toString());
0567:
0568: return t;
0569: }
0570:
0571: // The first character has already been scanned when this is called.
0572: private Token scanNumber(int c) throws IOException {
0573: int startLine = mSource.getLineNumber();
0574: int startPos = mSource.getStartPosition();
0575: mWord.setLength(0);
0576:
0577: int errorPos = -1;
0578:
0579: // 0 is decimal int,
0580: // 1 is hex int,
0581: // 2 is decimal long,
0582: // 3 is hex long,
0583: // 4 is float,
0584: // 5 is double,
0585: // 6 is auto-double by decimal
0586: // 7 is auto-double by exponent ('e' or 'E')
0587: int type = 0;
0588:
0589: if (c == '0') {
0590: if (mSource.peek() == 'x' || mSource.peek() == 'X') {
0591: type = 1;
0592: mSource.read(); // absorb the 'x'
0593: c = mSource.read(); // get the first digit after the 'x'
0594: }
0595: }
0596:
0597: for (; c != -1; c = mSource.read()) {
0598: if (c == '.') {
0599: int peek = mSource.peek();
0600: if (peek == '.') {
0601: mSource.unread();
0602: break;
0603: } else {
0604: if (peek < '0' || peek > '9') {
0605: error("number.decimal.end");
0606: }
0607:
0608: mWord.append((char) c);
0609:
0610: if (type == 0) {
0611: type = 6;
0612: } else if (errorPos < 0) {
0613: errorPos = mSource.getStartPosition();
0614: }
0615:
0616: continue;
0617: }
0618: }
0619:
0620: if (c >= '0' && c <= '9') {
0621: mWord.append((char) c);
0622:
0623: if (type == 2 || type == 3 || type == 4 || type == 5) {
0624: if (errorPos < 0) {
0625: errorPos = mSource.getStartPosition();
0626: }
0627: }
0628:
0629: continue;
0630: }
0631:
0632: if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
0633: if (type == 1) {
0634: mWord.append((char) c);
0635: continue;
0636: }
0637:
0638: if (c == 'f' || c == 'F') {
0639: if (type == 0 || type == 6 || type == 7) {
0640: type = 4;
0641: continue;
0642: }
0643: } else if (c == 'd' || c == 'D') {
0644: if (type == 0 || type == 6 || type == 7) {
0645: type = 5;
0646: continue;
0647: }
0648: } else if (c == 'e' || c == 'E') {
0649: if (type == 0 || type == 6) {
0650: mWord.append((char) c);
0651: type = 7;
0652: int peek = mSource.peek();
0653: if (peek == '+' || peek == '-') {
0654: mWord.append((char) mSource.read());
0655: }
0656: continue;
0657: }
0658: }
0659:
0660: mWord.append((char) c);
0661:
0662: if (errorPos < 0) {
0663: errorPos = mSource.getStartPosition();
0664: }
0665:
0666: continue;
0667: }
0668:
0669: if (c == 'l' || c == 'L') {
0670: if (type == 0) {
0671: type = 2;
0672: } else if (type == 1) {
0673: type = 3;
0674: } else {
0675: mWord.append((char) c);
0676: if (errorPos < 0) {
0677: errorPos = mSource.getStartPosition();
0678: }
0679: }
0680:
0681: continue;
0682: }
0683:
0684: if (Character.isLetterOrDigit((char) c)) {
0685: mWord.append((char) c);
0686:
0687: if (errorPos < 0) {
0688: errorPos = mSource.getStartPosition();
0689: }
0690: } else {
0691: mSource.unread();
0692: break;
0693: }
0694: }
0695:
0696: String str = mWord.toString();
0697: int endPos = mSource.getEndPosition();
0698: Token token;
0699:
0700: if (errorPos >= 0) {
0701: token = new StringToken(startLine, startPos, endPos,
0702: errorPos, Token.NUMBER, str);
0703: } else {
0704: Number value;
0705: try {
0706: switch (type) {
0707: case 0:
0708: default:
0709: try {
0710: token = new IntToken(startLine, startPos,
0711: endPos, Integer.parseInt(str));
0712: } catch (NumberFormatException e) {
0713: token = new LongToken(startLine, startPos,
0714: endPos, Long.parseLong(str));
0715: }
0716: break;
0717: case 1:
0718: try {
0719: token = new IntToken(startLine, startPos,
0720: endPos, parseHexInt(str));
0721: } catch (NumberFormatException e) {
0722: token = new LongToken(startLine, startPos,
0723: endPos, parseHexLong(str));
0724: }
0725: break;
0726: case 2:
0727: token = new LongToken(startLine, startPos, endPos,
0728: Long.parseLong(str));
0729: break;
0730: case 3:
0731: token = new LongToken(startLine, startPos, endPos,
0732: parseHexLong(str));
0733: break;
0734: case 4:
0735: token = new FloatToken(startLine, startPos, endPos,
0736: Float.parseFloat(str));
0737: break;
0738: case 5:
0739: case 6:
0740: case 7:
0741: token = new DoubleToken(startLine, startPos,
0742: endPos, Double.parseDouble(str));
0743: break;
0744: }
0745: } catch (NumberFormatException e) {
0746: token = new IntToken(startLine, startPos, endPos, 0);
0747: error("number.range", token.getSourceInfo());
0748: }
0749: }
0750:
0751: return token;
0752: }
0753:
0754: private int parseHexInt(String str) {
0755: if (str.length() > 8) {
0756: // Strip off any leading zeros.
0757: while (str.charAt(0) == '0') {
0758: str = str.substring(1);
0759: }
0760: }
0761:
0762: try {
0763: return Integer.parseInt(str, 16);
0764: } catch (NumberFormatException e) {
0765: if (str.length() == 8) {
0766: return (int) Long.parseLong(str, 16);
0767: } else {
0768: throw e;
0769: }
0770: }
0771: }
0772:
0773: private long parseHexLong(String str) {
0774: if (str.length() > 16) {
0775: // Strip off any leading zeros.
0776: while (str.charAt(0) == '0') {
0777: str = str.substring(1);
0778: }
0779: }
0780:
0781: try {
0782: return Long.parseLong(str, 16);
0783: } catch (NumberFormatException e) {
0784: if (str.length() == 16) {
0785: long v1 = Long.parseLong(str.substring(0, 8), 16);
0786: long v2 = Long.parseLong(str.substring(8), 16);
0787: return v1 << 32 + v2 & 0xffffffffL;
0788: } else {
0789: throw e;
0790: }
0791: }
0792: }
0793:
0794: // The first character has already been scanned when this is called.
0795: private Token scanIdentifier(int c) throws IOException {
0796: int startLine = mSource.getLineNumber();
0797: int startPos = mSource.getStartPosition();
0798: int endPos = mSource.getEndPosition();
0799: mWord.setLength(0);
0800:
0801: mWord.append((char) c);
0802:
0803: loop: while ((c = mSource.peek()) != -1) {
0804: switch (c) {
0805: case 'a':
0806: case 'b':
0807: case 'c':
0808: case 'd':
0809: case 'e':
0810: case 'f':
0811: case 'g':
0812: case 'h':
0813: case 'i':
0814: case 'j':
0815: case 'k':
0816: case 'l':
0817: case 'm':
0818: case 'n':
0819: case 'o':
0820: case 'p':
0821: case 'q':
0822: case 'r':
0823: case 's':
0824: case 't':
0825: case 'u':
0826: case 'v':
0827: case 'w':
0828: case 'x':
0829: case 'y':
0830: case 'z':
0831: case 'A':
0832: case 'B':
0833: case 'C':
0834: case 'D':
0835: case 'E':
0836: case 'F':
0837: case 'G':
0838: case 'H':
0839: case 'I':
0840: case 'J':
0841: case 'K':
0842: case 'L':
0843: case 'M':
0844: case 'N':
0845: case 'O':
0846: case 'P':
0847: case 'Q':
0848: case 'R':
0849: case 'S':
0850: case 'T':
0851: case 'U':
0852: case 'V':
0853: case 'W':
0854: case 'X':
0855: case 'Y':
0856: case 'Z':
0857: case '_':
0858: case '0':
0859: case '1':
0860: case '2':
0861: case '3':
0862: case '4':
0863: case '5':
0864: case '6':
0865: case '7':
0866: case '8':
0867: case '9':
0868: mSource.read();
0869: endPos = mSource.getEndPosition();
0870: mWord.append((char) c);
0871: continue loop;
0872: }
0873:
0874: if (Character.isLetterOrDigit((char) c)) {
0875: mSource.read();
0876: endPos = mSource.getEndPosition();
0877: mWord.append((char) c);
0878: } else {
0879: break;
0880: }
0881: }
0882:
0883: int id = Token.findReservedWordID(mWord);
0884:
0885: Token t;
0886:
0887: if (id != Token.UNKNOWN) {
0888: t = new Token(startLine, startPos, endPos, id);
0889: } else {
0890: t = new StringToken(startLine, startPos, endPos,
0891: Token.IDENT, mWord.toString());
0892: }
0893:
0894: mWord.setLength(0);
0895: return t;
0896: }
0897:
0898: // The two leading slashes have already been scanned when this is
0899: // called.
0900: private Token scanOneLineComment(int startPos) throws IOException {
0901: int c;
0902: int startLine = mSource.getLineNumber();
0903: int endPos = mSource.getEndPosition();
0904: mWord.setLength(0);
0905: mWord.append('/').append('/');
0906:
0907: while ((c = mSource.peek()) != -1) {
0908: if (c == '\r' || c == '\n') {
0909: break;
0910: }
0911:
0912: mSource.read();
0913: mWord.append((char) c);
0914:
0915: endPos = mSource.getEndPosition();
0916: }
0917:
0918: return new StringToken(startLine, startPos, endPos,
0919: Token.COMMENT, mWord.toString());
0920: }
0921:
0922: // The leading slash and star has already been scanned when this is
0923: // called.
0924: private Token scanMultiLineComment(int startPos) throws IOException {
0925: int c;
0926: int startLine = mSource.getLineNumber();
0927: mWord.setLength(0);
0928: mWord.append('/').append('*');
0929:
0930: while ((c = mSource.read()) != -1) {
0931: mWord.append((char) c);
0932:
0933: if (c == '*') {
0934: if (mSource.peek() == '/') {
0935: mWord.append('/');
0936: mSource.read();
0937: break;
0938: }
0939: }
0940: }
0941:
0942: if (c == -1) {
0943: error("comment.eof");
0944: }
0945:
0946: return new StringToken(startLine, startPos, mSource
0947: .getEndPosition(), Token.COMMENT, mWord.toString());
0948: }
0949:
0950: private Token makeToken(int ID) {
0951: return new Token(mSource.getLineNumber(), mSource
0952: .getStartPosition(), mSource.getEndPosition(), ID);
0953: }
0954:
0955: private Token makeToken(int ID, int startPos) {
0956: return new Token(mSource.getLineNumber(), startPos, mSource
0957: .getEndPosition(), ID);
0958: }
0959:
0960: private Token makeStringToken(int ID, String str) {
0961: return new StringToken(mSource.getLineNumber(), mSource
0962: .getStartPosition(), mSource.getEndPosition(), ID, str);
0963: }
0964:
0965: /**
0966: * Simple test program
0967: */
0968: public static void main(String[] arg) throws Exception {
0969: Tester.test(arg);
0970: }
0971:
0972: /**************************************************************************
0973: *
0974: * @author Brian S O'Neill
0975: * @version
0976: * <!--$$Revision:--> 45 <!-- $-->, <!--$$JustDate:--> 00/12/13 <!-- $-->
0977: */
0978: private static class Tester {
0979: public static void test(String[] arg) throws Exception {
0980: Token token;
0981: Reader file;
0982: Scanner s;
0983:
0984: // First run, display tokens in program format
0985: file = new BufferedReader(new FileReader(arg[2]));
0986: s = new Scanner(new SourceReader(file, arg[0], arg[1]));
0987:
0988: while ((token = s.readToken()).getID() != Token.EOF) {
0989: int id = token.getID();
0990:
0991: if (id == Token.LBRACE) {
0992: System.out.println();
0993: }
0994:
0995: System.out.print(token + " ");
0996:
0997: if (id == Token.LBRACE || id == Token.RBRACE
0998: || id == Token.SEMI) {
0999: System.out.println();
1000:
1001: if (id == Token.RBRACE) {
1002: System.out.println();
1003: }
1004: }
1005: }
1006:
1007: System.out.println("\n\n*** Full Token Dump ***\n");
1008:
1009: // Second run, display detailed token information
1010: file = new FileReader(arg[2]);
1011: s = new Scanner(new SourceReader(file, arg[0], arg[1]));
1012: s.emitSpecialTokens(true);
1013:
1014: while ((token = s.readToken()).getID() != Token.EOF) {
1015: System.out.print(token.getCode() + ": ");
1016: System.out.print(token.getSourceInfo() + ": ");
1017:
1018: if (token.getID() == Token.NUMBER) {
1019: switch (token.getNumericType()) {
1020: case 1:
1021: System.out.print("int: ");
1022: break;
1023: case 2:
1024: System.out.print("long: ");
1025: break;
1026: case 3:
1027: System.out.print("float: ");
1028: break;
1029: case 4:
1030: System.out.print("double: ");
1031: break;
1032: default:
1033: System.out.print("BAD: ");
1034: break;
1035: }
1036: }
1037:
1038: String value = token.getStringValue();
1039: if (value != null) {
1040: System.out.println(value);
1041: } else {
1042: System.out.println(token.getImage());
1043: }
1044: }
1045: }
1046: }
1047: }
|