0001: /*
0002: * Copyright 1994-2004 Sun Microsystems, Inc. All Rights Reserved.
0003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004: *
0005: * This code is free software; you can redistribute it and/or modify it
0006: * under the terms of the GNU General Public License version 2 only, as
0007: * published by the Free Software Foundation. Sun designates this
0008: * particular file as subject to the "Classpath" exception as provided
0009: * by Sun in the LICENSE file that accompanied this code.
0010: *
0011: * This code is distributed in the hope that it will be useful, but WITHOUT
0012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014: * version 2 for more details (a copy is included in the LICENSE file that
0015: * accompanied this code).
0016: *
0017: * You should have received a copy of the GNU General Public License version
0018: * 2 along with this work; if not, write to the Free Software Foundation,
0019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020: *
0021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022: * CA 95054 USA or visit www.sun.com if you need additional information or
0023: * have any questions.
0024: */
0025:
0026: package sun.tools.java;
0027:
0028: import java.io.IOException;
0029: import java.io.InputStream;
0030: import java.util.Hashtable;
0031:
0032: /**
0033: * A Scanner for Java tokens. Errors are reported
0034: * to the environment object.<p>
0035: *
0036: * The scanner keeps track of the current token,
0037: * the value of the current token (if any), and the start
0038: * position of the current token.<p>
0039: *
0040: * The scan() method advances the scanner to the next
0041: * token in the input.<p>
0042: *
0043: * The match() method is used to quickly match opening
0044: * brackets (ie: '(', '{', or '[') with their closing
0045: * counter part. This is useful during error recovery.<p>
0046: *
0047: * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
0048: * this means that both the line number and the exact offset into
0049: * the file are encoded in each position value.<p>
0050: *
0051: * The compiler treats either "\n", "\r" or "\r\n" as the
0052: * end of a line.<p>
0053: *
0054: * WARNING: The contents of this source file are not part of any
0055: * supported API. Code that depends on them does so at its own risk:
0056: * they are subject to change or removal without notice.
0057: *
0058: * @author Arthur van Hoff
0059: * @version 1.60, 12/15/96
0060: */
0061:
0062: public class Scanner implements Constants {
0063: /**
0064: * The increment for each character.
0065: */
0066: public static final long OFFSETINC = 1;
0067:
0068: /**
0069: * The increment for each line.
0070: */
0071: public static final long LINEINC = 1L << WHEREOFFSETBITS;
0072:
0073: /**
0074: * End of input
0075: */
0076: public static final int EOF = -1;
0077:
0078: /**
0079: * Where errors are reported
0080: */
0081: public Environment env;
0082:
0083: /**
0084: * Input reader
0085: */
0086: protected ScannerInputReader in;
0087:
0088: /**
0089: * If true, present all comments as tokens.
0090: * Contents are not saved, but positions are recorded accurately,
0091: * so the comment can be recovered from the text.
0092: * Line terminations are also returned as comment tokens,
0093: * and may be distinguished by their start and end positions,
0094: * which are equal (meaning, these tokens contain no chars).
0095: */
0096: public boolean scanComments = false;
0097:
0098: /**
0099: * Current token
0100: */
0101: public int token;
0102:
0103: /**
0104: * The position of the current token
0105: */
0106: public long pos;
0107:
0108: /**
0109: * The position of the previous token
0110: */
0111: public long prevPos;
0112:
0113: /**
0114: * The current character
0115: */
0116: protected int ch;
0117:
0118: /*
0119: * Token values.
0120: */
0121: public char charValue;
0122: public int intValue;
0123: public long longValue;
0124: public float floatValue;
0125: public double doubleValue;
0126: public String stringValue;
0127: public Identifier idValue;
0128: public int radix; // Radix, when reading int or long
0129:
0130: /*
0131: * A doc comment preceding the most recent token
0132: */
0133: public String docComment;
0134:
0135: /*
0136: * A growable character buffer.
0137: */
0138: private int count;
0139: private char buffer[] = new char[1024];
0140:
0141: private void growBuffer() {
0142: char newBuffer[] = new char[buffer.length * 2];
0143: System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
0144: buffer = newBuffer;
0145: }
0146:
0147: // The following two methods have been hand-inlined in
0148: // scanDocComment. If you make changes here, you should
0149: // check to see if scanDocComment also needs modification.
0150: private void putc(int ch) {
0151: if (count == buffer.length) {
0152: growBuffer();
0153: }
0154: buffer[count++] = (char) ch;
0155: }
0156:
0157: private String bufferString() {
0158: return new String(buffer, 0, count);
0159: }
0160:
0161: /**
0162: * Create a scanner to scan an input stream.
0163: */
0164: public Scanner(Environment env, InputStream in) throws IOException {
0165: this .env = env;
0166: useInputStream(in);
0167: }
0168:
0169: /**
0170: * Setup input from the given input stream,
0171: * and scan the first token from it.
0172: */
0173: protected void useInputStream(InputStream in) throws IOException {
0174: try {
0175: this .in = new ScannerInputReader(env, in);
0176: } catch (Exception e) {
0177: env.setCharacterEncoding(null);
0178: this .in = new ScannerInputReader(env, in);
0179: }
0180:
0181: ch = this .in.read();
0182: prevPos = this .in.pos;
0183:
0184: scan();
0185: }
0186:
0187: /**
0188: * Create a scanner to scan an input stream.
0189: */
0190: protected Scanner(Environment env) {
0191: this .env = env;
0192: // Expect the subclass to call useInputStream at the right time.
0193: }
0194:
0195: /**
0196: * Define a keyword.
0197: */
0198: private static void defineKeyword(int val) {
0199: Identifier.lookup(opNames[val]).setType(val);
0200: }
0201:
0202: /**
0203: * Initialized keyword and token Hashtables
0204: */
0205: static {
0206: // Statement keywords
0207: defineKeyword(FOR);
0208: defineKeyword(IF);
0209: defineKeyword(ELSE);
0210: defineKeyword(WHILE);
0211: defineKeyword(DO);
0212: defineKeyword(SWITCH);
0213: defineKeyword(CASE);
0214: defineKeyword(DEFAULT);
0215: defineKeyword(BREAK);
0216: defineKeyword(CONTINUE);
0217: defineKeyword(RETURN);
0218: defineKeyword(TRY);
0219: defineKeyword(CATCH);
0220: defineKeyword(FINALLY);
0221: defineKeyword(THROW);
0222:
0223: // Type defineKeywords
0224: defineKeyword(BYTE);
0225: defineKeyword(CHAR);
0226: defineKeyword(SHORT);
0227: defineKeyword(INT);
0228: defineKeyword(LONG);
0229: defineKeyword(FLOAT);
0230: defineKeyword(DOUBLE);
0231: defineKeyword(VOID);
0232: defineKeyword(BOOLEAN);
0233:
0234: // Expression keywords
0235: defineKeyword(INSTANCEOF);
0236: defineKeyword(TRUE);
0237: defineKeyword(FALSE);
0238: defineKeyword(NEW);
0239: defineKeyword(THIS);
0240: defineKeyword(SUPER);
0241: defineKeyword(NULL);
0242:
0243: // Declaration keywords
0244: defineKeyword(IMPORT);
0245: defineKeyword(CLASS);
0246: defineKeyword(EXTENDS);
0247: defineKeyword(IMPLEMENTS);
0248: defineKeyword(INTERFACE);
0249: defineKeyword(PACKAGE);
0250: defineKeyword(THROWS);
0251:
0252: // Modifier keywords
0253: defineKeyword(PRIVATE);
0254: defineKeyword(PUBLIC);
0255: defineKeyword(PROTECTED);
0256: defineKeyword(STATIC);
0257: defineKeyword(TRANSIENT);
0258: defineKeyword(SYNCHRONIZED);
0259: defineKeyword(NATIVE);
0260: defineKeyword(ABSTRACT);
0261: defineKeyword(VOLATILE);
0262: defineKeyword(FINAL);
0263: defineKeyword(STRICTFP);
0264:
0265: // reserved keywords
0266: defineKeyword(CONST);
0267: defineKeyword(GOTO);
0268: }
0269:
0270: /**
0271: * Scan a comment. This method should be
0272: * called once the initial /, * and the next
0273: * character have been read.
0274: */
0275: private void skipComment() throws IOException {
0276: while (true) {
0277: switch (ch) {
0278: case EOF:
0279: env.error(pos, "eof.in.comment");
0280: return;
0281:
0282: case '*':
0283: if ((ch = in.read()) == '/') {
0284: ch = in.read();
0285: return;
0286: }
0287: break;
0288:
0289: default:
0290: ch = in.read();
0291: break;
0292: }
0293: }
0294: }
0295:
0296: /**
0297: * Scan a doc comment. This method should be called
0298: * once the initial /, * and * have been read. It gathers
0299: * the content of the comment (witout leading spaces and '*'s)
0300: * in the string buffer.
0301: */
0302: private String scanDocComment() throws IOException {
0303: // Note: this method has been hand-optimized to yield
0304: // better performance. This was done after it was noted
0305: // that javadoc spent a great deal of its time here.
0306: // This should also help the performance of the compiler
0307: // as well -- it scans the doc comments to find
0308: // @deprecated tags.
0309: //
0310: // The logic of the method has been completely rewritten
0311: // to avoid the use of flags that need to be looked at
0312: // for every character read. Members that are accessed
0313: // more than once have been stored in local variables.
0314: // The methods putc() and bufferString() have been
0315: // inlined by hand. Extra cases have been added to
0316: // switch statements to trick the compiler into generating
0317: // a tableswitch instead of a lookupswitch.
0318: //
0319: // This implementation aims to preserve the previous
0320: // behavior of this method.
0321:
0322: int c;
0323:
0324: // Put `in' in a local variable.
0325: final ScannerInputReader in = this .in;
0326:
0327: // We maintain the buffer locally rather than calling putc().
0328: char[] buffer = this .buffer;
0329: int count = 0;
0330:
0331: // We are called pointing at the second star of the doc
0332: // comment:
0333: //
0334: // Input: /** the rest of the comment ... */
0335: // ^
0336: //
0337: // We rely on this in the code below.
0338:
0339: // Consume any number of stars.
0340: while ((c = in.read()) == '*')
0341: ;
0342:
0343: // Is the comment of the form /**/, /***/, /****/, etc.?
0344: if (c == '/') {
0345: // Set ch and return
0346: ch = in.read();
0347: return "";
0348: }
0349:
0350: // Skip a newline on the first line of the comment.
0351: if (c == '\n') {
0352: c = in.read();
0353: }
0354:
0355: outerLoop:
0356: // The outerLoop processes the doc comment, looping once
0357: // for each line. For each line, it first strips off
0358: // whitespace, then it consumes any stars, then it
0359: // puts the rest of the line into our buffer.
0360: while (true) {
0361:
0362: // The wsLoop consumes whitespace from the beginning
0363: // of each line.
0364: wsLoop: while (true) {
0365: switch (c) {
0366: case ' ':
0367: case '\t':
0368: // We could check for other forms of whitespace
0369: // as well, but this is left as is for minimum
0370: // disturbance of functionality.
0371: //
0372: // Just skip whitespace.
0373: c = in.read();
0374: break;
0375:
0376: // We have added extra cases here to trick the
0377: // compiler into using a tableswitch instead of
0378: // a lookupswitch. They can be removed without
0379: // a change in meaning.
0380: case 10:
0381: case 11:
0382: case 12:
0383: case 13:
0384: case 14:
0385: case 15:
0386: case 16:
0387: case 17:
0388: case 18:
0389: case 19:
0390: case 20:
0391: case 21:
0392: case 22:
0393: case 23:
0394: case 24:
0395: case 25:
0396: case 26:
0397: case 27:
0398: case 28:
0399: case 29:
0400: case 30:
0401: case 31:
0402: default:
0403: // We've seen something that isn't whitespace,
0404: // jump out.
0405: break wsLoop;
0406: }
0407: } // end wsLoop.
0408:
0409: // Are there stars here? If so, consume them all
0410: // and check for the end of comment.
0411: if (c == '*') {
0412: // Skip all of the stars...
0413: do {
0414: c = in.read();
0415: } while (c == '*');
0416:
0417: // ...then check for the closing slash.
0418: if (c == '/') {
0419: // We're done with the doc comment.
0420: // Set ch and break out.
0421: ch = in.read();
0422: break outerLoop;
0423: }
0424: }
0425:
0426: // The textLoop processes the rest of the characters
0427: // on the line, adding them to our buffer.
0428: textLoop: while (true) {
0429: switch (c) {
0430: case EOF:
0431: // We've seen a premature EOF. Break out
0432: // of the loop.
0433: env.error(pos, "eof.in.comment");
0434: ch = EOF;
0435: break outerLoop;
0436:
0437: case '*':
0438: // Is this just a star? Or is this the
0439: // end of a comment?
0440: c = in.read();
0441: if (c == '/') {
0442: // This is the end of the comment,
0443: // set ch and return our buffer.
0444: ch = in.read();
0445: break outerLoop;
0446: }
0447: // This is just an ordinary star. Add it to
0448: // the buffer.
0449: if (count == buffer.length) {
0450: growBuffer();
0451: buffer = this .buffer;
0452: }
0453: buffer[count++] = '*';
0454: break;
0455:
0456: case '\n':
0457: // We've seen a newline. Add it to our
0458: // buffer and break out of this loop,
0459: // starting fresh on a new line.
0460: if (count == buffer.length) {
0461: growBuffer();
0462: buffer = this .buffer;
0463: }
0464: buffer[count++] = '\n';
0465: c = in.read();
0466: break textLoop;
0467:
0468: // Again, the extra cases here are a trick
0469: // to get the compiler to generate a tableswitch.
0470: case 0:
0471: case 1:
0472: case 2:
0473: case 3:
0474: case 4:
0475: case 5:
0476: case 6:
0477: case 7:
0478: case 8:
0479: case 11:
0480: case 12:
0481: case 13:
0482: case 14:
0483: case 15:
0484: case 16:
0485: case 17:
0486: case 18:
0487: case 19:
0488: case 20:
0489: case 21:
0490: case 22:
0491: case 23:
0492: case 24:
0493: case 25:
0494: case 26:
0495: case 27:
0496: case 28:
0497: case 29:
0498: case 30:
0499: case 31:
0500: case 32:
0501: case 33:
0502: case 34:
0503: case 35:
0504: case 36:
0505: case 37:
0506: case 38:
0507: case 39:
0508: case 40:
0509: default:
0510: // Add the character to our buffer.
0511: if (count == buffer.length) {
0512: growBuffer();
0513: buffer = this .buffer;
0514: }
0515: buffer[count++] = (char) c;
0516: c = in.read();
0517: break;
0518: }
0519: } // end textLoop
0520: } // end outerLoop
0521:
0522: // We have scanned our doc comment. It is stored in
0523: // buffer. The previous implementation of scanDocComment
0524: // stripped off all trailing spaces and stars from the comment.
0525: // We will do this as well, so as to cause a minimum of
0526: // disturbance. Is this what we want?
0527: if (count > 0) {
0528: int i = count - 1;
0529: trailLoop: while (i > -1) {
0530: switch (buffer[i]) {
0531: case ' ':
0532: case '\t':
0533: case '*':
0534: i--;
0535: break;
0536: // And again, the extra cases here are a trick
0537: // to get the compiler to generate a tableswitch.
0538: case 0:
0539: case 1:
0540: case 2:
0541: case 3:
0542: case 4:
0543: case 5:
0544: case 6:
0545: case 7:
0546: case 8:
0547: case 10:
0548: case 11:
0549: case 12:
0550: case 13:
0551: case 14:
0552: case 15:
0553: case 16:
0554: case 17:
0555: case 18:
0556: case 19:
0557: case 20:
0558: case 21:
0559: case 22:
0560: case 23:
0561: case 24:
0562: case 25:
0563: case 26:
0564: case 27:
0565: case 28:
0566: case 29:
0567: case 30:
0568: case 31:
0569: case 33:
0570: case 34:
0571: case 35:
0572: case 36:
0573: case 37:
0574: case 38:
0575: case 39:
0576: case 40:
0577: default:
0578: break trailLoop;
0579: }
0580: }
0581: count = i + 1;
0582:
0583: // Return the text of the doc comment.
0584: return new String(buffer, 0, count);
0585: } else {
0586: return "";
0587: }
0588: }
0589:
0590: /**
0591: * Scan a number. The first digit of the number should be the current
0592: * character. We may be scanning hex, decimal, or octal at this point
0593: */
0594: private void scanNumber() throws IOException {
0595: boolean seenNonOctal = false;
0596: boolean overflow = false;
0597: boolean seenDigit = false; // used to detect invalid hex number 0xL
0598: radix = (ch == '0' ? 8 : 10);
0599: long value = ch - '0';
0600: count = 0;
0601: putc(ch); // save character in buffer
0602: numberLoop: for (;;) {
0603: switch (ch = in.read()) {
0604: case '.':
0605: if (radix == 16)
0606: break numberLoop; // an illegal character
0607: scanReal();
0608: return;
0609:
0610: case '8':
0611: case '9':
0612: // We can't yet throw an error if reading an octal. We might
0613: // discover we're really reading a real.
0614: seenNonOctal = true;
0615: case '0':
0616: case '1':
0617: case '2':
0618: case '3':
0619: case '4':
0620: case '5':
0621: case '6':
0622: case '7':
0623: seenDigit = true;
0624: putc(ch);
0625: if (radix == 10) {
0626: overflow = overflow || (value * 10) / 10 != value;
0627: value = (value * 10) + (ch - '0');
0628: overflow = overflow || (value - 1 < -1);
0629: } else if (radix == 8) {
0630: overflow = overflow || (value >>> 61) != 0;
0631: value = (value << 3) + (ch - '0');
0632: } else {
0633: overflow = overflow || (value >>> 60) != 0;
0634: value = (value << 4) + (ch - '0');
0635: }
0636: break;
0637:
0638: case 'd':
0639: case 'D':
0640: case 'e':
0641: case 'E':
0642: case 'f':
0643: case 'F':
0644: if (radix != 16) {
0645: scanReal();
0646: return;
0647: }
0648: // fall through
0649: case 'a':
0650: case 'A':
0651: case 'b':
0652: case 'B':
0653: case 'c':
0654: case 'C':
0655: seenDigit = true;
0656: putc(ch);
0657: if (radix != 16)
0658: break numberLoop; // an illegal character
0659: overflow = overflow || (value >>> 60) != 0;
0660: value = (value << 4) + 10
0661: + Character.toLowerCase((char) ch) - 'a';
0662: break;
0663:
0664: case 'l':
0665: case 'L':
0666: ch = in.read(); // skip over 'l'
0667: longValue = value;
0668: token = LONGVAL;
0669: break numberLoop;
0670:
0671: case 'x':
0672: case 'X':
0673: // if the first character is a '0' and this is the second
0674: // letter, then read in a hexadecimal number. Otherwise, error.
0675: if (count == 1 && radix == 8) {
0676: radix = 16;
0677: seenDigit = false;
0678: break;
0679: } else {
0680: // we'll get an illegal character error
0681: break numberLoop;
0682: }
0683:
0684: default:
0685: intValue = (int) value;
0686: token = INTVAL;
0687: break numberLoop;
0688: }
0689: } // while true
0690:
0691: // We have just finished reading the number. The next thing better
0692: // not be a letter or digit.
0693: // Note: There will be deprecation warnings against these uses
0694: // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
0695: // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
0696: if (Character.isJavaLetterOrDigit((char) ch) || ch == '.') {
0697: env.error(in.pos, "invalid.number");
0698: do {
0699: ch = in.read();
0700: } while (Character.isJavaLetterOrDigit((char) ch)
0701: || ch == '.');
0702: intValue = 0;
0703: token = INTVAL;
0704: } else if (radix == 8 && seenNonOctal) {
0705: // A bogus octal literal.
0706: intValue = 0;
0707: token = INTVAL;
0708: env.error(pos, "invalid.octal.number");
0709: } else if (radix == 16 && seenDigit == false) {
0710: // A hex literal with no digits, 0xL, for example.
0711: intValue = 0;
0712: token = INTVAL;
0713: env.error(pos, "invalid.hex.number");
0714: } else {
0715: if (token == INTVAL) {
0716: // Check for overflow. Note that base 10 literals
0717: // have different rules than base 8 and 16.
0718: overflow = overflow
0719: || (value & 0xFFFFFFFF00000000L) != 0
0720: || (radix == 10 && value > 2147483648L);
0721:
0722: if (overflow) {
0723: intValue = 0;
0724:
0725: // Give a specific error message which tells
0726: // the user the range.
0727: switch (radix) {
0728: case 8:
0729: env.error(pos, "overflow.int.oct");
0730: break;
0731: case 10:
0732: env.error(pos, "overflow.int.dec");
0733: break;
0734: case 16:
0735: env.error(pos, "overflow.int.hex");
0736: break;
0737: default:
0738: throw new CompilerError("invalid radix");
0739: }
0740: }
0741: } else {
0742: if (overflow) {
0743: longValue = 0;
0744:
0745: // Give a specific error message which tells
0746: // the user the range.
0747: switch (radix) {
0748: case 8:
0749: env.error(pos, "overflow.long.oct");
0750: break;
0751: case 10:
0752: env.error(pos, "overflow.long.dec");
0753: break;
0754: case 16:
0755: env.error(pos, "overflow.long.hex");
0756: break;
0757: default:
0758: throw new CompilerError("invalid radix");
0759: }
0760: }
0761: }
0762: }
0763: }
0764:
0765: /**
0766: * Scan a float. We are either looking at the decimal, or we have already
0767: * seen it and put it into the buffer. We haven't seen an exponent.
0768: * Scan a float. Should be called with the current character is either
0769: * the 'e', 'E' or '.'
0770: */
0771: private void scanReal() throws IOException {
0772: boolean seenExponent = false;
0773: boolean isSingleFloat = false;
0774: char lastChar;
0775: if (ch == '.') {
0776: putc(ch);
0777: ch = in.read();
0778: }
0779:
0780: numberLoop: for (;; ch = in.read()) {
0781: switch (ch) {
0782: case '0':
0783: case '1':
0784: case '2':
0785: case '3':
0786: case '4':
0787: case '5':
0788: case '6':
0789: case '7':
0790: case '8':
0791: case '9':
0792: putc(ch);
0793: break;
0794:
0795: case 'e':
0796: case 'E':
0797: if (seenExponent)
0798: break numberLoop; // we'll get a format error
0799: putc(ch);
0800: seenExponent = true;
0801: break;
0802:
0803: case '+':
0804: case '-':
0805: lastChar = buffer[count - 1];
0806: if (lastChar != 'e' && lastChar != 'E')
0807: break numberLoop; // this isn't an error, though!
0808: putc(ch);
0809: break;
0810:
0811: case 'f':
0812: case 'F':
0813: ch = in.read(); // skip over 'f'
0814: isSingleFloat = true;
0815: break numberLoop;
0816:
0817: case 'd':
0818: case 'D':
0819: ch = in.read(); // skip over 'd'
0820: // fall through
0821: default:
0822: break numberLoop;
0823: } // sswitch
0824: } // loop
0825:
0826: // we have just finished reading the number. The next thing better
0827: // not be a letter or digit.
0828: if (Character.isJavaLetterOrDigit((char) ch) || ch == '.') {
0829: env.error(in.pos, "invalid.number");
0830: do {
0831: ch = in.read();
0832: } while (Character.isJavaLetterOrDigit((char) ch)
0833: || ch == '.');
0834: doubleValue = 0;
0835: token = DOUBLEVAL;
0836: } else {
0837: token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
0838: try {
0839: lastChar = buffer[count - 1];
0840: if (lastChar == 'e' || lastChar == 'E'
0841: || lastChar == '+' || lastChar == '-') {
0842: env.error(in.pos - 1, "float.format");
0843: } else if (isSingleFloat) {
0844: String string = bufferString();
0845: floatValue = Float.valueOf(string).floatValue();
0846: if (Float.isInfinite(floatValue)) {
0847: env.error(pos, "overflow.float");
0848: } else if (floatValue == 0
0849: && !looksLikeZero(string)) {
0850: env.error(pos, "underflow.float");
0851: }
0852: } else {
0853: String string = bufferString();
0854: doubleValue = Double.valueOf(string).doubleValue();
0855: if (Double.isInfinite(doubleValue)) {
0856: env.error(pos, "overflow.double");
0857: } else if (doubleValue == 0
0858: && !looksLikeZero(string)) {
0859: env.error(pos, "underflow.double");
0860: }
0861: }
0862: } catch (NumberFormatException ee) {
0863: env.error(pos, "float.format");
0864: doubleValue = 0;
0865: floatValue = 0;
0866: }
0867: }
0868: return;
0869: }
0870:
0871: // We have a token that parses as a number. Is this token possibly zero?
0872: // i.e. does it have a non-zero value in the mantissa?
0873: private static boolean looksLikeZero(String token) {
0874: int length = token.length();
0875: for (int i = 0; i < length; i++) {
0876: switch (token.charAt(i)) {
0877: case 0:
0878: case '.':
0879: continue;
0880: case '1':
0881: case '2':
0882: case '3':
0883: case '4':
0884: case '5':
0885: case '6':
0886: case '7':
0887: case '8':
0888: case '9':
0889: return false;
0890: case 'e':
0891: case 'E':
0892: case 'f':
0893: case 'F':
0894: return true;
0895: }
0896: }
0897: return true;
0898: }
0899:
0900: /**
0901: * Scan an escape character.
0902: * @return the character or -1 if it escaped an
0903: * end-of-line.
0904: */
0905: private int scanEscapeChar() throws IOException {
0906: long p = in.pos;
0907:
0908: switch (ch = in.read()) {
0909: case '0':
0910: case '1':
0911: case '2':
0912: case '3':
0913: case '4':
0914: case '5':
0915: case '6':
0916: case '7': {
0917: int n = ch - '0';
0918: for (int i = 2; i > 0; i--) {
0919: switch (ch = in.read()) {
0920: case '0':
0921: case '1':
0922: case '2':
0923: case '3':
0924: case '4':
0925: case '5':
0926: case '6':
0927: case '7':
0928: n = (n << 3) + ch - '0';
0929: break;
0930:
0931: default:
0932: if (n > 0xFF) {
0933: env.error(p, "invalid.escape.char");
0934: }
0935: return n;
0936: }
0937: }
0938: ch = in.read();
0939: if (n > 0xFF) {
0940: env.error(p, "invalid.escape.char");
0941: }
0942: return n;
0943: }
0944:
0945: case 'r':
0946: ch = in.read();
0947: return '\r';
0948: case 'n':
0949: ch = in.read();
0950: return '\n';
0951: case 'f':
0952: ch = in.read();
0953: return '\f';
0954: case 'b':
0955: ch = in.read();
0956: return '\b';
0957: case 't':
0958: ch = in.read();
0959: return '\t';
0960: case '\\':
0961: ch = in.read();
0962: return '\\';
0963: case '\"':
0964: ch = in.read();
0965: return '\"';
0966: case '\'':
0967: ch = in.read();
0968: return '\'';
0969: }
0970:
0971: env.error(p, "invalid.escape.char");
0972: ch = in.read();
0973: return -1;
0974: }
0975:
0976: /**
0977: * Scan a string. The current character
0978: * should be the opening " of the string.
0979: */
0980: private void scanString() throws IOException {
0981: token = STRINGVAL;
0982: count = 0;
0983: ch = in.read();
0984:
0985: // Scan a String
0986: while (true) {
0987: switch (ch) {
0988: case EOF:
0989: env.error(pos, "eof.in.string");
0990: stringValue = bufferString();
0991: return;
0992:
0993: case '\r':
0994: case '\n':
0995: ch = in.read();
0996: env.error(pos, "newline.in.string");
0997: stringValue = bufferString();
0998: return;
0999:
1000: case '"':
1001: ch = in.read();
1002: stringValue = bufferString();
1003: return;
1004:
1005: case '\\': {
1006: int c = scanEscapeChar();
1007: if (c >= 0) {
1008: putc((char) c);
1009: }
1010: break;
1011: }
1012:
1013: default:
1014: putc(ch);
1015: ch = in.read();
1016: break;
1017: }
1018: }
1019: }
1020:
1021: /**
1022: * Scan a character. The current character should be
1023: * the opening ' of the character constant.
1024: */
1025: private void scanCharacter() throws IOException {
1026: token = CHARVAL;
1027:
1028: switch (ch = in.read()) {
1029: case '\\':
1030: int c = scanEscapeChar();
1031: charValue = (char) ((c >= 0) ? c : 0);
1032: break;
1033:
1034: case '\'':
1035: // There are two standard problems this case deals with. One
1036: // is the malformed single quote constant (i.e. the programmer
1037: // uses ''' instead of '\'') and the other is the empty
1038: // character constant (i.e. ''). Just consume any number of
1039: // single quotes and emit an error message.
1040: charValue = 0;
1041: env.error(pos, "invalid.char.constant");
1042: ch = in.read();
1043: while (ch == '\'') {
1044: ch = in.read();
1045: }
1046: return;
1047:
1048: case '\r':
1049: case '\n':
1050: charValue = 0;
1051: env.error(pos, "invalid.char.constant");
1052: return;
1053:
1054: default:
1055: charValue = (char) ch;
1056: ch = in.read();
1057: break;
1058: }
1059:
1060: if (ch == '\'') {
1061: ch = in.read();
1062: } else {
1063: env.error(pos, "invalid.char.constant");
1064: while (true) {
1065: switch (ch) {
1066: case '\'':
1067: ch = in.read();
1068: return;
1069: case ';':
1070: case '\n':
1071: case EOF:
1072: return;
1073: default:
1074: ch = in.read();
1075: }
1076: }
1077: }
1078: }
1079:
1080: /**
1081: * Scan an Identifier. The current character should
1082: * be the first character of the identifier.
1083: */
1084: private void scanIdentifier() throws IOException {
1085: count = 0;
1086:
1087: while (true) {
1088: putc(ch);
1089: switch (ch = in.read()) {
1090: case 'a':
1091: case 'b':
1092: case 'c':
1093: case 'd':
1094: case 'e':
1095: case 'f':
1096: case 'g':
1097: case 'h':
1098: case 'i':
1099: case 'j':
1100: case 'k':
1101: case 'l':
1102: case 'm':
1103: case 'n':
1104: case 'o':
1105: case 'p':
1106: case 'q':
1107: case 'r':
1108: case 's':
1109: case 't':
1110: case 'u':
1111: case 'v':
1112: case 'w':
1113: case 'x':
1114: case 'y':
1115: case 'z':
1116: case 'A':
1117: case 'B':
1118: case 'C':
1119: case 'D':
1120: case 'E':
1121: case 'F':
1122: case 'G':
1123: case 'H':
1124: case 'I':
1125: case 'J':
1126: case 'K':
1127: case 'L':
1128: case 'M':
1129: case 'N':
1130: case 'O':
1131: case 'P':
1132: case 'Q':
1133: case 'R':
1134: case 'S':
1135: case 'T':
1136: case 'U':
1137: case 'V':
1138: case 'W':
1139: case 'X':
1140: case 'Y':
1141: case 'Z':
1142: case '0':
1143: case '1':
1144: case '2':
1145: case '3':
1146: case '4':
1147: case '5':
1148: case '6':
1149: case '7':
1150: case '8':
1151: case '9':
1152: case '$':
1153: case '_':
1154: break;
1155:
1156: default:
1157: if (!Character.isJavaLetterOrDigit((char) ch)) {
1158: idValue = Identifier.lookup(bufferString());
1159: token = idValue.getType();
1160: return;
1161: }
1162: }
1163: }
1164: }
1165:
1166: /**
1167: * The ending position of the current token
1168: */
1169: // Note: This should be part of the pos itself.
1170: public long getEndPos() {
1171: return in.pos;
1172: }
1173:
1174: /**
1175: * If the current token is IDENT, return the identifier occurrence.
1176: * It will be freshly allocated.
1177: */
1178: public IdentifierToken getIdToken() {
1179: return (token != IDENT) ? null : new IdentifierToken(pos,
1180: idValue);
1181: }
1182:
1183: /**
1184: * Scan the next token.
1185: * @return the position of the previous token.
1186: */
1187: public long scan() throws IOException {
1188: return xscan();
1189: }
1190:
1191: protected long xscan() throws IOException {
1192: final ScannerInputReader in = this .in;
1193: long retPos = pos;
1194: prevPos = in.pos;
1195: docComment = null;
1196: while (true) {
1197: pos = in.pos;
1198:
1199: switch (ch) {
1200: case EOF:
1201: token = EOF;
1202: return retPos;
1203:
1204: case '\n':
1205: if (scanComments) {
1206: ch = ' ';
1207: // Avoid this path the next time around.
1208: // Do not just call in.read; we want to present
1209: // a null token (and also avoid read-ahead).
1210: token = COMMENT;
1211: return retPos;
1212: }
1213: case ' ':
1214: case '\t':
1215: case '\f':
1216: ch = in.read();
1217: break;
1218:
1219: case '/':
1220: switch (ch = in.read()) {
1221: case '/':
1222: // Parse a // comment
1223: while (((ch = in.read()) != EOF) && (ch != '\n'))
1224: ;
1225: if (scanComments) {
1226: token = COMMENT;
1227: return retPos;
1228: }
1229: break;
1230:
1231: case '*':
1232: ch = in.read();
1233: if (ch == '*') {
1234: docComment = scanDocComment();
1235: } else {
1236: skipComment();
1237: }
1238: if (scanComments) {
1239: return retPos;
1240: }
1241: break;
1242:
1243: case '=':
1244: ch = in.read();
1245: token = ASGDIV;
1246: return retPos;
1247:
1248: default:
1249: token = DIV;
1250: return retPos;
1251: }
1252: break;
1253:
1254: case '"':
1255: scanString();
1256: return retPos;
1257:
1258: case '\'':
1259: scanCharacter();
1260: return retPos;
1261:
1262: case '0':
1263: case '1':
1264: case '2':
1265: case '3':
1266: case '4':
1267: case '5':
1268: case '6':
1269: case '7':
1270: case '8':
1271: case '9':
1272: scanNumber();
1273: return retPos;
1274:
1275: case '.':
1276: switch (ch = in.read()) {
1277: case '0':
1278: case '1':
1279: case '2':
1280: case '3':
1281: case '4':
1282: case '5':
1283: case '6':
1284: case '7':
1285: case '8':
1286: case '9':
1287: count = 0;
1288: putc('.');
1289: scanReal();
1290: break;
1291: default:
1292: token = FIELD;
1293: }
1294: return retPos;
1295:
1296: case '{':
1297: ch = in.read();
1298: token = LBRACE;
1299: return retPos;
1300:
1301: case '}':
1302: ch = in.read();
1303: token = RBRACE;
1304: return retPos;
1305:
1306: case '(':
1307: ch = in.read();
1308: token = LPAREN;
1309: return retPos;
1310:
1311: case ')':
1312: ch = in.read();
1313: token = RPAREN;
1314: return retPos;
1315:
1316: case '[':
1317: ch = in.read();
1318: token = LSQBRACKET;
1319: return retPos;
1320:
1321: case ']':
1322: ch = in.read();
1323: token = RSQBRACKET;
1324: return retPos;
1325:
1326: case ',':
1327: ch = in.read();
1328: token = COMMA;
1329: return retPos;
1330:
1331: case ';':
1332: ch = in.read();
1333: token = SEMICOLON;
1334: return retPos;
1335:
1336: case '?':
1337: ch = in.read();
1338: token = QUESTIONMARK;
1339: return retPos;
1340:
1341: case '~':
1342: ch = in.read();
1343: token = BITNOT;
1344: return retPos;
1345:
1346: case ':':
1347: ch = in.read();
1348: token = COLON;
1349: return retPos;
1350:
1351: case '-':
1352: switch (ch = in.read()) {
1353: case '-':
1354: ch = in.read();
1355: token = DEC;
1356: return retPos;
1357:
1358: case '=':
1359: ch = in.read();
1360: token = ASGSUB;
1361: return retPos;
1362: }
1363: token = SUB;
1364: return retPos;
1365:
1366: case '+':
1367: switch (ch = in.read()) {
1368: case '+':
1369: ch = in.read();
1370: token = INC;
1371: return retPos;
1372:
1373: case '=':
1374: ch = in.read();
1375: token = ASGADD;
1376: return retPos;
1377: }
1378: token = ADD;
1379: return retPos;
1380:
1381: case '<':
1382: switch (ch = in.read()) {
1383: case '<':
1384: if ((ch = in.read()) == '=') {
1385: ch = in.read();
1386: token = ASGLSHIFT;
1387: return retPos;
1388: }
1389: token = LSHIFT;
1390: return retPos;
1391:
1392: case '=':
1393: ch = in.read();
1394: token = LE;
1395: return retPos;
1396: }
1397: token = LT;
1398: return retPos;
1399:
1400: case '>':
1401: switch (ch = in.read()) {
1402: case '>':
1403: switch (ch = in.read()) {
1404: case '=':
1405: ch = in.read();
1406: token = ASGRSHIFT;
1407: return retPos;
1408:
1409: case '>':
1410: if ((ch = in.read()) == '=') {
1411: ch = in.read();
1412: token = ASGURSHIFT;
1413: return retPos;
1414: }
1415: token = URSHIFT;
1416: return retPos;
1417: }
1418: token = RSHIFT;
1419: return retPos;
1420:
1421: case '=':
1422: ch = in.read();
1423: token = GE;
1424: return retPos;
1425: }
1426: token = GT;
1427: return retPos;
1428:
1429: case '|':
1430: switch (ch = in.read()) {
1431: case '|':
1432: ch = in.read();
1433: token = OR;
1434: return retPos;
1435:
1436: case '=':
1437: ch = in.read();
1438: token = ASGBITOR;
1439: return retPos;
1440: }
1441: token = BITOR;
1442: return retPos;
1443:
1444: case '&':
1445: switch (ch = in.read()) {
1446: case '&':
1447: ch = in.read();
1448: token = AND;
1449: return retPos;
1450:
1451: case '=':
1452: ch = in.read();
1453: token = ASGBITAND;
1454: return retPos;
1455: }
1456: token = BITAND;
1457: return retPos;
1458:
1459: case '=':
1460: if ((ch = in.read()) == '=') {
1461: ch = in.read();
1462: token = EQ;
1463: return retPos;
1464: }
1465: token = ASSIGN;
1466: return retPos;
1467:
1468: case '%':
1469: if ((ch = in.read()) == '=') {
1470: ch = in.read();
1471: token = ASGREM;
1472: return retPos;
1473: }
1474: token = REM;
1475: return retPos;
1476:
1477: case '^':
1478: if ((ch = in.read()) == '=') {
1479: ch = in.read();
1480: token = ASGBITXOR;
1481: return retPos;
1482: }
1483: token = BITXOR;
1484: return retPos;
1485:
1486: case '!':
1487: if ((ch = in.read()) == '=') {
1488: ch = in.read();
1489: token = NE;
1490: return retPos;
1491: }
1492: token = NOT;
1493: return retPos;
1494:
1495: case '*':
1496: if ((ch = in.read()) == '=') {
1497: ch = in.read();
1498: token = ASGMUL;
1499: return retPos;
1500: }
1501: token = MUL;
1502: return retPos;
1503:
1504: case 'a':
1505: case 'b':
1506: case 'c':
1507: case 'd':
1508: case 'e':
1509: case 'f':
1510: case 'g':
1511: case 'h':
1512: case 'i':
1513: case 'j':
1514: case 'k':
1515: case 'l':
1516: case 'm':
1517: case 'n':
1518: case 'o':
1519: case 'p':
1520: case 'q':
1521: case 'r':
1522: case 's':
1523: case 't':
1524: case 'u':
1525: case 'v':
1526: case 'w':
1527: case 'x':
1528: case 'y':
1529: case 'z':
1530: case 'A':
1531: case 'B':
1532: case 'C':
1533: case 'D':
1534: case 'E':
1535: case 'F':
1536: case 'G':
1537: case 'H':
1538: case 'I':
1539: case 'J':
1540: case 'K':
1541: case 'L':
1542: case 'M':
1543: case 'N':
1544: case 'O':
1545: case 'P':
1546: case 'Q':
1547: case 'R':
1548: case 'S':
1549: case 'T':
1550: case 'U':
1551: case 'V':
1552: case 'W':
1553: case 'X':
1554: case 'Y':
1555: case 'Z':
1556: case '$':
1557: case '_':
1558: scanIdentifier();
1559: return retPos;
1560:
1561: case '\u001a':
1562: // Our one concession to DOS.
1563: if ((ch = in.read()) == EOF) {
1564: token = EOF;
1565: return retPos;
1566: }
1567: env.error(pos, "funny.char");
1568: ch = in.read();
1569: break;
1570:
1571: default:
1572: if (Character.isJavaLetter((char) ch)) {
1573: scanIdentifier();
1574: return retPos;
1575: }
1576: env.error(pos, "funny.char");
1577: ch = in.read();
1578: break;
1579: }
1580: }
1581: }
1582:
1583: /**
1584: * Scan to a matching '}', ']' or ')'. The current token must be
1585: * a '{', '[' or '(';
1586: */
1587: public void match(int open, int close) throws IOException {
1588: int depth = 1;
1589:
1590: while (true) {
1591: scan();
1592: if (token == open) {
1593: depth++;
1594: } else if (token == close) {
1595: if (--depth == 0) {
1596: return;
1597: }
1598: } else if (token == EOF) {
1599: env.error(pos, "unbalanced.paren");
1600: return;
1601: }
1602: }
1603: }
1604: }
|