0001: /*
0002: * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
0003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004: *
0005: * This code is free software; you can redistribute it and/or modify it
0006: * under the terms of the GNU General Public License version 2 only, as
0007: * published by the Free Software Foundation. Sun designates this
0008: * particular file as subject to the "Classpath" exception as provided
0009: * by Sun in the LICENSE file that accompanied this code.
0010: *
0011: * This code is distributed in the hope that it will be useful, but WITHOUT
0012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014: * version 2 for more details (a copy is included in the LICENSE file that
0015: * accompanied this code).
0016: *
0017: * You should have received a copy of the GNU General Public License version
0018: * 2 along with this work; if not, write to the Free Software Foundation,
0019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020: *
0021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022: * CA 95054 USA or visit www.sun.com if you need additional information or
0023: * have any questions.
0024: */
0025:
0026: package com.sun.tools.javac.parser;
0027:
0028: import java.io.*;
0029: import java.nio.*;
0030: import java.nio.ByteBuffer;
0031: import java.nio.charset.*;
0032: import java.nio.channels.*;
0033: import java.util.regex.*;
0034:
0035: import com.sun.tools.javac.util.*;
0036:
0037: import com.sun.tools.javac.code.Source;
0038:
0039: import static com.sun.tools.javac.parser.Token.*;
0040: import static com.sun.tools.javac.util.LayoutCharacters.*;
0041:
0042: /** The lexical analyzer maps an input stream consisting of
0043: * ASCII characters and Unicode escapes into a token sequence.
0044: *
0045: * <p><b>This is NOT part of any API supported by Sun Microsystems. If
0046: * you write code that depends on this, you do so at your own risk.
0047: * This code and its internal interfaces are subject to change or
0048: * deletion without notice.</b>
0049: */
0050: @Version("@(#)Scanner.java 1.79 07/05/05")
0051: public class Scanner implements Lexer {
0052:
0053: private static boolean scannerDebug = false;
0054:
0055: /** A factory for creating scanners. */
0056: public static class Factory {
0057: /** The context key for the scanner factory. */
0058: public static final Context.Key<Scanner.Factory> scannerFactoryKey = new Context.Key<Scanner.Factory>();
0059:
0060: /** Get the Factory instance for this context. */
0061: public static Factory instance(Context context) {
0062: Factory instance = context.get(scannerFactoryKey);
0063: if (instance == null)
0064: instance = new Factory(context);
0065: return instance;
0066: }
0067:
0068: final Log log;
0069: final Name.Table names;
0070: final Source source;
0071: final Keywords keywords;
0072:
0073: /** Create a new scanner factory. */
0074: protected Factory(Context context) {
0075: context.put(scannerFactoryKey, this );
0076: this .log = Log.instance(context);
0077: this .names = Name.Table.instance(context);
0078: this .source = Source.instance(context);
0079: this .keywords = Keywords.instance(context);
0080: }
0081:
0082: public Scanner newScanner(CharSequence input) {
0083: if (input instanceof CharBuffer) {
0084: return new Scanner(this , (CharBuffer) input);
0085: } else {
0086: char[] array = input.toString().toCharArray();
0087: return newScanner(array, array.length);
0088: }
0089: }
0090:
0091: public Scanner newScanner(char[] input, int inputLength) {
0092: return new Scanner(this , input, inputLength);
0093: }
0094: }
0095:
0096: /* Output variables; set by nextToken():
0097: */
0098:
0099: /** The token, set by nextToken().
0100: */
0101: private Token token;
0102:
0103: /** Allow hex floating-point literals.
0104: */
0105: private boolean allowHexFloats;
0106:
0107: /** The token's position, 0-based offset from beginning of text.
0108: */
0109: private int pos;
0110:
0111: /** Character position just after the last character of the token.
0112: */
0113: private int endPos;
0114:
0115: /** The last character position of the previous token.
0116: */
0117: private int prevEndPos;
0118:
0119: /** The position where a lexical error occurred;
0120: */
0121: private int errPos = Position.NOPOS;
0122:
0123: /** The name of an identifier or token:
0124: */
0125: private Name name;
0126:
0127: /** The radix of a numeric literal token.
0128: */
0129: private int radix;
0130:
0131: /** Has a @deprecated been encountered in last doc comment?
0132: * this needs to be reset by client.
0133: */
0134: protected boolean deprecatedFlag = false;
0135:
0136: /** A character buffer for literals.
0137: */
0138: private char[] sbuf = new char[128];
0139: private int sp;
0140:
0141: /** The input buffer, index of next chacter to be read,
0142: * index of one past last character in buffer.
0143: */
0144: private char[] buf;
0145: private int bp;
0146: private int buflen;
0147: private int eofPos;
0148:
0149: /** The current character.
0150: */
0151: private char ch;
0152:
0153: /** The buffer index of the last converted unicode character
0154: */
0155: private int unicodeConversionBp = -1;
0156:
0157: /** The log to be used for error reporting.
0158: */
0159: private final Log log;
0160:
0161: /** The name table. */
0162: private final Name.Table names;
0163:
0164: /** The keyword table. */
0165: private final Keywords keywords;
0166:
0167: /** Common code for constructors. */
0168: private Scanner(Factory fac) {
0169: this .log = fac.log;
0170: this .names = fac.names;
0171: this .keywords = fac.keywords;
0172: this .allowHexFloats = fac.source.allowHexFloats();
0173: }
0174:
0175: private static final boolean hexFloatsWork = hexFloatsWork();
0176:
0177: private static boolean hexFloatsWork() {
0178: try {
0179: Float.valueOf("0x1.0p1");
0180: return true;
0181: } catch (NumberFormatException ex) {
0182: return false;
0183: }
0184: }
0185:
0186: /** Create a scanner from the input buffer. buffer must implement
0187: * array() and compact(), and remaining() must be less than limit().
0188: */
0189: protected Scanner(Factory fac, CharBuffer buffer) {
0190: this (fac, JavacFileManager.toArray(buffer), buffer.limit());
0191: }
0192:
0193: /**
0194: * Create a scanner from the input array. This method might
0195: * modify the array. To avoid copying the input array, ensure
0196: * that {@code inputLength < input.length} or
0197: * {@code input[input.length -1]} is a white space character.
0198: *
0199: * @param fac the factory which created this Scanner
0200: * @param input the input, might be modified
0201: * @param inputLength the size of the input.
0202: * Must be positive and less than or equal to input.length.
0203: */
0204: protected Scanner(Factory fac, char[] input, int inputLength) {
0205: this (fac);
0206: eofPos = inputLength;
0207: if (inputLength == input.length) {
0208: if (input.length > 0
0209: && Character.isWhitespace(input[input.length - 1])) {
0210: inputLength--;
0211: } else {
0212: char[] newInput = new char[inputLength + 1];
0213: System.arraycopy(input, 0, newInput, 0, input.length);
0214: input = newInput;
0215: }
0216: }
0217: buf = input;
0218: buflen = inputLength;
0219: buf[buflen] = EOI;
0220: bp = -1;
0221: scanChar();
0222: }
0223:
0224: /** Report an error at the given position using the provided arguments.
0225: */
0226: private void lexError(int pos, String key, Object... args) {
0227: log.error(pos, key, args);
0228: token = ERROR;
0229: errPos = pos;
0230: }
0231:
0232: /** Report an error at the current token position using the provided
0233: * arguments.
0234: */
0235: private void lexError(String key, Object... args) {
0236: lexError(pos, key, args);
0237: }
0238:
0239: /** Convert an ASCII digit from its base (8, 10, or 16)
0240: * to its value.
0241: */
0242: private int digit(int base) {
0243: char c = ch;
0244: int result = Character.digit(c, base);
0245: if (result >= 0 && c > 0x7f) {
0246: lexError(pos + 1, "illegal.nonascii.digit");
0247: ch = "0123456789abcdef".charAt(result);
0248: }
0249: return result;
0250: }
0251:
0252: /** Convert unicode escape; bp points to initial '\' character
0253: * (Spec 3.3).
0254: */
0255: private void convertUnicode() {
0256: if (ch == '\\' && unicodeConversionBp != bp) {
0257: bp++;
0258: ch = buf[bp];
0259: if (ch == 'u') {
0260: do {
0261: bp++;
0262: ch = buf[bp];
0263: } while (ch == 'u');
0264: int limit = bp + 3;
0265: if (limit < buflen) {
0266: int d = digit(16);
0267: int code = d;
0268: while (bp < limit && d >= 0) {
0269: bp++;
0270: ch = buf[bp];
0271: d = digit(16);
0272: code = (code << 4) + d;
0273: }
0274: if (d >= 0) {
0275: ch = (char) code;
0276: unicodeConversionBp = bp;
0277: return;
0278: }
0279: }
0280: lexError(bp, "illegal.unicode.esc");
0281: } else {
0282: bp--;
0283: ch = '\\';
0284: }
0285: }
0286: }
0287:
0288: /** Read next character.
0289: */
0290: private void scanChar() {
0291: ch = buf[++bp];
0292: if (ch == '\\') {
0293: convertUnicode();
0294: }
0295: }
0296:
0297: /** Read next character in comment, skipping over double '\' characters.
0298: */
0299: private void scanCommentChar() {
0300: scanChar();
0301: if (ch == '\\') {
0302: if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
0303: bp++;
0304: } else {
0305: convertUnicode();
0306: }
0307: }
0308: }
0309:
0310: /** Append a character to sbuf.
0311: */
0312: private void putChar(char ch) {
0313: if (sp == sbuf.length) {
0314: char[] newsbuf = new char[sbuf.length * 2];
0315: System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
0316: sbuf = newsbuf;
0317: }
0318: sbuf[sp++] = ch;
0319: }
0320:
0321: /** For debugging purposes: print character.
0322: */
0323: private void dch() {
0324: System.err.print(ch);
0325: System.out.flush();
0326: }
0327:
0328: /** Read next character in character or string literal and copy into sbuf.
0329: */
0330: private void scanLitChar() {
0331: if (ch == '\\') {
0332: if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
0333: bp++;
0334: putChar('\\');
0335: scanChar();
0336: } else {
0337: scanChar();
0338: switch (ch) {
0339: case '0':
0340: case '1':
0341: case '2':
0342: case '3':
0343: case '4':
0344: case '5':
0345: case '6':
0346: case '7':
0347: char leadch = ch;
0348: int oct = digit(8);
0349: scanChar();
0350: if ('0' <= ch && ch <= '7') {
0351: oct = oct * 8 + digit(8);
0352: scanChar();
0353: if (leadch <= '3' && '0' <= ch && ch <= '7') {
0354: oct = oct * 8 + digit(8);
0355: scanChar();
0356: }
0357: }
0358: putChar((char) oct);
0359: break;
0360: case 'b':
0361: putChar('\b');
0362: scanChar();
0363: break;
0364: case 't':
0365: putChar('\t');
0366: scanChar();
0367: break;
0368: case 'n':
0369: putChar('\n');
0370: scanChar();
0371: break;
0372: case 'f':
0373: putChar('\f');
0374: scanChar();
0375: break;
0376: case 'r':
0377: putChar('\r');
0378: scanChar();
0379: break;
0380: case '\'':
0381: putChar('\'');
0382: scanChar();
0383: break;
0384: case '\"':
0385: putChar('\"');
0386: scanChar();
0387: break;
0388: case '\\':
0389: putChar('\\');
0390: scanChar();
0391: break;
0392: default:
0393: lexError(bp, "illegal.esc.char");
0394: }
0395: }
0396: } else if (bp != buflen) {
0397: putChar(ch);
0398: scanChar();
0399: }
0400: }
0401:
0402: /** Read fractional part of hexadecimal floating point number.
0403: */
0404: private void scanHexExponentAndSuffix() {
0405: if (ch == 'p' || ch == 'P') {
0406: putChar(ch);
0407: scanChar();
0408: if (ch == '+' || ch == '-') {
0409: putChar(ch);
0410: scanChar();
0411: }
0412: if ('0' <= ch && ch <= '9') {
0413: do {
0414: putChar(ch);
0415: scanChar();
0416: } while ('0' <= ch && ch <= '9');
0417: if (!allowHexFloats) {
0418: lexError("unsupported.fp.lit");
0419: allowHexFloats = true;
0420: } else if (!hexFloatsWork)
0421: lexError("unsupported.cross.fp.lit");
0422: } else
0423: lexError("malformed.fp.lit");
0424: } else {
0425: lexError("malformed.fp.lit");
0426: }
0427: if (ch == 'f' || ch == 'F') {
0428: putChar(ch);
0429: scanChar();
0430: token = FLOATLITERAL;
0431: } else {
0432: if (ch == 'd' || ch == 'D') {
0433: putChar(ch);
0434: scanChar();
0435: }
0436: token = DOUBLELITERAL;
0437: }
0438: }
0439:
0440: /** Read fractional part of floating point number.
0441: */
0442: private void scanFraction() {
0443: while (digit(10) >= 0) {
0444: putChar(ch);
0445: scanChar();
0446: }
0447: int sp1 = sp;
0448: if (ch == 'e' || ch == 'E') {
0449: putChar(ch);
0450: scanChar();
0451: if (ch == '+' || ch == '-') {
0452: putChar(ch);
0453: scanChar();
0454: }
0455: if ('0' <= ch && ch <= '9') {
0456: do {
0457: putChar(ch);
0458: scanChar();
0459: } while ('0' <= ch && ch <= '9');
0460: return;
0461: }
0462: lexError("malformed.fp.lit");
0463: sp = sp1;
0464: }
0465: }
0466:
0467: /** Read fractional part and 'd' or 'f' suffix of floating point number.
0468: */
0469: private void scanFractionAndSuffix() {
0470: this .radix = 10;
0471: scanFraction();
0472: if (ch == 'f' || ch == 'F') {
0473: putChar(ch);
0474: scanChar();
0475: token = FLOATLITERAL;
0476: } else {
0477: if (ch == 'd' || ch == 'D') {
0478: putChar(ch);
0479: scanChar();
0480: }
0481: token = DOUBLELITERAL;
0482: }
0483: }
0484:
0485: /** Read fractional part and 'd' or 'f' suffix of floating point number.
0486: */
0487: private void scanHexFractionAndSuffix(boolean seendigit) {
0488: this .radix = 16;
0489: assert ch == '.';
0490: putChar(ch);
0491: scanChar();
0492: while (digit(16) >= 0) {
0493: seendigit = true;
0494: putChar(ch);
0495: scanChar();
0496: }
0497: if (!seendigit)
0498: lexError("invalid.hex.number");
0499: else
0500: scanHexExponentAndSuffix();
0501: }
0502:
0503: /** Read a number.
0504: * @param radix The radix of the number; one of 8, 10, 16.
0505: */
0506: private void scanNumber(int radix) {
0507: this .radix = radix;
0508: // for octal, allow base-10 digit in case it's a float literal
0509: int digitRadix = (radix <= 10) ? 10 : 16;
0510: boolean seendigit = false;
0511: while (digit(digitRadix) >= 0) {
0512: seendigit = true;
0513: putChar(ch);
0514: scanChar();
0515: }
0516: if (radix == 16 && ch == '.') {
0517: scanHexFractionAndSuffix(seendigit);
0518: } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
0519: scanHexExponentAndSuffix();
0520: } else if (radix <= 10 && ch == '.') {
0521: putChar(ch);
0522: scanChar();
0523: scanFractionAndSuffix();
0524: } else if (radix <= 10
0525: && (ch == 'e' || ch == 'E' || ch == 'f' || ch == 'F'
0526: || ch == 'd' || ch == 'D')) {
0527: scanFractionAndSuffix();
0528: } else {
0529: if (ch == 'l' || ch == 'L') {
0530: scanChar();
0531: token = LONGLITERAL;
0532: } else {
0533: token = INTLITERAL;
0534: }
0535: }
0536: }
0537:
0538: /** Read an identifier.
0539: */
0540: private void scanIdent() {
0541: boolean isJavaIdentifierPart;
0542: char high;
0543: do {
0544: if (sp == sbuf.length)
0545: putChar(ch);
0546: else
0547: sbuf[sp++] = ch;
0548: // optimization, was: putChar(ch);
0549:
0550: scanChar();
0551: switch (ch) {
0552: case 'A':
0553: case 'B':
0554: case 'C':
0555: case 'D':
0556: case 'E':
0557: case 'F':
0558: case 'G':
0559: case 'H':
0560: case 'I':
0561: case 'J':
0562: case 'K':
0563: case 'L':
0564: case 'M':
0565: case 'N':
0566: case 'O':
0567: case 'P':
0568: case 'Q':
0569: case 'R':
0570: case 'S':
0571: case 'T':
0572: case 'U':
0573: case 'V':
0574: case 'W':
0575: case 'X':
0576: case 'Y':
0577: case 'Z':
0578: case 'a':
0579: case 'b':
0580: case 'c':
0581: case 'd':
0582: case 'e':
0583: case 'f':
0584: case 'g':
0585: case 'h':
0586: case 'i':
0587: case 'j':
0588: case 'k':
0589: case 'l':
0590: case 'm':
0591: case 'n':
0592: case 'o':
0593: case 'p':
0594: case 'q':
0595: case 'r':
0596: case 's':
0597: case 't':
0598: case 'u':
0599: case 'v':
0600: case 'w':
0601: case 'x':
0602: case 'y':
0603: case 'z':
0604: case '$':
0605: case '_':
0606: case '0':
0607: case '1':
0608: case '2':
0609: case '3':
0610: case '4':
0611: case '5':
0612: case '6':
0613: case '7':
0614: case '8':
0615: case '9':
0616: case '\u0000':
0617: case '\u0001':
0618: case '\u0002':
0619: case '\u0003':
0620: case '\u0004':
0621: case '\u0005':
0622: case '\u0006':
0623: case '\u0007':
0624: case '\u0008':
0625: case '\u000E':
0626: case '\u000F':
0627: case '\u0010':
0628: case '\u0011':
0629: case '\u0012':
0630: case '\u0013':
0631: case '\u0014':
0632: case '\u0015':
0633: case '\u0016':
0634: case '\u0017':
0635: case '\u0018':
0636: case '\u0019':
0637: case '\u001B':
0638: case '\u007F':
0639: break;
0640: case '\u001A': // EOI is also a legal identifier part
0641: if (bp >= buflen) {
0642: name = names.fromChars(sbuf, 0, sp);
0643: token = keywords.key(name);
0644: return;
0645: }
0646: break;
0647: default:
0648: if (ch < '\u0080') {
0649: // all ASCII range chars already handled, above
0650: isJavaIdentifierPart = false;
0651: } else {
0652: high = scanSurrogates();
0653: if (high != 0) {
0654: if (sp == sbuf.length) {
0655: putChar(high);
0656: } else {
0657: sbuf[sp++] = high;
0658: }
0659: isJavaIdentifierPart = Character
0660: .isJavaIdentifierPart(Character
0661: .toCodePoint(high, ch));
0662: } else {
0663: isJavaIdentifierPart = Character
0664: .isJavaIdentifierPart(ch);
0665: }
0666: }
0667: if (!isJavaIdentifierPart) {
0668: name = names.fromChars(sbuf, 0, sp);
0669: token = keywords.key(name);
0670: return;
0671: }
0672: }
0673: } while (true);
0674: }
0675:
0676: /** Are surrogates supported?
0677: */
0678: final static boolean surrogatesSupported = surrogatesSupported();
0679:
0680: private static boolean surrogatesSupported() {
0681: try {
0682: Character.isHighSurrogate('a');
0683: return true;
0684: } catch (NoSuchMethodError ex) {
0685: return false;
0686: }
0687: }
0688:
0689: /** Scan surrogate pairs. If 'ch' is a high surrogate and
0690: * the next character is a low surrogate, then put the low
0691: * surrogate in 'ch', and return the high surrogate.
0692: * otherwise, just return 0.
0693: */
0694: private char scanSurrogates() {
0695: if (surrogatesSupported && Character.isHighSurrogate(ch)) {
0696: char high = ch;
0697:
0698: scanChar();
0699:
0700: if (Character.isLowSurrogate(ch)) {
0701: return high;
0702: }
0703:
0704: ch = high;
0705: }
0706:
0707: return 0;
0708: }
0709:
0710: /** Return true if ch can be part of an operator.
0711: */
0712: private boolean isSpecial(char ch) {
0713: switch (ch) {
0714: case '!':
0715: case '%':
0716: case '&':
0717: case '*':
0718: case '?':
0719: case '+':
0720: case '-':
0721: case ':':
0722: case '<':
0723: case '=':
0724: case '>':
0725: case '^':
0726: case '|':
0727: case '~':
0728: case '@':
0729: return true;
0730: default:
0731: return false;
0732: }
0733: }
0734:
0735: /** Read longest possible sequence of special characters and convert
0736: * to token.
0737: */
0738: private void scanOperator() {
0739: while (true) {
0740: putChar(ch);
0741: Name newname = names.fromChars(sbuf, 0, sp);
0742: if (keywords.key(newname) == IDENTIFIER) {
0743: sp--;
0744: break;
0745: }
0746: name = newname;
0747: token = keywords.key(newname);
0748: scanChar();
0749: if (!isSpecial(ch))
0750: break;
0751: }
0752: }
0753:
0754: /**
0755: * Scan a documention comment; determine if a deprecated tag is present.
0756: * Called once the initial /, * have been skipped, positioned at the second *
0757: * (which is treated as the beginning of the first line).
0758: * Stops positioned at the closing '/'.
0759: */
0760: @SuppressWarnings("fallthrough")
0761: private void scanDocComment() {
0762: boolean deprecatedPrefix = false;
0763:
0764: forEachLine: while (bp < buflen) {
0765:
0766: // Skip optional WhiteSpace at beginning of line
0767: while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
0768: scanCommentChar();
0769: }
0770:
0771: // Skip optional consecutive Stars
0772: while (bp < buflen && ch == '*') {
0773: scanCommentChar();
0774: if (ch == '/') {
0775: return;
0776: }
0777: }
0778:
0779: // Skip optional WhiteSpace after Stars
0780: while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
0781: scanCommentChar();
0782: }
0783:
0784: deprecatedPrefix = false;
0785: // At beginning of line in the JavaDoc sense.
0786: if (bp < buflen && ch == '@' && !deprecatedFlag) {
0787: scanCommentChar();
0788: if (bp < buflen && ch == 'd') {
0789: scanCommentChar();
0790: if (bp < buflen && ch == 'e') {
0791: scanCommentChar();
0792: if (bp < buflen && ch == 'p') {
0793: scanCommentChar();
0794: if (bp < buflen && ch == 'r') {
0795: scanCommentChar();
0796: if (bp < buflen && ch == 'e') {
0797: scanCommentChar();
0798: if (bp < buflen && ch == 'c') {
0799: scanCommentChar();
0800: if (bp < buflen && ch == 'a') {
0801: scanCommentChar();
0802: if (bp < buflen
0803: && ch == 't') {
0804: scanCommentChar();
0805: if (bp < buflen
0806: && ch == 'e') {
0807: scanCommentChar();
0808: if (bp < buflen
0809: && ch == 'd') {
0810: deprecatedPrefix = true;
0811: scanCommentChar();
0812: }
0813: }
0814: }
0815: }
0816: }
0817: }
0818: }
0819: }
0820: }
0821: }
0822: }
0823: if (deprecatedPrefix && bp < buflen) {
0824: if (Character.isWhitespace(ch)) {
0825: deprecatedFlag = true;
0826: } else if (ch == '*') {
0827: scanCommentChar();
0828: if (ch == '/') {
0829: deprecatedFlag = true;
0830: return;
0831: }
0832: }
0833: }
0834:
0835: // Skip rest of line
0836: while (bp < buflen) {
0837: switch (ch) {
0838: case '*':
0839: scanCommentChar();
0840: if (ch == '/') {
0841: return;
0842: }
0843: break;
0844: case CR: // (Spec 3.4)
0845: scanCommentChar();
0846: if (ch != LF) {
0847: continue forEachLine;
0848: }
0849: /* fall through to LF case */
0850: case LF: // (Spec 3.4)
0851: scanCommentChar();
0852: continue forEachLine;
0853: default:
0854: scanCommentChar();
0855: }
0856: } // rest of line
0857: } // forEachLine
0858: return;
0859: }
0860:
0861: /** The value of a literal token, recorded as a string.
0862: * For integers, leading 0x and 'l' suffixes are suppressed.
0863: */
0864: public String stringVal() {
0865: return new String(sbuf, 0, sp);
0866: }
0867:
0868: /** Read token.
0869: */
0870: public void nextToken() {
0871:
0872: try {
0873: prevEndPos = endPos;
0874: sp = 0;
0875:
0876: while (true) {
0877: pos = bp;
0878: switch (ch) {
0879: case ' ': // (Spec 3.6)
0880: case '\t': // (Spec 3.6)
0881: case FF: // (Spec 3.6)
0882: do {
0883: scanChar();
0884: } while (ch == ' ' || ch == '\t' || ch == FF);
0885: endPos = bp;
0886: processWhiteSpace();
0887: break;
0888: case LF: // (Spec 3.4)
0889: scanChar();
0890: endPos = bp;
0891: processLineTerminator();
0892: break;
0893: case CR: // (Spec 3.4)
0894: scanChar();
0895: if (ch == LF) {
0896: scanChar();
0897: }
0898: endPos = bp;
0899: processLineTerminator();
0900: break;
0901: case 'A':
0902: case 'B':
0903: case 'C':
0904: case 'D':
0905: case 'E':
0906: case 'F':
0907: case 'G':
0908: case 'H':
0909: case 'I':
0910: case 'J':
0911: case 'K':
0912: case 'L':
0913: case 'M':
0914: case 'N':
0915: case 'O':
0916: case 'P':
0917: case 'Q':
0918: case 'R':
0919: case 'S':
0920: case 'T':
0921: case 'U':
0922: case 'V':
0923: case 'W':
0924: case 'X':
0925: case 'Y':
0926: case 'Z':
0927: case 'a':
0928: case 'b':
0929: case 'c':
0930: case 'd':
0931: case 'e':
0932: case 'f':
0933: case 'g':
0934: case 'h':
0935: case 'i':
0936: case 'j':
0937: case 'k':
0938: case 'l':
0939: case 'm':
0940: case 'n':
0941: case 'o':
0942: case 'p':
0943: case 'q':
0944: case 'r':
0945: case 's':
0946: case 't':
0947: case 'u':
0948: case 'v':
0949: case 'w':
0950: case 'x':
0951: case 'y':
0952: case 'z':
0953: case '$':
0954: case '_':
0955: scanIdent();
0956: return;
0957: case '0':
0958: scanChar();
0959: if (ch == 'x' || ch == 'X') {
0960: scanChar();
0961: if (ch == '.') {
0962: scanHexFractionAndSuffix(false);
0963: } else if (digit(16) < 0) {
0964: lexError("invalid.hex.number");
0965: } else {
0966: scanNumber(16);
0967: }
0968: } else {
0969: putChar('0');
0970: scanNumber(8);
0971: }
0972: return;
0973: case '1':
0974: case '2':
0975: case '3':
0976: case '4':
0977: case '5':
0978: case '6':
0979: case '7':
0980: case '8':
0981: case '9':
0982: scanNumber(10);
0983: return;
0984: case '.':
0985: scanChar();
0986: if ('0' <= ch && ch <= '9') {
0987: putChar('.');
0988: scanFractionAndSuffix();
0989: } else if (ch == '.') {
0990: putChar('.');
0991: putChar('.');
0992: scanChar();
0993: if (ch == '.') {
0994: scanChar();
0995: putChar('.');
0996: token = ELLIPSIS;
0997: } else {
0998: lexError("malformed.fp.lit");
0999: }
1000: } else {
1001: token = DOT;
1002: }
1003: return;
1004: case ',':
1005: scanChar();
1006: token = COMMA;
1007: return;
1008: case ';':
1009: scanChar();
1010: token = SEMI;
1011: return;
1012: case '(':
1013: scanChar();
1014: token = LPAREN;
1015: return;
1016: case ')':
1017: scanChar();
1018: token = RPAREN;
1019: return;
1020: case '[':
1021: scanChar();
1022: token = LBRACKET;
1023: return;
1024: case ']':
1025: scanChar();
1026: token = RBRACKET;
1027: return;
1028: case '{':
1029: scanChar();
1030: token = LBRACE;
1031: return;
1032: case '}':
1033: scanChar();
1034: token = RBRACE;
1035: return;
1036: case '/':
1037: scanChar();
1038: if (ch == '/') {
1039: do {
1040: scanCommentChar();
1041: } while (ch != CR && ch != LF && bp < buflen);
1042: if (bp < buflen) {
1043: endPos = bp;
1044: processComment(CommentStyle.LINE);
1045: }
1046: break;
1047: } else if (ch == '*') {
1048: scanChar();
1049: CommentStyle style;
1050: if (ch == '*') {
1051: style = CommentStyle.JAVADOC;
1052: scanDocComment();
1053: } else {
1054: style = CommentStyle.BLOCK;
1055: while (bp < buflen) {
1056: if (ch == '*') {
1057: scanChar();
1058: if (ch == '/')
1059: break;
1060: } else {
1061: scanCommentChar();
1062: }
1063: }
1064: }
1065: if (ch == '/') {
1066: scanChar();
1067: endPos = bp;
1068: processComment(style);
1069: break;
1070: } else {
1071: lexError("unclosed.comment");
1072: return;
1073: }
1074: } else if (ch == '=') {
1075: name = names.slashequals;
1076: token = SLASHEQ;
1077: scanChar();
1078: } else {
1079: name = names.slash;
1080: token = SLASH;
1081: }
1082: return;
1083: case '\'':
1084: scanChar();
1085: if (ch == '\'') {
1086: lexError("empty.char.lit");
1087: } else {
1088: if (ch == CR || ch == LF)
1089: lexError(pos,
1090: "illegal.line.end.in.char.lit");
1091: scanLitChar();
1092: if (ch == '\'') {
1093: scanChar();
1094: token = CHARLITERAL;
1095: } else {
1096: lexError(pos, "unclosed.char.lit");
1097: }
1098: }
1099: return;
1100: case '\"':
1101: scanChar();
1102: while (ch != '\"' && ch != CR && ch != LF
1103: && bp < buflen)
1104: scanLitChar();
1105: if (ch == '\"') {
1106: token = STRINGLITERAL;
1107: scanChar();
1108: } else {
1109: lexError(pos, "unclosed.str.lit");
1110: }
1111: return;
1112: default:
1113: if (isSpecial(ch)) {
1114: scanOperator();
1115: } else {
1116: boolean isJavaIdentifierStart;
1117: if (ch < '\u0080') {
1118: // all ASCII range chars already handled, above
1119: isJavaIdentifierStart = false;
1120: } else {
1121: char high = scanSurrogates();
1122: if (high != 0) {
1123: if (sp == sbuf.length) {
1124: putChar(high);
1125: } else {
1126: sbuf[sp++] = high;
1127: }
1128:
1129: isJavaIdentifierStart = Character
1130: .isJavaIdentifierStart(Character
1131: .toCodePoint(high, ch));
1132: } else {
1133: isJavaIdentifierStart = Character
1134: .isJavaIdentifierStart(ch);
1135: }
1136: }
1137: if (isJavaIdentifierStart) {
1138: scanIdent();
1139: } else if (bp == buflen || ch == EOI
1140: && bp + 1 == buflen) { // JLS 3.5
1141: token = EOF;
1142: pos = bp = eofPos;
1143: } else {
1144: lexError("illegal.char", String
1145: .valueOf((int) ch));
1146: scanChar();
1147: }
1148: }
1149: return;
1150: }
1151: }
1152: } finally {
1153: endPos = bp;
1154: if (scannerDebug)
1155: System.out.println("nextToken(" + pos + "," + endPos
1156: + ")=|"
1157: + new String(getRawCharacters(pos, endPos))
1158: + "|");
1159: }
1160: }
1161:
1162: /** Return the current token, set by nextToken().
1163: */
1164: public Token token() {
1165: return token;
1166: }
1167:
1168: /** Sets the current token.
1169: */
1170: public void token(Token token) {
1171: this .token = token;
1172: }
1173:
1174: /** Return the current token's position: a 0-based
1175: * offset from beginning of the raw input stream
1176: * (before unicode translation)
1177: */
1178: public int pos() {
1179: return pos;
1180: }
1181:
1182: /** Return the last character position of the current token.
1183: */
1184: public int endPos() {
1185: return endPos;
1186: }
1187:
1188: /** Return the last character position of the previous token.
1189: */
1190: public int prevEndPos() {
1191: return prevEndPos;
1192: }
1193:
1194: /** Return the position where a lexical error occurred;
1195: */
1196: public int errPos() {
1197: return errPos;
1198: }
1199:
1200: /** Set the position where a lexical error occurred;
1201: */
1202: public void errPos(int pos) {
1203: errPos = pos;
1204: }
1205:
1206: /** Return the name of an identifier or token for the current token.
1207: */
1208: public Name name() {
1209: return name;
1210: }
1211:
1212: /** Return the radix of a numeric literal token.
1213: */
1214: public int radix() {
1215: return radix;
1216: }
1217:
1218: /** Has a @deprecated been encountered in last doc comment?
1219: * This needs to be reset by client with resetDeprecatedFlag.
1220: */
1221: public boolean deprecatedFlag() {
1222: return deprecatedFlag;
1223: }
1224:
1225: public void resetDeprecatedFlag() {
1226: deprecatedFlag = false;
1227: }
1228:
1229: /**
1230: * Returns the documentation string of the current token.
1231: */
1232: public String docComment() {
1233: return null;
1234: }
1235:
1236: /**
1237: * Returns a copy of the input buffer, up to its inputLength.
1238: * Unicode escape sequences are not translated.
1239: */
1240: public char[] getRawCharacters() {
1241: char[] chars = new char[buflen];
1242: System.arraycopy(buf, 0, chars, 0, buflen);
1243: return chars;
1244: }
1245:
1246: /**
1247: * Returns a copy of a character array subset of the input buffer.
1248: * The returned array begins at the <code>beginIndex</code> and
1249: * extends to the character at index <code>endIndex - 1</code>.
1250: * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1251: * This behavior is like
1252: * <code>String.substring(beginIndex, endIndex)</code>.
1253: * Unicode escape sequences are not translated.
1254: *
1255: * @param beginIndex the beginning index, inclusive.
1256: * @param endIndex the ending index, exclusive.
1257: * @throws IndexOutOfBounds if either offset is outside of the
1258: * array bounds
1259: */
1260: public char[] getRawCharacters(int beginIndex, int endIndex) {
1261: int length = endIndex - beginIndex;
1262: char[] chars = new char[length];
1263: System.arraycopy(buf, beginIndex, chars, 0, length);
1264: return chars;
1265: }
1266:
1267: public enum CommentStyle {
1268: LINE, BLOCK, JAVADOC,
1269: }
1270:
1271: /**
1272: * Called when a complete comment has been scanned. pos and endPos
1273: * will mark the comment boundary.
1274: */
1275: protected void processComment(CommentStyle style) {
1276: if (scannerDebug)
1277: System.out.println("processComment(" + pos + "," + endPos
1278: + "," + style + ")=|"
1279: + new String(getRawCharacters(pos, endPos)) + "|");
1280: }
1281:
1282: /**
1283: * Called when a complete whitespace run has been scanned. pos and endPos
1284: * will mark the whitespace boundary.
1285: */
1286: protected void processWhiteSpace() {
1287: if (scannerDebug)
1288: System.out.println("processWhitespace(" + pos + ","
1289: + endPos + ")=|"
1290: + new String(getRawCharacters(pos, endPos)) + "|");
1291: }
1292:
1293: /**
1294: * Called when a line terminator has been processed.
1295: */
1296: protected void processLineTerminator() {
1297: if (scannerDebug)
1298: System.out.println("processTerminator(" + pos + ","
1299: + endPos + ")=|"
1300: + new String(getRawCharacters(pos, endPos)) + "|");
1301: }
1302:
1303: /** Build a map for translating between line numbers and
1304: * positions in the input.
1305: *
1306: * @return a LineMap */
1307: public Position.LineMap getLineMap() {
1308: return Position.makeLineMap(buf, buflen, false);
1309: }
1310:
1311: }
|