Source Code Cross Referenced for Scanner.java in » 6.0-JDK-Modules-com.sun » tools » com » sun » tools » javac » parser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » 6.0 JDK Modules com.sun » tools » com.sun.tools.javac.parser
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
0003:         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004:         *
0005:         * This code is free software; you can redistribute it and/or modify it
0006:         * under the terms of the GNU General Public License version 2 only, as
0007:         * published by the Free Software Foundation.  Sun designates this
0008:         * particular file as subject to the "Classpath" exception as provided
0009:         * by Sun in the LICENSE file that accompanied this code.
0010:         *
0011:         * This code is distributed in the hope that it will be useful, but WITHOUT
0012:         * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013:         * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
0014:         * version 2 for more details (a copy is included in the LICENSE file that
0015:         * accompanied this code).
0016:         *
0017:         * You should have received a copy of the GNU General Public License version
0018:         * 2 along with this work; if not, write to the Free Software Foundation,
0019:         * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020:         *
0021:         * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022:         * CA 95054 USA or visit www.sun.com if you need additional information or
0023:         * have any questions.
0024:         */
0025:
0026:        package com.sun.tools.javac.parser;
0027:
0028:        import java.io.*;
0029:        import java.nio.*;
0030:        import java.nio.ByteBuffer;
0031:        import java.nio.charset.*;
0032:        import java.nio.channels.*;
0033:        import java.util.regex.*;
0034:
0035:        import com.sun.tools.javac.util.*;
0036:
0037:        import com.sun.tools.javac.code.Source;
0038:
0039:        import static com.sun.tools.javac.parser.Token.*;
0040:        import static com.sun.tools.javac.util.LayoutCharacters.*;
0041:
0042:        /** The lexical analyzer maps an input stream consisting of
0043:         *  ASCII characters and Unicode escapes into a token sequence.
0044:         *
0045:         *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
0046:         *  you write code that depends on this, you do so at your own risk.
0047:         *  This code and its internal interfaces are subject to change or
0048:         *  deletion without notice.</b>
0049:         */
0050:        @Version("@(#)Scanner.java	1.79 07/05/05")
0051:        public class Scanner implements  Lexer {
0052:
0053:            private static boolean scannerDebug = false;
0054:
0055:            /** A factory for creating scanners. */
0056:            public static class Factory {
0057:                /** The context key for the scanner factory. */
0058:                public static final Context.Key<Scanner.Factory> scannerFactoryKey = new Context.Key<Scanner.Factory>();
0059:
0060:                /** Get the Factory instance for this context. */
0061:                public static Factory instance(Context context) {
0062:                    Factory instance = context.get(scannerFactoryKey);
0063:                    if (instance == null)
0064:                        instance = new Factory(context);
0065:                    return instance;
0066:                }
0067:
0068:                final Log log;
0069:                final Name.Table names;
0070:                final Source source;
0071:                final Keywords keywords;
0072:
0073:                /** Create a new scanner factory. */
0074:                protected Factory(Context context) {
0075:                    context.put(scannerFactoryKey, this );
0076:                    this .log = Log.instance(context);
0077:                    this .names = Name.Table.instance(context);
0078:                    this .source = Source.instance(context);
0079:                    this .keywords = Keywords.instance(context);
0080:                }
0081:
0082:                public Scanner newScanner(CharSequence input) {
0083:                    if (input instanceof  CharBuffer) {
0084:                        return new Scanner(this , (CharBuffer) input);
0085:                    } else {
0086:                        char[] array = input.toString().toCharArray();
0087:                        return newScanner(array, array.length);
0088:                    }
0089:                }
0090:
0091:                public Scanner newScanner(char[] input, int inputLength) {
0092:                    return new Scanner(this , input, inputLength);
0093:                }
0094:            }
0095:
0096:            /* Output variables; set by nextToken():
0097:             */
0098:
0099:            /** The token, set by nextToken().
0100:             */
0101:            private Token token;
0102:
0103:            /** Allow hex floating-point literals.
0104:             */
0105:            private boolean allowHexFloats;
0106:
0107:            /** The token's position, 0-based offset from beginning of text.
0108:             */
0109:            private int pos;
0110:
0111:            /** Character position just after the last character of the token.
0112:             */
0113:            private int endPos;
0114:
0115:            /** The last character position of the previous token.
0116:             */
0117:            private int prevEndPos;
0118:
0119:            /** The position where a lexical error occurred;
0120:             */
0121:            private int errPos = Position.NOPOS;
0122:
0123:            /** The name of an identifier or token:
0124:             */
0125:            private Name name;
0126:
0127:            /** The radix of a numeric literal token.
0128:             */
0129:            private int radix;
0130:
0131:            /** Has a @deprecated been encountered in last doc comment?
0132:             *  this needs to be reset by client.
0133:             */
0134:            protected boolean deprecatedFlag = false;
0135:
0136:            /** A character buffer for literals.
0137:             */
0138:            private char[] sbuf = new char[128];
0139:            private int sp;
0140:
0141:            /** The input buffer, index of next chacter to be read,
0142:             *  index of one past last character in buffer.
0143:             */
0144:            private char[] buf;
0145:            private int bp;
0146:            private int buflen;
0147:            private int eofPos;
0148:
0149:            /** The current character.
0150:             */
0151:            private char ch;
0152:
0153:            /** The buffer index of the last converted unicode character
0154:             */
0155:            private int unicodeConversionBp = -1;
0156:
0157:            /** The log to be used for error reporting.
0158:             */
0159:            private final Log log;
0160:
0161:            /** The name table. */
0162:            private final Name.Table names;
0163:
0164:            /** The keyword table. */
0165:            private final Keywords keywords;
0166:
0167:            /** Common code for constructors. */
0168:            private Scanner(Factory fac) {
0169:                this .log = fac.log;
0170:                this .names = fac.names;
0171:                this .keywords = fac.keywords;
0172:                this .allowHexFloats = fac.source.allowHexFloats();
0173:            }
0174:
0175:            private static final boolean hexFloatsWork = hexFloatsWork();
0176:
0177:            private static boolean hexFloatsWork() {
0178:                try {
0179:                    Float.valueOf("0x1.0p1");
0180:                    return true;
0181:                } catch (NumberFormatException ex) {
0182:                    return false;
0183:                }
0184:            }
0185:
0186:            /** Create a scanner from the input buffer.  buffer must implement
0187:             *  array() and compact(), and remaining() must be less than limit().
0188:             */
0189:            protected Scanner(Factory fac, CharBuffer buffer) {
0190:                this (fac, JavacFileManager.toArray(buffer), buffer.limit());
0191:            }
0192:
0193:            /**
0194:             * Create a scanner from the input array.  This method might
0195:             * modify the array.  To avoid copying the input array, ensure
0196:             * that {@code inputLength < input.length} or
0197:             * {@code input[input.length -1]} is a white space character.
0198:             * 
0199:             * @param fac the factory which created this Scanner
0200:             * @param input the input, might be modified
0201:             * @param inputLength the size of the input.
0202:             * Must be positive and less than or equal to input.length.
0203:             */
0204:            protected Scanner(Factory fac, char[] input, int inputLength) {
0205:                this (fac);
0206:                eofPos = inputLength;
0207:                if (inputLength == input.length) {
0208:                    if (input.length > 0
0209:                            && Character.isWhitespace(input[input.length - 1])) {
0210:                        inputLength--;
0211:                    } else {
0212:                        char[] newInput = new char[inputLength + 1];
0213:                        System.arraycopy(input, 0, newInput, 0, input.length);
0214:                        input = newInput;
0215:                    }
0216:                }
0217:                buf = input;
0218:                buflen = inputLength;
0219:                buf[buflen] = EOI;
0220:                bp = -1;
0221:                scanChar();
0222:            }
0223:
0224:            /** Report an error at the given position using the provided arguments.
0225:             */
0226:            private void lexError(int pos, String key, Object... args) {
0227:                log.error(pos, key, args);
0228:                token = ERROR;
0229:                errPos = pos;
0230:            }
0231:
0232:            /** Report an error at the current token position using the provided
0233:             *  arguments.
0234:             */
0235:            private void lexError(String key, Object... args) {
0236:                lexError(pos, key, args);
0237:            }
0238:
0239:            /** Convert an ASCII digit from its base (8, 10, or 16)
0240:             *  to its value.
0241:             */
0242:            private int digit(int base) {
0243:                char c = ch;
0244:                int result = Character.digit(c, base);
0245:                if (result >= 0 && c > 0x7f) {
0246:                    lexError(pos + 1, "illegal.nonascii.digit");
0247:                    ch = "0123456789abcdef".charAt(result);
0248:                }
0249:                return result;
0250:            }
0251:
0252:            /** Convert unicode escape; bp points to initial '\' character
0253:             *  (Spec 3.3).
0254:             */
0255:            private void convertUnicode() {
0256:                if (ch == '\\' && unicodeConversionBp != bp) {
0257:                    bp++;
0258:                    ch = buf[bp];
0259:                    if (ch == 'u') {
0260:                        do {
0261:                            bp++;
0262:                            ch = buf[bp];
0263:                        } while (ch == 'u');
0264:                        int limit = bp + 3;
0265:                        if (limit < buflen) {
0266:                            int d = digit(16);
0267:                            int code = d;
0268:                            while (bp < limit && d >= 0) {
0269:                                bp++;
0270:                                ch = buf[bp];
0271:                                d = digit(16);
0272:                                code = (code << 4) + d;
0273:                            }
0274:                            if (d >= 0) {
0275:                                ch = (char) code;
0276:                                unicodeConversionBp = bp;
0277:                                return;
0278:                            }
0279:                        }
0280:                        lexError(bp, "illegal.unicode.esc");
0281:                    } else {
0282:                        bp--;
0283:                        ch = '\\';
0284:                    }
0285:                }
0286:            }
0287:
0288:            /** Read next character.
0289:             */
0290:            private void scanChar() {
0291:                ch = buf[++bp];
0292:                if (ch == '\\') {
0293:                    convertUnicode();
0294:                }
0295:            }
0296:
0297:            /** Read next character in comment, skipping over double '\' characters.
0298:             */
0299:            private void scanCommentChar() {
0300:                scanChar();
0301:                if (ch == '\\') {
0302:                    if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
0303:                        bp++;
0304:                    } else {
0305:                        convertUnicode();
0306:                    }
0307:                }
0308:            }
0309:
0310:            /** Append a character to sbuf.
0311:             */
0312:            private void putChar(char ch) {
0313:                if (sp == sbuf.length) {
0314:                    char[] newsbuf = new char[sbuf.length * 2];
0315:                    System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
0316:                    sbuf = newsbuf;
0317:                }
0318:                sbuf[sp++] = ch;
0319:            }
0320:
0321:            /** For debugging purposes: print character.
0322:             */
0323:            private void dch() {
0324:                System.err.print(ch);
0325:                System.out.flush();
0326:            }
0327:
0328:            /** Read next character in character or string literal and copy into sbuf.
0329:             */
0330:            private void scanLitChar() {
0331:                if (ch == '\\') {
0332:                    if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
0333:                        bp++;
0334:                        putChar('\\');
0335:                        scanChar();
0336:                    } else {
0337:                        scanChar();
0338:                        switch (ch) {
0339:                        case '0':
0340:                        case '1':
0341:                        case '2':
0342:                        case '3':
0343:                        case '4':
0344:                        case '5':
0345:                        case '6':
0346:                        case '7':
0347:                            char leadch = ch;
0348:                            int oct = digit(8);
0349:                            scanChar();
0350:                            if ('0' <= ch && ch <= '7') {
0351:                                oct = oct * 8 + digit(8);
0352:                                scanChar();
0353:                                if (leadch <= '3' && '0' <= ch && ch <= '7') {
0354:                                    oct = oct * 8 + digit(8);
0355:                                    scanChar();
0356:                                }
0357:                            }
0358:                            putChar((char) oct);
0359:                            break;
0360:                        case 'b':
0361:                            putChar('\b');
0362:                            scanChar();
0363:                            break;
0364:                        case 't':
0365:                            putChar('\t');
0366:                            scanChar();
0367:                            break;
0368:                        case 'n':
0369:                            putChar('\n');
0370:                            scanChar();
0371:                            break;
0372:                        case 'f':
0373:                            putChar('\f');
0374:                            scanChar();
0375:                            break;
0376:                        case 'r':
0377:                            putChar('\r');
0378:                            scanChar();
0379:                            break;
0380:                        case '\'':
0381:                            putChar('\'');
0382:                            scanChar();
0383:                            break;
0384:                        case '\"':
0385:                            putChar('\"');
0386:                            scanChar();
0387:                            break;
0388:                        case '\\':
0389:                            putChar('\\');
0390:                            scanChar();
0391:                            break;
0392:                        default:
0393:                            lexError(bp, "illegal.esc.char");
0394:                        }
0395:                    }
0396:                } else if (bp != buflen) {
0397:                    putChar(ch);
0398:                    scanChar();
0399:                }
0400:            }
0401:
0402:            /** Read fractional part of hexadecimal floating point number.
0403:             */
0404:            private void scanHexExponentAndSuffix() {
0405:                if (ch == 'p' || ch == 'P') {
0406:                    putChar(ch);
0407:                    scanChar();
0408:                    if (ch == '+' || ch == '-') {
0409:                        putChar(ch);
0410:                        scanChar();
0411:                    }
0412:                    if ('0' <= ch && ch <= '9') {
0413:                        do {
0414:                            putChar(ch);
0415:                            scanChar();
0416:                        } while ('0' <= ch && ch <= '9');
0417:                        if (!allowHexFloats) {
0418:                            lexError("unsupported.fp.lit");
0419:                            allowHexFloats = true;
0420:                        } else if (!hexFloatsWork)
0421:                            lexError("unsupported.cross.fp.lit");
0422:                    } else
0423:                        lexError("malformed.fp.lit");
0424:                } else {
0425:                    lexError("malformed.fp.lit");
0426:                }
0427:                if (ch == 'f' || ch == 'F') {
0428:                    putChar(ch);
0429:                    scanChar();
0430:                    token = FLOATLITERAL;
0431:                } else {
0432:                    if (ch == 'd' || ch == 'D') {
0433:                        putChar(ch);
0434:                        scanChar();
0435:                    }
0436:                    token = DOUBLELITERAL;
0437:                }
0438:            }
0439:
0440:            /** Read fractional part of floating point number.
0441:             */
0442:            private void scanFraction() {
0443:                while (digit(10) >= 0) {
0444:                    putChar(ch);
0445:                    scanChar();
0446:                }
0447:                int sp1 = sp;
0448:                if (ch == 'e' || ch == 'E') {
0449:                    putChar(ch);
0450:                    scanChar();
0451:                    if (ch == '+' || ch == '-') {
0452:                        putChar(ch);
0453:                        scanChar();
0454:                    }
0455:                    if ('0' <= ch && ch <= '9') {
0456:                        do {
0457:                            putChar(ch);
0458:                            scanChar();
0459:                        } while ('0' <= ch && ch <= '9');
0460:                        return;
0461:                    }
0462:                    lexError("malformed.fp.lit");
0463:                    sp = sp1;
0464:                }
0465:            }
0466:
0467:            /** Read fractional part and 'd' or 'f' suffix of floating point number.
0468:             */
0469:            private void scanFractionAndSuffix() {
0470:                this .radix = 10;
0471:                scanFraction();
0472:                if (ch == 'f' || ch == 'F') {
0473:                    putChar(ch);
0474:                    scanChar();
0475:                    token = FLOATLITERAL;
0476:                } else {
0477:                    if (ch == 'd' || ch == 'D') {
0478:                        putChar(ch);
0479:                        scanChar();
0480:                    }
0481:                    token = DOUBLELITERAL;
0482:                }
0483:            }
0484:
0485:            /** Read fractional part and 'd' or 'f' suffix of floating point number.
0486:             */
0487:            private void scanHexFractionAndSuffix(boolean seendigit) {
0488:                this .radix = 16;
0489:                assert ch == '.';
0490:                putChar(ch);
0491:                scanChar();
0492:                while (digit(16) >= 0) {
0493:                    seendigit = true;
0494:                    putChar(ch);
0495:                    scanChar();
0496:                }
0497:                if (!seendigit)
0498:                    lexError("invalid.hex.number");
0499:                else
0500:                    scanHexExponentAndSuffix();
0501:            }
0502:
0503:            /** Read a number.
0504:             *  @param radix  The radix of the number; one of 8, 10, 16.
0505:             */
0506:            private void scanNumber(int radix) {
0507:                this .radix = radix;
0508:                // for octal, allow base-10 digit in case it's a float literal
0509:                int digitRadix = (radix <= 10) ? 10 : 16;
0510:                boolean seendigit = false;
0511:                while (digit(digitRadix) >= 0) {
0512:                    seendigit = true;
0513:                    putChar(ch);
0514:                    scanChar();
0515:                }
0516:                if (radix == 16 && ch == '.') {
0517:                    scanHexFractionAndSuffix(seendigit);
0518:                } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
0519:                    scanHexExponentAndSuffix();
0520:                } else if (radix <= 10 && ch == '.') {
0521:                    putChar(ch);
0522:                    scanChar();
0523:                    scanFractionAndSuffix();
0524:                } else if (radix <= 10
0525:                        && (ch == 'e' || ch == 'E' || ch == 'f' || ch == 'F'
0526:                                || ch == 'd' || ch == 'D')) {
0527:                    scanFractionAndSuffix();
0528:                } else {
0529:                    if (ch == 'l' || ch == 'L') {
0530:                        scanChar();
0531:                        token = LONGLITERAL;
0532:                    } else {
0533:                        token = INTLITERAL;
0534:                    }
0535:                }
0536:            }
0537:
0538:            /** Read an identifier.
0539:             */
0540:            private void scanIdent() {
0541:                boolean isJavaIdentifierPart;
0542:                char high;
0543:                do {
0544:                    if (sp == sbuf.length)
0545:                        putChar(ch);
0546:                    else
0547:                        sbuf[sp++] = ch;
0548:                    // optimization, was: putChar(ch);
0549:
0550:                    scanChar();
0551:                    switch (ch) {
0552:                    case 'A':
0553:                    case 'B':
0554:                    case 'C':
0555:                    case 'D':
0556:                    case 'E':
0557:                    case 'F':
0558:                    case 'G':
0559:                    case 'H':
0560:                    case 'I':
0561:                    case 'J':
0562:                    case 'K':
0563:                    case 'L':
0564:                    case 'M':
0565:                    case 'N':
0566:                    case 'O':
0567:                    case 'P':
0568:                    case 'Q':
0569:                    case 'R':
0570:                    case 'S':
0571:                    case 'T':
0572:                    case 'U':
0573:                    case 'V':
0574:                    case 'W':
0575:                    case 'X':
0576:                    case 'Y':
0577:                    case 'Z':
0578:                    case 'a':
0579:                    case 'b':
0580:                    case 'c':
0581:                    case 'd':
0582:                    case 'e':
0583:                    case 'f':
0584:                    case 'g':
0585:                    case 'h':
0586:                    case 'i':
0587:                    case 'j':
0588:                    case 'k':
0589:                    case 'l':
0590:                    case 'm':
0591:                    case 'n':
0592:                    case 'o':
0593:                    case 'p':
0594:                    case 'q':
0595:                    case 'r':
0596:                    case 's':
0597:                    case 't':
0598:                    case 'u':
0599:                    case 'v':
0600:                    case 'w':
0601:                    case 'x':
0602:                    case 'y':
0603:                    case 'z':
0604:                    case '$':
0605:                    case '_':
0606:                    case '0':
0607:                    case '1':
0608:                    case '2':
0609:                    case '3':
0610:                    case '4':
0611:                    case '5':
0612:                    case '6':
0613:                    case '7':
0614:                    case '8':
0615:                    case '9':
0616:                    case '\u0000':
0617:                    case '\u0001':
0618:                    case '\u0002':
0619:                    case '\u0003':
0620:                    case '\u0004':
0621:                    case '\u0005':
0622:                    case '\u0006':
0623:                    case '\u0007':
0624:                    case '\u0008':
0625:                    case '\u000E':
0626:                    case '\u000F':
0627:                    case '\u0010':
0628:                    case '\u0011':
0629:                    case '\u0012':
0630:                    case '\u0013':
0631:                    case '\u0014':
0632:                    case '\u0015':
0633:                    case '\u0016':
0634:                    case '\u0017':
0635:                    case '\u0018':
0636:                    case '\u0019':
0637:                    case '\u001B':
0638:                    case '\u007F':
0639:                        break;
0640:                    case '\u001A': // EOI is also a legal identifier part
0641:                        if (bp >= buflen) {
0642:                            name = names.fromChars(sbuf, 0, sp);
0643:                            token = keywords.key(name);
0644:                            return;
0645:                        }
0646:                        break;
0647:                    default:
0648:                        if (ch < '\u0080') {
0649:                            // all ASCII range chars already handled, above
0650:                            isJavaIdentifierPart = false;
0651:                        } else {
0652:                            high = scanSurrogates();
0653:                            if (high != 0) {
0654:                                if (sp == sbuf.length) {
0655:                                    putChar(high);
0656:                                } else {
0657:                                    sbuf[sp++] = high;
0658:                                }
0659:                                isJavaIdentifierPart = Character
0660:                                        .isJavaIdentifierPart(Character
0661:                                                .toCodePoint(high, ch));
0662:                            } else {
0663:                                isJavaIdentifierPart = Character
0664:                                        .isJavaIdentifierPart(ch);
0665:                            }
0666:                        }
0667:                        if (!isJavaIdentifierPart) {
0668:                            name = names.fromChars(sbuf, 0, sp);
0669:                            token = keywords.key(name);
0670:                            return;
0671:                        }
0672:                    }
0673:                } while (true);
0674:            }
0675:
0676:            /** Are surrogates supported?
0677:             */
0678:            final static boolean surrogatesSupported = surrogatesSupported();
0679:
0680:            private static boolean surrogatesSupported() {
0681:                try {
0682:                    Character.isHighSurrogate('a');
0683:                    return true;
0684:                } catch (NoSuchMethodError ex) {
0685:                    return false;
0686:                }
0687:            }
0688:
0689:            /** Scan surrogate pairs.  If 'ch' is a high surrogate and
0690:             *  the next character is a low surrogate, then put the low
0691:             *  surrogate in 'ch', and return the high surrogate.
0692:             *  otherwise, just return 0.
0693:             */
0694:            private char scanSurrogates() {
0695:                if (surrogatesSupported && Character.isHighSurrogate(ch)) {
0696:                    char high = ch;
0697:
0698:                    scanChar();
0699:
0700:                    if (Character.isLowSurrogate(ch)) {
0701:                        return high;
0702:                    }
0703:
0704:                    ch = high;
0705:                }
0706:
0707:                return 0;
0708:            }
0709:
0710:            /** Return true if ch can be part of an operator.
0711:             */
0712:            private boolean isSpecial(char ch) {
0713:                switch (ch) {
0714:                case '!':
0715:                case '%':
0716:                case '&':
0717:                case '*':
0718:                case '?':
0719:                case '+':
0720:                case '-':
0721:                case ':':
0722:                case '<':
0723:                case '=':
0724:                case '>':
0725:                case '^':
0726:                case '|':
0727:                case '~':
0728:                case '@':
0729:                    return true;
0730:                default:
0731:                    return false;
0732:                }
0733:            }
0734:
0735:            /** Read longest possible sequence of special characters and convert
0736:             *  to token.
0737:             */
0738:            private void scanOperator() {
0739:                while (true) {
0740:                    putChar(ch);
0741:                    Name newname = names.fromChars(sbuf, 0, sp);
0742:                    if (keywords.key(newname) == IDENTIFIER) {
0743:                        sp--;
0744:                        break;
0745:                    }
0746:                    name = newname;
0747:                    token = keywords.key(newname);
0748:                    scanChar();
0749:                    if (!isSpecial(ch))
0750:                        break;
0751:                }
0752:            }
0753:
0754:            /**
0755:             * Scan a documention comment; determine if a deprecated tag is present.
0756:             * Called once the initial /, * have been skipped, positioned at the second *
0757:             * (which is treated as the beginning of the first line).
0758:             * Stops positioned at the closing '/'.
0759:             */
0760:            @SuppressWarnings("fallthrough")
0761:            private void scanDocComment() {
0762:                boolean deprecatedPrefix = false;
0763:
0764:                forEachLine: while (bp < buflen) {
0765:
0766:                    // Skip optional WhiteSpace at beginning of line
0767:                    while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
0768:                        scanCommentChar();
0769:                    }
0770:
0771:                    // Skip optional consecutive Stars
0772:                    while (bp < buflen && ch == '*') {
0773:                        scanCommentChar();
0774:                        if (ch == '/') {
0775:                            return;
0776:                        }
0777:                    }
0778:
0779:                    // Skip optional WhiteSpace after Stars
0780:                    while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
0781:                        scanCommentChar();
0782:                    }
0783:
0784:                    deprecatedPrefix = false;
0785:                    // At beginning of line in the JavaDoc sense.
0786:                    if (bp < buflen && ch == '@' && !deprecatedFlag) {
0787:                        scanCommentChar();
0788:                        if (bp < buflen && ch == 'd') {
0789:                            scanCommentChar();
0790:                            if (bp < buflen && ch == 'e') {
0791:                                scanCommentChar();
0792:                                if (bp < buflen && ch == 'p') {
0793:                                    scanCommentChar();
0794:                                    if (bp < buflen && ch == 'r') {
0795:                                        scanCommentChar();
0796:                                        if (bp < buflen && ch == 'e') {
0797:                                            scanCommentChar();
0798:                                            if (bp < buflen && ch == 'c') {
0799:                                                scanCommentChar();
0800:                                                if (bp < buflen && ch == 'a') {
0801:                                                    scanCommentChar();
0802:                                                    if (bp < buflen
0803:                                                            && ch == 't') {
0804:                                                        scanCommentChar();
0805:                                                        if (bp < buflen
0806:                                                                && ch == 'e') {
0807:                                                            scanCommentChar();
0808:                                                            if (bp < buflen
0809:                                                                    && ch == 'd') {
0810:                                                                deprecatedPrefix = true;
0811:                                                                scanCommentChar();
0812:                                                            }
0813:                                                        }
0814:                                                    }
0815:                                                }
0816:                                            }
0817:                                        }
0818:                                    }
0819:                                }
0820:                            }
0821:                        }
0822:                    }
0823:                    if (deprecatedPrefix && bp < buflen) {
0824:                        if (Character.isWhitespace(ch)) {
0825:                            deprecatedFlag = true;
0826:                        } else if (ch == '*') {
0827:                            scanCommentChar();
0828:                            if (ch == '/') {
0829:                                deprecatedFlag = true;
0830:                                return;
0831:                            }
0832:                        }
0833:                    }
0834:
0835:                    // Skip rest of line
0836:                    while (bp < buflen) {
0837:                        switch (ch) {
0838:                        case '*':
0839:                            scanCommentChar();
0840:                            if (ch == '/') {
0841:                                return;
0842:                            }
0843:                            break;
0844:                        case CR: // (Spec 3.4)
0845:                            scanCommentChar();
0846:                            if (ch != LF) {
0847:                                continue forEachLine;
0848:                            }
0849:                            /* fall through to LF case */
0850:                        case LF: // (Spec 3.4)
0851:                            scanCommentChar();
0852:                            continue forEachLine;
0853:                        default:
0854:                            scanCommentChar();
0855:                        }
0856:                    } // rest of line
0857:                } // forEachLine
0858:                return;
0859:            }
0860:
0861:            /** The value of a literal token, recorded as a string.
0862:             *  For integers, leading 0x and 'l' suffixes are suppressed.
0863:             */
0864:            public String stringVal() {
0865:                return new String(sbuf, 0, sp);
0866:            }
0867:
0868:            /** Read token.
0869:             */
0870:            public void nextToken() {
0871:
0872:                try {
0873:                    prevEndPos = endPos;
0874:                    sp = 0;
0875:
0876:                    while (true) {
0877:                        pos = bp;
0878:                        switch (ch) {
0879:                        case ' ': // (Spec 3.6)
0880:                        case '\t': // (Spec 3.6)
0881:                        case FF: // (Spec 3.6)
0882:                            do {
0883:                                scanChar();
0884:                            } while (ch == ' ' || ch == '\t' || ch == FF);
0885:                            endPos = bp;
0886:                            processWhiteSpace();
0887:                            break;
0888:                        case LF: // (Spec 3.4)
0889:                            scanChar();
0890:                            endPos = bp;
0891:                            processLineTerminator();
0892:                            break;
0893:                        case CR: // (Spec 3.4)
0894:                            scanChar();
0895:                            if (ch == LF) {
0896:                                scanChar();
0897:                            }
0898:                            endPos = bp;
0899:                            processLineTerminator();
0900:                            break;
0901:                        case 'A':
0902:                        case 'B':
0903:                        case 'C':
0904:                        case 'D':
0905:                        case 'E':
0906:                        case 'F':
0907:                        case 'G':
0908:                        case 'H':
0909:                        case 'I':
0910:                        case 'J':
0911:                        case 'K':
0912:                        case 'L':
0913:                        case 'M':
0914:                        case 'N':
0915:                        case 'O':
0916:                        case 'P':
0917:                        case 'Q':
0918:                        case 'R':
0919:                        case 'S':
0920:                        case 'T':
0921:                        case 'U':
0922:                        case 'V':
0923:                        case 'W':
0924:                        case 'X':
0925:                        case 'Y':
0926:                        case 'Z':
0927:                        case 'a':
0928:                        case 'b':
0929:                        case 'c':
0930:                        case 'd':
0931:                        case 'e':
0932:                        case 'f':
0933:                        case 'g':
0934:                        case 'h':
0935:                        case 'i':
0936:                        case 'j':
0937:                        case 'k':
0938:                        case 'l':
0939:                        case 'm':
0940:                        case 'n':
0941:                        case 'o':
0942:                        case 'p':
0943:                        case 'q':
0944:                        case 'r':
0945:                        case 's':
0946:                        case 't':
0947:                        case 'u':
0948:                        case 'v':
0949:                        case 'w':
0950:                        case 'x':
0951:                        case 'y':
0952:                        case 'z':
0953:                        case '$':
0954:                        case '_':
0955:                            scanIdent();
0956:                            return;
0957:                        case '0':
0958:                            scanChar();
0959:                            if (ch == 'x' || ch == 'X') {
0960:                                scanChar();
0961:                                if (ch == '.') {
0962:                                    scanHexFractionAndSuffix(false);
0963:                                } else if (digit(16) < 0) {
0964:                                    lexError("invalid.hex.number");
0965:                                } else {
0966:                                    scanNumber(16);
0967:                                }
0968:                            } else {
0969:                                putChar('0');
0970:                                scanNumber(8);
0971:                            }
0972:                            return;
0973:                        case '1':
0974:                        case '2':
0975:                        case '3':
0976:                        case '4':
0977:                        case '5':
0978:                        case '6':
0979:                        case '7':
0980:                        case '8':
0981:                        case '9':
0982:                            scanNumber(10);
0983:                            return;
0984:                        case '.':
0985:                            scanChar();
0986:                            if ('0' <= ch && ch <= '9') {
0987:                                putChar('.');
0988:                                scanFractionAndSuffix();
0989:                            } else if (ch == '.') {
0990:                                putChar('.');
0991:                                putChar('.');
0992:                                scanChar();
0993:                                if (ch == '.') {
0994:                                    scanChar();
0995:                                    putChar('.');
0996:                                    token = ELLIPSIS;
0997:                                } else {
0998:                                    lexError("malformed.fp.lit");
0999:                                }
1000:                            } else {
1001:                                token = DOT;
1002:                            }
1003:                            return;
1004:                        case ',':
1005:                            scanChar();
1006:                            token = COMMA;
1007:                            return;
1008:                        case ';':
1009:                            scanChar();
1010:                            token = SEMI;
1011:                            return;
1012:                        case '(':
1013:                            scanChar();
1014:                            token = LPAREN;
1015:                            return;
1016:                        case ')':
1017:                            scanChar();
1018:                            token = RPAREN;
1019:                            return;
1020:                        case '[':
1021:                            scanChar();
1022:                            token = LBRACKET;
1023:                            return;
1024:                        case ']':
1025:                            scanChar();
1026:                            token = RBRACKET;
1027:                            return;
1028:                        case '{':
1029:                            scanChar();
1030:                            token = LBRACE;
1031:                            return;
1032:                        case '}':
1033:                            scanChar();
1034:                            token = RBRACE;
1035:                            return;
1036:                        case '/':
1037:                            scanChar();
1038:                            if (ch == '/') {
1039:                                do {
1040:                                    scanCommentChar();
1041:                                } while (ch != CR && ch != LF && bp < buflen);
1042:                                if (bp < buflen) {
1043:                                    endPos = bp;
1044:                                    processComment(CommentStyle.LINE);
1045:                                }
1046:                                break;
1047:                            } else if (ch == '*') {
1048:                                scanChar();
1049:                                CommentStyle style;
1050:                                if (ch == '*') {
1051:                                    style = CommentStyle.JAVADOC;
1052:                                    scanDocComment();
1053:                                } else {
1054:                                    style = CommentStyle.BLOCK;
1055:                                    while (bp < buflen) {
1056:                                        if (ch == '*') {
1057:                                            scanChar();
1058:                                            if (ch == '/')
1059:                                                break;
1060:                                        } else {
1061:                                            scanCommentChar();
1062:                                        }
1063:                                    }
1064:                                }
1065:                                if (ch == '/') {
1066:                                    scanChar();
1067:                                    endPos = bp;
1068:                                    processComment(style);
1069:                                    break;
1070:                                } else {
1071:                                    lexError("unclosed.comment");
1072:                                    return;
1073:                                }
1074:                            } else if (ch == '=') {
1075:                                name = names.slashequals;
1076:                                token = SLASHEQ;
1077:                                scanChar();
1078:                            } else {
1079:                                name = names.slash;
1080:                                token = SLASH;
1081:                            }
1082:                            return;
1083:                        case '\'':
1084:                            scanChar();
1085:                            if (ch == '\'') {
1086:                                lexError("empty.char.lit");
1087:                            } else {
1088:                                if (ch == CR || ch == LF)
1089:                                    lexError(pos,
1090:                                            "illegal.line.end.in.char.lit");
1091:                                scanLitChar();
1092:                                if (ch == '\'') {
1093:                                    scanChar();
1094:                                    token = CHARLITERAL;
1095:                                } else {
1096:                                    lexError(pos, "unclosed.char.lit");
1097:                                }
1098:                            }
1099:                            return;
1100:                        case '\"':
1101:                            scanChar();
1102:                            while (ch != '\"' && ch != CR && ch != LF
1103:                                    && bp < buflen)
1104:                                scanLitChar();
1105:                            if (ch == '\"') {
1106:                                token = STRINGLITERAL;
1107:                                scanChar();
1108:                            } else {
1109:                                lexError(pos, "unclosed.str.lit");
1110:                            }
1111:                            return;
1112:                        default:
1113:                            if (isSpecial(ch)) {
1114:                                scanOperator();
1115:                            } else {
1116:                                boolean isJavaIdentifierStart;
1117:                                if (ch < '\u0080') {
1118:                                    // all ASCII range chars already handled, above
1119:                                    isJavaIdentifierStart = false;
1120:                                } else {
1121:                                    char high = scanSurrogates();
1122:                                    if (high != 0) {
1123:                                        if (sp == sbuf.length) {
1124:                                            putChar(high);
1125:                                        } else {
1126:                                            sbuf[sp++] = high;
1127:                                        }
1128:
1129:                                        isJavaIdentifierStart = Character
1130:                                                .isJavaIdentifierStart(Character
1131:                                                        .toCodePoint(high, ch));
1132:                                    } else {
1133:                                        isJavaIdentifierStart = Character
1134:                                                .isJavaIdentifierStart(ch);
1135:                                    }
1136:                                }
1137:                                if (isJavaIdentifierStart) {
1138:                                    scanIdent();
1139:                                } else if (bp == buflen || ch == EOI
1140:                                        && bp + 1 == buflen) { // JLS 3.5
1141:                                    token = EOF;
1142:                                    pos = bp = eofPos;
1143:                                } else {
1144:                                    lexError("illegal.char", String
1145:                                            .valueOf((int) ch));
1146:                                    scanChar();
1147:                                }
1148:                            }
1149:                            return;
1150:                        }
1151:                    }
1152:                } finally {
1153:                    endPos = bp;
1154:                    if (scannerDebug)
1155:                        System.out.println("nextToken(" + pos + "," + endPos
1156:                                + ")=|"
1157:                                + new String(getRawCharacters(pos, endPos))
1158:                                + "|");
1159:                }
1160:            }
1161:
1162:            /** Return the current token, set by nextToken().
1163:             */
1164:            public Token token() {
1165:                return token;
1166:            }
1167:
1168:            /** Sets the current token.
1169:             */
1170:            public void token(Token token) {
1171:                this .token = token;
1172:            }
1173:
1174:            /** Return the current token's position: a 0-based
1175:             *  offset from beginning of the raw input stream
1176:             *  (before unicode translation)
1177:             */
1178:            public int pos() {
1179:                return pos;
1180:            }
1181:
1182:            /** Return the last character position of the current token.
1183:             */
1184:            public int endPos() {
1185:                return endPos;
1186:            }
1187:
1188:            /** Return the last character position of the previous token.
1189:             */
1190:            public int prevEndPos() {
1191:                return prevEndPos;
1192:            }
1193:
1194:            /** Return the position where a lexical error occurred;
1195:             */
1196:            public int errPos() {
1197:                return errPos;
1198:            }
1199:
1200:            /** Set the position where a lexical error occurred;
1201:             */
1202:            public void errPos(int pos) {
1203:                errPos = pos;
1204:            }
1205:
1206:            /** Return the name of an identifier or token for the current token.
1207:             */
1208:            public Name name() {
1209:                return name;
1210:            }
1211:
1212:            /** Return the radix of a numeric literal token.
1213:             */
1214:            public int radix() {
1215:                return radix;
1216:            }
1217:
1218:            /** Has a @deprecated been encountered in last doc comment?
1219:             *  This needs to be reset by client with resetDeprecatedFlag.
1220:             */
1221:            public boolean deprecatedFlag() {
1222:                return deprecatedFlag;
1223:            }
1224:
1225:            public void resetDeprecatedFlag() {
1226:                deprecatedFlag = false;
1227:            }
1228:
1229:            /**
1230:             * Returns the documentation string of the current token.
1231:             */
1232:            public String docComment() {
1233:                return null;
1234:            }
1235:
1236:            /**
1237:             * Returns a copy of the input buffer, up to its inputLength.
1238:             * Unicode escape sequences are not translated.
1239:             */
1240:            public char[] getRawCharacters() {
1241:                char[] chars = new char[buflen];
1242:                System.arraycopy(buf, 0, chars, 0, buflen);
1243:                return chars;
1244:            }
1245:
1246:            /**
1247:             * Returns a copy of a character array subset of the input buffer.
1248:             * The returned array begins at the <code>beginIndex</code> and
1249:             * extends to the character at index <code>endIndex - 1</code>.
1250:             * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1251:             * This behavior is like 
1252:             * <code>String.substring(beginIndex, endIndex)</code>.
1253:             * Unicode escape sequences are not translated.
1254:             *
1255:             * @param beginIndex the beginning index, inclusive.
1256:             * @param endIndex the ending index, exclusive.
1257:             * @throws IndexOutOfBounds if either offset is outside of the
1258:             *         array bounds
1259:             */
1260:            public char[] getRawCharacters(int beginIndex, int endIndex) {
1261:                int length = endIndex - beginIndex;
1262:                char[] chars = new char[length];
1263:                System.arraycopy(buf, beginIndex, chars, 0, length);
1264:                return chars;
1265:            }
1266:
1267:            public enum CommentStyle {
1268:                LINE, BLOCK, JAVADOC,
1269:            }
1270:
1271:            /**
1272:             * Called when a complete comment has been scanned. pos and endPos 
1273:             * will mark the comment boundary.
1274:             */
1275:            protected void processComment(CommentStyle style) {
1276:                if (scannerDebug)
1277:                    System.out.println("processComment(" + pos + "," + endPos
1278:                            + "," + style + ")=|"
1279:                            + new String(getRawCharacters(pos, endPos)) + "|");
1280:            }
1281:
1282:            /**
1283:             * Called when a complete whitespace run has been scanned. pos and endPos 
1284:             * will mark the whitespace boundary.
1285:             */
1286:            protected void processWhiteSpace() {
1287:                if (scannerDebug)
1288:                    System.out.println("processWhitespace(" + pos + ","
1289:                            + endPos + ")=|"
1290:                            + new String(getRawCharacters(pos, endPos)) + "|");
1291:            }
1292:
1293:            /**
1294:             * Called when a line terminator has been processed.
1295:             */
1296:            protected void processLineTerminator() {
1297:                if (scannerDebug)
1298:                    System.out.println("processTerminator(" + pos + ","
1299:                            + endPos + ")=|"
1300:                            + new String(getRawCharacters(pos, endPos)) + "|");
1301:            }
1302:
1303:            /** Build a map for translating between line numbers and
1304:             * positions in the input.
1305:             *
1306:             * @return a LineMap */
1307:            public Position.LineMap getLineMap() {
1308:                return Position.makeLineMap(buf, buflen, false);
1309:            }
1310:
1311:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.