Source Code Cross Referenced for ELLexer.java in  » IDE-Netbeans » el.lexer » org » netbeans » modules » el » lexer » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » IDE Netbeans » el.lexer » org.netbeans.modules.el.lexer 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003:         *
004:         * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005:         *
006:         * The contents of this file are subject to the terms of either the GNU
007:         * General Public License Version 2 only ("GPL") or the Common
008:         * Development and Distribution License("CDDL") (collectively, the
009:         * "License"). You may not use this file except in compliance with the
010:         * License. You can obtain a copy of the License at
011:         * http://www.netbeans.org/cddl-gplv2.html
012:         * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013:         * specific language governing permissions and limitations under the
014:         * License.  When distributing the software, include this License Header
015:         * Notice in each file and include the License file at
016:         * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
017:         * particular file as subject to the "Classpath" exception as provided
018:         * by Sun in the GPL Version 2 section of the License file that
019:         * accompanied this code. If applicable, add the following below the
020:         * License Header, with the fields enclosed by brackets [] replaced by
021:         * your own identifying information:
022:         * "Portions Copyrighted [year] [name of copyright owner]"
023:         *
024:         * Contributor(s):
025:         *
026:         * The Original Software is NetBeans. The Initial Developer of the Original
027:         * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028:         * Microsystems, Inc. All Rights Reserved.
029:         *
030:         * If you wish your version of this file to be governed by only the CDDL
031:         * or only the GPL Version 2, indicate your decision by adding
032:         * "[Contributor] elects to include this software in this distribution
033:         * under the [CDDL or GPL Version 2] license." If you do not indicate a
034:         * single choice of license, a recipient has the option to distribute
035:         * your version of this file under either the CDDL, the GPL Version 2 or
036:         * to extend the choice of license to its licensees as provided above.
037:         * However, if you add GPL Version 2 code and therefore, elected the GPL
038:         * Version 2 license, then the option applies only if the new code is
039:         * made subject to such option by the copyright holder.
040:         */
041:
042:        package org.netbeans.modules.el.lexer;
043:
044:        import java.util.logging.Level;
045:        import java.util.logging.Logger;
046:        import org.netbeans.api.lexer.Token;
047:        import org.netbeans.modules.el.lexer.api.ELTokenId;
048:        import org.netbeans.spi.lexer.Lexer;
049:        import org.netbeans.spi.lexer.LexerInput;
050:        import org.netbeans.spi.lexer.LexerRestartInfo;
051:        import org.netbeans.spi.lexer.TokenFactory;
052:
053:        /**
054:         * Lexical analyzer for Expression Language.
055:         * It does NOT recognizes the EL delimiters ${ } and #{ }
056:         *
057:         * @author Petr Pisl
058:         * @author Marek Fukala
059:         *
060:         * @version 1.00
061:         */
062:
063:        public class ELLexer implements  Lexer<ELTokenId> {
064:
065:            private static final Logger LOGGER = Logger.getLogger(ELLexer.class
066:                    .getName());
067:            private static final boolean LOG = Boolean
068:                    .getBoolean("j2ee_lexer_debug"); //NOI18N
069:
070:            private static final int EOF = LexerInput.EOF;
071:
072:            private final LexerInput input;
073:
074:            private final TokenFactory<ELTokenId> tokenFactory;
075:
076:            public Object state() {
077:                return lexerState;
078:            }
079:
080:            /** Internal state of the lexical analyzer before entering subanalyzer of
081:             * character references. It is initially set to INIT, but before first usage,
082:             * this will be overwritten with state, which originated transition to
083:             * charref subanalyzer.
084:             */
085:            private int lexerState = INIT;
086:
087:            /* Internal states used internally by analyzer. There
088:             * can be any number of them declared by the analyzer.
089:             */
090:            private static final int INIT = 1; //initial lexer state
091:            private static final int ISI_IDENTIFIER = 2;
092:            private static final int ISI_CHAR = 3; // inside char constant
093:            private static final int ISI_CHAR_A_BSLASH = 4; // inside char constant after backslash
094:            private static final int ISI_STRING = 5; // inside a string " ... "
095:            private static final int ISI_STRING_A_BSLASH = 6; // inside string "..." constant after backslash
096:            private static final int ISI_CHAR_STRING = 7; // inside a string '...'
097:            private static final int ISI_CHAR_STRING_A_BSLASH = 8; // inside string '...'contant after backslash
098:            private static final int ISA_ZERO = 9; // after '0'
099:            private static final int ISI_INT = 10; // integer number
100:            private static final int ISI_OCTAL = 11; // octal number
101:            private static final int ISI_DOUBLE = 12; // double number
102:            private static final int ISI_DOUBLE_EXP = 13; // double number
103:            private static final int ISI_HEX = 14; // hex number
104:            private static final int ISA_DOT = 15; // after '.'
105:            private static final int ISI_WHITESPACE = 16; // inside white space
106:            private static final int ISA_EQ = 17; // after '='
107:            private static final int ISA_GT = 18; // after '>'
108:            private static final int ISA_LT = 19; // after '<'
109:            //private static final int ISA_PLUS = 20; // after '+'
110:            //private static final int ISA_MINUS = 21; // after '-'
111:            //private static final int ISA_STAR = 22; // after '*'
112:            private static final int ISA_PIPE = 23; // after '|'
113:            private static final int ISA_AND = 24; // after '&'
114:            private static final int ISA_EXCLAMATION = 25; // after '!'
115:            private static final int ISI_BRACKET = 26; // after '['
116:            private static final int ISI_BRACKET_A_WHITESPACE = 27;
117:            private static final int ISI_BRACKET_A_IDENTIFIER = 28;
118:            private static final int ISI_BRACKET_ISA_EQ = 29;
119:            private static final int ISI_BRACKET_ISA_GT = 30;
120:            private static final int ISI_BRACKET_ISA_LT = 31;
121:            private static final int ISI_BRACKET_ISA_PIPE = 32; // after '|'
122:            private static final int ISI_BRACKET_ISA_AND = 33; // after '&'
123:            private static final int ISI_BRACKET_ISA_ZERO = 34; // after '0'
124:            private static final int ISI_BRACKET_ISA_DOT = 35; // after '.'
125:            private static final int ISI_BRACKET_ISI_INT = 36; // after '.'
126:            private static final int ISI_BRACKET_ISI_OCTAL = 37; // octal number
127:            private static final int ISI_BRACKET_ISI_DOUBLE = 38; // double number
128:            private static final int ISI_BRACKET_ISI_DOUBLE_EXP = 39; // double number
129:            private static final int ISI_BRACKET_ISI_HEX = 40; // hex number
130:            private static final int ISI_DOULE_EXP_ISA_SIGN = 41;
131:            private static final int ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN = 42;
132:
133:            //private static final int ISA_PERCENT = 24; // after '%'
134:
135:            public ELLexer(LexerRestartInfo<ELTokenId> info) {
136:                this .input = info.input();
137:                this .tokenFactory = info.tokenFactory();
138:                if (info.state() == null) {
139:                    this .lexerState = INIT;
140:                } else {
141:                    this .lexerState = ((Integer) info.state()).intValue();
142:                }
143:            }
144:
145:            /** This is core function of analyzer and it returns one of following numbers:
146:             * a) token number of next token from scanned text
147:             * b) EOL when end of line was found in scanned buffer
148:             * c) EOT when there is no more chars available in scanned buffer.
149:             *
150:             * The function scans the active character and does one or more
151:             * of the following actions:
152:             * 1. change internal analyzer state (state = new-state)
153:             * 2. return token ID (return token-ID)
154:             * 3. adjust current position to signal different end of token;
155:             *    the character that offset points to is not included in the token
156:             */
157:            public Token<ELTokenId> nextToken() {
158:
159:                int actChar;
160:
161:                while (true) {
162:                    actChar = input.read();
163:
164:                    if (actChar == EOF) {
165:                        if (input.readLengthEOF() == 1) {
166:                            return null; //just EOL is read
167:                        } else {
168:                            //there is something else in the buffer except EOL
169:                            //we will return last token now
170:                            input.backup(1); //backup the EOL, we will return null in next nextToken() call
171:                            break;
172:                        }
173:                    }
174:
175:                    switch (lexerState) { // switch by the current internal state
176:                    case INIT:
177:
178:                        switch (actChar) {
179:                        case '"':
180:                            lexerState = ISI_STRING;
181:                            break;
182:                        case '\'':
183:                            lexerState = ISI_CHAR;
184:                            break;
185:                        case '/':
186:                            return token(ELTokenId.DIV);
187:                        case '=':
188:                            lexerState = ISA_EQ;
189:                            break;
190:                        case '>':
191:                            lexerState = ISA_GT;
192:                            break;
193:                        case '<':
194:                            lexerState = ISA_LT;
195:                            break;
196:                        case '+':
197:                            return token(ELTokenId.PLUS);
198:                        case '-':
199:                            return token(ELTokenId.MINUS);
200:                        case '*':
201:                            return token(ELTokenId.MUL);
202:                        case '|':
203:                            lexerState = ISA_PIPE;
204:                            break;
205:                        case '&':
206:                            lexerState = ISA_AND;
207:                            break;
208:                        case '[':
209:                            return token(ELTokenId.LBRACKET);
210:                        case ']':
211:                            return token(ELTokenId.RBRACKET);
212:                        case '%':
213:                            return token(ELTokenId.MOD);
214:                        case ':':
215:                            return token(ELTokenId.COLON);
216:                        case '!':
217:                            lexerState = ISA_EXCLAMATION;
218:                            break;
219:                        case '(':
220:                            return token(ELTokenId.LPAREN);
221:                        case ')':
222:                            return token(ELTokenId.RPAREN);
223:                        case ',':
224:                            return token(ELTokenId.COMMA);
225:                        case '?':
226:                            return token(ELTokenId.QUESTION);
227:                        case '\n':
228:                            return token(ELTokenId.EOL);
229:                        case '0':
230:                            lexerState = ISA_ZERO;
231:                            break;
232:                        case '.':
233:                            lexerState = ISA_DOT;
234:                            break;
235:                        default:
236:                            // Check for whitespace
237:                            if (Character.isWhitespace(actChar)) {
238:                                lexerState = ISI_WHITESPACE;
239:                                break;
240:                            }
241:
242:                            // check whether it can be identifier
243:                            if (Character.isJavaIdentifierStart(actChar)) {
244:                                lexerState = ISI_IDENTIFIER;
245:                                break;
246:                            }
247:                            // Check for digit
248:                            if (Character.isDigit(actChar)) {
249:                                lexerState = ISI_INT;
250:                                break;
251:                            }
252:                            return token(ELTokenId.INVALID_CHAR);
253:                            //break;
254:                        }
255:                        break;
256:
257:                    case ISI_WHITESPACE: // white space
258:                        if (!Character.isWhitespace(actChar)) {
259:                            lexerState = INIT;
260:                            input.backup(1);
261:                            return token(ELTokenId.WHITESPACE);
262:                        }
263:                        break;
264:
265:                    case ISI_BRACKET:
266:                        switch (actChar) {
267:                        case ']':
268:                            lexerState = INIT;
269:                            input.backup(1);
270:                            return token(ELTokenId.IDENTIFIER);
271:                        case '"':
272:                            return token(ELTokenId.LBRACKET);
273:                        case '\'':
274:                            return token(ELTokenId.LBRACKET);
275:                        case '/':
276:                            return token(ELTokenId.DIV);
277:                        case '+':
278:                            return token(ELTokenId.PLUS);
279:                        case '-':
280:                            return token(ELTokenId.MINUS);
281:                        case '*':
282:                            return token(ELTokenId.MUL);
283:                        case '[':
284:                            return token(ELTokenId.LBRACKET);
285:                        case '%':
286:                            return token(ELTokenId.MOD);
287:                        case ':':
288:                            return token(ELTokenId.COLON);
289:                        case '(':
290:                            return token(ELTokenId.LPAREN);
291:                        case ')':
292:                            return token(ELTokenId.RPAREN);
293:                        case ',':
294:                            return token(ELTokenId.COMMA);
295:                        case '?':
296:                            return token(ELTokenId.QUESTION);
297:                        case '=':
298:                            lexerState = ISI_BRACKET_ISA_EQ;
299:                            break;
300:                        case '>':
301:                            lexerState = ISI_BRACKET_ISA_GT;
302:                            break;
303:                        case '<':
304:                            lexerState = ISI_BRACKET_ISA_LT;
305:                            break;
306:                        case '|':
307:                            lexerState = ISI_BRACKET_ISA_PIPE;
308:                            break;
309:                        case '&':
310:                            lexerState = ISI_BRACKET_ISA_AND;
311:                            break;
312:                        case '0':
313:                            lexerState = ISI_BRACKET_ISA_ZERO;
314:                            break;
315:                        case '.':
316:                            lexerState = ISI_BRACKET_ISA_DOT;
317:                            break;
318:                        default:
319:                            // Check for whitespace
320:                            if (Character.isWhitespace(actChar)) {
321:                                lexerState = ISI_BRACKET_A_WHITESPACE;
322:                                break;
323:                            }
324:                            if (Character.isJavaIdentifierStart(actChar)) {
325:                                // - System.out.print(" state->ISI_IDENTIFIER ");
326:                                lexerState = ISI_BRACKET_A_IDENTIFIER;
327:                                break;
328:                            }
329:                            // Check for digit
330:                            if (Character.isDigit(actChar)) {
331:                                lexerState = ISI_BRACKET_ISI_INT;
332:                                break;
333:                            }
334:                            return token(ELTokenId.INVALID_CHAR);
335:                            //break;
336:                        }
337:                        break;
338:
339:                    case ISI_BRACKET_A_WHITESPACE:
340:                        if (!Character.isWhitespace(actChar)) {
341:                            lexerState = ISI_BRACKET;
342:                            input.backup(1);
343:                            return token(ELTokenId.WHITESPACE);
344:                        }
345:                        break;
346:
347:                    case ISI_BRACKET_ISA_EQ:
348:                    case ISA_EQ:
349:                        switch (actChar) {
350:                        case '=':
351:                            lexerState = INIT;
352:                            return token(ELTokenId.EQ_EQ);
353:                        default:
354:                            lexerState = (lexerState == ISI_BRACKET_ISA_EQ) ? ISI_BRACKET
355:                                    : INIT;
356:                            input.backup(1);
357:                            break;
358:                        }
359:                        break;
360:
361:                    case ISI_BRACKET_ISA_GT:
362:                    case ISA_GT:
363:                        switch (actChar) {
364:                        case '=':
365:                            lexerState = INIT;
366:                            return token(ELTokenId.GT_EQ);
367:                        default:
368:                            lexerState = (lexerState == ISI_BRACKET_ISA_GT) ? ISI_BRACKET
369:                                    : INIT;
370:                            input.backup(1);
371:                            return token(ELTokenId.GT);
372:                        }
373:                        //break;
374:                    case ISI_BRACKET_ISA_LT:
375:                    case ISA_LT:
376:                        switch (actChar) {
377:                        case '=':
378:                            lexerState = INIT;
379:                            return token(ELTokenId.LT_EQ);
380:                        default:
381:                            lexerState = (lexerState == ISI_BRACKET_ISA_LT) ? ISI_BRACKET
382:                                    : INIT;
383:                            input.backup(1);
384:                            return token(ELTokenId.LT);
385:                        }
386:                        //break;
387:                    case ISI_BRACKET_ISA_PIPE:
388:                    case ISA_PIPE:
389:                        switch (actChar) {
390:                        case '|':
391:                            lexerState = INIT;
392:                            return token(ELTokenId.OR_OR);
393:                        default:
394:                            lexerState = (lexerState == ISI_BRACKET_ISA_PIPE) ? ISI_BRACKET
395:                                    : INIT;
396:                            input.backup(1);
397:                            break;
398:                        }
399:                        break;
400:                    case ISI_BRACKET_ISA_AND:
401:                    case ISA_AND:
402:                        switch (actChar) {
403:                        case '&':
404:                            lexerState = INIT;
405:                            return token(ELTokenId.AND_AND);
406:                        default:
407:                            lexerState = (lexerState == ISI_BRACKET_ISA_AND) ? ISI_BRACKET
408:                                    : INIT;
409:                            input.backup(1);
410:                            break;
411:                        }
412:                        break;
413:                    case ISA_EXCLAMATION:
414:                        switch (actChar) {
415:                        case '=':
416:                            lexerState = INIT;
417:                            return token(ELTokenId.NOT_EQ);
418:                        default:
419:                            lexerState = INIT;
420:                            input.backup(1);
421:                            return token(ELTokenId.NOT);
422:                        }
423:                    case ISI_STRING:
424:                        switch (actChar) {
425:                        case '\\':
426:                            lexerState = ISI_STRING_A_BSLASH;
427:                            break;
428:                        case '\n':
429:                            lexerState = INIT;
430:                            input.backup(1);
431:                            return token(ELTokenId.STRING_LITERAL);
432:                        case '"': // NOI18N
433:                            lexerState = INIT;
434:                            return token(ELTokenId.STRING_LITERAL);
435:                        }
436:                        break;
437:                    case ISI_STRING_A_BSLASH:
438:                        lexerState = ISI_STRING;
439:                        break;
440:                    case ISI_BRACKET_A_IDENTIFIER:
441:                    case ISI_IDENTIFIER:
442:                        if (!(Character.isJavaIdentifierPart(actChar))) {
443:                            switch (lexerState) {
444:                            case ISI_IDENTIFIER:
445:                                lexerState = INIT;
446:                                break;
447:                            case ISI_BRACKET_A_IDENTIFIER:
448:                                lexerState = ISI_BRACKET;
449:                                break;
450:                            }
451:                            Token<ELTokenId> tid = matchKeyword(input);
452:                            input.backup(1);
453:                            if (tid == null) {
454:                                if (actChar == ':') {
455:                                    tid = token(ELTokenId.TAG_LIB_PREFIX);
456:                                } else {
457:                                    tid = token(ELTokenId.IDENTIFIER);
458:                                }
459:                            }
460:                            return tid;
461:                        }
462:                        break;
463:
464:                    case ISI_CHAR:
465:                        switch (actChar) {
466:                        case '\\':
467:                            lexerState = ISI_CHAR_A_BSLASH;
468:                            break;
469:                        case '\n':
470:                            lexerState = INIT;
471:                            input.backup(1);
472:                            return token(ELTokenId.CHAR_LITERAL);
473:                        case '\'':
474:                            lexerState = INIT;
475:                            return token(ELTokenId.CHAR_LITERAL);
476:                        default:
477:                            char prevChar = input.readText().charAt(
478:                                    input.readLength() - 1);
479:                            if (prevChar != '\'' && prevChar != '\\') {
480:                                lexerState = ISI_CHAR_STRING;
481:                            }
482:                        }
483:                        break;
484:
485:                    case ISI_CHAR_A_BSLASH:
486:                        switch (actChar) {
487:                        case '\'':
488:                        case '\\':
489:                            break;
490:                        default:
491:                            input.backup(1);
492:                            break;
493:                        }
494:                        lexerState = ISI_CHAR;
495:                        break;
496:
497:                    case ISI_CHAR_STRING:
498:                        // - System.out.print(" ISI_CHAR_STRING (");
499:                        switch (actChar) {
500:                        case '\\':
501:                            // - System.out.print(" state->ISI_CHAR_A_BSLASH )");
502:                            lexerState = ISI_CHAR_STRING_A_BSLASH;
503:                            break;
504:                        case '\n':
505:                            lexerState = INIT;
506:                            input.backup(1);
507:                            return token(ELTokenId.STRING_LITERAL);
508:                        case '\'':
509:                            lexerState = INIT;
510:                            return token(ELTokenId.STRING_LITERAL);
511:                        }
512:                        // - System.out.print(")");
513:                        break;
514:
515:                    case ISI_CHAR_STRING_A_BSLASH:
516:                        switch (actChar) {
517:                        case '\'':
518:                        case '\\':
519:                            break;
520:                        default:
521:                            input.backup(1);
522:                            break;
523:                        }
524:                        lexerState = ISI_CHAR_STRING;
525:                        break;
526:
527:                    case ISI_BRACKET_ISA_ZERO:
528:                    case ISA_ZERO:
529:                        switch (actChar) {
530:                        case '.':
531:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE
532:                                    : ISI_DOUBLE;
533:                            break;
534:                        case 'x':
535:                        case 'X':
536:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_HEX
537:                                    : ISI_HEX;
538:                            break;
539:                        case 'l':
540:                        case 'L':
541:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
542:                                    : INIT;
543:                            return token(ELTokenId.LONG_LITERAL);
544:                        case 'f':
545:                        case 'F':
546:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
547:                                    : INIT;
548:                            return token(ELTokenId.FLOAT_LITERAL);
549:                        case 'd':
550:                        case 'D':
551:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
552:                                    : INIT;
553:                            return token(ELTokenId.DOUBLE_LITERAL);
554:                        case '8': // it's error to have '8' and '9' in octal number
555:                        case '9':
556:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
557:                                    : INIT;
558:                            return token(ELTokenId.INVALID_OCTAL_LITERAL);
559:                        case 'e':
560:                        case 'E':
561:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE_EXP
562:                                    : ISI_DOUBLE_EXP;
563:                            break;
564:                        default:
565:                            if (Character.isDigit(actChar)) { // '8' and '9' already handled
566:                                lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_OCTAL
567:                                        : ISI_OCTAL;
568:                                break;
569:                            }
570:                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
571:                                    : INIT;
572:                            input.backup(1);
573:                            return token(ELTokenId.INT_LITERAL);
574:                        }
575:                        break;
576:
577:                    case ISI_BRACKET_ISI_INT:
578:                    case ISI_INT:
579:                        switch (actChar) {
580:                        case 'l':
581:                        case 'L':
582:                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
583:                                    : INIT;
584:                            return token(ELTokenId.LONG_LITERAL);
585:                        case '.':
586:                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET_ISI_DOUBLE
587:                                    : ISI_DOUBLE;
588:                            break;
589:                        case 'f':
590:                        case 'F':
591:                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
592:                                    : INIT;
593:                            return token(ELTokenId.FLOAT_LITERAL);
594:                        case 'd':
595:                        case 'D':
596:                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
597:                                    : INIT;
598:                            return token(ELTokenId.DOUBLE_LITERAL);
599:                        case 'e':
600:                        case 'E':
601:                            lexerState = ISI_DOUBLE_EXP;
602:                            break;
603:                        default:
604:                            if (!(actChar >= '0' && actChar <= '9')) {
605:                                lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
606:                                        : INIT;
607:                                input.backup(1);
608:                                return token(ELTokenId.INT_LITERAL);
609:                            }
610:                        }
611:                        break;
612:
613:                    case ISI_BRACKET_ISI_OCTAL:
614:                    case ISI_OCTAL:
615:                        if (!(actChar >= '0' && actChar <= '7')) {
616:                            lexerState = (lexerState == ISI_BRACKET_ISI_OCTAL) ? ISI_BRACKET
617:                                    : INIT;
618:                            input.backup(1);
619:                            return token(ELTokenId.OCTAL_LITERAL);
620:                        }
621:                        break;
622:
623:                    case ISI_BRACKET_ISI_DOUBLE:
624:                    case ISI_DOUBLE:
625:                        switch (actChar) {
626:                        case 'f':
627:                        case 'F':
628:                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
629:                                    : INIT;
630:                            return token(ELTokenId.FLOAT_LITERAL);
631:                        case 'd':
632:                        case 'D':
633:                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
634:                                    : INIT;
635:                            return token(ELTokenId.DOUBLE_LITERAL);
636:                        case 'e':
637:                        case 'E':
638:                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET_ISI_DOUBLE_EXP
639:                                    : ISI_DOUBLE_EXP;
640:                            break;
641:                        default:
642:                            if (!((actChar >= '0' && actChar <= '9') || actChar == '.')) {
643:                                lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
644:                                        : INIT;
645:                                input.backup(1);
646:                                return token(ELTokenId.DOUBLE_LITERAL);
647:                            }
648:                        }
649:                        break;
650:
651:                    case ISI_DOUBLE_EXP:
652:                    case ISI_BRACKET_ISI_DOUBLE_EXP:
653:                        switch (actChar) {
654:                        case 'f':
655:                        case 'F':
656:                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
657:                                    : INIT;
658:                            return token(ELTokenId.FLOAT_LITERAL);
659:                        case 'd':
660:                        case 'D':
661:                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
662:                                    : INIT;
663:                            return token(ELTokenId.DOUBLE_LITERAL);
664:                        case '-':
665:                        case '+':
666:                            lexerState = ISI_DOULE_EXP_ISA_SIGN;
667:                            break;
668:                        default:
669:                            if (!Character.isDigit(actChar)) {
670:                                //|| ch == '-' || ch == '+')) {
671:                                lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
672:                                        : INIT;
673:                                input.backup(1);
674:                                return token(ELTokenId.DOUBLE_LITERAL);
675:                            }
676:                        }
677:                        break;
678:
679:                    case ISI_DOULE_EXP_ISA_SIGN:
680:                    case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
681:                        if (!Character.isDigit(actChar)) {
682:                            lexerState = (lexerState == ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN) ? ISI_BRACKET
683:                                    : INIT;
684:                            input.backup(1);
685:                            return token(ELTokenId.DOUBLE_LITERAL);
686:                        }
687:                        break;
688:
689:                    case ISI_BRACKET_ISI_HEX:
690:                    case ISI_HEX:
691:                        if (!((actChar >= 'a' && actChar <= 'f')
692:                                || (actChar >= 'A' && actChar <= 'F') || Character
693:                                .isDigit(actChar))) {
694:                            lexerState = (lexerState == ISI_BRACKET_ISI_HEX) ? ISI_BRACKET
695:                                    : INIT;
696:                            input.backup(1);
697:                            return token(ELTokenId.HEX_LITERAL);
698:                        }
699:                        break;
700:
701:                    case ISI_BRACKET_ISA_DOT:
702:                    case ISA_DOT:
703:                        if (Character.isDigit(actChar)) {
704:                            lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET_ISI_DOUBLE
705:                                    : ISI_DOUBLE;
706:
707:                        } else { // only single dot
708:                            lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET
709:                                    : INIT;
710:                            input.backup(1);
711:                            return token(ELTokenId.DOT);
712:                        }
713:                        break;
714:
715:                    } // end of switch(state)
716:
717:                } //end of big while
718:
719:                /** At this stage there's no more text in the scanned buffer.
720:                 * Scanner first checks whether this is completely the last
721:                 * available buffer.
722:                 */
723:                switch (lexerState) {
724:                case INIT:
725:                    if (input.readLength() == 0) {
726:                        return null;
727:                    }
728:                    break;
729:                case ISI_WHITESPACE:
730:                    lexerState = INIT;
731:                    return token(ELTokenId.WHITESPACE);
732:                case ISI_IDENTIFIER:
733:                    lexerState = INIT;
734:                    Token<ELTokenId> kwd = matchKeyword(input);
735:                    return (kwd != null) ? kwd : token(ELTokenId.IDENTIFIER);
736:                case ISI_STRING:
737:                case ISI_STRING_A_BSLASH:
738:                    return token(ELTokenId.STRING_LITERAL); // hold the state
739:                case ISI_CHAR:
740:                case ISI_CHAR_A_BSLASH:
741:                    return token(ELTokenId.CHAR_LITERAL);
742:                case ISI_CHAR_STRING:
743:                case ISI_CHAR_STRING_A_BSLASH:
744:                    return token(ELTokenId.STRING_LITERAL);
745:                case ISA_ZERO:
746:                case ISI_INT:
747:                    lexerState = INIT;
748:                    return token(ELTokenId.INT_LITERAL);
749:                case ISI_OCTAL:
750:                    lexerState = INIT;
751:                    return token(ELTokenId.OCTAL_LITERAL);
752:                case ISI_DOUBLE:
753:                case ISI_DOUBLE_EXP:
754:                case ISI_DOULE_EXP_ISA_SIGN:
755:                case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
756:                    lexerState = INIT;
757:                    return token(ELTokenId.DOUBLE_LITERAL);
758:                case ISI_HEX:
759:                    lexerState = INIT;
760:                    return token(ELTokenId.HEX_LITERAL);
761:                case ISA_DOT:
762:                    lexerState = INIT;
763:                    return token(ELTokenId.DOT);
764:                case ISA_EQ:
765:                    lexerState = INIT;
766:                    return token(ELTokenId.EQ_EQ);
767:                case ISA_GT:
768:                    lexerState = INIT;
769:                    return token(ELTokenId.GT);
770:                case ISA_LT:
771:                    lexerState = INIT;
772:                    return token(ELTokenId.LT);
773:                case ISA_PIPE:
774:                    lexerState = INIT;
775:                    return token(ELTokenId.OR_OR);
776:                case ISA_AND:
777:                    lexerState = INIT;
778:                    return token(ELTokenId.AND_AND);
779:                case ISA_EXCLAMATION:
780:                    lexerState = INIT;
781:                    return token(ELTokenId.NOT);
782:                case ISI_BRACKET:
783:                case ISI_BRACKET_A_IDENTIFIER:
784:                    lexerState = INIT;
785:                    return token(ELTokenId.IDENTIFIER);
786:                case ISI_BRACKET_A_WHITESPACE:
787:                    lexerState = ISI_BRACKET;
788:                    return token(ELTokenId.WHITESPACE);
789:                case ISI_BRACKET_ISA_EQ:
790:                    lexerState = ISI_BRACKET;
791:                    return token(ELTokenId.EQ_EQ);
792:                case ISI_BRACKET_ISA_GT:
793:                    lexerState = ISI_BRACKET;
794:                    return token(ELTokenId.GT_EQ);
795:                case ISI_BRACKET_ISA_LT:
796:                    lexerState = ISI_BRACKET;
797:                    return token(ELTokenId.LT_EQ);
798:                case ISI_BRACKET_ISA_AND:
799:                    lexerState = ISI_BRACKET;
800:                    return token(ELTokenId.AND_AND);
801:                case ISI_BRACKET_ISA_PIPE:
802:                    lexerState = ISI_BRACKET;
803:                    return token(ELTokenId.OR_OR);
804:                case ISI_BRACKET_ISA_DOT:
805:                    lexerState = ISI_BRACKET;
806:                    return token(ELTokenId.DOT);
807:                case ISI_BRACKET_ISA_ZERO:
808:                case ISI_BRACKET_ISI_INT:
809:                    lexerState = ISI_BRACKET;
810:                    return token(ELTokenId.INT_LITERAL);
811:                }
812:
813:                return null;
814:            }
815:
816:            public Token<ELTokenId> matchKeyword(LexerInput lexerInput) {
817:                int len = lexerInput.readLength();
818:                char[] buffer = new char[len];
819:                String read = lexerInput.readText().toString();
820:                read.getChars(0, read.length(), buffer, 0);
821:                int offset = 0;
822:
823:                if (len > 10)
824:                    return null;
825:                if (len <= 1)
826:                    return null;
827:                switch (buffer[offset++]) {
828:                case 'a':
829:                    if (len <= 2)
830:                        return null;
831:                    return (len == 3 && buffer[offset++] == 'n' && buffer[offset++] == 'd') ? token(ELTokenId.AND_KEYWORD)
832:                            : null;
833:                case 'd':
834:                    if (len <= 2)
835:                        return null;
836:                    return (len == 3 && buffer[offset++] == 'i' && buffer[offset++] == 'v') ? token(ELTokenId.DIV_KEYWORD)
837:                            : null;
838:                case 'e':
839:                    switch (buffer[offset++]) {
840:                    case 'q':
841:                        return (len == 2) ? token(ELTokenId.EQ_KEYWORD) : null;
842:                    case 'm':
843:                        return (len == 5 && buffer[offset++] == 'p'
844:                                && buffer[offset++] == 't' && buffer[offset++] == 'y') ? token(ELTokenId.EMPTY_KEYWORD)
845:                                : null;
846:                    default:
847:                        return null;
848:                    }
849:                case 'f':
850:                    return (len == 5 && buffer[offset++] == 'a'
851:                            && buffer[offset++] == 'l'
852:                            && buffer[offset++] == 's' && buffer[offset++] == 'e') ? token(ELTokenId.FALSE_KEYWORD)
853:                            : null;
854:                case 'g':
855:                    switch (buffer[offset++]) {
856:                    case 'e':
857:                        return (len == 2) ? token(ELTokenId.GE_KEYWORD) : null;
858:                    case 't':
859:                        return (len == 2) ? token(ELTokenId.GT_KEYWORD) : null;
860:                    default:
861:                        return null;
862:                    }
863:                case 'l':
864:                    switch (buffer[offset++]) {
865:                    case 'e':
866:                        return (len == 2) ? token(ELTokenId.LE_KEYWORD) : null;
867:                    case 't':
868:                        return (len == 2) ? token(ELTokenId.LT_KEYWORD) : null;
869:                    default:
870:                        return null;
871:                    }
872:                case 'i':
873:                    if (len <= 9)
874:                        return null;
875:                    return (len == 10 && buffer[offset++] == 'n'
876:                            && buffer[offset++] == 's'
877:                            && buffer[offset++] == 't'
878:                            && buffer[offset++] == 'a'
879:                            && buffer[offset++] == 'n'
880:                            && buffer[offset++] == 'c'
881:                            && buffer[offset++] == 'e'
882:                            && buffer[offset++] == 'o' && buffer[offset++] == 'f') ? token(ELTokenId.INSTANCEOF_KEYWORD)
883:                            : null;
884:                case 'm':
885:                    if (len <= 2)
886:                        return null;
887:                    return (len == 3 && buffer[offset++] == 'o' && buffer[offset++] == 'd') ? token(ELTokenId.MOD_KEYWORD)
888:                            : null;
889:                case 'n':
890:                    switch (buffer[offset++]) {
891:                    case 'e':
892:                        return (len == 2) ? token(ELTokenId.NE_KEYWORD) : null;
893:                    case 'o':
894:                        return (len == 3 && buffer[offset++] == 't') ? token(ELTokenId.NOT_KEYWORD)
895:                                : null;
896:                    case 'u':
897:                        return (len == 4 && buffer[offset++] == 'l' && buffer[offset++] == 'l') ? token(ELTokenId.NULL_KEYWORD)
898:                                : null;
899:                    default:
900:                        return null;
901:                    }
902:                case 'o':
903:                    return (len == 2 && buffer[offset++] == 'r') ? token(ELTokenId.OR_KEYWORD)
904:                            : null;
905:                case 't':
906:                    return (len == 4 && buffer[offset++] == 'r'
907:                            && buffer[offset++] == 'u' && buffer[offset++] == 'e') ? token(ELTokenId.TRUE_KEYWORD)
908:                            : null;
909:
910:                default:
911:                    return null;
912:                }
913:            }
914:
915:            private Token<ELTokenId> token(ELTokenId tokenId) {
916:                if (LOG) {
917:                    if (input.readLength() == 0) {
918:                        LOGGER.log(Level.INFO, "["
919:                                + this .getClass().getSimpleName()
920:                                + "] Found zero length token: "); //NOI18N
921:                    }
922:                    LOGGER.log(Level.INFO, "["
923:                            + this .getClass().getSimpleName() + "] token ('"
924:                            + input.readText().toString() + "'; id=" + tokenId
925:                            + ")\n"); //NOI18N
926:                }
927:                return tokenFactory.createToken(tokenId);
928:            }
929:
930:            public void release() {
931:            }
932:
933:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.