001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.el.lexer;
043:
044: import java.util.logging.Level;
045: import java.util.logging.Logger;
046: import org.netbeans.api.lexer.Token;
047: import org.netbeans.modules.el.lexer.api.ELTokenId;
048: import org.netbeans.spi.lexer.Lexer;
049: import org.netbeans.spi.lexer.LexerInput;
050: import org.netbeans.spi.lexer.LexerRestartInfo;
051: import org.netbeans.spi.lexer.TokenFactory;
052:
053: /**
054: * Lexical analyzer for Expression Language.
055: * It does NOT recognizes the EL delimiters ${ } and #{ }
056: *
057: * @author Petr Pisl
058: * @author Marek Fukala
059: *
060: * @version 1.00
061: */
062:
063: public class ELLexer implements Lexer<ELTokenId> {
064:
065: private static final Logger LOGGER = Logger.getLogger(ELLexer.class
066: .getName());
067: private static final boolean LOG = Boolean
068: .getBoolean("j2ee_lexer_debug"); //NOI18N
069:
070: private static final int EOF = LexerInput.EOF;
071:
072: private final LexerInput input;
073:
074: private final TokenFactory<ELTokenId> tokenFactory;
075:
076: public Object state() {
077: return lexerState;
078: }
079:
080: /** Internal state of the lexical analyzer before entering subanalyzer of
081: * character references. It is initially set to INIT, but before first usage,
082: * this will be overwritten with state, which originated transition to
083: * charref subanalyzer.
084: */
085: private int lexerState = INIT;
086:
087: /* Internal states used internally by analyzer. There
088: * can be any number of them declared by the analyzer.
089: */
090: private static final int INIT = 1; //initial lexer state
091: private static final int ISI_IDENTIFIER = 2;
092: private static final int ISI_CHAR = 3; // inside char constant
093: private static final int ISI_CHAR_A_BSLASH = 4; // inside char constant after backslash
094: private static final int ISI_STRING = 5; // inside a string " ... "
095: private static final int ISI_STRING_A_BSLASH = 6; // inside string "..." constant after backslash
096: private static final int ISI_CHAR_STRING = 7; // inside a string '...'
097: private static final int ISI_CHAR_STRING_A_BSLASH = 8; // inside string '...'contant after backslash
098: private static final int ISA_ZERO = 9; // after '0'
099: private static final int ISI_INT = 10; // integer number
100: private static final int ISI_OCTAL = 11; // octal number
101: private static final int ISI_DOUBLE = 12; // double number
102: private static final int ISI_DOUBLE_EXP = 13; // double number
103: private static final int ISI_HEX = 14; // hex number
104: private static final int ISA_DOT = 15; // after '.'
105: private static final int ISI_WHITESPACE = 16; // inside white space
106: private static final int ISA_EQ = 17; // after '='
107: private static final int ISA_GT = 18; // after '>'
108: private static final int ISA_LT = 19; // after '<'
109: //private static final int ISA_PLUS = 20; // after '+'
110: //private static final int ISA_MINUS = 21; // after '-'
111: //private static final int ISA_STAR = 22; // after '*'
112: private static final int ISA_PIPE = 23; // after '|'
113: private static final int ISA_AND = 24; // after '&'
114: private static final int ISA_EXCLAMATION = 25; // after '!'
115: private static final int ISI_BRACKET = 26; // after '['
116: private static final int ISI_BRACKET_A_WHITESPACE = 27;
117: private static final int ISI_BRACKET_A_IDENTIFIER = 28;
118: private static final int ISI_BRACKET_ISA_EQ = 29;
119: private static final int ISI_BRACKET_ISA_GT = 30;
120: private static final int ISI_BRACKET_ISA_LT = 31;
121: private static final int ISI_BRACKET_ISA_PIPE = 32; // after '|'
122: private static final int ISI_BRACKET_ISA_AND = 33; // after '&'
123: private static final int ISI_BRACKET_ISA_ZERO = 34; // after '0'
124: private static final int ISI_BRACKET_ISA_DOT = 35; // after '.'
125: private static final int ISI_BRACKET_ISI_INT = 36; // after '.'
126: private static final int ISI_BRACKET_ISI_OCTAL = 37; // octal number
127: private static final int ISI_BRACKET_ISI_DOUBLE = 38; // double number
128: private static final int ISI_BRACKET_ISI_DOUBLE_EXP = 39; // double number
129: private static final int ISI_BRACKET_ISI_HEX = 40; // hex number
130: private static final int ISI_DOULE_EXP_ISA_SIGN = 41;
131: private static final int ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN = 42;
132:
133: //private static final int ISA_PERCENT = 24; // after '%'
134:
135: public ELLexer(LexerRestartInfo<ELTokenId> info) {
136: this .input = info.input();
137: this .tokenFactory = info.tokenFactory();
138: if (info.state() == null) {
139: this .lexerState = INIT;
140: } else {
141: this .lexerState = ((Integer) info.state()).intValue();
142: }
143: }
144:
145: /** This is core function of analyzer and it returns one of following numbers:
146: * a) token number of next token from scanned text
147: * b) EOL when end of line was found in scanned buffer
148: * c) EOT when there is no more chars available in scanned buffer.
149: *
150: * The function scans the active character and does one or more
151: * of the following actions:
152: * 1. change internal analyzer state (state = new-state)
153: * 2. return token ID (return token-ID)
154: * 3. adjust current position to signal different end of token;
155: * the character that offset points to is not included in the token
156: */
157: public Token<ELTokenId> nextToken() {
158:
159: int actChar;
160:
161: while (true) {
162: actChar = input.read();
163:
164: if (actChar == EOF) {
165: if (input.readLengthEOF() == 1) {
166: return null; //just EOL is read
167: } else {
168: //there is something else in the buffer except EOL
169: //we will return last token now
170: input.backup(1); //backup the EOL, we will return null in next nextToken() call
171: break;
172: }
173: }
174:
175: switch (lexerState) { // switch by the current internal state
176: case INIT:
177:
178: switch (actChar) {
179: case '"':
180: lexerState = ISI_STRING;
181: break;
182: case '\'':
183: lexerState = ISI_CHAR;
184: break;
185: case '/':
186: return token(ELTokenId.DIV);
187: case '=':
188: lexerState = ISA_EQ;
189: break;
190: case '>':
191: lexerState = ISA_GT;
192: break;
193: case '<':
194: lexerState = ISA_LT;
195: break;
196: case '+':
197: return token(ELTokenId.PLUS);
198: case '-':
199: return token(ELTokenId.MINUS);
200: case '*':
201: return token(ELTokenId.MUL);
202: case '|':
203: lexerState = ISA_PIPE;
204: break;
205: case '&':
206: lexerState = ISA_AND;
207: break;
208: case '[':
209: return token(ELTokenId.LBRACKET);
210: case ']':
211: return token(ELTokenId.RBRACKET);
212: case '%':
213: return token(ELTokenId.MOD);
214: case ':':
215: return token(ELTokenId.COLON);
216: case '!':
217: lexerState = ISA_EXCLAMATION;
218: break;
219: case '(':
220: return token(ELTokenId.LPAREN);
221: case ')':
222: return token(ELTokenId.RPAREN);
223: case ',':
224: return token(ELTokenId.COMMA);
225: case '?':
226: return token(ELTokenId.QUESTION);
227: case '\n':
228: return token(ELTokenId.EOL);
229: case '0':
230: lexerState = ISA_ZERO;
231: break;
232: case '.':
233: lexerState = ISA_DOT;
234: break;
235: default:
236: // Check for whitespace
237: if (Character.isWhitespace(actChar)) {
238: lexerState = ISI_WHITESPACE;
239: break;
240: }
241:
242: // check whether it can be identifier
243: if (Character.isJavaIdentifierStart(actChar)) {
244: lexerState = ISI_IDENTIFIER;
245: break;
246: }
247: // Check for digit
248: if (Character.isDigit(actChar)) {
249: lexerState = ISI_INT;
250: break;
251: }
252: return token(ELTokenId.INVALID_CHAR);
253: //break;
254: }
255: break;
256:
257: case ISI_WHITESPACE: // white space
258: if (!Character.isWhitespace(actChar)) {
259: lexerState = INIT;
260: input.backup(1);
261: return token(ELTokenId.WHITESPACE);
262: }
263: break;
264:
265: case ISI_BRACKET:
266: switch (actChar) {
267: case ']':
268: lexerState = INIT;
269: input.backup(1);
270: return token(ELTokenId.IDENTIFIER);
271: case '"':
272: return token(ELTokenId.LBRACKET);
273: case '\'':
274: return token(ELTokenId.LBRACKET);
275: case '/':
276: return token(ELTokenId.DIV);
277: case '+':
278: return token(ELTokenId.PLUS);
279: case '-':
280: return token(ELTokenId.MINUS);
281: case '*':
282: return token(ELTokenId.MUL);
283: case '[':
284: return token(ELTokenId.LBRACKET);
285: case '%':
286: return token(ELTokenId.MOD);
287: case ':':
288: return token(ELTokenId.COLON);
289: case '(':
290: return token(ELTokenId.LPAREN);
291: case ')':
292: return token(ELTokenId.RPAREN);
293: case ',':
294: return token(ELTokenId.COMMA);
295: case '?':
296: return token(ELTokenId.QUESTION);
297: case '=':
298: lexerState = ISI_BRACKET_ISA_EQ;
299: break;
300: case '>':
301: lexerState = ISI_BRACKET_ISA_GT;
302: break;
303: case '<':
304: lexerState = ISI_BRACKET_ISA_LT;
305: break;
306: case '|':
307: lexerState = ISI_BRACKET_ISA_PIPE;
308: break;
309: case '&':
310: lexerState = ISI_BRACKET_ISA_AND;
311: break;
312: case '0':
313: lexerState = ISI_BRACKET_ISA_ZERO;
314: break;
315: case '.':
316: lexerState = ISI_BRACKET_ISA_DOT;
317: break;
318: default:
319: // Check for whitespace
320: if (Character.isWhitespace(actChar)) {
321: lexerState = ISI_BRACKET_A_WHITESPACE;
322: break;
323: }
324: if (Character.isJavaIdentifierStart(actChar)) {
325: // - System.out.print(" state->ISI_IDENTIFIER ");
326: lexerState = ISI_BRACKET_A_IDENTIFIER;
327: break;
328: }
329: // Check for digit
330: if (Character.isDigit(actChar)) {
331: lexerState = ISI_BRACKET_ISI_INT;
332: break;
333: }
334: return token(ELTokenId.INVALID_CHAR);
335: //break;
336: }
337: break;
338:
339: case ISI_BRACKET_A_WHITESPACE:
340: if (!Character.isWhitespace(actChar)) {
341: lexerState = ISI_BRACKET;
342: input.backup(1);
343: return token(ELTokenId.WHITESPACE);
344: }
345: break;
346:
347: case ISI_BRACKET_ISA_EQ:
348: case ISA_EQ:
349: switch (actChar) {
350: case '=':
351: lexerState = INIT;
352: return token(ELTokenId.EQ_EQ);
353: default:
354: lexerState = (lexerState == ISI_BRACKET_ISA_EQ) ? ISI_BRACKET
355: : INIT;
356: input.backup(1);
357: break;
358: }
359: break;
360:
361: case ISI_BRACKET_ISA_GT:
362: case ISA_GT:
363: switch (actChar) {
364: case '=':
365: lexerState = INIT;
366: return token(ELTokenId.GT_EQ);
367: default:
368: lexerState = (lexerState == ISI_BRACKET_ISA_GT) ? ISI_BRACKET
369: : INIT;
370: input.backup(1);
371: return token(ELTokenId.GT);
372: }
373: //break;
374: case ISI_BRACKET_ISA_LT:
375: case ISA_LT:
376: switch (actChar) {
377: case '=':
378: lexerState = INIT;
379: return token(ELTokenId.LT_EQ);
380: default:
381: lexerState = (lexerState == ISI_BRACKET_ISA_LT) ? ISI_BRACKET
382: : INIT;
383: input.backup(1);
384: return token(ELTokenId.LT);
385: }
386: //break;
387: case ISI_BRACKET_ISA_PIPE:
388: case ISA_PIPE:
389: switch (actChar) {
390: case '|':
391: lexerState = INIT;
392: return token(ELTokenId.OR_OR);
393: default:
394: lexerState = (lexerState == ISI_BRACKET_ISA_PIPE) ? ISI_BRACKET
395: : INIT;
396: input.backup(1);
397: break;
398: }
399: break;
400: case ISI_BRACKET_ISA_AND:
401: case ISA_AND:
402: switch (actChar) {
403: case '&':
404: lexerState = INIT;
405: return token(ELTokenId.AND_AND);
406: default:
407: lexerState = (lexerState == ISI_BRACKET_ISA_AND) ? ISI_BRACKET
408: : INIT;
409: input.backup(1);
410: break;
411: }
412: break;
413: case ISA_EXCLAMATION:
414: switch (actChar) {
415: case '=':
416: lexerState = INIT;
417: return token(ELTokenId.NOT_EQ);
418: default:
419: lexerState = INIT;
420: input.backup(1);
421: return token(ELTokenId.NOT);
422: }
423: case ISI_STRING:
424: switch (actChar) {
425: case '\\':
426: lexerState = ISI_STRING_A_BSLASH;
427: break;
428: case '\n':
429: lexerState = INIT;
430: input.backup(1);
431: return token(ELTokenId.STRING_LITERAL);
432: case '"': // NOI18N
433: lexerState = INIT;
434: return token(ELTokenId.STRING_LITERAL);
435: }
436: break;
437: case ISI_STRING_A_BSLASH:
438: lexerState = ISI_STRING;
439: break;
440: case ISI_BRACKET_A_IDENTIFIER:
441: case ISI_IDENTIFIER:
442: if (!(Character.isJavaIdentifierPart(actChar))) {
443: switch (lexerState) {
444: case ISI_IDENTIFIER:
445: lexerState = INIT;
446: break;
447: case ISI_BRACKET_A_IDENTIFIER:
448: lexerState = ISI_BRACKET;
449: break;
450: }
451: Token<ELTokenId> tid = matchKeyword(input);
452: input.backup(1);
453: if (tid == null) {
454: if (actChar == ':') {
455: tid = token(ELTokenId.TAG_LIB_PREFIX);
456: } else {
457: tid = token(ELTokenId.IDENTIFIER);
458: }
459: }
460: return tid;
461: }
462: break;
463:
464: case ISI_CHAR:
465: switch (actChar) {
466: case '\\':
467: lexerState = ISI_CHAR_A_BSLASH;
468: break;
469: case '\n':
470: lexerState = INIT;
471: input.backup(1);
472: return token(ELTokenId.CHAR_LITERAL);
473: case '\'':
474: lexerState = INIT;
475: return token(ELTokenId.CHAR_LITERAL);
476: default:
477: char prevChar = input.readText().charAt(
478: input.readLength() - 1);
479: if (prevChar != '\'' && prevChar != '\\') {
480: lexerState = ISI_CHAR_STRING;
481: }
482: }
483: break;
484:
485: case ISI_CHAR_A_BSLASH:
486: switch (actChar) {
487: case '\'':
488: case '\\':
489: break;
490: default:
491: input.backup(1);
492: break;
493: }
494: lexerState = ISI_CHAR;
495: break;
496:
497: case ISI_CHAR_STRING:
498: // - System.out.print(" ISI_CHAR_STRING (");
499: switch (actChar) {
500: case '\\':
501: // - System.out.print(" state->ISI_CHAR_A_BSLASH )");
502: lexerState = ISI_CHAR_STRING_A_BSLASH;
503: break;
504: case '\n':
505: lexerState = INIT;
506: input.backup(1);
507: return token(ELTokenId.STRING_LITERAL);
508: case '\'':
509: lexerState = INIT;
510: return token(ELTokenId.STRING_LITERAL);
511: }
512: // - System.out.print(")");
513: break;
514:
515: case ISI_CHAR_STRING_A_BSLASH:
516: switch (actChar) {
517: case '\'':
518: case '\\':
519: break;
520: default:
521: input.backup(1);
522: break;
523: }
524: lexerState = ISI_CHAR_STRING;
525: break;
526:
527: case ISI_BRACKET_ISA_ZERO:
528: case ISA_ZERO:
529: switch (actChar) {
530: case '.':
531: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE
532: : ISI_DOUBLE;
533: break;
534: case 'x':
535: case 'X':
536: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_HEX
537: : ISI_HEX;
538: break;
539: case 'l':
540: case 'L':
541: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
542: : INIT;
543: return token(ELTokenId.LONG_LITERAL);
544: case 'f':
545: case 'F':
546: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
547: : INIT;
548: return token(ELTokenId.FLOAT_LITERAL);
549: case 'd':
550: case 'D':
551: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
552: : INIT;
553: return token(ELTokenId.DOUBLE_LITERAL);
554: case '8': // it's error to have '8' and '9' in octal number
555: case '9':
556: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
557: : INIT;
558: return token(ELTokenId.INVALID_OCTAL_LITERAL);
559: case 'e':
560: case 'E':
561: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE_EXP
562: : ISI_DOUBLE_EXP;
563: break;
564: default:
565: if (Character.isDigit(actChar)) { // '8' and '9' already handled
566: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_OCTAL
567: : ISI_OCTAL;
568: break;
569: }
570: lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
571: : INIT;
572: input.backup(1);
573: return token(ELTokenId.INT_LITERAL);
574: }
575: break;
576:
577: case ISI_BRACKET_ISI_INT:
578: case ISI_INT:
579: switch (actChar) {
580: case 'l':
581: case 'L':
582: lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
583: : INIT;
584: return token(ELTokenId.LONG_LITERAL);
585: case '.':
586: lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET_ISI_DOUBLE
587: : ISI_DOUBLE;
588: break;
589: case 'f':
590: case 'F':
591: lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
592: : INIT;
593: return token(ELTokenId.FLOAT_LITERAL);
594: case 'd':
595: case 'D':
596: lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
597: : INIT;
598: return token(ELTokenId.DOUBLE_LITERAL);
599: case 'e':
600: case 'E':
601: lexerState = ISI_DOUBLE_EXP;
602: break;
603: default:
604: if (!(actChar >= '0' && actChar <= '9')) {
605: lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
606: : INIT;
607: input.backup(1);
608: return token(ELTokenId.INT_LITERAL);
609: }
610: }
611: break;
612:
613: case ISI_BRACKET_ISI_OCTAL:
614: case ISI_OCTAL:
615: if (!(actChar >= '0' && actChar <= '7')) {
616: lexerState = (lexerState == ISI_BRACKET_ISI_OCTAL) ? ISI_BRACKET
617: : INIT;
618: input.backup(1);
619: return token(ELTokenId.OCTAL_LITERAL);
620: }
621: break;
622:
623: case ISI_BRACKET_ISI_DOUBLE:
624: case ISI_DOUBLE:
625: switch (actChar) {
626: case 'f':
627: case 'F':
628: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
629: : INIT;
630: return token(ELTokenId.FLOAT_LITERAL);
631: case 'd':
632: case 'D':
633: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
634: : INIT;
635: return token(ELTokenId.DOUBLE_LITERAL);
636: case 'e':
637: case 'E':
638: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET_ISI_DOUBLE_EXP
639: : ISI_DOUBLE_EXP;
640: break;
641: default:
642: if (!((actChar >= '0' && actChar <= '9') || actChar == '.')) {
643: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
644: : INIT;
645: input.backup(1);
646: return token(ELTokenId.DOUBLE_LITERAL);
647: }
648: }
649: break;
650:
651: case ISI_DOUBLE_EXP:
652: case ISI_BRACKET_ISI_DOUBLE_EXP:
653: switch (actChar) {
654: case 'f':
655: case 'F':
656: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
657: : INIT;
658: return token(ELTokenId.FLOAT_LITERAL);
659: case 'd':
660: case 'D':
661: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
662: : INIT;
663: return token(ELTokenId.DOUBLE_LITERAL);
664: case '-':
665: case '+':
666: lexerState = ISI_DOULE_EXP_ISA_SIGN;
667: break;
668: default:
669: if (!Character.isDigit(actChar)) {
670: //|| ch == '-' || ch == '+')) {
671: lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
672: : INIT;
673: input.backup(1);
674: return token(ELTokenId.DOUBLE_LITERAL);
675: }
676: }
677: break;
678:
679: case ISI_DOULE_EXP_ISA_SIGN:
680: case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
681: if (!Character.isDigit(actChar)) {
682: lexerState = (lexerState == ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN) ? ISI_BRACKET
683: : INIT;
684: input.backup(1);
685: return token(ELTokenId.DOUBLE_LITERAL);
686: }
687: break;
688:
689: case ISI_BRACKET_ISI_HEX:
690: case ISI_HEX:
691: if (!((actChar >= 'a' && actChar <= 'f')
692: || (actChar >= 'A' && actChar <= 'F') || Character
693: .isDigit(actChar))) {
694: lexerState = (lexerState == ISI_BRACKET_ISI_HEX) ? ISI_BRACKET
695: : INIT;
696: input.backup(1);
697: return token(ELTokenId.HEX_LITERAL);
698: }
699: break;
700:
701: case ISI_BRACKET_ISA_DOT:
702: case ISA_DOT:
703: if (Character.isDigit(actChar)) {
704: lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET_ISI_DOUBLE
705: : ISI_DOUBLE;
706:
707: } else { // only single dot
708: lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET
709: : INIT;
710: input.backup(1);
711: return token(ELTokenId.DOT);
712: }
713: break;
714:
715: } // end of switch(state)
716:
717: } //end of big while
718:
719: /** At this stage there's no more text in the scanned buffer.
720: * Scanner first checks whether this is completely the last
721: * available buffer.
722: */
723: switch (lexerState) {
724: case INIT:
725: if (input.readLength() == 0) {
726: return null;
727: }
728: break;
729: case ISI_WHITESPACE:
730: lexerState = INIT;
731: return token(ELTokenId.WHITESPACE);
732: case ISI_IDENTIFIER:
733: lexerState = INIT;
734: Token<ELTokenId> kwd = matchKeyword(input);
735: return (kwd != null) ? kwd : token(ELTokenId.IDENTIFIER);
736: case ISI_STRING:
737: case ISI_STRING_A_BSLASH:
738: return token(ELTokenId.STRING_LITERAL); // hold the state
739: case ISI_CHAR:
740: case ISI_CHAR_A_BSLASH:
741: return token(ELTokenId.CHAR_LITERAL);
742: case ISI_CHAR_STRING:
743: case ISI_CHAR_STRING_A_BSLASH:
744: return token(ELTokenId.STRING_LITERAL);
745: case ISA_ZERO:
746: case ISI_INT:
747: lexerState = INIT;
748: return token(ELTokenId.INT_LITERAL);
749: case ISI_OCTAL:
750: lexerState = INIT;
751: return token(ELTokenId.OCTAL_LITERAL);
752: case ISI_DOUBLE:
753: case ISI_DOUBLE_EXP:
754: case ISI_DOULE_EXP_ISA_SIGN:
755: case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
756: lexerState = INIT;
757: return token(ELTokenId.DOUBLE_LITERAL);
758: case ISI_HEX:
759: lexerState = INIT;
760: return token(ELTokenId.HEX_LITERAL);
761: case ISA_DOT:
762: lexerState = INIT;
763: return token(ELTokenId.DOT);
764: case ISA_EQ:
765: lexerState = INIT;
766: return token(ELTokenId.EQ_EQ);
767: case ISA_GT:
768: lexerState = INIT;
769: return token(ELTokenId.GT);
770: case ISA_LT:
771: lexerState = INIT;
772: return token(ELTokenId.LT);
773: case ISA_PIPE:
774: lexerState = INIT;
775: return token(ELTokenId.OR_OR);
776: case ISA_AND:
777: lexerState = INIT;
778: return token(ELTokenId.AND_AND);
779: case ISA_EXCLAMATION:
780: lexerState = INIT;
781: return token(ELTokenId.NOT);
782: case ISI_BRACKET:
783: case ISI_BRACKET_A_IDENTIFIER:
784: lexerState = INIT;
785: return token(ELTokenId.IDENTIFIER);
786: case ISI_BRACKET_A_WHITESPACE:
787: lexerState = ISI_BRACKET;
788: return token(ELTokenId.WHITESPACE);
789: case ISI_BRACKET_ISA_EQ:
790: lexerState = ISI_BRACKET;
791: return token(ELTokenId.EQ_EQ);
792: case ISI_BRACKET_ISA_GT:
793: lexerState = ISI_BRACKET;
794: return token(ELTokenId.GT_EQ);
795: case ISI_BRACKET_ISA_LT:
796: lexerState = ISI_BRACKET;
797: return token(ELTokenId.LT_EQ);
798: case ISI_BRACKET_ISA_AND:
799: lexerState = ISI_BRACKET;
800: return token(ELTokenId.AND_AND);
801: case ISI_BRACKET_ISA_PIPE:
802: lexerState = ISI_BRACKET;
803: return token(ELTokenId.OR_OR);
804: case ISI_BRACKET_ISA_DOT:
805: lexerState = ISI_BRACKET;
806: return token(ELTokenId.DOT);
807: case ISI_BRACKET_ISA_ZERO:
808: case ISI_BRACKET_ISI_INT:
809: lexerState = ISI_BRACKET;
810: return token(ELTokenId.INT_LITERAL);
811: }
812:
813: return null;
814: }
815:
816: public Token<ELTokenId> matchKeyword(LexerInput lexerInput) {
817: int len = lexerInput.readLength();
818: char[] buffer = new char[len];
819: String read = lexerInput.readText().toString();
820: read.getChars(0, read.length(), buffer, 0);
821: int offset = 0;
822:
823: if (len > 10)
824: return null;
825: if (len <= 1)
826: return null;
827: switch (buffer[offset++]) {
828: case 'a':
829: if (len <= 2)
830: return null;
831: return (len == 3 && buffer[offset++] == 'n' && buffer[offset++] == 'd') ? token(ELTokenId.AND_KEYWORD)
832: : null;
833: case 'd':
834: if (len <= 2)
835: return null;
836: return (len == 3 && buffer[offset++] == 'i' && buffer[offset++] == 'v') ? token(ELTokenId.DIV_KEYWORD)
837: : null;
838: case 'e':
839: switch (buffer[offset++]) {
840: case 'q':
841: return (len == 2) ? token(ELTokenId.EQ_KEYWORD) : null;
842: case 'm':
843: return (len == 5 && buffer[offset++] == 'p'
844: && buffer[offset++] == 't' && buffer[offset++] == 'y') ? token(ELTokenId.EMPTY_KEYWORD)
845: : null;
846: default:
847: return null;
848: }
849: case 'f':
850: return (len == 5 && buffer[offset++] == 'a'
851: && buffer[offset++] == 'l'
852: && buffer[offset++] == 's' && buffer[offset++] == 'e') ? token(ELTokenId.FALSE_KEYWORD)
853: : null;
854: case 'g':
855: switch (buffer[offset++]) {
856: case 'e':
857: return (len == 2) ? token(ELTokenId.GE_KEYWORD) : null;
858: case 't':
859: return (len == 2) ? token(ELTokenId.GT_KEYWORD) : null;
860: default:
861: return null;
862: }
863: case 'l':
864: switch (buffer[offset++]) {
865: case 'e':
866: return (len == 2) ? token(ELTokenId.LE_KEYWORD) : null;
867: case 't':
868: return (len == 2) ? token(ELTokenId.LT_KEYWORD) : null;
869: default:
870: return null;
871: }
872: case 'i':
873: if (len <= 9)
874: return null;
875: return (len == 10 && buffer[offset++] == 'n'
876: && buffer[offset++] == 's'
877: && buffer[offset++] == 't'
878: && buffer[offset++] == 'a'
879: && buffer[offset++] == 'n'
880: && buffer[offset++] == 'c'
881: && buffer[offset++] == 'e'
882: && buffer[offset++] == 'o' && buffer[offset++] == 'f') ? token(ELTokenId.INSTANCEOF_KEYWORD)
883: : null;
884: case 'm':
885: if (len <= 2)
886: return null;
887: return (len == 3 && buffer[offset++] == 'o' && buffer[offset++] == 'd') ? token(ELTokenId.MOD_KEYWORD)
888: : null;
889: case 'n':
890: switch (buffer[offset++]) {
891: case 'e':
892: return (len == 2) ? token(ELTokenId.NE_KEYWORD) : null;
893: case 'o':
894: return (len == 3 && buffer[offset++] == 't') ? token(ELTokenId.NOT_KEYWORD)
895: : null;
896: case 'u':
897: return (len == 4 && buffer[offset++] == 'l' && buffer[offset++] == 'l') ? token(ELTokenId.NULL_KEYWORD)
898: : null;
899: default:
900: return null;
901: }
902: case 'o':
903: return (len == 2 && buffer[offset++] == 'r') ? token(ELTokenId.OR_KEYWORD)
904: : null;
905: case 't':
906: return (len == 4 && buffer[offset++] == 'r'
907: && buffer[offset++] == 'u' && buffer[offset++] == 'e') ? token(ELTokenId.TRUE_KEYWORD)
908: : null;
909:
910: default:
911: return null;
912: }
913: }
914:
915: private Token<ELTokenId> token(ELTokenId tokenId) {
916: if (LOG) {
917: if (input.readLength() == 0) {
918: LOGGER.log(Level.INFO, "["
919: + this .getClass().getSimpleName()
920: + "] Found zero length token: "); //NOI18N
921: }
922: LOGGER.log(Level.INFO, "["
923: + this .getClass().getSimpleName() + "] token ('"
924: + input.readText().toString() + "'; id=" + tokenId
925: + ")\n"); //NOI18N
926: }
927: return tokenFactory.createToken(tokenId);
928: }
929:
930: public void release() {
931: }
932:
933: }
|