0001: /*
0002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
0003: *
0004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
0005: *
0006: * The contents of this file are subject to the terms of either the GNU
0007: * General Public License Version 2 only ("GPL") or the Common
0008: * Development and Distribution License("CDDL") (collectively, the
0009: * "License"). You may not use this file except in compliance with the
0010: * License. You can obtain a copy of the License at
0011: * http://www.netbeans.org/cddl-gplv2.html
0012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
0013: * specific language governing permissions and limitations under the
0014: * License. When distributing the software, include this License Header
0015: * Notice in each file and include the License file at
0016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
0017: * particular file as subject to the "Classpath" exception as provided
0018: * by Sun in the GPL Version 2 section of the License file that
0019: * accompanied this code. If applicable, add the following below the
0020: * License Header, with the fields enclosed by brackets [] replaced by
0021: * your own identifying information:
0022: * "Portions Copyrighted [year] [name of copyright owner]"
0023: *
0024: * Contributor(s):
0025: *
0026: * The Original Software is NetBeans. The Initial Developer of the Original
0027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
0028: * Microsystems, Inc. All Rights Reserved.
0029: *
0030: * If you wish your version of this file to be governed by only the CDDL
0031: * or only the GPL Version 2, indicate your decision by adding
0032: * "[Contributor] elects to include this software in this distribution
0033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
0034: * single choice of license, a recipient has the option to distribute
0035: * your version of this file under either the CDDL, the GPL Version 2 or
0036: * to extend the choice of license to its licensees as provided above.
0037: * However, if you add GPL Version 2 code and therefore, elected the GPL
0038: * Version 2 license, then the option applies only if the new code is
0039: * made subject to such option by the copyright holder.
0040: */
0041:
0042: package org.netbeans.modules.web.core.syntax.deprecated;
0043:
0044: import org.netbeans.modules.web.core.syntax.*;
0045: import org.netbeans.editor.Syntax;
0046: import org.netbeans.editor.TokenID;
0047:
0048: /**
0049: * Lexical analyzer for the plain text.
0050: *
0051: * @deprecated Use {@link ELLexer} instead.
0052: *
0053: * @author Petr Pisl
0054: */
0055:
0056: public class ELSyntax extends Syntax {
0057:
0058: /* Internal states used internally by analyzer. There
0059: * can be any number of them declared by the analyzer.
0060: * They are usually numbered starting from zero but they don't
0061: * have to. The only reserved value is -1 which is reserved
0062: * for the INIT state - the initial internal state of the analyzer.
0063: */
0064:
0065: private static final int ISI_IDENTIFIER = 2;
0066: private static final int ISI_CHAR = 3; // inside char constant
0067: private static final int ISI_CHAR_A_BSLASH = 4; // inside char constant after backslash
0068: private static final int ISI_STRING = 5; // inside a string " ... "
0069: private static final int ISI_STRING_A_BSLASH = 6; // inside string "..." constant after backslash
0070: private static final int ISI_CHAR_STRING = 7; // inside a string '...'
0071: private static final int ISI_CHAR_STRING_A_BSLASH = 8; // inside string '...'contant after backslash
0072: private static final int ISA_ZERO = 9; // after '0'
0073: private static final int ISI_INT = 10; // integer number
0074: private static final int ISI_OCTAL = 11; // octal number
0075: private static final int ISI_DOUBLE = 12; // double number
0076: private static final int ISI_DOUBLE_EXP = 13; // double number
0077: private static final int ISI_HEX = 14; // hex number
0078: private static final int ISA_DOT = 15; // after '.'
0079: private static final int ISI_WHITESPACE = 16; // inside white space
0080: private static final int ISA_EQ = 17; // after '='
0081: private static final int ISA_GT = 18; // after '>'
0082: private static final int ISA_LT = 19; // after '<'
0083: //private static final int ISA_PLUS = 20; // after '+'
0084: //private static final int ISA_MINUS = 21; // after '-'
0085: //private static final int ISA_STAR = 22; // after '*'
0086: private static final int ISA_PIPE = 23; // after '|'
0087: private static final int ISA_AND = 24; // after '&'
0088: private static final int ISA_EXCLAMATION = 25; // after '!'
0089: private static final int ISI_BRACKET = 26; // after '['
0090: private static final int ISI_BRACKET_A_WHITESPACE = 27;
0091: private static final int ISI_BRACKET_A_IDENTIFIER = 28;
0092: private static final int ISI_BRACKET_ISA_EQ = 29;
0093: private static final int ISI_BRACKET_ISA_GT = 30;
0094: private static final int ISI_BRACKET_ISA_LT = 31;
0095: private static final int ISI_BRACKET_ISA_PIPE = 32; // after '|'
0096: private static final int ISI_BRACKET_ISA_AND = 33; // after '&'
0097: private static final int ISI_BRACKET_ISA_ZERO = 34; // after '0'
0098: private static final int ISI_BRACKET_ISA_DOT = 35; // after '.'
0099: private static final int ISI_BRACKET_ISI_INT = 36; // after '.'
0100: private static final int ISI_BRACKET_ISI_OCTAL = 37; // octal number
0101: private static final int ISI_BRACKET_ISI_DOUBLE = 38; // double number
0102: private static final int ISI_BRACKET_ISI_DOUBLE_EXP = 39; // double number
0103: private static final int ISI_BRACKET_ISI_HEX = 40; // hex number
0104: private static final int ISI_DOULE_EXP_ISA_SIGN = 41;
0105: private static final int ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN = 42;
0106:
0107: //private static final int ISA_PERCENT = 24; // after '%'
0108:
0109: public ELSyntax() {
0110: tokenContextPath = ELTokenContext.contextPath;
0111: }
0112:
0113: /** This is core function of analyzer and it returns one of following numbers:
0114: * a) token number of next token from scanned text
0115: * b) EOL when end of line was found in scanned buffer
0116: * c) EOT when there is no more chars available in scanned buffer.
0117: *
0118: * The function scans the active character and does one or more
0119: * of the following actions:
0120: * 1. change internal analyzer state (state = new-state)
0121: * 2. return token ID (return token-ID)
0122: * 3. adjust current position to signal different end of token;
0123: * the character that offset points to is not included in the token
0124: */
0125: protected TokenID parseToken() {
0126: // The main loop that reads characters one by one follows
0127: while (offset < stopOffset) {
0128: char ch = buffer[offset]; // get the current character
0129: // - System.out.print(ch);
0130: switch (state) { // switch by the current internal state
0131: case INIT:
0132: // - System.out.print(" INIT (");
0133: switch (ch) {
0134: case '"':
0135: // - System.out.print(" state->ISI_STRING ");
0136: state = ISI_STRING;
0137: break;
0138: case '\'':
0139: // - System.out.print(" state->ISI_CHAR ");
0140: state = ISI_CHAR;
0141: break;
0142: case '/':
0143: offset++;
0144: return ELTokenContext.DIV;
0145: case '=':
0146: state = ISA_EQ;
0147: break;
0148: case '>':
0149: state = ISA_GT;
0150: break;
0151: case '<':
0152: state = ISA_LT;
0153: break;
0154: case '+':
0155: offset++;
0156: return ELTokenContext.PLUS;
0157: case '-':
0158: offset++;
0159: return ELTokenContext.MINUS;
0160: case '*':
0161: offset++;
0162: return ELTokenContext.MUL;
0163: case '|':
0164: state = ISA_PIPE;
0165: break;
0166: case '&':
0167: state = ISA_AND;
0168: break;
0169: case '[':
0170: //state = ISI_BRACKET;
0171: offset++;
0172: return ELTokenContext.LBRACKET;
0173: //break;
0174: case ']':
0175: offset++;
0176: return ELTokenContext.RBRACKET;
0177: case '%':
0178: offset++;
0179: return ELTokenContext.MOD;
0180: case ':':
0181: //state = ISA_COLON;
0182: //break;
0183: offset++;
0184: return ELTokenContext.COLON;
0185: case '!':
0186: state = ISA_EXCLAMATION;
0187: break;
0188: case '(':
0189: offset++;
0190: return ELTokenContext.LPAREN;
0191: case ')':
0192: offset++;
0193: return ELTokenContext.RPAREN;
0194: case ',':
0195: offset++;
0196: return ELTokenContext.COMMA;
0197: case '?':
0198: offset++;
0199: return ELTokenContext.QUESTION;
0200: case '\n':
0201: offset++;
0202: // - System.out.print("r=EOL )");
0203: return ELTokenContext.EOL;
0204: case '0':
0205: state = ISA_ZERO;
0206: break;
0207: case '.':
0208: state = ISA_DOT;
0209: break;
0210: default:
0211: // Check for whitespace
0212: if (Character.isWhitespace(ch)) {
0213: state = ISI_WHITESPACE;
0214: break;
0215: }
0216:
0217: // check whether it can be identifier
0218: if (Character.isJavaIdentifierStart(ch)) {
0219: // - System.out.print(" state->ISI_IDENTIFIER ");
0220: state = ISI_IDENTIFIER;
0221: break;
0222: }
0223: // Check for digit
0224: if (Character.isDigit(ch)) {
0225: state = ISI_INT;
0226: break;
0227: }
0228: // - System.out.print(" r=INVALID_CHAR )");
0229: offset++;
0230: return ELTokenContext.INVALID_CHAR;
0231: //break;
0232: }
0233: // - System.out.print(")");
0234: break;
0235:
0236: case ISI_WHITESPACE: // white space
0237: if (!Character.isWhitespace(ch)) {
0238: state = INIT;
0239: return ELTokenContext.WHITESPACE;
0240: }
0241: break;
0242:
0243: case ISI_BRACKET:
0244: switch (ch) {
0245: case ']':
0246: state = INIT;
0247: //offset++;
0248: return ELTokenContext.IDENTIFIER;
0249: case '"':
0250: offset++;
0251: return ELTokenContext.LBRACKET;
0252: case '\'':
0253: offset++;
0254: return ELTokenContext.LBRACKET;
0255: case '/':
0256: offset++;
0257: return ELTokenContext.DIV;
0258: case '+':
0259: offset++;
0260: return ELTokenContext.PLUS;
0261: case '-':
0262: offset++;
0263: return ELTokenContext.MINUS;
0264: case '*':
0265: offset++;
0266: return ELTokenContext.MUL;
0267: case '[':
0268: offset++;
0269: return ELTokenContext.LBRACKET;
0270: case '%':
0271: offset++;
0272: return ELTokenContext.MOD;
0273: case ':':
0274: offset++;
0275: return ELTokenContext.COLON;
0276: case '(':
0277: offset++;
0278: return ELTokenContext.LPAREN;
0279: case ')':
0280: offset++;
0281: return ELTokenContext.RPAREN;
0282: case ',':
0283: offset++;
0284: return ELTokenContext.COMMA;
0285: case '?':
0286: offset++;
0287: return ELTokenContext.QUESTION;
0288: case '=':
0289: state = ISI_BRACKET_ISA_EQ;
0290: break;
0291: case '>':
0292: state = ISI_BRACKET_ISA_GT;
0293: break;
0294: case '<':
0295: state = ISI_BRACKET_ISA_LT;
0296: break;
0297: case '|':
0298: state = ISI_BRACKET_ISA_PIPE;
0299: break;
0300: case '&':
0301: state = ISI_BRACKET_ISA_AND;
0302: break;
0303: case '0':
0304: state = ISI_BRACKET_ISA_ZERO;
0305: break;
0306: case '.':
0307: state = ISI_BRACKET_ISA_DOT;
0308: break;
0309: default:
0310: // Check for whitespace
0311: if (Character.isWhitespace(ch)) {
0312: state = ISI_BRACKET_A_WHITESPACE;
0313: break;
0314: }
0315: if (Character.isJavaIdentifierStart(ch)) {
0316: // - System.out.print(" state->ISI_IDENTIFIER ");
0317: state = ISI_BRACKET_A_IDENTIFIER;
0318: break;
0319: }
0320: // Check for digit
0321: if (Character.isDigit(ch)) {
0322: state = ISI_BRACKET_ISI_INT;
0323: break;
0324: }
0325: // - System.out.print(" r=INVALID_CHAR )");
0326: offset++;
0327: return ELTokenContext.INVALID_CHAR;
0328: //break;
0329: }
0330: break;
0331:
0332: case ISI_BRACKET_A_WHITESPACE:
0333: if (!Character.isWhitespace(ch)) {
0334: state = ISI_BRACKET;
0335: return ELTokenContext.WHITESPACE;
0336: }
0337: break;
0338:
0339: case ISI_BRACKET_ISA_EQ:
0340: case ISA_EQ:
0341: switch (ch) {
0342: case '=':
0343: offset++;
0344: return ELTokenContext.EQ_EQ;
0345: default:
0346: state = (state == ISI_BRACKET_ISA_EQ) ? ISI_BRACKET
0347: : INIT;
0348: offset--;
0349: //return ELTokenContext.INVALID_CHAR;
0350: }
0351: break;
0352:
0353: case ISI_BRACKET_ISA_GT:
0354: case ISA_GT:
0355: switch (ch) {
0356: case '=':
0357: offset++;
0358: return ELTokenContext.GT_EQ;
0359: default:
0360: state = (state == ISI_BRACKET_ISA_GT) ? ISI_BRACKET
0361: : INIT;
0362: return ELTokenContext.GT;
0363: }
0364: //break;
0365: case ISI_BRACKET_ISA_LT:
0366: case ISA_LT:
0367: switch (ch) {
0368: case '=':
0369: offset++;
0370: return ELTokenContext.LT_EQ;
0371: default:
0372: state = (state == ISI_BRACKET_ISA_LT) ? ISI_BRACKET
0373: : INIT;
0374: return ELTokenContext.LT;
0375: }
0376: //break;
0377: case ISI_BRACKET_ISA_PIPE:
0378: case ISA_PIPE:
0379: switch (ch) {
0380: case '|':
0381: offset++;
0382: state = INIT;
0383: return ELTokenContext.OR_OR;
0384: default:
0385: state = (state == ISI_BRACKET_ISA_PIPE) ? ISI_BRACKET
0386: : INIT;
0387: offset--;
0388: }
0389: break;
0390: case ISI_BRACKET_ISA_AND:
0391: case ISA_AND:
0392: switch (ch) {
0393: case '&':
0394: offset++;
0395: state = INIT;
0396: return ELTokenContext.AND_AND;
0397: default:
0398: state = (state == ISI_BRACKET_ISA_AND) ? ISI_BRACKET
0399: : INIT;
0400: offset--;
0401: }
0402: break;
0403: case ISA_EXCLAMATION:
0404: switch (ch) {
0405: case '=':
0406: offset++;
0407: state = INIT;
0408: return ELTokenContext.NOT_EQ;
0409: default:
0410: state = INIT;
0411: return ELTokenContext.NOT;
0412: }
0413: case ISI_STRING:
0414: // - System.out.print(" ISI_STRING (");
0415: switch (ch) {
0416: case '\\':
0417: // - System.out.print(" state->ISI_STRING_A_BSLASH");
0418: state = ISI_STRING_A_BSLASH;
0419: break;
0420: case '\n':
0421: state = INIT;
0422: // - System.out.print(" state->INIT r=STRING_LITERAL )");
0423: return ELTokenContext.STRING_LITERAL;
0424: case '"': // NOI18N
0425: offset++;
0426: state = INIT;
0427: // - System.out.print(" state->INIT r=STRING_LITERAL )");
0428: return ELTokenContext.STRING_LITERAL;
0429: }
0430: // - System.out.print(")");
0431: break;
0432: case ISI_STRING_A_BSLASH:
0433: // - System.out.print(" ISI_STRING_A_BSLASH (");
0434: //switch (ch){
0435: // case '"':
0436: state = ISI_STRING;
0437: // - System.out.print(" state->INIT ");
0438: // - System.out.print(")");
0439: break;
0440: case ISI_BRACKET_A_IDENTIFIER:
0441: case ISI_IDENTIFIER:
0442: // - System.out.print(" ISI_IDENTIFIER (");
0443: if (!(Character.isJavaIdentifierPart(ch))) {
0444: switch (state) {
0445: case ISI_IDENTIFIER:
0446: state = INIT;
0447: break;
0448: case ISI_BRACKET_A_IDENTIFIER:
0449: state = ISI_BRACKET;
0450: break;
0451: }
0452: // - System.out.print(" state->INIT ");
0453:
0454: TokenID tid = matchKeyword(buffer, tokenOffset,
0455: offset - tokenOffset);
0456: if (tid == null) {
0457: if (ch == ':') {
0458: // - System.out.print(" r=TAG_LIB_PREFIX) ");
0459: tid = ELTokenContext.TAG_LIB_PREFIX;
0460: } else {
0461: tid = ELTokenContext.IDENTIFIER;
0462: // - System.out.print(" r=IDENTIFIER)");
0463: }
0464: } else {
0465: // - System.out.println(" r=KEYWORDS )");
0466: }
0467: return tid;
0468: }
0469: // - System.out.print(")");
0470: break;
0471:
0472: case ISI_CHAR:
0473: // - System.out.print(" ISI_CHAR (");
0474: switch (ch) {
0475: case '\\':
0476: // - System.out.print(" state->ISI_CHAR_A_BSLASH )");
0477: state = ISI_CHAR_A_BSLASH;
0478: break;
0479: case '\n':
0480: state = INIT;
0481: // - System.out.print(" state->INIT r=CHAR_LITERAL )");
0482: return ELTokenContext.CHAR_LITERAL;
0483: case '\'':
0484: offset++;
0485: state = INIT;
0486: // - System.out.print(" state->INIT r=CHAR_LITERAL )");
0487: return ELTokenContext.CHAR_LITERAL;
0488: default:
0489:
0490: if (buffer[offset - 1] != '\''
0491: && buffer[offset - 1] != '\\') {
0492: // - System.out.print(" state->ISI_CHAR_STRING ");
0493: state = ISI_CHAR_STRING;
0494: }
0495:
0496: }
0497: // - System.out.print(")");
0498: break;
0499:
0500: case ISI_CHAR_A_BSLASH:
0501: switch (ch) {
0502: case '\'':
0503: case '\\':
0504: break;
0505: default:
0506: offset--;
0507: break;
0508: }
0509: state = ISI_CHAR;
0510: break;
0511:
0512: case ISI_CHAR_STRING:
0513: // - System.out.print(" ISI_CHAR_STRING (");
0514: switch (ch) {
0515: case '\\':
0516: // - System.out.print(" state->ISI_CHAR_A_BSLASH )");
0517: state = ISI_CHAR_STRING_A_BSLASH;
0518: break;
0519: case '\n':
0520: state = INIT;
0521: // - System.out.print(" state->INIT r=STRING_LITERAL )");
0522: return ELTokenContext.STRING_LITERAL;
0523: case '\'':
0524: offset++;
0525: state = INIT;
0526: // - System.out.print(" state->INIT r=STRING_LITERAL )");
0527: return ELTokenContext.STRING_LITERAL;
0528: }
0529: // - System.out.print(")");
0530: break;
0531:
0532: case ISI_CHAR_STRING_A_BSLASH:
0533: switch (ch) {
0534: case '\'':
0535: case '\\':
0536: break;
0537: default:
0538: offset--;
0539: break;
0540: }
0541: state = ISI_CHAR_STRING;
0542: break;
0543:
0544: case ISI_BRACKET_ISA_ZERO:
0545: case ISA_ZERO:
0546: switch (ch) {
0547: case '.':
0548: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE
0549: : ISI_DOUBLE;
0550: break;
0551: case 'x':
0552: case 'X':
0553: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_HEX
0554: : ISI_HEX;
0555: break;
0556: case 'l':
0557: case 'L':
0558: offset++;
0559: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
0560: : INIT;
0561: return ELTokenContext.LONG_LITERAL;
0562: case 'f':
0563: case 'F':
0564: offset++;
0565: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
0566: : INIT;
0567: return ELTokenContext.FLOAT_LITERAL;
0568: case 'd':
0569: case 'D':
0570: offset++;
0571: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
0572: : INIT;
0573: return ELTokenContext.DOUBLE_LITERAL;
0574: case '8': // it's error to have '8' and '9' in octal number
0575: case '9':
0576: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
0577: : INIT;
0578: offset++;
0579: return ELTokenContext.INVALID_OCTAL_LITERAL;
0580: case 'e':
0581: case 'E':
0582: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE_EXP
0583: : ISI_DOUBLE_EXP;
0584: break;
0585: default:
0586: if (Character.isDigit(ch)) { // '8' and '9' already handled
0587: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_OCTAL
0588: : ISI_OCTAL;
0589: break;
0590: }
0591: state = (state == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET
0592: : INIT;
0593: return ELTokenContext.INT_LITERAL;
0594: }
0595: break;
0596:
0597: case ISI_BRACKET_ISI_INT:
0598: case ISI_INT:
0599: switch (ch) {
0600: case 'l':
0601: case 'L':
0602: offset++;
0603: state = (state == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
0604: : INIT;
0605: return ELTokenContext.LONG_LITERAL;
0606: case '.':
0607: state = (state == ISI_BRACKET_ISI_INT) ? ISI_BRACKET_ISI_DOUBLE
0608: : ISI_DOUBLE;
0609: break;
0610: case 'f':
0611: case 'F':
0612: offset++;
0613: state = (state == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
0614: : INIT;
0615: return ELTokenContext.FLOAT_LITERAL;
0616: case 'd':
0617: case 'D':
0618: offset++;
0619: state = (state == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
0620: : INIT;
0621: return ELTokenContext.DOUBLE_LITERAL;
0622: case 'e':
0623: case 'E':
0624: state = ISI_DOUBLE_EXP;
0625: break;
0626: default:
0627: if (!(ch >= '0' && ch <= '9')) {
0628: state = (state == ISI_BRACKET_ISI_INT) ? ISI_BRACKET
0629: : INIT;
0630: return ELTokenContext.INT_LITERAL;
0631: }
0632: }
0633: break;
0634:
0635: case ISI_BRACKET_ISI_OCTAL:
0636: case ISI_OCTAL:
0637: if (!(ch >= '0' && ch <= '7')) {
0638: state = (state == ISI_BRACKET_ISI_OCTAL) ? ISI_BRACKET
0639: : INIT;
0640: return ELTokenContext.OCTAL_LITERAL;
0641: }
0642: break;
0643:
0644: case ISI_BRACKET_ISI_DOUBLE:
0645: case ISI_DOUBLE:
0646: switch (ch) {
0647: case 'f':
0648: case 'F':
0649: offset++;
0650: state = (state == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
0651: : INIT;
0652: return ELTokenContext.FLOAT_LITERAL;
0653: case 'd':
0654: case 'D':
0655: offset++;
0656: state = (state == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
0657: : INIT;
0658: return ELTokenContext.DOUBLE_LITERAL;
0659: case 'e':
0660: case 'E':
0661: state = (state == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET_ISI_DOUBLE_EXP
0662: : ISI_DOUBLE_EXP;
0663: break;
0664: default:
0665: if (!((ch >= '0' && ch <= '9') || ch == '.')) {
0666: state = (state == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET
0667: : INIT;
0668: return ELTokenContext.DOUBLE_LITERAL;
0669: }
0670: }
0671: break;
0672:
0673: case ISI_DOUBLE_EXP:
0674: case ISI_BRACKET_ISI_DOUBLE_EXP:
0675: switch (ch) {
0676: case 'f':
0677: case 'F':
0678: offset++;
0679: state = (state == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
0680: : INIT;
0681: return ELTokenContext.FLOAT_LITERAL;
0682: case 'd':
0683: case 'D':
0684: offset++;
0685: state = (state == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
0686: : INIT;
0687: return ELTokenContext.DOUBLE_LITERAL;
0688: case '-':
0689: case '+':
0690: state = ISI_DOULE_EXP_ISA_SIGN;
0691: break;
0692: default:
0693: if (!Character.isDigit(ch)) {
0694: //|| ch == '-' || ch == '+')) {
0695: state = (state == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET
0696: : INIT;
0697: return ELTokenContext.DOUBLE_LITERAL;
0698: }
0699: }
0700: break;
0701:
0702: case ISI_DOULE_EXP_ISA_SIGN:
0703: case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
0704: if (!Character.isDigit(ch)) {
0705: state = (state == ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN) ? ISI_BRACKET
0706: : INIT;
0707: return ELTokenContext.DOUBLE_LITERAL;
0708: }
0709: break;
0710:
0711: case ISI_BRACKET_ISI_HEX:
0712: case ISI_HEX:
0713: if (!((ch >= 'a' && ch <= 'f')
0714: || (ch >= 'A' && ch <= 'F') || Character
0715: .isDigit(ch))) {
0716: state = (state == ISI_BRACKET_ISI_HEX) ? ISI_BRACKET
0717: : INIT;
0718: return ELTokenContext.HEX_LITERAL;
0719: }
0720: break;
0721:
0722: case ISI_BRACKET_ISA_DOT:
0723: case ISA_DOT:
0724: if (Character.isDigit(ch)) {
0725: state = (state == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET_ISI_DOUBLE
0726: : ISI_DOUBLE;
0727:
0728: } else { // only single dot
0729: state = (state == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET
0730: : INIT;
0731: return ELTokenContext.DOT;
0732: }
0733: break;
0734:
0735: } // end of switch(state)
0736:
0737: offset++; // move to the next char
0738: }
0739:
0740: /* At this state there's no more text in the scanned buffer.
0741: * The caller will decide either to stop scanning at all
0742: * or to relocate scanning and provide next buffer with characters.
0743: * The lastBuffer variable indicates whether the scanning will
0744: * stop (true) or the caller will provide another buffer
0745: * to continue on (false) and call relocate() to continue on the given buffer.
0746: * If this is the last buffer, the analyzer must ensure
0747: * that for all internal states there will be some token ID returned.
0748: * The easiest way how to ensure that all the internal states will
0749: * be covered is to copy all the internal state constants and
0750: * put them after the switch() and provide the code that will return
0751: * appropriate token ID.
0752: *
0753: * When there are no more characters available in the buffer
0754: * and the buffer is not the last one the analyzer can still
0755: * decide to return the token ID even if it doesn't know whether
0756: * the token is complete or not. This is possible in this simple
0757: * implementation for example because it doesn't matter whether
0758: * it returns the text all together or broken into several pieces.
0759: * The advantage of such aproach is that the preScan value
0760: * is minimized which avoids the additional increasing of the buffer
0761: * by preScan characters, but on the other hand it can become
0762: * problematic if the token should be forwarded for some further
0763: * processing. For example it could seem handy to return incomplete
0764: * token for java block comments but it could become difficult
0765: * if we would want to analyzer these comment tokens additionally
0766: * by the HTML analyzer for example.
0767: */
0768:
0769: // Normally the following block would be done only for lastBuffer == true
0770: // but in this case it can always be done
0771: if (lastBuffer) {
0772: switch (state) {
0773: case ISI_WHITESPACE:
0774: state = INIT;
0775: return ELTokenContext.WHITESPACE;
0776: case ISI_IDENTIFIER:
0777: state = INIT;
0778: TokenID kwd = matchKeyword(buffer, tokenOffset, offset
0779: - tokenOffset);
0780: return (kwd != null) ? kwd : ELTokenContext.IDENTIFIER;
0781: case ISI_STRING:
0782: case ISI_STRING_A_BSLASH:
0783: return ELTokenContext.STRING_LITERAL; // hold the state
0784: case ISI_CHAR:
0785: case ISI_CHAR_A_BSLASH:
0786: return ELTokenContext.CHAR_LITERAL;
0787: case ISI_CHAR_STRING:
0788: case ISI_CHAR_STRING_A_BSLASH:
0789: return ELTokenContext.STRING_LITERAL;
0790: case ISA_ZERO:
0791: case ISI_INT:
0792: state = INIT;
0793: return ELTokenContext.INT_LITERAL;
0794: case ISI_OCTAL:
0795: state = INIT;
0796: return ELTokenContext.OCTAL_LITERAL;
0797: case ISI_DOUBLE:
0798: case ISI_DOUBLE_EXP:
0799: case ISI_DOULE_EXP_ISA_SIGN:
0800: case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
0801: state = INIT;
0802: return ELTokenContext.DOUBLE_LITERAL;
0803: case ISI_HEX:
0804: state = INIT;
0805: return ELTokenContext.HEX_LITERAL;
0806: case ISA_DOT:
0807: state = INIT;
0808: return ELTokenContext.DOT;
0809: case ISA_EQ:
0810: state = INIT;
0811: return ELTokenContext.EQ_EQ;
0812: case ISA_GT:
0813: state = INIT;
0814: return ELTokenContext.GT;
0815: case ISA_LT:
0816: state = INIT;
0817: return ELTokenContext.LT;
0818: case ISA_PIPE:
0819: state = INIT;
0820: return ELTokenContext.OR_OR;
0821: case ISA_AND:
0822: state = INIT;
0823: return ELTokenContext.AND_AND;
0824: case ISA_EXCLAMATION:
0825: state = INIT;
0826: return ELTokenContext.NOT;
0827: case ISI_BRACKET:
0828: case ISI_BRACKET_A_IDENTIFIER:
0829: state = INIT;
0830: return ELTokenContext.IDENTIFIER;
0831: case ISI_BRACKET_A_WHITESPACE:
0832: state = ISI_BRACKET;
0833: return ELTokenContext.WHITESPACE;
0834: case ISI_BRACKET_ISA_EQ:
0835: state = ISI_BRACKET;
0836: return ELTokenContext.EQ_EQ;
0837: case ISI_BRACKET_ISA_GT:
0838: state = ISI_BRACKET;
0839: return ELTokenContext.GT_EQ;
0840: case ISI_BRACKET_ISA_LT:
0841: state = ISI_BRACKET;
0842: return ELTokenContext.LT_EQ;
0843: case ISI_BRACKET_ISA_AND:
0844: state = ISI_BRACKET;
0845: return ELTokenContext.AND_AND;
0846: case ISI_BRACKET_ISA_PIPE:
0847: state = ISI_BRACKET;
0848: return ELTokenContext.OR_OR;
0849: case ISI_BRACKET_ISA_DOT:
0850: state = ISI_BRACKET;
0851: return ELTokenContext.DOT;
0852: case ISI_BRACKET_ISA_ZERO:
0853: case ISI_BRACKET_ISI_INT:
0854: state = ISI_BRACKET;
0855: return ELTokenContext.INT_LITERAL;
0856:
0857: }
0858: }
0859:
0860: return null;
0861: }
0862:
0863: public static TokenID matchKeyword(char[] buffer, int offset,
0864: int len) {
0865: if (len > 10)
0866: return null;
0867: if (len <= 1)
0868: return null;
0869: switch (buffer[offset++]) {
0870: case 'a':
0871: if (len <= 2)
0872: return null;
0873: return (len == 3 && buffer[offset++] == 'n' && buffer[offset++] == 'd') ? ELTokenContext.AND_KEYWORD
0874: : null;
0875: case 'd':
0876: if (len <= 2)
0877: return null;
0878: return (len == 3 && buffer[offset++] == 'i' && buffer[offset++] == 'v') ? ELTokenContext.DIV_KEYWORD
0879: : null;
0880: case 'e':
0881: switch (buffer[offset++]) {
0882: case 'q':
0883: return (len == 2) ? ELTokenContext.EQ_KEYWORD : null;
0884: case 'm':
0885: return (len == 5 && buffer[offset++] == 'p'
0886: && buffer[offset++] == 't' && buffer[offset++] == 'y') ? ELTokenContext.EMPTY_KEYWORD
0887: : null;
0888: default:
0889: return null;
0890: }
0891: case 'f':
0892: return (len == 5 && buffer[offset++] == 'a'
0893: && buffer[offset++] == 'l'
0894: && buffer[offset++] == 's' && buffer[offset++] == 'e') ? ELTokenContext.FALSE_KEYWORD
0895: : null;
0896: case 'g':
0897: switch (buffer[offset++]) {
0898: case 'e':
0899: return (len == 2) ? ELTokenContext.GE_KEYWORD : null;
0900: case 't':
0901: return (len == 2) ? ELTokenContext.GT_KEYWORD : null;
0902: default:
0903: return null;
0904: }
0905: case 'l':
0906: switch (buffer[offset++]) {
0907: case 'e':
0908: return (len == 2) ? ELTokenContext.LE_KEYWORD : null;
0909: case 't':
0910: return (len == 2) ? ELTokenContext.LT_KEYWORD : null;
0911: default:
0912: return null;
0913: }
0914: case 'i':
0915: if (len <= 9)
0916: return null;
0917: return (len == 10 && buffer[offset++] == 'n'
0918: && buffer[offset++] == 's'
0919: && buffer[offset++] == 't'
0920: && buffer[offset++] == 'a'
0921: && buffer[offset++] == 'n'
0922: && buffer[offset++] == 'c'
0923: && buffer[offset++] == 'e'
0924: && buffer[offset++] == 'o' && buffer[offset++] == 'f') ? ELTokenContext.INSTANCEOF_KEYWORD
0925: : null;
0926: case 'm':
0927: if (len <= 2)
0928: return null;
0929: return (len == 3 && buffer[offset++] == 'o' && buffer[offset++] == 'd') ? ELTokenContext.MOD_KEYWORD
0930: : null;
0931: case 'n':
0932: switch (buffer[offset++]) {
0933: case 'e':
0934: return (len == 2) ? ELTokenContext.NE_KEYWORD : null;
0935: case 'o':
0936: return (len == 3 && buffer[offset++] == 't') ? ELTokenContext.NOT_KEYWORD
0937: : null;
0938: case 'u':
0939: return (len == 4 && buffer[offset++] == 'l' && buffer[offset++] == 'l') ? ELTokenContext.NULL_KEYWORD
0940: : null;
0941: default:
0942: return null;
0943: }
0944: case 'o':
0945: return (len == 2 && buffer[offset++] == 'r') ? ELTokenContext.OR_KEYWORD
0946: : null;
0947: case 't':
0948: return (len == 4 && buffer[offset++] == 'r'
0949: && buffer[offset++] == 'u' && buffer[offset++] == 'e') ? ELTokenContext.TRUE_KEYWORD
0950: : null;
0951:
0952: default:
0953: return null;
0954: }
0955: }
0956:
0957: public String getStateName(int stateNumber) {
0958: switch (stateNumber) {
0959: case ISI_IDENTIFIER:
0960: return "jsp_el_ISI_IDENTIFIER"; //NOI18N
0961: case ISI_CHAR:
0962: return "el_ISI_CHAR"; //NOI18N
0963: case ISI_CHAR_A_BSLASH:
0964: return "el_ISI_CHAR_A_BSLASH"; //NOI18N
0965: case ISI_STRING:
0966: return "el_ISI_STRING"; //NOI18N
0967: case ISI_STRING_A_BSLASH:
0968: return "el_ISI_STRING_A_BSLASH"; //NOI18N
0969: case ISI_CHAR_STRING:
0970: return "el_ISI_CHAR_STRING"; //NOI18N
0971: case ISI_CHAR_STRING_A_BSLASH:
0972: return "el_ISI_CHAR_STRING_A_BSLASH";//NOI18N
0973: case ISA_ZERO:
0974: return "el_ISA_ZERO"; //NOI18N
0975: case ISI_INT:
0976: return "el_ISI_INT"; //NOI18N
0977: case ISI_OCTAL:
0978: return "el_ISI_OCTAL"; //NOI18N
0979: case ISI_DOUBLE:
0980: return "el_ISI_DOUBLE"; //NOI18N
0981: case ISI_DOUBLE_EXP:
0982: return "el_ISI_DOUBLE_EXP"; //NOI18N
0983: case ISI_HEX:
0984: return "el_ISI_HEX"; //NOI18N
0985: case ISA_DOT:
0986: return "el_ISA_DOT"; //NOI18N
0987: case ISI_WHITESPACE:
0988: return "el_ISI_WHITESPACE"; //NOI18N
0989: case ISA_EQ:
0990: return "el_ISA_EQ"; //NOI18N
0991: case ISA_GT:
0992: return "el_ISA_GT"; //NOI18N
0993: case ISA_LT:
0994: return "el_ISA_LT"; //NOI18N
0995: case ISA_PIPE:
0996: return "el_ISA_PIPE"; //NOI18N
0997: case ISA_AND:
0998: return "el_ISA_AND"; //NOI18N
0999: case ISA_EXCLAMATION:
1000: return "el_ISA_EXCLAMATION"; //NOI18N
1001: case ISI_BRACKET:
1002: return "el_ISI_BRACKET"; //NOI18N
1003: case ISI_BRACKET_A_WHITESPACE:
1004: return "el_ISI_BRACKET_A_WHITSPACE";//NOI18N
1005: case ISI_BRACKET_ISA_EQ:
1006: return "el_ISI_BRACKET_ISA_EQ"; //NOI18N
1007: case ISI_BRACKET_ISA_GT:
1008: return "el_ISI_BRACKET_ISA_GT"; //NOI18N
1009: case ISI_BRACKET_ISA_LT:
1010: return "el_ISI_BRACKET_ISA_LT"; //NOI18N
1011: case ISI_BRACKET_ISA_AND:
1012: return "el_ISI_BRACKET_ISA_AND"; //NOI18N
1013: case ISI_BRACKET_ISA_PIPE:
1014: return "el_ISI_BRACKET_ISA_PIPE"; //NOI18N
1015: case ISI_BRACKET_ISI_INT:
1016: return "el_ISI_BRACKET_ISI_INT"; //NOI18N
1017: case ISI_BRACKET_ISI_OCTAL:
1018: return "el_ISI_BRACKET_ISI_OCTAL"; //NOI18N
1019: case ISI_BRACKET_ISI_DOUBLE:
1020: return "el_ISI_BRACKET_ISI_DOUBLE"; //NOI18N
1021: case ISI_BRACKET_ISI_DOUBLE_EXP:
1022: return "el_ISI_BRACKET_ISI_DOUBLE_EXP";//NOI18N
1023: case ISI_BRACKET_ISI_HEX:
1024: return "el_ISI_BRACKET_ISI_HEX"; //NOI18N
1025: case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
1026: return "el_ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN";
1027: case ISI_DOULE_EXP_ISA_SIGN:
1028: return "el_ISI_DOULE_EXP_ISA_SIGN";
1029: default:
1030: return super.getStateName(stateNumber);
1031: }
1032: }
1033:
1034: }
|