0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: /* The following code was generated by JFlex 1.4.1 on 20/12/06 21:02 */
0019:
0020: package javax.swing.text.html.parser;
0021:
0022: import java_cup.runtime.Symbol;
0023: import javax.swing.text.html.HTML;
0024: import javax.swing.text.html.HTML.Tag;
0025: import java.util.ArrayList;
0026: import java.util.List;
0027: import java.math.BigInteger;
0028:
0029: /**
0030: * This class is a scanner generated by
0031: * <a href="http://www.jflex.de/">JFlex</a> 1.4.1
0032: * on 20/12/06 21:02 from the specification file
0033: * <tt>/home/asanchez/workspace/ParserHtml/parser-spec/Lexer.lex</tt>
0034: */
0035: class Lexer implements java_cup.runtime.Scanner {
0036:
0037: /** This character denotes the end of file */
0038: public static final int YYEOF = -1;
0039:
0040: /** initial size of the lookahead buffer */
0041: private static final int ZZ_BUFFERSIZE = 16384;
0042:
0043: /** lexical states */
0044: public static final int STARTTAG = 7;
0045: public static final int TAG = 1;
0046: public static final int ATTREQUALS = 8;
0047: public static final int ATTRVALLIT_SQM = 11;
0048: public static final int ENDTAG = 4;
0049: public static final int CHECK_IF_CDATA = 16;
0050: public static final int ATTRVALSTART = 9;
0051: public static final int YYINITIAL_NOTEXT = 17;
0052: public static final int CDATA_ENDTAG_NAME = 1;
0053: public static final int TAG_TRAILINGWS = 13;
0054: public static final int STARTENDTAG = 1;
0055: public static final int MDSTART = 12;
0056: public static final int ATTRVAL = 3;
0057: public static final int CDATA = 14;
0058: public static final int COMMENT = 6;
0059: public static final int IGNORED_Q_TAG = 19;
0060: public static final int CDATA_ENDTAG_START = 15;
0061: public static final int MD = 5;
0062: public static final int CDATA_ENDTAG_END = 1;
0063: public static final int ATTRVALLIT_DQM = 10;
0064: public static final int YYINITIAL = 0;
0065: public static final int TAG_IGNORE_ATTS = 18;
0066: public static final int ATTR = 2;
0067:
0068: /**
0069: * Translates characters to character classes
0070: */
0071: private static final String ZZ_CMAP_PACKED = "\11\0\1\5\1\6\2\0\1\7\22\0\1\4\1\16\1\11\1\20"
0072: + "\2\0\1\10\1\12\5\0\1\3\1\21\1\15\12\1\1\25\1\17"
0073: + "\1\13\1\26\1\14\1\24\1\0\6\23\21\2\1\22\2\2\6\0"
0074: + "\6\23\21\2\1\22\2\2\uff85\0";
0075:
0076: /**
0077: * Translates characters to character classes
0078: */
0079: private static final char[] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
0080:
0081: /**
0082: * Translates DFA states to action switch labels.
0083: */
0084: private static final int[] ZZ_ACTION = zzUnpackAction();
0085:
0086: private static final String ZZ_ACTION_PACKED_0 = "\6\0\1\1\3\0\1\2\1\3\10\0\1\4\1\5"
0087: + "\1\6\1\7\1\4\1\10\1\11\1\12\1\13\1\14"
0088: + "\1\15\1\16\1\17\1\20\1\21\1\20\1\22\1\23"
0089: + "\1\24\1\20\1\25\1\26\1\27\1\30\1\31\1\32"
0090: + "\1\33\1\34\1\33\1\35\1\36\1\37\1\1\1\40"
0091: + "\1\41\1\42\1\43\1\44\1\45\1\46\1\47\1\50"
0092: + "\1\51\1\52\1\53\1\17\1\2\1\54\1\55\1\56"
0093: + "\1\2\1\57\1\3\1\60\1\61\1\62\1\3\1\63"
0094: + "\1\64\1\65\1\17\1\66\1\67\1\70\1\71\2\72"
0095: + "\1\73\1\74\2\75\1\76\1\77\1\100\2\101\1\102"
0096: + "\1\103\1\104\1\105\1\0\1\106\1\107\1\0\1\110"
0097: + "\1\111\1\112\1\113\1\0\1\114\1\115\1\116\2\0"
0098: + "\1\117\1\120\1\121\1\0\1\122\1\123\1\0\1\124"
0099: + "\1\125\1\126\2\0\1\127\1\130\1\0\1\131\1\132"
0100: + "\1\133\1\134\1\135\1\107\1\0\1\107\1\136\1\113"
0101: + "\1\0\1\113\1\0\1\137\1\140\1\121\1\0\1\121"
0102: + "\1\141\1\123\1\0\1\123\1\142\1\126\1\0\1\126"
0103: + "\1\0\1\143\1\107\1\113\1\121\1\123\1\126\1\144";
0104:
0105: private static int[] zzUnpackAction() {
0106: int[] result = new int[163];
0107: int offset = 0;
0108: offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
0109: return result;
0110: }
0111:
0112: private static int zzUnpackAction(String packed, int offset,
0113: int[] result) {
0114: int i = 0; /* index in packed string */
0115: int j = offset; /* index in unpacked array */
0116: int l = packed.length();
0117: while (i < l) {
0118: int count = packed.charAt(i++);
0119: int value = packed.charAt(i++);
0120: do
0121: result[j++] = value;
0122: while (--count > 0);
0123: }
0124: return j;
0125: }
0126:
0127: /**
0128: * Translates a state to a row index in the transition table
0129: */
0130: private static final int[] ZZ_ROWMAP = zzUnpackRowMap();
0131:
0132: private static final String ZZ_ROWMAP_PACKED_0 = "\0\0\0\27\0\56\0\105\0\134\0\163\0\212\0\241"
0133: + "\0\270\0\317\0\346\0\375\0\u0114\0\u012b\0\u0142\0\u0159"
0134: + "\0\u0170\0\u0187\0\u019e\0\u01b5\0\u01cc\0\u01e3\0\27\0\u01fa"
0135: + "\0\u0211\0\27\0\27\0\u0228\0\u023f\0\27\0\u0256\0\27"
0136: + "\0\27\0\u026d\0\u0284\0\u029b\0\u02b2\0\27\0\u02c9\0\u02e0"
0137: + "\0\27\0\27\0\27\0\u02f7\0\27\0\u030e\0\27\0\u0325"
0138: + "\0\u033c\0\u0353\0\27\0\u036a\0\u0381\0\u0398\0\27\0\27"
0139: + "\0\27\0\27\0\27\0\u03af\0\27\0\u03c6\0\27\0\27"
0140: + "\0\27\0\u026d\0\u03dd\0\u03f4\0\u03dd\0\u040b\0\u0422\0\27"
0141: + "\0\u0439\0\u0450\0\u0439\0\u0467\0\u047e\0\27\0\u0495\0\27"
0142: + "\0\u04ac\0\27\0\u04c3\0\27\0\u04da\0\u04f1\0\u0508\0\27"
0143: + "\0\u051f\0\u0536\0\u054d\0\27\0\u0564\0\27\0\27\0\u057b"
0144: + "\0\27\0\27\0\u0592\0\u05a9\0\u05c0\0\27\0\u05d7\0\u05ee"
0145: + "\0\27\0\u0605\0\27\0\u061c\0\u0633\0\27\0\27\0\27"
0146: + "\0\u064a\0\u0661\0\27\0\u03dd\0\u0678\0\u068f\0\u0439\0\u06a6"
0147: + "\0\u06bd\0\27\0\27\0\u06d4\0\u06eb\0\u0702\0\u0719\0\27"
0148: + "\0\u0730\0\27\0\27\0\27\0\27\0\27\0\u0747\0\u075e"
0149: + "\0\u0775\0\27\0\u078c\0\u07a3\0\u07ba\0\u07d1\0\27\0\27"
0150: + "\0\u07e8\0\u07ff\0\u0816\0\27\0\u082d\0\u0844\0\u085b\0\27"
0151: + "\0\u0872\0\u0889\0\u08a0\0\u08b7\0\u08ce\0\u08e5\0\u08fc\0\u0913"
0152: + "\0\u092a\0\u0941\0\27";
0153:
0154: private static int[] zzUnpackRowMap() {
0155: int[] result = new int[163];
0156: int offset = 0;
0157: offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
0158: return result;
0159: }
0160:
0161: private static int zzUnpackRowMap(String packed, int offset,
0162: int[] result) {
0163: int i = 0; /* index in packed string */
0164: int j = offset; /* index in unpacked array */
0165: int l = packed.length();
0166: while (i < l) {
0167: int high = packed.charAt(i++) << 16;
0168: result[j++] = high | packed.charAt(i++);
0169: }
0170: return j;
0171: }
0172:
0173: /**
0174: * The transition table of the DFA
0175: */
0176: private static final int[] ZZ_TRANS = zzUnpackTrans();
0177:
0178: private static final String ZZ_TRANS_PACKED_0 = "\4\25\2\26\1\27\1\30\1\31\2\25\1\32\13\25"
0179: + "\27\0\2\33\1\34\1\33\2\35\1\36\1\37\4\33"
0180: + "\1\40\1\41\4\33\2\34\3\33\1\42\1\43\1\42"
0181: + "\1\44\2\45\1\46\1\47\1\50\1\51\1\52\1\0"
0182: + "\1\40\12\42\4\53\2\54\1\55\1\56\4\53\1\40"
0183: + "\12\53\2\57\1\60\1\61\2\62\1\63\1\64\12\57"
0184: + "\2\60\3\57\3\65\1\66\23\65\4\67\2\35\1\36"
0185: + "\1\37\3\67\1\70\1\71\1\41\7\67\1\72\1\67"
0186: + "\2\73\1\34\1\73\2\74\1\75\1\76\4\73\1\40"
0187: + "\1\41\4\73\2\34\2\73\1\77\1\42\1\43\1\42"
0188: + "\1\44\2\35\1\36\1\37\1\50\1\100\2\101\1\40"
0189: + "\1\102\11\42\4\103\2\104\1\105\1\106\1\107\1\110"
0190: + "\15\103\4\111\2\112\1\113\1\114\1\115\1\111\1\116"
0191: + "\14\111\14\117\1\40\12\117\4\120\2\121\1\122\1\123"
0192: + "\4\120\1\124\12\120\4\125\1\126\1\127\1\130\1\131"
0193: + "\1\132\2\125\1\133\13\125\2\134\1\135\3\134\1\0"
0194: + "\5\134\1\40\5\134\2\135\3\134\27\136\6\137\1\0"
0195: + "\4\137\1\140\13\137\6\141\1\142\1\143\3\141\1\70"
0196: + "\1\71\12\141\24\144\1\145\2\144\4\25\5\0\2\25"
0197: + "\1\0\13\25\4\0\2\26\27\0\1\146\22\0\1\147"
0198: + "\15\0\1\150\1\0\2\147\4\0\3\34\15\0\3\34"
0199: + "\7\0\2\35\27\0\1\151\20\0\1\42\1\0\2\42"
0200: + "\11\0\12\42\1\152\1\43\2\152\11\0\12\152\1\42"
0201: + "\1\43\2\42\11\0\12\42\4\0\2\45\27\0\1\153"
0202: + "\22\0\1\154\15\0\1\155\1\0\2\154\7\0\2\54"
0203: + "\27\0\1\156\21\0\3\60\15\0\3\60\6\0\1\157"
0204: + "\27\0\2\62\27\0\1\160\20\0\3\65\1\161\26\65"
0205: + "\1\162\23\65\4\0\2\74\27\0\1\163\20\0\10\103"
0206: + "\2\0\21\103\2\104\2\103\2\0\23\103\1\164\1\103"
0207: + "\2\0\15\103\2\0\1\165\15\0\1\166\1\0\2\165"
0208: + "\3\0\10\111\1\0\1\111\1\0\20\111\2\112\2\111"
0209: + "\1\0\1\111\1\0\22\111\1\167\1\111\1\0\1\111"
0210: + "\1\0\14\111\2\0\1\170\15\0\1\171\1\0\2\170"
0211: + "\3\0\14\117\1\0\12\117\4\0\2\121\27\0\1\172"
0212: + "\20\0\4\125\1\0\1\125\3\0\2\125\1\0\13\125"
0213: + "\4\0\2\126\21\0\4\125\1\126\1\127\3\0\2\125"
0214: + "\1\0\13\125\6\0\1\173\22\0\1\174\15\0\1\175"
0215: + "\1\0\2\174\20\0\1\176\12\0\3\135\15\0\3\135"
0216: + "\5\0\1\177\11\0\1\200\1\201\1\202\3\0\2\177"
0217: + "\1\203\10\0\1\204\20\0\24\144\1\0\2\144\14\0"
0218: + "\1\205\14\0\1\147\14\0\1\206\2\0\2\147\4\0"
0219: + "\1\207\1\147\1\210\16\0\1\211\1\147\3\0\4\152"
0220: + "\11\0\12\152\2\0\1\154\14\0\1\212\2\0\2\154"
0221: + "\4\0\1\213\1\154\1\214\16\0\1\215\1\154\3\0"
0222: + "\3\65\1\216\37\65\1\217\12\65\2\0\1\165\14\0"
0223: + "\1\220\2\0\2\165\4\0\1\221\1\165\1\222\16\0"
0224: + "\1\223\1\165\5\0\1\170\14\0\1\224\2\0\2\170"
0225: + "\4\0\1\225\1\170\1\226\16\0\1\227\1\170\5\0"
0226: + "\1\174\14\0\1\230\2\0\2\174\4\0\1\231\1\174"
0227: + "\1\232\16\0\1\233\1\174\5\0\1\234\17\0\2\234"
0228: + "\4\0\3\177\15\0\3\177\5\0\1\235\17\0\2\235"
0229: + "\4\0\1\207\15\0\1\206\10\0\1\207\26\0\1\236"
0230: + "\1\147\14\0\1\206\2\0\1\147\1\211\4\0\1\213"
0231: + "\15\0\1\212\10\0\1\213\26\0\1\237\1\154\14\0"
0232: + "\1\212\2\0\1\154\1\215\3\0\14\65\1\0\12\65"
0233: + "\1\0\1\221\15\0\1\220\10\0\1\221\26\0\1\240"
0234: + "\1\165\14\0\1\220\2\0\1\165\1\223\4\0\1\225"
0235: + "\15\0\1\224\10\0\1\225\26\0\1\241\1\170\14\0"
0236: + "\1\224\2\0\1\170\1\227\4\0\1\231\15\0\1\230"
0237: + "\10\0\1\231\26\0\1\242\1\174\14\0\1\230\2\0"
0238: + "\1\174\1\233\4\0\3\234\10\0\1\243\4\0\3\234"
0239: + "\4\0\3\235\15\0\3\235\4\0\1\236\15\0\1\206"
0240: + "\3\0\1\236\4\0\1\237\15\0\1\212\3\0\1\237"
0241: + "\4\0\1\240\15\0\1\220\3\0\1\240\4\0\1\241"
0242: + "\15\0\1\224\3\0\1\241\4\0\1\242\15\0\1\230"
0243: + "\3\0\1\242\3\0";
0244:
0245: private static int[] zzUnpackTrans() {
0246: int[] result = new int[2392];
0247: int offset = 0;
0248: offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
0249: return result;
0250: }
0251:
0252: private static int zzUnpackTrans(String packed, int offset,
0253: int[] result) {
0254: int i = 0; /* index in packed string */
0255: int j = offset; /* index in unpacked array */
0256: int l = packed.length();
0257: while (i < l) {
0258: int count = packed.charAt(i++);
0259: int value = packed.charAt(i++);
0260: value--;
0261: do
0262: result[j++] = value;
0263: while (--count > 0);
0264: }
0265: return j;
0266: }
0267:
0268: /* error codes */
0269: private static final int ZZ_UNKNOWN_ERROR = 0;
0270: private static final int ZZ_NO_MATCH = 1;
0271: private static final int ZZ_PUSHBACK_2BIG = 2;
0272:
0273: /* error messages for the codes above */
0274: private static final String ZZ_ERROR_MSG[] = {
0275: "Unkown internal scanner error",
0276: "Error: could not match input",
0277: "Error: pushback value was too large" };
0278:
0279: /**
0280: * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
0281: */
0282: private static final int[] ZZ_ATTRIBUTE = zzUnpackAttribute();
0283:
0284: private static final String ZZ_ATTRIBUTE_PACKED_0 = "\1\0\1\10\4\0\1\1\3\0\2\1\10\0\2\1"
0285: + "\1\11\2\1\2\11\2\1\1\11\1\1\2\11\4\1"
0286: + "\1\11\2\1\3\11\1\1\1\11\1\1\1\11\3\1"
0287: + "\1\11\3\1\5\11\1\1\1\11\1\1\3\11\6\1"
0288: + "\1\11\5\1\1\11\1\1\1\11\1\1\1\11\1\1"
0289: + "\1\11\3\1\1\11\3\1\1\11\1\1\2\11\1\1"
0290: + "\2\11\2\1\1\0\1\11\1\1\1\0\1\11\1\1"
0291: + "\1\11\1\1\1\0\3\11\2\0\1\11\2\1\1\0"
0292: + "\2\1\1\0\2\11\1\1\2\0\1\1\1\11\1\0"
0293: + "\5\11\1\1\1\0\1\1\1\11\1\1\1\0\1\1"
0294: + "\1\0\2\11\1\1\1\0\1\1\1\11\1\1\1\0"
0295: + "\1\1\1\11\1\1\1\0\1\1\1\0\6\1\1\11";
0296:
0297: private static int[] zzUnpackAttribute() {
0298: int[] result = new int[163];
0299: int offset = 0;
0300: offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset,
0301: result);
0302: return result;
0303: }
0304:
0305: private static int zzUnpackAttribute(String packed, int offset,
0306: int[] result) {
0307: int i = 0; /* index in packed string */
0308: int j = offset; /* index in unpacked array */
0309: int l = packed.length();
0310: while (i < l) {
0311: int count = packed.charAt(i++);
0312: int value = packed.charAt(i++);
0313: do
0314: result[j++] = value;
0315: while (--count > 0);
0316: }
0317: return j;
0318: }
0319:
0320: /** the input device */
0321: private java.io.Reader zzReader;
0322:
0323: /** the current state of the DFA */
0324: private int zzState;
0325:
0326: /** the current lexical state */
0327: private int zzLexicalState = YYINITIAL;
0328:
0329: /** this buffer contains the current text to be matched and is
0330: the source of the yytext() string */
0331: private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
0332:
0333: /** the textposition at the last accepting state */
0334: private int zzMarkedPos;
0335:
0336: /** the textposition at the last state to be included in yytext */
0337: private int zzPushbackPos;
0338:
0339: /** the current text position in the buffer */
0340: private int zzCurrentPos;
0341:
0342: /** startRead marks the beginning of the yytext() string in the buffer */
0343: private int zzStartRead;
0344:
0345: /** endRead marks the last character in the buffer, that has been read
0346: from input */
0347: private int zzEndRead;
0348:
0349: /** number of newlines encountered up to the start of the matched text */
0350: private int yyline;
0351:
0352: /** the number of characters up to the start of the matched text */
0353: private int yychar;
0354:
0355: /**
0356: * the number of characters from the last newline up to the start of the
0357: * matched text
0358: */
0359: private int yycolumn;
0360:
0361: /**
0362: * zzAtBOL == true <=> the scanner is currently at the beginning of a line
0363: */
0364: private boolean zzAtBOL = true;
0365:
0366: /** zzAtEOF == true <=> the scanner is at the EOF */
0367: private boolean zzAtEOF;
0368:
0369: /** denotes if the user-EOF-code has already been executed */
0370: private boolean zzEOFDone;
0371:
0372: /* user code: */
0373: private StringBuilder buffer = new StringBuilder();
0374:
0375: private int CRcount;
0376:
0377: private int CRNLcount;
0378:
0379: private int NLcount;
0380:
0381: private String currentCDATAClosingTag;
0382:
0383: private int offsetCDATA = 0;
0384:
0385: private LexerTextStateType textState = LexerTextStateType.DEFAULT;
0386:
0387: private int preEntryCounter;
0388:
0389: /**
0390: * Decsribes whether a piece of text (no white space) was parsed.
0391: */
0392: private boolean textParsed;
0393:
0394: /**
0395: * Describes whether a line terminator has already been skipped.
0396: */
0397: private boolean terminatorSkipped;
0398:
0399: /**
0400: * Stores the position where a piece of text begins
0401: */
0402: private int textPos;
0403:
0404: private boolean hasTrailingSpaces;
0405:
0406: private boolean hasLeadingSpaces;
0407:
0408: /**
0409: * Indicates if the strict mode of the current Parser instance is set
0410: */
0411: private boolean strict;
0412:
0413: /**
0414: * A reference of the Parser's DTD
0415: */
0416: private DTD dtd;
0417:
0418: /**
0419: * Stores all the TAGS that are set as preformatted at <code>HTML.getAllTags()</code>
0420: */
0421: private static List<String> preformattedTags;
0422:
0423: private static List<String> breaksFlowTags;
0424:
0425: /**
0426: * This flag is used in CHECK_IF_CDATA state
0427: */
0428: private boolean syncronizeSent;
0429:
0430: /*
0431: * Used by preformatted purposes. Describe if the last token was a line terminator
0432: * in order to collapse the white spaces as the RI does
0433: **/
0434: private boolean lastWasLineTerminator;
0435:
0436: /**
0437: * Used by preformatted purposes. Describe if the token text returned as a SYMBOL
0438: * may have to collapse the last token (line terminator)
0439: */
0440: private boolean skipLastLineTerminator;
0441:
0442: /* private boolean lastTokenWasBreaksFlow; */
0443:
0444: /**
0445: * A reference to the current CUP object
0446: */
0447: private ParserCup cup;
0448:
0449: /**
0450: * Sets <code>preformattedTags</code> with all the tags (<code>HTML.Tag</code>)
0451: * that are set as preformatted, by examining isPreformatted() method.
0452: * <br>
0453: * Then, each time the method <code>isPreformatted(String)</code> is called
0454: * search at this <code>ArrayList</code>
0455: */
0456: static {
0457: preformattedTags = new ArrayList<String>();
0458: for (Tag tag : HTML.getAllTags()) {
0459: if (tag.isPreformatted()) {
0460: preformattedTags.add(tag.toString().toLowerCase());
0461: }
0462: }
0463: }
0464:
0465: public void setStrict(boolean strict) {
0466: this .strict = strict;
0467: }
0468:
0469: public int getOffset() {
0470: return yychar;
0471: }
0472:
0473: public void setCup(ParserCup cup) {
0474: this .cup = cup;
0475: }
0476:
0477: public void setDTD(DTD dtd) {
0478: this .dtd = dtd;
0479: }
0480:
0481: public int yyline() {
0482: return yyline;
0483: }
0484:
0485: /**
0486: * Decides if a tag name corresponds to a preformatted one.
0487: * <br>
0488: * This method use <code>preformattedTags</code> in order retrieve this information
0489: *
0490: * @param tokenTag The tag name to be analyzed.
0491: * @return True if the tag name corresponds to a preformatted one. Otherwise
0492: * it returns false.
0493: */
0494: private boolean isPreformatted(String tokenTag) {
0495: return preformattedTags.contains(tokenTag);
0496: }
0497:
0498: private void appendText(LexerTextType textType) {
0499: if (buffer.length() == 0) {
0500: textPos = yychar;
0501: }
0502: switch (textState) {
0503: /*
0504: * DEFAULT STATE
0505: */
0506: case DEFAULT:
0507: switch (textType) {
0508: case SPACE:
0509: case LINE_TERMINATOR:
0510: if (!hasLeadingSpaces && !textParsed) {
0511: hasLeadingSpaces = true;
0512: }
0513: hasTrailingSpaces = true;
0514: break;
0515: case TEXT:
0516: if (hasTrailingSpaces && textParsed) {
0517: buffer.append(" ");
0518: }
0519: buffer.append(yytext());
0520: hasTrailingSpaces = false;
0521: textParsed = true;
0522: break;
0523: case ENTITY_SEMI:
0524: if (hasTrailingSpaces && textParsed) {
0525: buffer.append(" ");
0526: }
0527: buffer.append(replaceEntity(true));
0528: textParsed = true;
0529: hasTrailingSpaces = false;
0530: break;
0531: case ENTITY_NO_SEMI:
0532: if (hasTrailingSpaces && textParsed) {
0533: buffer.append(" ");
0534: }
0535: buffer.append(replaceEntity(false));
0536: textParsed = true;
0537: hasTrailingSpaces = false;
0538: break;
0539: }
0540: break;
0541: /*
0542: * PREFORMATTED STATE
0543: */
0544: case PREFORMATTED:
0545: switch (textType) {
0546: case SPACE:
0547: buffer.append(yytext());
0548: break;
0549: case LINE_TERMINATOR:
0550: buffer.append(yytext());
0551: terminatorSkipped = false;
0552: break;
0553: case TEXT:
0554: buffer.append(yytext());
0555: textParsed = true;
0556: break;
0557: case ENTITY_SEMI:
0558: buffer.append(replaceEntity(true));
0559: textParsed = true;
0560: break;
0561: case ENTITY_NO_SEMI:
0562: buffer.append(replaceEntity(false));
0563: textParsed = true;
0564: break;
0565: }
0566: break;
0567: /*
0568: * CDATA STATE
0569: */
0570: case CDATA:
0571: switch (textType) {
0572: case TEXT:
0573: case SPACE:
0574: buffer.append(yytext());
0575: textParsed = true;
0576: break;
0577: case LINE_TERMINATOR:
0578: if (!terminatorSkipped) {
0579: // skipping line terminator
0580: terminatorSkipped = true;
0581: } else {
0582: // append line terminator
0583: buffer.append(yytext());
0584: }
0585: break;
0586: case ENTITY_SEMI:
0587: buffer.append(replaceEntity(true));
0588: textParsed = true;
0589: break;
0590: case ENTITY_NO_SEMI:
0591: buffer.append(replaceEntity(false));
0592: textParsed = true;
0593: break;
0594: }
0595: break;
0596: }
0597: }
0598:
0599: private String replaceEntity(boolean endsWithSemi) {
0600: Entity entity = null;
0601: String str;
0602: if (endsWithSemi) {
0603: str = yytext().substring(1, yytext().length() - 1);
0604: } else {
0605: str = yytext().substring(1, yytext().length());
0606: }
0607:
0608: if (str.startsWith("#")) {
0609: if (str.codePointAt(1) == 'X' || str.codePointAt(1) == 'x') {
0610: entity = dtd.getEntity(Integer.parseInt(str
0611: .substring(2), 16));
0612: } else {
0613: entity = dtd.getEntity(Integer.parseInt(str
0614: .substring(1)));
0615: }
0616: } else {
0617: entity = dtd.getEntity(str);
0618: }
0619: return entity == null ? "&" + str : String.valueOf(entity.data);
0620: }
0621:
0622: private HTMLText flushBufferedText() {
0623: String str = buffer.toString();
0624: buffer = new StringBuilder();
0625: HTMLText textToken = null;
0626: if (textParsed) {
0627: /*
0628: * Collapses if there is only one last line terminator.
0629: * If there are more then doesn't do it (same as RI)
0630: */
0631: if (skipLastLineTerminator
0632: && textState == LexerTextStateType.PREFORMATTED) {
0633: str = str.substring(0, str.length() - 1);
0634: skipLastLineTerminator = false;
0635: }
0636: textToken = new HTMLText(str, textPos, hasLeadingSpaces,
0637: hasTrailingSpaces);
0638: }
0639: hasTrailingSpaces = false;
0640: hasLeadingSpaces = false;
0641: textParsed = false;
0642: terminatorSkipped = false;
0643: lastWasLineTerminator = false;
0644:
0645: return textToken;
0646: }
0647:
0648: public String getEOLString() {
0649: int eol = Math.max(CRcount, Math.max(NLcount, CRNLcount));
0650:
0651: String result = null;
0652:
0653: if (eol == NLcount) {
0654: result = String.valueOf('\n');
0655: } else if (eol == CRNLcount) {
0656: result = String.valueOf("\r\n");
0657: } else if (eol == CRcount) {
0658: result = String.valueOf('\r');
0659: }
0660:
0661: if (result != null) {
0662: return result;
0663: } else {
0664: throw new AssertionError();
0665: }
0666: }
0667:
0668: /**
0669: * Creates a new scanner
0670: * There is also a java.io.InputStream version of this constructor.
0671: *
0672: * @param in the java.io.Reader to read input from.
0673: */
0674: Lexer(java.io.Reader in) {
0675: this .zzReader = in;
0676: }
0677:
0678: /**
0679: * Creates a new scanner.
0680: * There is also java.io.Reader version of this constructor.
0681: *
0682: * @param in the java.io.Inputstream to read input from.
0683: */
0684: Lexer(java.io.InputStream in) {
0685: this (new java.io.InputStreamReader(in));
0686: }
0687:
0688: /**
0689: * Unpacks the compressed character translation table.
0690: *
0691: * @param packed the packed character translation table
0692: * @return the unpacked character translation table
0693: */
0694: private static char[] zzUnpackCMap(String packed) {
0695: char[] map = new char[0x10000];
0696: int i = 0; /* index in packed string */
0697: int j = 0; /* index in unpacked array */
0698: while (i < 70) {
0699: int count = packed.charAt(i++);
0700: char value = packed.charAt(i++);
0701: do
0702: map[j++] = value;
0703: while (--count > 0);
0704: }
0705: return map;
0706: }
0707:
0708: /**
0709: * Refills the input buffer.
0710: *
0711: * @return <code>false</code>, iff there was new input.
0712: *
0713: * @exception java.io.IOException if any I/O-Error occurs
0714: */
0715: private boolean zzRefill() throws java.io.IOException {
0716:
0717: /* first: make room (if you can) */
0718: if (zzStartRead > 0) {
0719: System.arraycopy(zzBuffer, zzStartRead, zzBuffer, 0,
0720: zzEndRead - zzStartRead);
0721:
0722: /* translate stored positions */
0723: zzEndRead -= zzStartRead;
0724: zzCurrentPos -= zzStartRead;
0725: zzMarkedPos -= zzStartRead;
0726: zzPushbackPos -= zzStartRead;
0727: zzStartRead = 0;
0728: }
0729:
0730: /* is the buffer big enough? */
0731: if (zzCurrentPos >= zzBuffer.length) {
0732: /* if not: blow it up */
0733: char newBuffer[] = new char[zzCurrentPos * 2];
0734: System
0735: .arraycopy(zzBuffer, 0, newBuffer, 0,
0736: zzBuffer.length);
0737: zzBuffer = newBuffer;
0738: }
0739:
0740: /* finally: fill the buffer with new input */
0741: int numRead = zzReader.read(zzBuffer, zzEndRead,
0742: zzBuffer.length - zzEndRead);
0743:
0744: if (numRead < 0) {
0745: return true;
0746: } else {
0747: zzEndRead += numRead;
0748: return false;
0749: }
0750: }
0751:
0752: /**
0753: * Closes the input stream.
0754: */
0755: public final void yyclose() throws java.io.IOException {
0756: zzAtEOF = true; /* indicate end of file */
0757: zzEndRead = zzStartRead; /* invalidate buffer */
0758:
0759: if (zzReader != null)
0760: zzReader.close();
0761: }
0762:
0763: /**
0764: * Resets the scanner to read from a new input stream.
0765: * Does not close the old reader.
0766: *
0767: * All internal variables are reset, the old input stream
0768: * <b>cannot</b> be reused (internal buffer is discarded and lost).
0769: * Lexical state is set to <tt>ZZ_INITIAL</tt>.
0770: *
0771: * @param reader the new input stream
0772: */
0773: public final void yyreset(java.io.Reader reader) {
0774: zzReader = reader;
0775: zzAtBOL = true;
0776: zzAtEOF = false;
0777: zzEndRead = zzStartRead = 0;
0778: zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
0779: yyline = yychar = yycolumn = 0;
0780: zzLexicalState = YYINITIAL;
0781: }
0782:
0783: /**
0784: * Returns the current lexical state.
0785: */
0786: public final int yystate() {
0787: return zzLexicalState;
0788: }
0789:
0790: /**
0791: * Enters a new lexical state
0792: *
0793: * @param newState the new lexical state
0794: */
0795: public final void yybegin(int newState) {
0796: if ((newState == ENDTAG)
0797: && isPreformatted(yytext().substring(2))) {
0798: preEntryCounter--;
0799: textState = (preEntryCounter == 0) ? LexerTextStateType.DEFAULT
0800: : LexerTextStateType.PREFORMATTED;
0801: }
0802:
0803: zzLexicalState = newState;
0804: }
0805:
0806: /**
0807: * Returns the text matched by the current regular expression.
0808: */
0809: public final String yytext() {
0810: return new String(zzBuffer, zzStartRead, zzMarkedPos
0811: - zzStartRead);
0812: }
0813:
0814: /**
0815: * Returns the character at position <tt>pos</tt> from the
0816: * matched text.
0817: *
0818: * It is equivalent to yytext().charAt(pos), but faster
0819: *
0820: * @param pos the position of the character to fetch.
0821: * A value from 0 to yylength()-1.
0822: *
0823: * @return the character at position pos
0824: */
0825: public final char yycharat(int pos) {
0826: return zzBuffer[zzStartRead + pos];
0827: }
0828:
0829: /**
0830: * Returns the length of the matched text region.
0831: */
0832: public final int yylength() {
0833: return zzMarkedPos - zzStartRead;
0834: }
0835:
0836: /**
0837: * Reports an error that occured while scanning.
0838: *
0839: * In a wellformed scanner (no or only correct usage of
0840: * yypushback(int) and a match-all fallback rule) this method
0841: * will only be called with things that "Can't Possibly Happen".
0842: * If this method is called, something is seriously wrong
0843: * (e.g. a JFlex bug producing a faulty scanner etc.).
0844: *
0845: * Usual syntax/scanner level error handling should be done
0846: * in error fallback rules.
0847: *
0848: * @param errorCode the code of the errormessage to display
0849: */
0850: private void zzScanError(int errorCode) {
0851: String message;
0852: try {
0853: message = ZZ_ERROR_MSG[errorCode];
0854: } catch (ArrayIndexOutOfBoundsException e) {
0855: message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
0856: }
0857:
0858: throw new Error(message);
0859: }
0860:
0861: /**
0862: * Pushes the specified amount of characters back into the input stream.
0863: *
0864: * They will be read again by then next call of the scanning method
0865: *
0866: * @param number the number of characters to be read again.
0867: * This number must not be greater than yylength()!
0868: */
0869: public void yypushback(int number) {
0870: if (number > yylength())
0871: zzScanError(ZZ_PUSHBACK_2BIG);
0872:
0873: zzMarkedPos -= number;
0874: }
0875:
0876: /**
0877: * Contains user EOF-code, which will be executed exactly once,
0878: * when the end of file is reached
0879: */
0880: private void zzDoEOF() throws java.io.IOException {
0881: if (!zzEOFDone) {
0882: zzEOFDone = true;
0883: yyclose();
0884: }
0885: }
0886:
0887: /**
0888: * Resumes scanning until the next regular expression is matched,
0889: * the end of input is encountered or an I/O-Error occurs.
0890: *
0891: * @return the next token
0892: * @exception java.io.IOException if any I/O-Error occurs
0893: */
0894: public java_cup.runtime.Symbol next_token()
0895: throws java.io.IOException {
0896: int zzInput;
0897: int zzAction;
0898:
0899: // cached fields:
0900: int zzCurrentPosL;
0901: int zzMarkedPosL;
0902: int zzEndReadL = zzEndRead;
0903: char[] zzBufferL = zzBuffer;
0904: char[] zzCMapL = ZZ_CMAP;
0905:
0906: int[] zzTransL = ZZ_TRANS;
0907: int[] zzRowMapL = ZZ_ROWMAP;
0908: int[] zzAttrL = ZZ_ATTRIBUTE;
0909:
0910: while (true) {
0911: zzMarkedPosL = zzMarkedPos;
0912:
0913: yychar += zzMarkedPosL - zzStartRead;
0914:
0915: boolean zzR = false;
0916: for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL++) {
0917: switch (zzBufferL[zzCurrentPosL]) {
0918: case '\u000B':
0919: case '\u000C':
0920: case '\u0085':
0921: case '\u2028':
0922: case '\u2029':
0923: yyline++;
0924: yycolumn = 0;
0925: zzR = false;
0926: break;
0927: case '\r':
0928: yyline++;
0929: yycolumn = 0;
0930: zzR = true;
0931: break;
0932: case '\n':
0933: if (zzR)
0934: zzR = false;
0935: else {
0936: yyline++;
0937: yycolumn = 0;
0938: }
0939: break;
0940: default:
0941: zzR = false;
0942: yycolumn++;
0943: }
0944: }
0945:
0946: if (zzR) {
0947: // peek one character ahead if it is \n (if we have counted one line too much)
0948: boolean zzPeek;
0949: if (zzMarkedPosL < zzEndReadL)
0950: zzPeek = zzBufferL[zzMarkedPosL] == '\n';
0951: else if (zzAtEOF)
0952: zzPeek = false;
0953: else {
0954: boolean eof = zzRefill();
0955: zzEndReadL = zzEndRead;
0956: zzMarkedPosL = zzMarkedPos;
0957: zzBufferL = zzBuffer;
0958: if (eof)
0959: zzPeek = false;
0960: else
0961: zzPeek = zzBufferL[zzMarkedPosL] == '\n';
0962: }
0963: if (zzPeek)
0964: yyline--;
0965: }
0966: zzAction = -1;
0967:
0968: zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
0969:
0970: zzState = zzLexicalState;
0971:
0972: zzForAction: {
0973: while (true) {
0974:
0975: if (zzCurrentPosL < zzEndReadL)
0976: zzInput = zzBufferL[zzCurrentPosL++];
0977: else if (zzAtEOF) {
0978: zzInput = YYEOF;
0979: break zzForAction;
0980: } else {
0981: // store back cached positions
0982: zzCurrentPos = zzCurrentPosL;
0983: zzMarkedPos = zzMarkedPosL;
0984: boolean eof = zzRefill();
0985: // get translated positions and possibly new buffer
0986: zzCurrentPosL = zzCurrentPos;
0987: zzMarkedPosL = zzMarkedPos;
0988: zzBufferL = zzBuffer;
0989: zzEndReadL = zzEndRead;
0990: if (eof) {
0991: zzInput = YYEOF;
0992: break zzForAction;
0993: } else {
0994: zzInput = zzBufferL[zzCurrentPosL++];
0995: }
0996: }
0997: int zzNext = zzTransL[zzRowMapL[zzState]
0998: + zzCMapL[zzInput]];
0999: if (zzNext == -1)
1000: break zzForAction;
1001: zzState = zzNext;
1002:
1003: int zzAttributes = zzAttrL[zzState];
1004: if ((zzAttributes & 1) == 1) {
1005: zzAction = zzState;
1006: zzMarkedPosL = zzCurrentPosL;
1007: if ((zzAttributes & 8) == 8)
1008: break zzForAction;
1009: }
1010:
1011: }
1012: }
1013:
1014: // store back cached position
1015: zzMarkedPos = zzMarkedPosL;
1016:
1017: switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
1018: case 28: {
1019: yybegin(MDSTART);
1020: return new Symbol(ParserSym.MUDECL, yytext());
1021: }
1022: case 101:
1023: break;
1024: case 50: {
1025: yybegin(ATTRVALLIT_SQM);
1026: CRcount++;
1027: }
1028: case 102:
1029: break;
1030: case 49: {
1031: yybegin(ATTRVALLIT_SQM);
1032: NLcount++;
1033: }
1034: case 103:
1035: break;
1036: case 18: {
1037: yybegin(ATTRVAL);
1038: }
1039: case 104:
1040: break;
1041: case 32: {
1042: yybegin(COMMENT);
1043: return new Symbol(ParserSym.LEXERR, yytext());
1044: }
1045: case 105:
1046: break;
1047: case 43: {
1048: yybegin(ATTRVALSTART);
1049: return new Symbol(ParserSym.LEXERR, yytext());
1050: }
1051: case 106:
1052: break;
1053: case 98: {
1054: yybegin(CDATA);
1055: appendText(LexerTextType.ENTITY_SEMI);
1056: }
1057: case 107:
1058: break;
1059: case 62: {
1060: yybegin(CDATA_ENDTAG_START);
1061: return new Symbol(ParserSym.LEXERR, yytext());
1062: }
1063: case 108:
1064: break;
1065: case 96: {
1066: yybegin(ATTRVALLIT_DQM);
1067: return new Symbol(ParserSym.ATTRVAL_LIT,
1068: replaceEntity(true));
1069: }
1070: case 109:
1071: break;
1072: case 7: {
1073: yybegin(YYINITIAL);
1074: appendText(LexerTextType.LINE_TERMINATOR);
1075: CRcount++;
1076: }
1077: case 110:
1078: break;
1079: case 6: {
1080: yybegin(YYINITIAL);
1081: appendText(LexerTextType.LINE_TERMINATOR);
1082: NLcount++;
1083: }
1084: case 111:
1085: break;
1086: case 61: {
1087: yybegin(CDATA);
1088: appendText(LexerTextType.TEXT);
1089: }
1090: case 112:
1091: break;
1092: case 59: {
1093: yybegin(CDATA);
1094: NLcount++;
1095: appendText(LexerTextType.LINE_TERMINATOR);
1096: }
1097: case 113:
1098: break;
1099: case 31: {
1100: yybegin(MD);
1101: CRcount++;
1102: }
1103: case 114:
1104: break;
1105: case 30: {
1106: yybegin(MD);
1107: NLcount++;
1108: }
1109: case 115:
1110: break;
1111: case 9: {
1112: yybegin(ATTR);
1113: return new Symbol(ParserSym.LEXERR, yytext());
1114: }
1115: case 116:
1116: break;
1117: case 38: {
1118: yybegin(ATTREQUALS);
1119: }
1120: case 117:
1121: break;
1122: case 100: {
1123: if (yytext().equalsIgnoreCase(currentCDATAClosingTag)) {
1124: yypushback(yytext().length() - 2);
1125: yybegin(CDATA_ENDTAG_START);
1126: String aux = buffer.toString();
1127:
1128: if (aux.endsWith("\r\n")) {
1129: aux = aux.substring(0, aux.length() - 2);
1130: } else if (aux.endsWith("\n") || aux.endsWith("\r")) {
1131: aux = aux.substring(0, aux.length() - 1);
1132: }
1133: flushBufferedText(); // for initialization purposes
1134: return new Symbol(ParserSym.TEXT, new HTMLText(aux,
1135: offsetCDATA, false, false));
1136: } else {
1137: yybegin(CDATA);
1138: appendText(LexerTextType.TEXT);
1139: }
1140: }
1141: case 118:
1142: break;
1143: case 85: {
1144: yybegin(CDATA);
1145: CRNLcount++;
1146: appendText(LexerTextType.LINE_TERMINATOR);
1147: }
1148: case 119:
1149: break;
1150: case 5: {
1151: yybegin(YYINITIAL);
1152: appendText(LexerTextType.SPACE);
1153: }
1154: case 120:
1155: break;
1156: case 35: {
1157: yybegin(CHECK_IF_CDATA);
1158: syncronizeSent = false;
1159: return new Symbol(ParserSym.TAG_CLOSE, yychar, yyline,
1160: new Character(yytext().charAt(0)));
1161: }
1162: case 121:
1163: break;
1164: case 14: {
1165: yybegin(YYINITIAL);
1166: return new Symbol(ParserSym.TAG_CLOSE, yychar, yyline,
1167: new Character(yytext().charAt(0)));
1168: }
1169: case 122:
1170: break;
1171: case 93: {
1172: yybegin(YYINITIAL);
1173: appendText(LexerTextType.ENTITY_SEMI);
1174: }
1175: case 123:
1176: break;
1177: case 23: {
1178: yybegin(ENDTAG);
1179: return new Symbol(ParserSym.LEXERR, yytext());
1180: }
1181: case 124:
1182: break;
1183: case 74: {
1184: yybegin(ATTRVAL);
1185: CRNLcount++;
1186: }
1187: case 125:
1188: break;
1189: case 1: {
1190: yybegin(COMMENT);
1191: return new Symbol(ParserSym.COMM_CONTENT, yytext());
1192: }
1193: case 126:
1194: break;
1195: case 37: {
1196: yybegin(ATTREQUALS);
1197: return new Symbol(ParserSym.LEXERR, yytext());
1198: }
1199: case 127:
1200: break;
1201: case 17: {
1202: yybegin(ATTRVALSTART);
1203: return new Symbol(ParserSym.ATTRVAL_NUM, yytext());
1204: }
1205: case 128:
1206: break;
1207: case 88: {
1208: yybegin(YYINITIAL);
1209: return new Symbol(ParserSym.EMPTY_TAG, yytext());
1210: }
1211: case 129:
1212: break;
1213: case 57: {
1214: yybegin(CDATA);
1215: appendText(LexerTextType.TEXT);
1216: }
1217: case 130:
1218: break;
1219: case 81: {
1220: yybegin(ATTRVALLIT_DQM);
1221: return new Symbol(ParserSym.ATTRVAL_LIT,
1222: replaceEntity(false));
1223: }
1224: case 131:
1225: break;
1226: case 20: {
1227: yybegin(ATTRVAL);
1228: CRcount++;
1229: }
1230: case 132:
1231: break;
1232: case 19: {
1233: yybegin(ATTRVAL);
1234: NLcount++;
1235: }
1236: case 133:
1237: break;
1238: case 40: {
1239: yybegin(ATTREQUALS);
1240: CRcount++;
1241: }
1242: case 134:
1243: break;
1244: case 10: {
1245: yybegin(ATTREQUALS);
1246: return new Symbol(ParserSym.ATTR_NAME, yytext());
1247: }
1248: case 135:
1249: break;
1250: case 39: {
1251: yybegin(ATTREQUALS);
1252: NLcount++;
1253: }
1254: case 136:
1255: break;
1256: case 64: {
1257: yypushback(yytext().length());
1258:
1259: if (!syncronizeSent) {
1260: syncronizeSent = true;
1261: return new Symbol(ParserSym.SYNC, yytext());
1262: }
1263:
1264: if (cup.action_obj.isLastStartTagCreatedOk()
1265: && currentCDATAClosingTag != null) {
1266: textState = LexerTextStateType.CDATA;
1267: yybegin(CDATA);
1268: } else {
1269: yybegin(YYINITIAL);
1270: }
1271: }
1272: case 137:
1273: break;
1274: case 75: {
1275: yybegin(ATTRVALSTART);
1276: return new Symbol(ParserSym.ATTRVAL_NMTK,
1277: replaceEntity(false));
1278: }
1279: case 138:
1280: break;
1281: case 65: {
1282: yybegin(YYINITIAL);
1283: return new Symbol(ParserSym.LEXERR_EXPECTED_TAG_NAME,
1284: yytext());
1285: }
1286: case 139:
1287: break;
1288: case 91: {
1289: yybegin(TAG_IGNORE_ATTS);
1290: CRNLcount++;
1291: }
1292: case 140:
1293: break;
1294: case 90: {
1295: yybegin(IGNORED_Q_TAG);
1296: }
1297: case 141:
1298: break;
1299: case 63: {
1300: yybegin(CDATA_ENDTAG_START);
1301: return new Symbol(ParserSym.END_TAG_NAME, offsetCDATA,
1302: yyline, yytext());
1303: }
1304: case 142:
1305: break;
1306: case 70: {
1307: yybegin(YYINITIAL);
1308: appendText(LexerTextType.LINE_TERMINATOR);
1309: CRNLcount++;
1310: }
1311: case 143:
1312: break;
1313: case 82: {
1314: yybegin(ATTRVALLIT_SQM);
1315: CRNLcount++;
1316: }
1317: case 144:
1318: break;
1319: case 79: {
1320: yybegin(ATTREQUALS);
1321: CRNLcount++;
1322: }
1323: case 145:
1324: break;
1325: case 72: {
1326: yybegin(ATTR);
1327: CRNLcount++;
1328: }
1329: case 146:
1330: break;
1331: case 33: {
1332: yybegin(STARTTAG);
1333: return new Symbol(ParserSym.LEXERR, yytext());
1334: }
1335: case 147:
1336: break;
1337: case 71: {
1338: yybegin(YYINITIAL);
1339: appendText(LexerTextType.ENTITY_NO_SEMI);
1340: }
1341: case 148:
1342: break;
1343: case 2: {
1344: yybegin(ATTRVALLIT_DQM);
1345: return new Symbol(ParserSym.ATTRVAL_LIT, yytext());
1346: }
1347: case 149:
1348: break;
1349: case 60: {
1350: yybegin(CDATA);
1351: CRcount++;
1352: appendText(LexerTextType.LINE_TERMINATOR);
1353: }
1354: case 150:
1355: break;
1356: case 83: {
1357: yybegin(ATTRVALLIT_SQM);
1358: return new Symbol(ParserSym.ATTRVAL_LIT,
1359: replaceEntity(false));
1360: }
1361: case 151:
1362: break;
1363: case 99: {
1364: yybegin(ENDTAG);
1365: String tagName = yytext().substring(2);
1366: return new Symbol(ParserSym.END_TAG_NAME, yychar,
1367: yyline, tagName);
1368: }
1369: case 152:
1370: break;
1371: case 76: {
1372: yybegin(ENDTAG);
1373: CRNLcount++;
1374: }
1375: case 153:
1376: break;
1377: case 53: {
1378: yybegin(YYINITIAL);
1379: yypushback(yytext().length());
1380: return new Symbol(ParserSym.TAG_SLASH_CLOSE, yytext());
1381: }
1382: case 154:
1383: break;
1384: case 16: {
1385: yybegin(ATTRVALSTART);
1386: return new Symbol(ParserSym.ATTRVAL_NMTK, yytext());
1387: }
1388: case 155:
1389: break;
1390: case 22: {
1391: yybegin(ATTRVALLIT_SQM);
1392: return new Symbol(ParserSym.SQM, new Character(yytext()
1393: .charAt(0)));
1394: }
1395: case 156:
1396: break;
1397: case 80: {
1398: yybegin(ATTRVALLIT_DQM);
1399: CRNLcount++;
1400: }
1401: case 157:
1402: break;
1403: case 77: {
1404: yybegin(COMMENT);
1405: return new Symbol(ParserSym.COMM, yytext());
1406: }
1407: case 158:
1408: break;
1409: case 47: {
1410: yybegin(ATTR);
1411: return new Symbol(ParserSym.DQM, new Character(yytext()
1412: .charAt(0)));
1413: }
1414: case 159:
1415: break;
1416: case 73: {
1417: yybegin(ATTRVALSTART);
1418: return new Symbol(ParserSym.ATTRVAL_NUMTK, yytext());
1419: }
1420: case 160:
1421: break;
1422: case 86: {
1423: yybegin(CDATA);
1424: appendText(LexerTextType.ENTITY_NO_SEMI);
1425: // to collapse next LINE TERMINATOR (same as RI)
1426: terminatorSkipped = false;
1427: }
1428: case 161:
1429: break;
1430: case 36: {
1431: yybegin(TAG_IGNORE_ATTS);
1432: return new Symbol(ParserSym.LEXERR, yytext());
1433: }
1434: case 162:
1435: break;
1436: case 4: {
1437: yybegin(YYINITIAL);
1438: appendText(LexerTextType.TEXT);
1439: }
1440: case 163:
1441: break;
1442: case 56: {
1443: yybegin(YYINITIAL);
1444: return new Symbol(ParserSym.TAG_SLASH_CLOSE, yytext());
1445: }
1446: case 164:
1447: break;
1448: case 94: {
1449: yybegin(ATTRVALSTART);
1450: return new Symbol(ParserSym.ATTRVAL_NMTK,
1451: replaceEntity(true));
1452: }
1453: case 165:
1454: break;
1455: case 41: {
1456: yybegin(ATTRVAL);
1457: return new Symbol(ParserSym.EQUALS, new Character(
1458: yytext().charAt(0)));
1459: }
1460: case 166:
1461: break;
1462: case 15: {
1463: yybegin(TAG_TRAILINGWS);
1464: }
1465: case 167:
1466: break;
1467: case 26: {
1468: yybegin(ENDTAG);
1469: CRcount++;
1470: }
1471: case 168:
1472: break;
1473: case 95: {
1474: yybegin(YYINITIAL);
1475: return new Symbol(ParserSym.TAG_COMM_CLOSE, yytext());
1476: }
1477: case 169:
1478: break;
1479: case 24: {
1480: yybegin(ENDTAG);
1481: }
1482: case 170:
1483: break;
1484: case 25: {
1485: yybegin(ENDTAG);
1486: NLcount++;
1487: }
1488: case 171:
1489: break;
1490: case 44: {
1491: yybegin(ATTRVALLIT_DQM);
1492: }
1493: case 172:
1494: break;
1495: case 13: {
1496: yybegin(ATTR);
1497: CRcount++;
1498: }
1499: case 173:
1500: break;
1501: case 89: {
1502: yybegin(MD);
1503: return new Symbol(ParserSym.TAG_OPEN_EXM, yychar,
1504: yyline, yytext());
1505: }
1506: case 174:
1507: break;
1508: case 12: {
1509: yybegin(ATTR);
1510: NLcount++;
1511: }
1512: case 175:
1513: break;
1514: case 58: {
1515: yybegin(CDATA);
1516: appendText(LexerTextType.SPACE);
1517: }
1518: case 176:
1519: break;
1520: case 92: {
1521: yybegin(YYINITIAL);
1522: }
1523: case 177:
1524: break;
1525: case 84: {
1526: yybegin(TAG_TRAILINGWS);
1527: CRNLcount++;
1528: }
1529: case 178:
1530: break;
1531: case 46: {
1532: yybegin(ATTRVALLIT_DQM);
1533: CRcount++;
1534: }
1535: case 179:
1536: break;
1537: case 45: {
1538: yybegin(ATTRVALLIT_DQM);
1539: NLcount++;
1540: }
1541: case 180:
1542: break;
1543: case 87: {
1544: String tagName = yytext().substring(1).toLowerCase();
1545: Element e = dtd.elementHash.get(tagName);
1546: if (e != null
1547: && ((e.getType() == DTDConstants.CDATA) || e
1548: .isScript())) {
1549: currentCDATAClosingTag = "</" + tagName + ">";
1550: yybegin(TAG_IGNORE_ATTS);
1551: } else {
1552: if (isPreformatted(tagName)) {
1553: preEntryCounter++;
1554: textState = LexerTextStateType.PREFORMATTED;
1555: }
1556: currentCDATAClosingTag = null;
1557: yybegin(STARTTAG);
1558: }
1559: return new Symbol(ParserSym.TAG_NAME, yychar, yyline,
1560: yytext().substring(1, yytext().length()));
1561: }
1562: case 181:
1563: break;
1564: case 3: {
1565: yybegin(ATTRVALLIT_SQM);
1566: return new Symbol(ParserSym.ATTRVAL_LIT, yytext());
1567: }
1568: case 182:
1569: break;
1570: case 68: {
1571: yybegin(TAG_IGNORE_ATTS);
1572: CRcount++;
1573: }
1574: case 183:
1575: break;
1576: case 29: {
1577: yybegin(MD);
1578: }
1579: case 184:
1580: break;
1581: case 55: {
1582: yybegin(TAG_TRAILINGWS);
1583: CRcount++;
1584: }
1585: case 185:
1586: break;
1587: case 42: {
1588: yybegin(ATTRVALSTART);
1589: return new Symbol(ParserSym.DQM, new Character(yytext()
1590: .charAt(0)));
1591: }
1592: case 186:
1593: break;
1594: case 67: {
1595: yybegin(TAG_IGNORE_ATTS);
1596: NLcount++;
1597: }
1598: case 187:
1599: break;
1600: case 54: {
1601: yybegin(TAG_TRAILINGWS);
1602: NLcount++;
1603: }
1604: case 188:
1605: break;
1606: case 21: {
1607: yybegin(ATTRVALLIT_DQM);
1608: return new Symbol(ParserSym.DQM, new Character(yytext()
1609: .charAt(0)));
1610: }
1611: case 189:
1612: break;
1613: case 27: {
1614: yybegin(MD);
1615: return new Symbol(ParserSym.LEXERR, yytext());
1616: }
1617: case 190:
1618: break;
1619: case 11: {
1620: yybegin(ATTR);
1621: }
1622: case 191:
1623: break;
1624: case 8: {
1625: HTMLText textToken = flushBufferedText();
1626: yypushback(1);
1627: yybegin(YYINITIAL_NOTEXT);
1628: if (textToken != null) {
1629: return new Symbol(ParserSym.TEXT, textToken);
1630: }
1631: }
1632: case 192:
1633: break;
1634: case 48: {
1635: yybegin(ATTRVALLIT_SQM);
1636: }
1637: case 193:
1638: break;
1639: case 51: {
1640: yybegin(ATTR);
1641: return new Symbol(ParserSym.SQM, new Character(yytext()
1642: .charAt(0)));
1643: }
1644: case 194:
1645: break;
1646: case 97: {
1647: yybegin(ATTRVALLIT_SQM);
1648: return new Symbol(ParserSym.ATTRVAL_LIT,
1649: replaceEntity(true));
1650: }
1651: case 195:
1652: break;
1653: case 66: {
1654: yybegin(TAG_IGNORE_ATTS);
1655: }
1656: case 196:
1657: break;
1658: case 52: {
1659: yybegin(MDSTART);
1660: return new Symbol(ParserSym.MUDECL_CONTENT, yytext());
1661: }
1662: case 197:
1663: break;
1664: case 78: {
1665: yybegin(MD);
1666: CRNLcount++;
1667: }
1668: case 198:
1669: break;
1670: case 34: {
1671: yybegin(CHECK_IF_CDATA);
1672: return new Symbol(ParserSym.TAG_OPEN, new Character(
1673: yytext().charAt(0)));
1674: }
1675: case 199:
1676: break;
1677: case 69: {
1678: }
1679: case 200:
1680: break;
1681: default:
1682: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
1683: zzAtEOF = true;
1684: zzDoEOF();
1685: {
1686: if (buffer.length() > 0) {
1687: String aux = buffer.toString();
1688: buffer = new StringBuilder();
1689: return new Symbol(ParserSym.TEXT,
1690: new HTMLText(aux, offsetCDATA,
1691: false, false));
1692: }
1693: if (yystate() == CDATA) {
1694: yybegin(YYINITIAL);
1695: return new Symbol(ParserSym.EOF_LITERAL,
1696: currentCDATAClosingTag.substring(2,
1697: currentCDATAClosingTag
1698: .length() - 1));
1699: }
1700: return new Symbol(ParserSym.EOF, yytext());
1701: }
1702: } else {
1703: zzScanError(ZZ_NO_MATCH);
1704: }
1705: }
1706: }
1707: }
1708:
1709: }
|