001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.editor.ext.html;
043:
044: import org.netbeans.editor.Syntax;
045: import org.netbeans.editor.TokenID;
046:
047: /**
048: * Lexical anlyzer for HTML source files.
049: *
050: * @author Petr Nejedly
051: * @author Miloslav Metelka
052: * @version 1.00
053: *
054: * @deprecated Use Lexer API instead. See {@link HTMLLexer} and {@link HTMLTokenId}.
055: */
056:
057: public class HTMLSyntax extends Syntax {
058:
059: /** Internal state of the lexical analyzer before entering subanalyzer of
060: * character references. It is initially set to INIT, but before first usage,
061: * this will be overwritten with state, which originated transition to
062: * charref subanalyzer.
063: */
064: protected int subState = INIT;
065:
066: // Internal states
067: private static final int ISI_TEXT = 1; // Plain text between tags
068: private static final int ISI_ERROR = 2; // Syntax error in HTML syntax
069: private static final int ISA_LT = 3; // After start of tag delimiter - "<"
070: private static final int ISA_SLASH = 4; // After ETAGO - "</"
071: private static final int ISI_ENDTAG = 5; // Inside endtag - "</[a..Z]+"
072: private static final int ISP_ENDTAG_X = 6; // X-switch after ENDTAG's name
073: private static final int ISP_ENDTAG_WS = 7; // In WS in ENDTAG - "</A_ _>"
074: private static final int ISI_TAG = 8; // Inside tag - "<[a..Z]+"
075: private static final int ISP_TAG_X = 9; // X-switch after TAG's name
076: private static final int ISP_TAG_WS = 10; // In WS in TAG - "<A_ _...>"
077: private static final int ISI_ARG = 11; // Inside tag's argument - "<A h_r_...>"
078: private static final int ISP_ARG_X = 12; // X-switch after ARGUMENT's name
079: private static final int ISP_ARG_WS = 13; // Inside WS after argument awaiting '='
080: private static final int ISP_EQ = 14; // X-switch after '=' in TAG's ARGUMENT
081: private static final int ISP_EQ_WS = 15; // In WS after '='
082: private static final int ISI_VAL = 16; // Non-quoted value
083: private static final int ISI_VAL_QUOT = 17; // Single-quoted value - may contain " chars
084: private static final int ISI_VAL_DQUOT = 18; // Double-quoted value - may contain ' chars
085: private static final int ISA_SGML_ESCAPE = 19; // After "<!"
086: private static final int ISA_SGML_DASH = 20; // After "<!-"
087: private static final int ISI_HTML_COMMENT = 21; // Somewhere after "<!--"
088: private static final int ISA_HTML_COMMENT_DASH = 22; // Dash in comment - maybe end of comment
089: private static final int ISI_HTML_COMMENT_WS = 23; // After end of comment, awaiting end of comment declaration
090: private static final int ISI_SGML_DECL = 24;
091: private static final int ISA_SGML_DECL_DASH = 25;
092: private static final int ISI_SGML_COMMENT = 26;
093: private static final int ISA_SGML_COMMENT_DASH = 27;
094: private static final int ISA_REF = 28; // when comes to character reference, e.g. &, after &
095: private static final int ISI_REF_NAME = 29; // if the reference is symbolic - by predefined name
096: private static final int ISA_REF_HASH = 30; // for numeric references - after &#
097: private static final int ISI_REF_DEC = 31; // decimal character reference, e.g. ř
098: private static final int ISA_REF_X = 32; //
099: private static final int ISI_REF_HEX = 33; // hexadecimal reference, in 
.. of 	..
100: private static final int ISI_TAG_SLASH = 34; //after slash in html tag
101:
102: public HTMLSyntax() {
103: tokenContextPath = HTMLTokenContext.contextPath;
104: }
105:
106: private final boolean isAZ(char ch) {
107: return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
108: }
109:
110: private final boolean isName(char ch) {
111: return Character.isLetterOrDigit(ch) || ch == '-' || ch == '_'
112: || ch == '.' || ch == ':';
113: // return( (ch >= 'a' && ch <= 'z') ||
114: // (ch >= 'A' && ch <= 'Z') ||
115: // (ch >= '0' && ch <= '9') ||
116: // ch == '-' || ch == '_' || ch == '.' || ch == ':' );
117:
118: }
119:
120: /**
121: * Resolves if given char is whitespace in terms of HTML4.0 specs
122: * According to specs, following characters are treated as whitespace:
123: * Space - <CODE>'\u0020'</CODE>, Tab - <CODE>'\u0009'</CODE>,
124: * Formfeed - <CODE>'\u000C'</CODE>,Zero-width space - <CODE>'\u200B'</CODE>,
125: * Carriage return - <CODE>'
126: '</CODE> and Line feed - <CODE>'
127: '</CODE>
128: * CR's are included for completenes only, they should never appear in document
129: */
130:
131: private final boolean isWS(char ch) {
132: return Character.isWhitespace(ch);
133: // return ( ch == '\u0020' || ch == '\u0009' || ch == '\u000c'
134: // || ch == '\u200b' || ch == '\n' || ch == '\r' );
135: }
136:
137: protected TokenID parseToken() {
138: char actChar;
139:
140: while (offset < stopOffset) {
141: actChar = buffer[offset];
142: //System.out.println("HTMLSyntax: parseToken tokenOffset=" + tokenOffset + ", actChar='" + actChar + "', offset=" + offset + ", state=" + getStateName(state) +
143: // ", stopOffset=" + stopOffset + ", lastBuffer=" + lastBuffer);
144: switch (state) {
145: case INIT: // DONE
146: switch (actChar) {
147: case '<':
148: state = ISA_LT;
149: break;
150: case '&':
151: state = ISA_REF;
152: subState = ISI_TEXT;
153: break;
154: default:
155: state = ISI_TEXT;
156: break;
157: }
158: break;
159:
160: case ISI_TEXT: // DONE
161: switch (actChar) {
162: case '<':
163: case '&':
164: state = INIT;
165: return HTMLTokenContext.TEXT;
166: }
167: break;
168:
169: case ISI_ERROR: // DONE
170: offset++;
171: state = INIT;
172: return HTMLTokenContext.ERROR;
173:
174: case ISA_LT: // PENDING other transitions - e.g '<?'
175: if (isAZ(actChar)) { // <'a..Z'
176: state = ISI_TAG;
177: return HTMLTokenContext.TAG_OPEN_SYMBOL;
178: }
179: switch (actChar) {
180: case '/': // ETAGO - </
181: state = ISA_SLASH;
182: offset++;
183: return HTMLTokenContext.TAG_OPEN_SYMBOL;
184: case '>': // Empty start tag <>, RELAXED
185: offset++;
186: state = INIT;
187: return HTMLTokenContext.TAG_CLOSE_SYMBOL;
188: case '!':
189: state = ISA_SGML_ESCAPE;
190: break;
191: default: // Part of text, RELAXED
192: state = ISI_TEXT;
193: continue; // don't eat the char, maybe its '&'
194: }
195: break;
196:
197: case ISA_SLASH: // DONE
198: if (isAZ(actChar)) { // </'a..Z'
199: state = ISI_ENDTAG;
200: break;
201: }
202: switch (actChar) {
203: case '>': // Empty end tag </>, RELAXED
204: offset++;
205: state = INIT;
206: return HTMLTokenContext.TAG_CLOSE_SYMBOL;
207: default: // Part of text, e.g. </3, </'\n', RELAXED
208: state = ISI_TEXT;
209: continue; // don'e eat the char
210: }
211: //break;
212:
213: case ISI_ENDTAG: // DONE
214: if (isName(actChar))
215: break; // Still in endtag identifier, eat next char
216: state = ISP_ENDTAG_X;
217: return HTMLTokenContext.TAG_CLOSE;
218:
219: case ISP_ENDTAG_X: // DONE
220: if (isWS(actChar)) {
221: state = ISP_ENDTAG_WS;
222: break;
223: }
224: switch (actChar) {
225: case '>': // Closing of endtag, e.g. </H6 _>_
226: offset++;
227: state = INIT;
228: return HTMLTokenContext.TAG_CLOSE_SYMBOL;
229: case '<': // next tag, e.g. </H6 _<_, RELAXED
230: state = INIT;
231: continue;
232: default:
233: state = ISI_ERROR;
234: continue; //don't eat
235: }
236: //break;
237:
238: case ISP_ENDTAG_WS: // DONE
239: if (isWS(actChar))
240: break; // eat all WS
241: state = ISP_ENDTAG_X;
242: return HTMLTokenContext.WS;
243:
244: case ISI_TAG: // DONE
245: if (isName(actChar))
246: break; // Still in tag identifier, eat next char
247: state = ISP_TAG_X;
248: return HTMLTokenContext.TAG_OPEN;
249:
250: case ISP_TAG_X: // DONE
251: if (isWS(actChar)) {
252: state = ISP_TAG_WS;
253: break;
254: }
255: if (isAZ(actChar)) {
256: state = ISI_ARG;
257: break;
258: }
259: switch (actChar) {
260: case '/':
261: offset++;
262: state = ISI_TAG_SLASH;
263: continue;
264: case '>':
265: offset++;
266: state = INIT;
267: return HTMLTokenContext.TAG_CLOSE_SYMBOL;
268: case '<':
269: state = INIT;
270: continue; // don't eat it!!!
271: default:
272: state = ISI_ERROR;
273: continue;
274: }
275: //break;
276:
277: case ISP_TAG_WS: // DONE
278: if (isWS(actChar))
279: break; // eat all WS
280: state = ISP_TAG_X;
281: return HTMLTokenContext.WS;
282:
283: case ISI_TAG_SLASH:
284: switch (actChar) {
285: case '>':
286: offset++;
287: state = INIT;
288: return HTMLTokenContext.TAG_CLOSE_SYMBOL;
289: default:
290: state = ISI_ERROR;
291: continue;
292: }
293:
294: case ISI_ARG: // DONE
295: if (isName(actChar))
296: break; // eat next char
297: state = ISP_ARG_X;
298: return HTMLTokenContext.ARGUMENT;
299:
300: case ISP_ARG_X:
301: if (isWS(actChar)) {
302: state = ISP_ARG_WS;
303: break;
304: }
305: if (isAZ(actChar)) {
306: state = ISI_ARG;
307: break;
308: }
309: switch (actChar) {
310: case '/':
311: case '>':
312: offset++;
313: state = INIT;
314: return HTMLTokenContext.TAG_OPEN;
315: case '<':
316: state = INIT;
317: continue; // don't eat !!!
318: case '=':
319: offset++;
320: state = ISP_EQ;
321: return HTMLTokenContext.OPERATOR;
322: default:
323: state = ISI_ERROR;
324: continue;
325: }
326: //break;
327:
328: case ISP_ARG_WS:
329: if (isWS(actChar))
330: break; // Eat all WhiteSpace
331: state = ISP_ARG_X;
332: return HTMLTokenContext.WS;
333:
334: case ISP_EQ:
335: if (isWS(actChar)) {
336: state = ISP_EQ_WS;
337: break;
338: }
339: switch (actChar) {
340: case '\'':
341: state = ISI_VAL_QUOT;
342: break;
343: case '"':
344: state = ISI_VAL_DQUOT;
345: break;
346: case '>':
347: offset++;
348: state = INIT;
349: return HTMLTokenContext.TAG_OPEN;
350: default:
351: state = ISI_VAL; //everything else if attribute value
352: break;
353: }
354: break;
355:
356: case ISP_EQ_WS:
357: if (isWS(actChar))
358: break; // Consume all WS
359: state = ISP_EQ;
360: return HTMLTokenContext.WS;
361:
362: case ISI_VAL:
363: if (!isWS(actChar)
364: && !(actChar == '/' || actChar == '>' || actChar == '<'))
365: break; // Consume whole value
366: state = ISP_TAG_X;
367: return HTMLTokenContext.VALUE;
368:
369: case ISI_VAL_QUOT:
370: switch (actChar) {
371: case '\'':
372: offset++;
373: state = ISP_TAG_X;
374: return HTMLTokenContext.VALUE;
375: case '&':
376: if (offset == tokenOffset) {
377: subState = state;
378: state = ISA_REF;
379: break;
380: } else {
381: return HTMLTokenContext.VALUE;
382: }
383: }
384: break; // else simply consume next char of VALUE
385:
386: case ISI_VAL_DQUOT:
387: switch (actChar) {
388: case '"':
389: offset++;
390: state = ISP_TAG_X;
391: return HTMLTokenContext.VALUE;
392: case '&':
393: if (offset == tokenOffset) {
394: subState = state;
395: state = ISA_REF;
396: break;
397: } else {
398: return HTMLTokenContext.VALUE;
399: }
400: }
401: break; // else simply consume next char of VALUE
402:
403: case ISA_SGML_ESCAPE: // DONE
404: if (isAZ(actChar)) {
405: state = ISI_SGML_DECL;
406: break;
407: }
408: switch (actChar) {
409: case '-':
410: state = ISA_SGML_DASH;
411: break;
412: default:
413: state = ISI_TEXT;
414: continue;
415: }
416: break;
417:
418: case ISA_SGML_DASH: // DONE
419: switch (actChar) {
420: case '-':
421: state = ISI_HTML_COMMENT;
422: break;
423: default:
424: state = ISI_TEXT;
425: continue;
426: }
427: break;
428:
429: case ISI_HTML_COMMENT: // DONE
430: switch (actChar) {
431: case '-':
432: state = ISA_HTML_COMMENT_DASH;
433: break;
434: //create an HTML comment token for each line of the comment - a performance fix for #43532
435: case '\n':
436: offset++;
437: //leave the some state - we are still in an HTML comment,
438: //we just need to create a token for each line.
439: return HTMLTokenContext.BLOCK_COMMENT;
440: }
441: break;
442:
443: case ISA_HTML_COMMENT_DASH:
444: switch (actChar) {
445: case '-':
446: state = ISI_HTML_COMMENT_WS;
447: break;
448: default:
449: state = ISI_HTML_COMMENT;
450: continue;
451: }
452: break;
453:
454: case ISI_HTML_COMMENT_WS: // DONE
455: if (isWS(actChar))
456: break; // Consume all WS
457: switch (actChar) {
458: case '>':
459: offset++;
460: state = INIT;
461: return HTMLTokenContext.BLOCK_COMMENT;
462: default:
463: state = ISI_HTML_COMMENT;
464: continue;
465: }
466: //break;
467:
468: case ISI_SGML_DECL:
469: switch (actChar) {
470: case '>':
471: offset++;
472: state = INIT;
473: return HTMLTokenContext.DECLARATION;
474: case '-':
475: if (offset == tokenOffset) {
476: state = ISA_SGML_DECL_DASH;
477: break;
478: } else {
479: return HTMLTokenContext.DECLARATION;
480: }
481: }
482: break;
483:
484: case ISA_SGML_DECL_DASH:
485: if (actChar == '-') {
486: state = ISI_SGML_COMMENT;
487: break;
488: } else {
489: state = ISI_SGML_DECL;
490: continue;
491: }
492:
493: case ISI_SGML_COMMENT:
494: switch (actChar) {
495: case '-':
496: state = ISA_SGML_COMMENT_DASH;
497: break;
498: }
499: break;
500:
501: case ISA_SGML_COMMENT_DASH:
502: if (actChar == '-') {
503: offset++;
504: state = ISI_SGML_DECL;
505: return HTMLTokenContext.SGML_COMMENT;
506: } else {
507: state = ISI_SGML_COMMENT;
508: continue;
509: }
510:
511: case ISA_REF:
512: if (isAZ(actChar)) {
513: state = ISI_REF_NAME;
514: break;
515: }
516: if (actChar == '#') {
517: state = ISA_REF_HASH;
518: break;
519: }
520: state = subState;
521: continue;
522:
523: case ISI_REF_NAME:
524: if (isName(actChar))
525: break;
526: if (actChar == ';')
527: offset++;
528: state = subState;
529: return HTMLTokenContext.CHARACTER;
530:
531: case ISA_REF_HASH:
532: if (actChar >= '0' && actChar <= '9') {
533: state = ISI_REF_DEC;
534: break;
535: }
536: if (actChar == 'x' || actChar == 'X') {
537: state = ISA_REF_X;
538: break;
539: }
540: if (isAZ(actChar)) {
541: offset++;
542: state = subState;
543: return HTMLTokenContext.ERROR;
544: }
545: state = subState;
546: continue;
547:
548: case ISI_REF_DEC:
549: if (actChar >= '0' && actChar <= '9')
550: break;
551: if (actChar == ';')
552: offset++;
553: state = subState;
554: return HTMLTokenContext.CHARACTER;
555:
556: case ISA_REF_X:
557: if ((actChar >= '0' && actChar <= '9')
558: || (actChar >= 'a' && actChar <= 'f')
559: || (actChar >= 'A' && actChar <= 'F')) {
560: state = ISI_REF_HEX;
561: break;
562: }
563: state = subState;
564: return HTMLTokenContext.ERROR; // error on previous "&#x" sequence
565:
566: case ISI_REF_HEX:
567: if ((actChar >= '0' && actChar <= '9')
568: || (actChar >= 'a' && actChar <= 'f')
569: || (actChar >= 'A' && actChar <= 'F'))
570: break;
571: if (actChar == ';')
572: offset++;
573: state = subState;
574: return HTMLTokenContext.CHARACTER;
575: }
576:
577: offset = ++offset;
578: } // end of while(offset...)
579:
580: /** At this stage there's no more text in the scanned buffer.
581: * Scanner first checks whether this is completely the last
582: * available buffer.
583: */
584: if (lastBuffer) {
585: switch (state) {
586: case INIT:
587: case ISI_TEXT:
588: case ISA_LT:
589: case ISA_SLASH:
590: case ISA_SGML_ESCAPE:
591: case ISA_SGML_DASH:
592: case ISI_TAG_SLASH:
593: return HTMLTokenContext.TEXT;
594:
595: case ISA_REF:
596: case ISA_REF_HASH:
597: if (subState == ISI_TEXT)
598: return HTMLTokenContext.TEXT;
599: else
600: return HTMLTokenContext.VALUE;
601:
602: case ISI_HTML_COMMENT:
603: case ISA_HTML_COMMENT_DASH:
604: case ISI_HTML_COMMENT_WS:
605: return HTMLTokenContext.BLOCK_COMMENT;
606:
607: case ISI_TAG:
608: return HTMLTokenContext.TAG_OPEN;
609: case ISI_ENDTAG:
610: return HTMLTokenContext.TAG_CLOSE;
611:
612: case ISI_ARG:
613: return HTMLTokenContext.ARGUMENT;
614:
615: case ISI_ERROR:
616: return HTMLTokenContext.ERROR;
617:
618: case ISP_ARG_WS:
619: case ISP_TAG_WS:
620: case ISP_ENDTAG_WS:
621: case ISP_EQ_WS:
622: return HTMLTokenContext.WS;
623:
624: case ISP_ARG_X:
625: case ISP_TAG_X:
626: case ISP_ENDTAG_X:
627: case ISP_EQ:
628: return HTMLTokenContext.WS;
629:
630: case ISI_VAL:
631: case ISI_VAL_QUOT:
632: case ISI_VAL_DQUOT:
633: return HTMLTokenContext.VALUE;
634:
635: case ISI_SGML_DECL:
636: case ISA_SGML_DECL_DASH:
637: return HTMLTokenContext.DECLARATION;
638:
639: case ISI_SGML_COMMENT:
640: case ISA_SGML_COMMENT_DASH:
641: return HTMLTokenContext.SGML_COMMENT;
642:
643: case ISI_REF_NAME:
644: case ISI_REF_DEC:
645: case ISA_REF_X:
646: case ISI_REF_HEX:
647: return HTMLTokenContext.CHARACTER;
648: }
649: }
650:
651: return null;
652: }
653:
654: public String getStateName(int stateNumber) {
655: switch (stateNumber) {
656: case INIT:
657: return "INIT"; // NOI18N
658: case ISI_TEXT:
659: return "ISI_TEXT"; // NOI18N
660: case ISA_LT:
661: return "ISA_LT"; // NOI18N
662: case ISA_SLASH:
663: return "ISA_SLASH"; // NOI18N
664: case ISA_SGML_ESCAPE:
665: return "ISA_SGML_ESCAPE"; // NOI18N
666: case ISA_SGML_DASH:
667: return "ISA_SGML_DASH"; // NOI18N
668: case ISI_HTML_COMMENT:
669: return "ISI_HTML_COMMENT";// NOI18N
670: case ISA_HTML_COMMENT_DASH:
671: return "ISA_HTML_COMMENT_DASH";// NOI18N
672: case ISI_HTML_COMMENT_WS:
673: return "ISI_HTML_COMMENT_WS";// NOI18N
674: case ISI_TAG:
675: return "ISI_TAG";// NOI18N
676: case ISI_ENDTAG:
677: return "ISI_ENDTAG";// NOI18N
678: case ISI_ARG:
679: return "ISI_ARG";// NOI18N
680: case ISI_ERROR:
681: return "ISI_ERROR";// NOI18N
682: case ISP_ARG_WS:
683: return "ISP_ARG_WS";// NOI18N
684: case ISP_TAG_WS:
685: return "ISP_TAG_WS";// NOI18N
686: case ISP_ENDTAG_WS:
687: return "ISP_ENDTAG_WS";// NOI18N
688: case ISP_ARG_X:
689: return "ISP_ARG_X";// NOI18N
690: case ISP_TAG_X:
691: return "ISP_TAG_X";// NOI18N
692: case ISP_ENDTAG_X:
693: return "ISP_ENDTAG_X";// NOI18N
694: case ISP_EQ:
695: return "ISP_EQ";// NOI18N
696: case ISI_VAL:
697: return "ISI_VAL";// NOI18N
698: case ISI_VAL_QUOT:
699: return "ISI_VAL_QUOT";// NOI18N
700: case ISI_VAL_DQUOT:
701: return "ISI_VAL_DQUOT";// NOI18N
702: case ISI_SGML_DECL:
703: return "ISI_SGML_DECL";// NOI18N
704: case ISA_SGML_DECL_DASH:
705: return "ISA_SGML_DECL_DASH";// NOI18N
706: case ISI_SGML_COMMENT:
707: return "ISI_SGML_COMMENT";// NOI18N
708: case ISA_SGML_COMMENT_DASH:
709: return "ISA_SGML_COMMENT_DASH";// NOI18N
710: case ISA_REF:
711: return "ISA_REF";// NOI18N
712: case ISI_REF_NAME:
713: return "ISI_REF_NAME";// NOI18N
714: case ISA_REF_HASH:
715: return "ISA_REF_HASH";// NOI18N
716: case ISI_REF_DEC:
717: return "ISI_REF_DEC";// NOI18N
718: case ISA_REF_X:
719: return "ISA_REF_X";// NOI18N
720: case ISI_REF_HEX:
721: return "ISI_REF_HEX";// NOI18N
722: default:
723: return super .getStateName(stateNumber);
724: }
725: }
726:
727: /** Load valid mark state into the analyzer. Offsets
728: * are already initialized when this method is called. This method
729: * must get the state from the mark and set it to the analyzer. Then
730: * it must decrease tokenOffset by the preScan stored in the mark state.
731: * @param markState mark state to be loaded into syntax. It must be non-null value.
732: */
733: public void loadState(StateInfo stateInfo) {
734: super .loadState(stateInfo);
735: subState = ((HTMLStateInfo) stateInfo).getSubState();
736: }
737:
738: /** Store state of this analyzer into given mark state. */
739: public void storeState(StateInfo stateInfo) {
740: super .storeState(stateInfo);
741: ((HTMLStateInfo) stateInfo).setSubState(subState);
742: }
743:
744: /** Compare state of this analyzer to given state info */
745: public int compareState(StateInfo stateInfo) {
746: if (super .compareState(stateInfo) == DIFFERENT_STATE)
747: return DIFFERENT_STATE;
748: return (((HTMLStateInfo) stateInfo).getSubState() == subState) ? EQUAL_STATE
749: : DIFFERENT_STATE;
750: }
751:
752: /** Create state info appropriate for particular analyzer */
753: public StateInfo createStateInfo() {
754: return new HTMLStateInfo();
755: }
756:
757: /** Base implementation of the StateInfo interface */
758: public static class HTMLStateInfo extends Syntax.BaseStateInfo {
759:
760: /** analyzer subState during parsing character references */
761: private int subState;
762:
763: public int getSubState() {
764: return subState;
765: }
766:
767: public void setSubState(int subState) {
768: this .subState = subState;
769: }
770:
771: public String toString(Syntax syntax) {
772: return super .toString(syntax)
773: + ", subState="
774: + (syntax == null ? "" : syntax
775: .getStateName(getSubState())); // NOI18N
776: }
777:
778: }
779:
780: }
|