Source Code Cross Referenced for HTMLSyntax.java in  » IDE-Netbeans » html » org » netbeans » editor » ext » html » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » IDE Netbeans » html » org.netbeans.editor.ext.html 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003:         *
004:         * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005:         *
006:         * The contents of this file are subject to the terms of either the GNU
007:         * General Public License Version 2 only ("GPL") or the Common
008:         * Development and Distribution License("CDDL") (collectively, the
009:         * "License"). You may not use this file except in compliance with the
010:         * License. You can obtain a copy of the License at
011:         * http://www.netbeans.org/cddl-gplv2.html
012:         * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013:         * specific language governing permissions and limitations under the
014:         * License.  When distributing the software, include this License Header
015:         * Notice in each file and include the License file at
016:         * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
017:         * particular file as subject to the "Classpath" exception as provided
018:         * by Sun in the GPL Version 2 section of the License file that
019:         * accompanied this code. If applicable, add the following below the
020:         * License Header, with the fields enclosed by brackets [] replaced by
021:         * your own identifying information:
022:         * "Portions Copyrighted [year] [name of copyright owner]"
023:         *
024:         * Contributor(s):
025:         *
026:         * The Original Software is NetBeans. The Initial Developer of the Original
027:         * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028:         * Microsystems, Inc. All Rights Reserved.
029:         *
030:         * If you wish your version of this file to be governed by only the CDDL
031:         * or only the GPL Version 2, indicate your decision by adding
032:         * "[Contributor] elects to include this software in this distribution
033:         * under the [CDDL or GPL Version 2] license." If you do not indicate a
034:         * single choice of license, a recipient has the option to distribute
035:         * your version of this file under either the CDDL, the GPL Version 2 or
036:         * to extend the choice of license to its licensees as provided above.
037:         * However, if you add GPL Version 2 code and therefore, elected the GPL
038:         * Version 2 license, then the option applies only if the new code is
039:         * made subject to such option by the copyright holder.
040:         */
041:
042:        package org.netbeans.editor.ext.html;
043:
044:        import org.netbeans.editor.Syntax;
045:        import org.netbeans.editor.TokenID;
046:
047:        /**
048:         * Lexical anlyzer for HTML source files.
049:         *
050:         * @author Petr Nejedly
051:         * @author Miloslav Metelka
052:         * @version 1.00
053:         *
054:         * @deprecated Use Lexer API instead. See {@link HTMLLexer} and {@link HTMLTokenId}.
055:         */
056:
057:        public class HTMLSyntax extends Syntax {
058:
059:            /** Internal state of the lexical analyzer before entering subanalyzer of
060:             * character references. It is initially set to INIT, but before first usage,
061:             * this will be overwritten with state, which originated transition to
062:             * charref subanalyzer.
063:             */
064:            protected int subState = INIT;
065:
066:            // Internal states
067:            private static final int ISI_TEXT = 1; // Plain text between tags
068:            private static final int ISI_ERROR = 2; // Syntax error in HTML syntax
069:            private static final int ISA_LT = 3; // After start of tag delimiter - "<"
070:            private static final int ISA_SLASH = 4; // After ETAGO - "</"
071:            private static final int ISI_ENDTAG = 5; // Inside endtag - "</[a..Z]+"
072:            private static final int ISP_ENDTAG_X = 6; // X-switch after ENDTAG's name
073:            private static final int ISP_ENDTAG_WS = 7; // In WS in ENDTAG - "</A_ _>"
074:            private static final int ISI_TAG = 8; // Inside tag - "<[a..Z]+"
075:            private static final int ISP_TAG_X = 9; // X-switch after TAG's name
076:            private static final int ISP_TAG_WS = 10; // In WS in TAG - "<A_ _...>"
077:            private static final int ISI_ARG = 11; // Inside tag's argument - "<A h_r_...>"
078:            private static final int ISP_ARG_X = 12; // X-switch after ARGUMENT's name
079:            private static final int ISP_ARG_WS = 13; // Inside WS after argument awaiting '='
080:            private static final int ISP_EQ = 14; // X-switch after '=' in TAG's ARGUMENT
081:            private static final int ISP_EQ_WS = 15; // In WS after '='
082:            private static final int ISI_VAL = 16; // Non-quoted value
083:            private static final int ISI_VAL_QUOT = 17; // Single-quoted value - may contain " chars
084:            private static final int ISI_VAL_DQUOT = 18; // Double-quoted value - may contain ' chars
085:            private static final int ISA_SGML_ESCAPE = 19; // After "<!"
086:            private static final int ISA_SGML_DASH = 20; // After "<!-"
087:            private static final int ISI_HTML_COMMENT = 21; // Somewhere after "<!--"
088:            private static final int ISA_HTML_COMMENT_DASH = 22; // Dash in comment - maybe end of comment
089:            private static final int ISI_HTML_COMMENT_WS = 23; // After end of comment, awaiting end of comment declaration
090:            private static final int ISI_SGML_DECL = 24;
091:            private static final int ISA_SGML_DECL_DASH = 25;
092:            private static final int ISI_SGML_COMMENT = 26;
093:            private static final int ISA_SGML_COMMENT_DASH = 27;
094:            private static final int ISA_REF = 28; // when comes to character reference, e.g. &amp;, after &
095:            private static final int ISI_REF_NAME = 29; // if the reference is symbolic - by predefined name
096:            private static final int ISA_REF_HASH = 30; // for numeric references - after &#
097:            private static final int ISI_REF_DEC = 31; // decimal character reference, e.g. &#345;
098:            private static final int ISA_REF_X = 32; //
099:            private static final int ISI_REF_HEX = 33; // hexadecimal reference, in &#xa.. of &#X9..
100:            private static final int ISI_TAG_SLASH = 34; //after slash in html tag
101:
102:            public HTMLSyntax() {
103:                tokenContextPath = HTMLTokenContext.contextPath;
104:            }
105:
106:            private final boolean isAZ(char ch) {
107:                return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
108:            }
109:
110:            private final boolean isName(char ch) {
111:                return Character.isLetterOrDigit(ch) || ch == '-' || ch == '_'
112:                        || ch == '.' || ch == ':';
113:                //        return( (ch >= 'a' && ch <= 'z') ||
114:                //                (ch >= 'A' && ch <= 'Z') ||
115:                //                (ch >= '0' && ch <= '9') ||
116:                //                ch == '-' || ch == '_' || ch == '.' || ch == ':' );
117:
118:            }
119:
120:            /**
121:             * Resolves if given char is whitespace in terms of HTML4.0 specs
122:             * According to specs, following characters are treated as whitespace:
123:             * Space - <CODE>'\u0020'</CODE>, Tab - <CODE>'\u0009'</CODE>,
124:             * Formfeed - <CODE>'\u000C'</CODE>,Zero-width space - <CODE>'\u200B'</CODE>,
125:             * Carriage return - <CODE>'
126:            '</CODE> and Line feed - <CODE>'
127:            '</CODE>
128:             * CR's are included for completenes only, they should never appear in document
129:             */
130:
131:            private final boolean isWS(char ch) {
132:                return Character.isWhitespace(ch);
133:                //        return ( ch == '\u0020' || ch == '\u0009' || ch == '\u000c'
134:                //              || ch == '\u200b' || ch == '\n' || ch == '\r' );
135:            }
136:
137:            protected TokenID parseToken() {
138:                char actChar;
139:
140:                while (offset < stopOffset) {
141:                    actChar = buffer[offset];
142:                    //System.out.println("HTMLSyntax: parseToken tokenOffset=" + tokenOffset + ", actChar='" + actChar + "', offset=" + offset + ", state=" + getStateName(state) + 
143:                    //      ", stopOffset=" + stopOffset + ", lastBuffer=" + lastBuffer);
144:                    switch (state) {
145:                    case INIT: // DONE
146:                        switch (actChar) {
147:                        case '<':
148:                            state = ISA_LT;
149:                            break;
150:                        case '&':
151:                            state = ISA_REF;
152:                            subState = ISI_TEXT;
153:                            break;
154:                        default:
155:                            state = ISI_TEXT;
156:                            break;
157:                        }
158:                        break;
159:
160:                    case ISI_TEXT: // DONE
161:                        switch (actChar) {
162:                        case '<':
163:                        case '&':
164:                            state = INIT;
165:                            return HTMLTokenContext.TEXT;
166:                        }
167:                        break;
168:
169:                    case ISI_ERROR: // DONE
170:                        offset++;
171:                        state = INIT;
172:                        return HTMLTokenContext.ERROR;
173:
174:                    case ISA_LT: // PENDING other transitions - e.g '<?'
175:                        if (isAZ(actChar)) { // <'a..Z'
176:                            state = ISI_TAG;
177:                            return HTMLTokenContext.TAG_OPEN_SYMBOL;
178:                        }
179:                        switch (actChar) {
180:                        case '/': // ETAGO - </
181:                            state = ISA_SLASH;
182:                            offset++;
183:                            return HTMLTokenContext.TAG_OPEN_SYMBOL;
184:                        case '>': // Empty start tag <>, RELAXED
185:                            offset++;
186:                            state = INIT;
187:                            return HTMLTokenContext.TAG_CLOSE_SYMBOL;
188:                        case '!':
189:                            state = ISA_SGML_ESCAPE;
190:                            break;
191:                        default: // Part of text, RELAXED
192:                            state = ISI_TEXT;
193:                            continue; // don't eat the char, maybe its '&'
194:                        }
195:                        break;
196:
197:                    case ISA_SLASH: // DONE
198:                        if (isAZ(actChar)) { // </'a..Z'
199:                            state = ISI_ENDTAG;
200:                            break;
201:                        }
202:                        switch (actChar) {
203:                        case '>': // Empty end tag </>, RELAXED
204:                            offset++;
205:                            state = INIT;
206:                            return HTMLTokenContext.TAG_CLOSE_SYMBOL;
207:                        default: // Part of text, e.g. </3, </'\n', RELAXED
208:                            state = ISI_TEXT;
209:                            continue; // don'e eat the char
210:                        }
211:                        //break;
212:
213:                    case ISI_ENDTAG: // DONE
214:                        if (isName(actChar))
215:                            break; // Still in endtag identifier, eat next char
216:                        state = ISP_ENDTAG_X;
217:                        return HTMLTokenContext.TAG_CLOSE;
218:
219:                    case ISP_ENDTAG_X: // DONE
220:                        if (isWS(actChar)) {
221:                            state = ISP_ENDTAG_WS;
222:                            break;
223:                        }
224:                        switch (actChar) {
225:                        case '>': // Closing of endtag, e.g. </H6 _>_
226:                            offset++;
227:                            state = INIT;
228:                            return HTMLTokenContext.TAG_CLOSE_SYMBOL;
229:                        case '<': // next tag, e.g. </H6 _<_, RELAXED
230:                            state = INIT;
231:                            continue;
232:                        default:
233:                            state = ISI_ERROR;
234:                            continue; //don't eat
235:                        }
236:                        //break;
237:
238:                    case ISP_ENDTAG_WS: // DONE
239:                        if (isWS(actChar))
240:                            break; // eat all WS
241:                        state = ISP_ENDTAG_X;
242:                        return HTMLTokenContext.WS;
243:
244:                    case ISI_TAG: // DONE
245:                        if (isName(actChar))
246:                            break; // Still in tag identifier, eat next char
247:                        state = ISP_TAG_X;
248:                        return HTMLTokenContext.TAG_OPEN;
249:
250:                    case ISP_TAG_X: // DONE
251:                        if (isWS(actChar)) {
252:                            state = ISP_TAG_WS;
253:                            break;
254:                        }
255:                        if (isAZ(actChar)) {
256:                            state = ISI_ARG;
257:                            break;
258:                        }
259:                        switch (actChar) {
260:                        case '/':
261:                            offset++;
262:                            state = ISI_TAG_SLASH;
263:                            continue;
264:                        case '>':
265:                            offset++;
266:                            state = INIT;
267:                            return HTMLTokenContext.TAG_CLOSE_SYMBOL;
268:                        case '<':
269:                            state = INIT;
270:                            continue; // don't eat it!!!
271:                        default:
272:                            state = ISI_ERROR;
273:                            continue;
274:                        }
275:                        //break;
276:
277:                    case ISP_TAG_WS: // DONE
278:                        if (isWS(actChar))
279:                            break; // eat all WS
280:                        state = ISP_TAG_X;
281:                        return HTMLTokenContext.WS;
282:
283:                    case ISI_TAG_SLASH:
284:                        switch (actChar) {
285:                        case '>':
286:                            offset++;
287:                            state = INIT;
288:                            return HTMLTokenContext.TAG_CLOSE_SYMBOL;
289:                        default:
290:                            state = ISI_ERROR;
291:                            continue;
292:                        }
293:
294:                    case ISI_ARG: // DONE
295:                        if (isName(actChar))
296:                            break; // eat next char
297:                        state = ISP_ARG_X;
298:                        return HTMLTokenContext.ARGUMENT;
299:
300:                    case ISP_ARG_X:
301:                        if (isWS(actChar)) {
302:                            state = ISP_ARG_WS;
303:                            break;
304:                        }
305:                        if (isAZ(actChar)) {
306:                            state = ISI_ARG;
307:                            break;
308:                        }
309:                        switch (actChar) {
310:                        case '/':
311:                        case '>':
312:                            offset++;
313:                            state = INIT;
314:                            return HTMLTokenContext.TAG_OPEN;
315:                        case '<':
316:                            state = INIT;
317:                            continue; // don't eat !!!
318:                        case '=':
319:                            offset++;
320:                            state = ISP_EQ;
321:                            return HTMLTokenContext.OPERATOR;
322:                        default:
323:                            state = ISI_ERROR;
324:                            continue;
325:                        }
326:                        //break;
327:
328:                    case ISP_ARG_WS:
329:                        if (isWS(actChar))
330:                            break; // Eat all WhiteSpace
331:                        state = ISP_ARG_X;
332:                        return HTMLTokenContext.WS;
333:
334:                    case ISP_EQ:
335:                        if (isWS(actChar)) {
336:                            state = ISP_EQ_WS;
337:                            break;
338:                        }
339:                        switch (actChar) {
340:                        case '\'':
341:                            state = ISI_VAL_QUOT;
342:                            break;
343:                        case '"':
344:                            state = ISI_VAL_DQUOT;
345:                            break;
346:                        case '>':
347:                            offset++;
348:                            state = INIT;
349:                            return HTMLTokenContext.TAG_OPEN;
350:                        default:
351:                            state = ISI_VAL; //everything else if attribute value
352:                            break;
353:                        }
354:                        break;
355:
356:                    case ISP_EQ_WS:
357:                        if (isWS(actChar))
358:                            break; // Consume all WS
359:                        state = ISP_EQ;
360:                        return HTMLTokenContext.WS;
361:
362:                    case ISI_VAL:
363:                        if (!isWS(actChar)
364:                                && !(actChar == '/' || actChar == '>' || actChar == '<'))
365:                            break; // Consume whole value
366:                        state = ISP_TAG_X;
367:                        return HTMLTokenContext.VALUE;
368:
369:                    case ISI_VAL_QUOT:
370:                        switch (actChar) {
371:                        case '\'':
372:                            offset++;
373:                            state = ISP_TAG_X;
374:                            return HTMLTokenContext.VALUE;
375:                        case '&':
376:                            if (offset == tokenOffset) {
377:                                subState = state;
378:                                state = ISA_REF;
379:                                break;
380:                            } else {
381:                                return HTMLTokenContext.VALUE;
382:                            }
383:                        }
384:                        break; // else simply consume next char of VALUE
385:
386:                    case ISI_VAL_DQUOT:
387:                        switch (actChar) {
388:                        case '"':
389:                            offset++;
390:                            state = ISP_TAG_X;
391:                            return HTMLTokenContext.VALUE;
392:                        case '&':
393:                            if (offset == tokenOffset) {
394:                                subState = state;
395:                                state = ISA_REF;
396:                                break;
397:                            } else {
398:                                return HTMLTokenContext.VALUE;
399:                            }
400:                        }
401:                        break; // else simply consume next char of VALUE
402:
403:                    case ISA_SGML_ESCAPE: // DONE
404:                        if (isAZ(actChar)) {
405:                            state = ISI_SGML_DECL;
406:                            break;
407:                        }
408:                        switch (actChar) {
409:                        case '-':
410:                            state = ISA_SGML_DASH;
411:                            break;
412:                        default:
413:                            state = ISI_TEXT;
414:                            continue;
415:                        }
416:                        break;
417:
418:                    case ISA_SGML_DASH: // DONE
419:                        switch (actChar) {
420:                        case '-':
421:                            state = ISI_HTML_COMMENT;
422:                            break;
423:                        default:
424:                            state = ISI_TEXT;
425:                            continue;
426:                        }
427:                        break;
428:
429:                    case ISI_HTML_COMMENT: // DONE
430:                        switch (actChar) {
431:                        case '-':
432:                            state = ISA_HTML_COMMENT_DASH;
433:                            break;
434:                        //create an HTML comment token for each line of the comment - a performance fix for #43532
435:                        case '\n':
436:                            offset++;
437:                            //leave the some state - we are still in an HTML comment,
438:                            //we just need to create a token for each line.
439:                            return HTMLTokenContext.BLOCK_COMMENT;
440:                        }
441:                        break;
442:
443:                    case ISA_HTML_COMMENT_DASH:
444:                        switch (actChar) {
445:                        case '-':
446:                            state = ISI_HTML_COMMENT_WS;
447:                            break;
448:                        default:
449:                            state = ISI_HTML_COMMENT;
450:                            continue;
451:                        }
452:                        break;
453:
454:                    case ISI_HTML_COMMENT_WS: // DONE
455:                        if (isWS(actChar))
456:                            break; // Consume all WS
457:                        switch (actChar) {
458:                        case '>':
459:                            offset++;
460:                            state = INIT;
461:                            return HTMLTokenContext.BLOCK_COMMENT;
462:                        default:
463:                            state = ISI_HTML_COMMENT;
464:                            continue;
465:                        }
466:                        //break;
467:
468:                    case ISI_SGML_DECL:
469:                        switch (actChar) {
470:                        case '>':
471:                            offset++;
472:                            state = INIT;
473:                            return HTMLTokenContext.DECLARATION;
474:                        case '-':
475:                            if (offset == tokenOffset) {
476:                                state = ISA_SGML_DECL_DASH;
477:                                break;
478:                            } else {
479:                                return HTMLTokenContext.DECLARATION;
480:                            }
481:                        }
482:                        break;
483:
484:                    case ISA_SGML_DECL_DASH:
485:                        if (actChar == '-') {
486:                            state = ISI_SGML_COMMENT;
487:                            break;
488:                        } else {
489:                            state = ISI_SGML_DECL;
490:                            continue;
491:                        }
492:
493:                    case ISI_SGML_COMMENT:
494:                        switch (actChar) {
495:                        case '-':
496:                            state = ISA_SGML_COMMENT_DASH;
497:                            break;
498:                        }
499:                        break;
500:
501:                    case ISA_SGML_COMMENT_DASH:
502:                        if (actChar == '-') {
503:                            offset++;
504:                            state = ISI_SGML_DECL;
505:                            return HTMLTokenContext.SGML_COMMENT;
506:                        } else {
507:                            state = ISI_SGML_COMMENT;
508:                            continue;
509:                        }
510:
511:                    case ISA_REF:
512:                        if (isAZ(actChar)) {
513:                            state = ISI_REF_NAME;
514:                            break;
515:                        }
516:                        if (actChar == '#') {
517:                            state = ISA_REF_HASH;
518:                            break;
519:                        }
520:                        state = subState;
521:                        continue;
522:
523:                    case ISI_REF_NAME:
524:                        if (isName(actChar))
525:                            break;
526:                        if (actChar == ';')
527:                            offset++;
528:                        state = subState;
529:                        return HTMLTokenContext.CHARACTER;
530:
531:                    case ISA_REF_HASH:
532:                        if (actChar >= '0' && actChar <= '9') {
533:                            state = ISI_REF_DEC;
534:                            break;
535:                        }
536:                        if (actChar == 'x' || actChar == 'X') {
537:                            state = ISA_REF_X;
538:                            break;
539:                        }
540:                        if (isAZ(actChar)) {
541:                            offset++;
542:                            state = subState;
543:                            return HTMLTokenContext.ERROR;
544:                        }
545:                        state = subState;
546:                        continue;
547:
548:                    case ISI_REF_DEC:
549:                        if (actChar >= '0' && actChar <= '9')
550:                            break;
551:                        if (actChar == ';')
552:                            offset++;
553:                        state = subState;
554:                        return HTMLTokenContext.CHARACTER;
555:
556:                    case ISA_REF_X:
557:                        if ((actChar >= '0' && actChar <= '9')
558:                                || (actChar >= 'a' && actChar <= 'f')
559:                                || (actChar >= 'A' && actChar <= 'F')) {
560:                            state = ISI_REF_HEX;
561:                            break;
562:                        }
563:                        state = subState;
564:                        return HTMLTokenContext.ERROR; // error on previous "&#x" sequence
565:
566:                    case ISI_REF_HEX:
567:                        if ((actChar >= '0' && actChar <= '9')
568:                                || (actChar >= 'a' && actChar <= 'f')
569:                                || (actChar >= 'A' && actChar <= 'F'))
570:                            break;
571:                        if (actChar == ';')
572:                            offset++;
573:                        state = subState;
574:                        return HTMLTokenContext.CHARACTER;
575:                    }
576:
577:                    offset = ++offset;
578:                } // end of while(offset...)
579:
580:                /** At this stage there's no more text in the scanned buffer.
581:                 * Scanner first checks whether this is completely the last
582:                 * available buffer.
583:                 */
584:                if (lastBuffer) {
585:                    switch (state) {
586:                    case INIT:
587:                    case ISI_TEXT:
588:                    case ISA_LT:
589:                    case ISA_SLASH:
590:                    case ISA_SGML_ESCAPE:
591:                    case ISA_SGML_DASH:
592:                    case ISI_TAG_SLASH:
593:                        return HTMLTokenContext.TEXT;
594:
595:                    case ISA_REF:
596:                    case ISA_REF_HASH:
597:                        if (subState == ISI_TEXT)
598:                            return HTMLTokenContext.TEXT;
599:                        else
600:                            return HTMLTokenContext.VALUE;
601:
602:                    case ISI_HTML_COMMENT:
603:                    case ISA_HTML_COMMENT_DASH:
604:                    case ISI_HTML_COMMENT_WS:
605:                        return HTMLTokenContext.BLOCK_COMMENT;
606:
607:                    case ISI_TAG:
608:                        return HTMLTokenContext.TAG_OPEN;
609:                    case ISI_ENDTAG:
610:                        return HTMLTokenContext.TAG_CLOSE;
611:
612:                    case ISI_ARG:
613:                        return HTMLTokenContext.ARGUMENT;
614:
615:                    case ISI_ERROR:
616:                        return HTMLTokenContext.ERROR;
617:
618:                    case ISP_ARG_WS:
619:                    case ISP_TAG_WS:
620:                    case ISP_ENDTAG_WS:
621:                    case ISP_EQ_WS:
622:                        return HTMLTokenContext.WS;
623:
624:                    case ISP_ARG_X:
625:                    case ISP_TAG_X:
626:                    case ISP_ENDTAG_X:
627:                    case ISP_EQ:
628:                        return HTMLTokenContext.WS;
629:
630:                    case ISI_VAL:
631:                    case ISI_VAL_QUOT:
632:                    case ISI_VAL_DQUOT:
633:                        return HTMLTokenContext.VALUE;
634:
635:                    case ISI_SGML_DECL:
636:                    case ISA_SGML_DECL_DASH:
637:                        return HTMLTokenContext.DECLARATION;
638:
639:                    case ISI_SGML_COMMENT:
640:                    case ISA_SGML_COMMENT_DASH:
641:                        return HTMLTokenContext.SGML_COMMENT;
642:
643:                    case ISI_REF_NAME:
644:                    case ISI_REF_DEC:
645:                    case ISA_REF_X:
646:                    case ISI_REF_HEX:
647:                        return HTMLTokenContext.CHARACTER;
648:                    }
649:                }
650:
651:                return null;
652:            }
653:
654:            public String getStateName(int stateNumber) {
655:                switch (stateNumber) {
656:                case INIT:
657:                    return "INIT"; // NOI18N
658:                case ISI_TEXT:
659:                    return "ISI_TEXT"; // NOI18N
660:                case ISA_LT:
661:                    return "ISA_LT"; // NOI18N
662:                case ISA_SLASH:
663:                    return "ISA_SLASH"; // NOI18N
664:                case ISA_SGML_ESCAPE:
665:                    return "ISA_SGML_ESCAPE"; // NOI18N
666:                case ISA_SGML_DASH:
667:                    return "ISA_SGML_DASH"; // NOI18N
668:                case ISI_HTML_COMMENT:
669:                    return "ISI_HTML_COMMENT";// NOI18N
670:                case ISA_HTML_COMMENT_DASH:
671:                    return "ISA_HTML_COMMENT_DASH";// NOI18N
672:                case ISI_HTML_COMMENT_WS:
673:                    return "ISI_HTML_COMMENT_WS";// NOI18N
674:                case ISI_TAG:
675:                    return "ISI_TAG";// NOI18N
676:                case ISI_ENDTAG:
677:                    return "ISI_ENDTAG";// NOI18N
678:                case ISI_ARG:
679:                    return "ISI_ARG";// NOI18N
680:                case ISI_ERROR:
681:                    return "ISI_ERROR";// NOI18N
682:                case ISP_ARG_WS:
683:                    return "ISP_ARG_WS";// NOI18N
684:                case ISP_TAG_WS:
685:                    return "ISP_TAG_WS";// NOI18N
686:                case ISP_ENDTAG_WS:
687:                    return "ISP_ENDTAG_WS";// NOI18N
688:                case ISP_ARG_X:
689:                    return "ISP_ARG_X";// NOI18N
690:                case ISP_TAG_X:
691:                    return "ISP_TAG_X";// NOI18N
692:                case ISP_ENDTAG_X:
693:                    return "ISP_ENDTAG_X";// NOI18N
694:                case ISP_EQ:
695:                    return "ISP_EQ";// NOI18N
696:                case ISI_VAL:
697:                    return "ISI_VAL";// NOI18N
698:                case ISI_VAL_QUOT:
699:                    return "ISI_VAL_QUOT";// NOI18N
700:                case ISI_VAL_DQUOT:
701:                    return "ISI_VAL_DQUOT";// NOI18N
702:                case ISI_SGML_DECL:
703:                    return "ISI_SGML_DECL";// NOI18N
704:                case ISA_SGML_DECL_DASH:
705:                    return "ISA_SGML_DECL_DASH";// NOI18N
706:                case ISI_SGML_COMMENT:
707:                    return "ISI_SGML_COMMENT";// NOI18N
708:                case ISA_SGML_COMMENT_DASH:
709:                    return "ISA_SGML_COMMENT_DASH";// NOI18N
710:                case ISA_REF:
711:                    return "ISA_REF";// NOI18N
712:                case ISI_REF_NAME:
713:                    return "ISI_REF_NAME";// NOI18N
714:                case ISA_REF_HASH:
715:                    return "ISA_REF_HASH";// NOI18N
716:                case ISI_REF_DEC:
717:                    return "ISI_REF_DEC";// NOI18N
718:                case ISA_REF_X:
719:                    return "ISA_REF_X";// NOI18N
720:                case ISI_REF_HEX:
721:                    return "ISI_REF_HEX";// NOI18N
722:                default:
723:                    return super .getStateName(stateNumber);
724:                }
725:            }
726:
727:            /** Load valid mark state into the analyzer. Offsets
728:             * are already initialized when this method is called. This method
729:             * must get the state from the mark and set it to the analyzer. Then
730:             * it must decrease tokenOffset by the preScan stored in the mark state.
731:             * @param markState mark state to be loaded into syntax. It must be non-null value.
732:             */
733:            public void loadState(StateInfo stateInfo) {
734:                super .loadState(stateInfo);
735:                subState = ((HTMLStateInfo) stateInfo).getSubState();
736:            }
737:
738:            /** Store state of this analyzer into given mark state. */
739:            public void storeState(StateInfo stateInfo) {
740:                super .storeState(stateInfo);
741:                ((HTMLStateInfo) stateInfo).setSubState(subState);
742:            }
743:
744:            /** Compare state of this analyzer to given state info */
745:            public int compareState(StateInfo stateInfo) {
746:                if (super .compareState(stateInfo) == DIFFERENT_STATE)
747:                    return DIFFERENT_STATE;
748:                return (((HTMLStateInfo) stateInfo).getSubState() == subState) ? EQUAL_STATE
749:                        : DIFFERENT_STATE;
750:            }
751:
752:            /** Create state info appropriate for particular analyzer */
753:            public StateInfo createStateInfo() {
754:                return new HTMLStateInfo();
755:            }
756:
757:            /** Base implementation of the StateInfo interface */
758:            public static class HTMLStateInfo extends Syntax.BaseStateInfo {
759:
760:                /** analyzer subState during parsing character references */
761:                private int subState;
762:
763:                public int getSubState() {
764:                    return subState;
765:                }
766:
767:                public void setSubState(int subState) {
768:                    this .subState = subState;
769:                }
770:
771:                public String toString(Syntax syntax) {
772:                    return super .toString(syntax)
773:                            + ", subState="
774:                            + (syntax == null ? "" : syntax
775:                                    .getStateName(getSubState())); // NOI18N
776:                }
777:
778:            }
779:
780:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.