Source Code Cross Referenced for FastPageParser.java in  » Web-Framework » SiteMesh » com » opensymphony » module » sitemesh » parser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Web Framework » SiteMesh » com.opensymphony.module.sitemesh.parser 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Title:        FastPageParser
003:         * Description:
004:         *
005:         * This software is published under the terms of the OpenSymphony Software
006:         * License version 1.1, of which a copy has been included with this
007:         * distribution in the LICENSE.txt file.
008:         */
009:
010:        package com.opensymphony.module.sitemesh.parser;
011:
012:        import com.opensymphony.module.sitemesh.Page;
013:        import com.opensymphony.module.sitemesh.PageParser;
014:        import com.opensymphony.module.sitemesh.html.util.CharArray;
015:        import com.opensymphony.module.sitemesh.util.CharArrayReader;
016:
017:        import java.io.IOException;
018:        import java.io.Reader;
019:        import java.util.Collections;
020:        import java.util.HashMap;
021:        import java.util.Map;
022:
023:        /**
024:         * Very fast PageParser implementation for parsing HTML.
025:         *
026:         * <p>Produces FastPage.</p>
027:         *
028:         * @author <a href="mailto:salaman@qoretech.com">Victor Salaman</a>
029:         * @version $Revision: 1.13 $
030:         * @deprecated Use HTMLPageParser instead - it performs better and is more extensible.
031:         */
032:        public final class FastPageParser implements  PageParser {
033:            private static final int TOKEN_NONE = -0;
034:            private static final int TOKEN_EOF = -1;
035:            private static final int TOKEN_TEXT = -2;
036:            private static final int TOKEN_TAG = -3;
037:            private static final int TOKEN_COMMENT = -4;
038:            private static final int TOKEN_CDATA = -5;
039:            private static final int TOKEN_SCRIPT = -6;
040:            private static final int TOKEN_DOCTYPE = -7;
041:            private static final int TOKEN_EMPTYTAG = -8;
042:
043:            private static final int STATE_EOF = -1;
044:            private static final int STATE_TEXT = -2;
045:            private static final int STATE_TAG = -3;
046:            private static final int STATE_COMMENT = -4;
047:            private static final int STATE_TAG_QUOTE = -5;
048:            private static final int STATE_CDATA = -6;
049:            private static final int STATE_SCRIPT = -7;
050:            private static final int STATE_DOCTYPE = -8;
051:
052:            private static final int TAG_STATE_NONE = 0;
053:            private static final int TAG_STATE_HTML = -1;
054:            private static final int TAG_STATE_HEAD = -2;
055:            private static final int TAG_STATE_TITLE = -3;
056:            private static final int TAG_STATE_BODY = -4;
057:            private static final int TAG_STATE_XML = -6;
058:            private static final int TAG_STATE_XMP = -7;
059:
060:            // These hashcodes are hardcoded because swtich statements can only
061:            // switch on compile-time constants.
062:            // In theory it is possible for there to be a hashcode collision with
063:            // other HTML tags, however in practice it is *very* unlikely because
064:            // tags are generally only a few characters long and hence are likely
065:            // to produce unique values.
066:
067:            private static final int SLASH_XML_HASH = 1518984; // "/xml".hashCode();
068:            private static final int XML_HASH = 118807; // "xml".hashCode();
069:            private static final int SLASH_XMP_HASH = 1518988; // "/xmp".hashCode();
070:            private static final int XMP_HASH = 118811; // "xmp".hashCode();
071:            private static final int HTML_HASH = 3213227; // "html".hashCode();
072:            private static final int SLASH_HTML_HASH = 46618714; // "/html".hashCode();
073:            private static final int HEAD_HASH = 3198432; // "head".hashCode();
074:            private static final int TITLE_HASH = 110371416; // "title".hashCode();
075:            private static final int SLASH_TITLE_HASH = 1455941513; // "/title".hashCode();
076:            private static final int PARAMETER_HASH = 1954460585; // "parameter".hashCode();
077:            private static final int META_HASH = 3347973; // "meta".hashCode();
078:            private static final int SLASH_HEAD_HASH = 46603919; // "/head".hashCode();
079:            private static final int FRAMESET_HASH = -1644953643; // "frameset".hashCode();
080:            private static final int FRAME_HASH = 97692013; // "frame".hashCode();
081:            private static final int BODY_HASH = 3029410; // "body".hashCode();
082:            private static final int SLASH_BODY_HASH = 46434897; // "/body".hashCode();
083:            private static final int CONTENT_HASH = 951530617; // "content".hashCode();
084:
085:            public Page parse(char[] data) throws IOException {
086:                FastPage page = internalParse(new CharArrayReader(data));
087:                page.setVerbatimPage(data);
088:                return page;
089:            }
090:
091:            public Page parse(Reader reader) {
092:                return internalParse(reader);
093:            }
094:
095:            private FastPage internalParse(Reader reader) {
096:                CharArray _buffer = new CharArray(4096);
097:                CharArray _body = new CharArray(4096);
098:                CharArray _head = new CharArray(512);
099:                CharArray _title = new CharArray(128);
100:                Map _htmlProperties = null;
101:                Map _metaProperties = new HashMap(6);
102:                Map _sitemeshProperties = new HashMap(6);
103:                Map _bodyProperties = null;
104:
105:                CharArray _currentTaggedContent = new CharArray(1024);
106:                String _contentTagId = null;
107:                boolean tagged = false;
108:
109:                boolean _frameSet = false;
110:
111:                int _state = STATE_TEXT;
112:                int _tokenType = TOKEN_NONE;
113:                int _pushBack = 0;
114:                int _comment = 0;
115:                int _quote = 0;
116:                boolean hide = false;
117:
118:                int state = TAG_STATE_NONE;
119:                int laststate = TAG_STATE_NONE;
120:                boolean doneTitle = false;
121:
122:                // This tag object gets reused each iteration.
123:                Tag tagObject = new Tag();
124:
125:                while (_tokenType != TOKEN_EOF) {
126:                    if (tagged) {
127:                        if (_tokenType == TOKEN_TAG
128:                                || _tokenType == TOKEN_EMPTYTAG) {
129:                            if (_buffer == null || _buffer.length() == 0) {
130:                                _tokenType = TOKEN_NONE;
131:                                continue;
132:                            }
133:
134:                            if (parseTag(tagObject, _buffer) == null)
135:                                continue;
136:
137:                            if (_buffer.compareLowerSubstr("/content")) // Note that the '/' survives the | 32 operation
138:                            {
139:                                tagged = false;
140:                                if (_contentTagId != null) {
141:                                    state = TAG_STATE_NONE;
142:                                    _sitemeshProperties.put(_contentTagId,
143:                                            _currentTaggedContent.toString());
144:                                    _currentTaggedContent.setLength(0);
145:                                    _contentTagId = null;
146:                                }
147:                            } else {
148:                                _currentTaggedContent.append('<').append(
149:                                        _buffer).append('>');
150:                            }
151:                        } else {
152:                            if (_buffer.length() > 0)
153:                                _currentTaggedContent.append(_buffer);
154:                        }
155:                    } else {
156:                        if (_tokenType == TOKEN_TAG
157:                                || _tokenType == TOKEN_EMPTYTAG) {
158:                            if (_buffer == null || _buffer.length() == 0) {
159:                                _tokenType = TOKEN_NONE;
160:                                continue;
161:                            }
162:
163:                            if (parseTag(tagObject, _buffer) == null) {
164:                                _tokenType = TOKEN_TEXT;
165:                                continue;
166:                            }
167:
168:                            int tagHash = _buffer.substrHashCode();
169:
170:                            if (state == TAG_STATE_XML
171:                                    || state == TAG_STATE_XMP) {
172:                                writeTag(state, laststate, hide, _head,
173:                                        _buffer, _body);
174:                                if ((state == TAG_STATE_XML && tagHash == SLASH_XML_HASH)
175:                                        || (state == TAG_STATE_XMP && tagHash == SLASH_XMP_HASH)) {
176:                                    state = laststate;
177:                                }
178:                            } else {
179:                                boolean doDefault = false;
180:                                switch (tagHash) {
181:                                case HTML_HASH:
182:                                    if (!_buffer.compareLowerSubstr("html")) { // skip any accidental hash collisions
183:                                        doDefault = true;
184:                                        break;
185:                                    }
186:                                    state = TAG_STATE_HTML;
187:                                    _htmlProperties = parseProperties(
188:                                            tagObject, _buffer).properties;
189:                                    break;
190:                                case HEAD_HASH:
191:                                    if (!_buffer.compareLowerSubstr("head")) { // skip any accidental hash collisions
192:                                        doDefault = true;
193:                                        break;
194:                                    }
195:                                    state = TAG_STATE_HEAD;
196:                                    break;
197:                                case XML_HASH:
198:                                    if (!_buffer.compareLowerSubstr("xml")) { // skip any accidental hash collisions
199:                                        doDefault = true;
200:                                        break;
201:                                    }
202:                                    laststate = state;
203:                                    writeTag(state, laststate, hide, _head,
204:                                            _buffer, _body);
205:                                    state = TAG_STATE_XML;
206:                                    break;
207:                                case XMP_HASH:
208:                                    if (!_buffer.compareLowerSubstr("xmp")) { // skip any accidental hash collisions
209:                                        doDefault = true;
210:                                        break;
211:                                    }
212:                                    laststate = state;
213:                                    writeTag(state, laststate, hide, _head,
214:                                            _buffer, _body);
215:                                    state = TAG_STATE_XMP;
216:                                    break;
217:                                case TITLE_HASH:
218:                                    if (!_buffer.compareLowerSubstr("title")) { // skip any accidental hash collisions
219:                                        doDefault = true;
220:                                        break;
221:                                    }
222:                                    if (doneTitle) {
223:                                        hide = true;
224:                                    } else {
225:                                        laststate = state;
226:                                        state = TAG_STATE_TITLE;
227:                                    }
228:                                    break;
229:                                case SLASH_TITLE_HASH:
230:                                    if (!_buffer.compareLowerSubstr("/title")) { // skip any accidental hash collisions
231:                                        doDefault = true;
232:                                        break;
233:                                    }
234:                                    if (doneTitle) {
235:                                        hide = false;
236:                                    } else {
237:                                        doneTitle = true;
238:                                        state = laststate;
239:                                    }
240:                                    break;
241:                                case PARAMETER_HASH:
242:                                    if (!_buffer
243:                                            .compareLowerSubstr("parameter")) { // skip any accidental hash collisions
244:                                        doDefault = true;
245:                                        break;
246:                                    }
247:                                    parseProperties(tagObject, _buffer);
248:                                    String name = (String) tagObject.properties
249:                                            .get("name");
250:                                    String value = (String) tagObject.properties
251:                                            .get("value");
252:
253:                                    if (name != null && value != null) {
254:                                        _sitemeshProperties.put(name, value);
255:                                    }
256:                                    break;
257:                                case META_HASH:
258:                                    if (!_buffer.compareLowerSubstr("meta")) { // skip any accidental hash collisions
259:                                        doDefault = true;
260:                                        break;
261:                                    }
262:                                    CharArray metaDestination = state == TAG_STATE_HEAD ? _head
263:                                            : _body;
264:                                    metaDestination.append('<');
265:                                    metaDestination.append(_buffer);
266:                                    metaDestination.append('>');
267:                                    parseProperties(tagObject, _buffer);
268:                                    name = (String) tagObject.properties
269:                                            .get("name");
270:                                    value = (String) tagObject.properties
271:                                            .get("content");
272:
273:                                    if (name == null) {
274:                                        String httpEquiv = (String) tagObject.properties
275:                                                .get("http-equiv");
276:
277:                                        if (httpEquiv != null) {
278:                                            name = "http-equiv." + httpEquiv;
279:                                        }
280:                                    }
281:
282:                                    if (name != null && value != null) {
283:                                        _metaProperties.put(name, value);
284:                                    }
285:                                    break;
286:                                case SLASH_HEAD_HASH:
287:                                    if (!_buffer.compareLowerSubstr("/head")) { // skip any accidental hash collisions
288:                                        doDefault = true;
289:                                        break;
290:                                    }
291:                                    state = TAG_STATE_HTML;
292:                                    break;
293:                                case FRAME_HASH:
294:                                    if (!_buffer.compareLowerSubstr("frame")) { // skip any accidental hash collisions
295:                                        doDefault = true;
296:                                        break;
297:                                    }
298:                                    _frameSet = true;
299:                                    break;
300:                                case FRAMESET_HASH:
301:                                    if (!_buffer.compareLowerSubstr("frameset")) { // skip any accidental hash collisions
302:                                        doDefault = true;
303:                                        break;
304:                                    }
305:                                    _frameSet = true;
306:                                    break;
307:                                case BODY_HASH:
308:                                    if (!_buffer.compareLowerSubstr("body")) { // skip any accidental hash collisions
309:                                        doDefault = true;
310:                                        break;
311:                                    }
312:                                    if (_tokenType == TOKEN_EMPTYTAG) {
313:                                        state = TAG_STATE_BODY;
314:                                    }
315:                                    _bodyProperties = parseProperties(
316:                                            tagObject, _buffer).properties;
317:                                    break;
318:                                case CONTENT_HASH:
319:                                    if (!_buffer.compareLowerSubstr("content")) { // skip any accidental hash collisions
320:                                        doDefault = true;
321:                                        break;
322:                                    }
323:                                    state = TAG_STATE_NONE;
324:                                    Map props = parseProperties(tagObject,
325:                                            _buffer).properties;
326:                                    if (props != null) {
327:                                        tagged = true;
328:                                        _contentTagId = (String) props
329:                                                .get("tag");
330:                                    }
331:                                    break;
332:                                case SLASH_XMP_HASH:
333:                                    if (!_buffer.compareLowerSubstr("/xmp")) { // skip any accidental hash collisions
334:                                        doDefault = true;
335:                                        break;
336:                                    }
337:                                    hide = false;
338:                                    break;
339:                                case SLASH_BODY_HASH:
340:                                    if (!_buffer.compareLowerSubstr("/body")) { // skip any accidental hash collisions
341:                                        doDefault = true;
342:                                        break;
343:                                    }
344:                                    state = TAG_STATE_NONE;
345:                                    hide = true;
346:                                    break;
347:                                case SLASH_HTML_HASH:
348:                                    if (!_buffer.compareLowerSubstr("/html")) { // skip any accidental hash collisions
349:                                        doDefault = true;
350:                                        break;
351:                                    }
352:                                    state = TAG_STATE_NONE;
353:                                    hide = true;
354:                                    break;
355:                                default:
356:                                    doDefault = true;
357:                                }
358:                                if (doDefault)
359:                                    writeTag(state, laststate, hide, _head,
360:                                            _buffer, _body);
361:                            }
362:                        } else if (!hide) {
363:                            if (_tokenType == TOKEN_TEXT) {
364:                                if (state == TAG_STATE_TITLE) {
365:                                    _title.append(_buffer);
366:                                } else if (shouldWriteToHead(state, laststate)) {
367:                                    _head.append(_buffer);
368:                                } else {
369:                                    _body.append(_buffer);
370:                                }
371:                            } else if (_tokenType == TOKEN_COMMENT) {
372:                                final CharArray commentDestination = shouldWriteToHead(
373:                                        state, laststate) ? _head : _body;
374:                                commentDestination.append("<!--");
375:                                commentDestination.append(_buffer);
376:                                commentDestination.append("-->");
377:                            } else if (_tokenType == TOKEN_CDATA) {
378:                                final CharArray commentDestination = state == TAG_STATE_HEAD ? _head
379:                                        : _body;
380:                                commentDestination.append("<![CDATA[");
381:                                commentDestination.append(_buffer);
382:                                commentDestination.append("]]>");
383:                            } else if (_tokenType == TOKEN_SCRIPT) {
384:                                final CharArray commentDestination = state == TAG_STATE_HEAD ? _head
385:                                        : _body;
386:                                commentDestination.append('<');
387:                                commentDestination.append(_buffer);
388:                            }
389:                        }
390:                    }
391:                    _buffer.setLength(0);
392:
393:                    start: while (true) {
394:                        int c;
395:
396:                        if (_pushBack != 0) {
397:                            c = _pushBack;
398:                            _pushBack = 0;
399:                        } else {
400:                            try {
401:                                c = reader.read();
402:                            } catch (IOException e) {
403:                                _tokenType = TOKEN_EOF;
404:                                break start;
405:                            }
406:                        }
407:
408:                        if (c < 0) {
409:                            int tmpstate = _state;
410:                            _state = STATE_EOF;
411:
412:                            if (_buffer.length() > 0 && tmpstate == STATE_TEXT) {
413:                                _tokenType = TOKEN_TEXT;
414:                                break start;
415:                            } else {
416:                                _tokenType = TOKEN_EOF;
417:                                break start;
418:                            }
419:                        }
420:
421:                        switch (_state) {
422:                        case STATE_TAG: {
423:                            int buflen = _buffer.length();
424:
425:                            if (c == '>') {
426:                                if (_buffer.length() > 1
427:                                        && _buffer.charAt(_buffer.length() - 1) == '/') {
428:                                    _tokenType = TOKEN_EMPTYTAG;
429:                                } else {
430:                                    _tokenType = TOKEN_TAG;
431:                                }
432:                                _state = STATE_TEXT;
433:                                break start;
434:                            } else if (c == '/') {
435:                                _buffer.append('/');
436:                            } else if (c == '<' && buflen == 0) {
437:                                _buffer.append("<<");
438:                                _state = STATE_TEXT;
439:                            } else if (c == '-' && buflen == 2
440:                                    && _buffer.charAt(1) == '-'
441:                                    && _buffer.charAt(0) == '!') {
442:                                _buffer.setLength(0);
443:                                _state = STATE_COMMENT;
444:                            } else if (c == '[' && buflen == 7
445:                                    && _buffer.charAt(0) == '!'
446:                                    && _buffer.charAt(1) == '['
447:                                    && _buffer.compareLower("cdata", 2)) {
448:                                _buffer.setLength(0);
449:                                _state = STATE_CDATA;
450:                            } else if ((c == 'e' || c == 'E') && buflen == 7
451:                                    && _buffer.charAt(0) == '!'
452:                                    && _buffer.compareLower("doctyp", 1)) {
453:                                _buffer.append((char) c);
454:                                _state = STATE_DOCTYPE;
455:                            } else if ((c == 'T' || c == 't') && buflen == 5
456:                                    && _buffer.compareLower("scrip", 0)) {
457:                                _buffer.append((char) c);
458:                                _state = STATE_SCRIPT;
459:                            }
460:
461:                            else if (c == '"' || c == '\'') {
462:                                _quote = c;
463:                                _buffer.append((char) c);
464:                                _state = STATE_TAG_QUOTE;
465:                            } else {
466:                                _buffer.append((char) c);
467:                            }
468:                        }
469:                            break;
470:
471:                        case STATE_TEXT: {
472:                            if (c == '<') {
473:                                _state = STATE_TAG;
474:                                if (_buffer.length() > 0) {
475:                                    _tokenType = TOKEN_TEXT;
476:                                    break start;
477:                                }
478:                            } else {
479:                                _buffer.append((char) c);
480:                            }
481:                        }
482:                            break;
483:
484:                        case STATE_TAG_QUOTE: {
485:                            if (c == '>') {
486:                                _pushBack = c;
487:                                _state = STATE_TAG;
488:                            } else {
489:                                _buffer.append((char) c);
490:                                if (c == _quote) {
491:                                    _state = STATE_TAG;
492:                                }
493:                            }
494:                        }
495:                            break;
496:
497:                        case STATE_COMMENT: {
498:                            if (c == '>' && _comment >= 2) {
499:                                _buffer.setLength(_buffer.length() - 2);
500:                                _comment = 0;
501:                                _state = STATE_TEXT;
502:                                _tokenType = TOKEN_COMMENT;
503:                                break start;
504:                            } else if (c == '-') {
505:                                _comment++;
506:                            } else {
507:                                _comment = 0;
508:                            }
509:
510:                            _buffer.append((char) c);
511:                        }
512:                            break;
513:
514:                        case STATE_CDATA: {
515:                            if (c == '>' && _comment >= 2) {
516:                                _buffer.setLength(_buffer.length() - 2);
517:                                _comment = 0;
518:                                _state = STATE_TEXT;
519:                                _tokenType = TOKEN_CDATA;
520:                                break start;
521:                            } else if (c == ']') {
522:                                _comment++;
523:                            } else {
524:                                _comment = 0;
525:                            }
526:
527:                            _buffer.append((char) c);
528:                        }
529:                            break;
530:
531:                        case STATE_SCRIPT: {
532:                            _buffer.append((char) c);
533:                            if (c == '<') {
534:                                _comment = 0;
535:                            } else if ((c == '/' && _comment == 0)
536:                                    || ((c == 's' || c == 'S') && _comment == 1)
537:                                    || ((c == 'c' || c == 'C') && _comment == 2)
538:                                    || ((c == 'r' || c == 'R') && _comment == 3)
539:                                    || ((c == 'i' || c == 'I') && _comment == 4)
540:                                    || ((c == 'p' || c == 'P') && _comment == 5)
541:                                    || ((c == 't' || c == 'T') && _comment == 6)) {
542:                                _comment++;
543:                            } else if (c == '>' && _comment >= 7) {
544:                                _comment = 0;
545:                                _state = STATE_TEXT;
546:                                _tokenType = TOKEN_SCRIPT;
547:                                break start;
548:                            }
549:                        }
550:                            break;
551:
552:                        case STATE_DOCTYPE: {
553:                            _buffer.append((char) c);
554:                            if (c == '>') {
555:                                _state = STATE_TEXT;
556:                                _tokenType = TOKEN_DOCTYPE;
557:                                break start;
558:                            } else {
559:                                _comment = 0;
560:                            }
561:                        }
562:                            break;
563:                        }
564:                    }
565:                }
566:
567:                // Help the GC
568:                _currentTaggedContent = null;
569:                _buffer = null;
570:
571:                return new FastPage(_sitemeshProperties, _htmlProperties,
572:                        _metaProperties, _bodyProperties, _title.toString()
573:                                .trim(), _head.toString().trim(), _body
574:                                .toString().trim(), _frameSet);
575:            }
576:
577:            private static void writeTag(int state, int laststate,
578:                    boolean hide, CharArray _head, CharArray _buffer,
579:                    CharArray _body) {
580:                if (!hide) {
581:                    if (shouldWriteToHead(state, laststate)) {
582:                        _head.append('<').append(_buffer).append('>');
583:                    } else {
584:                        _body.append('<').append(_buffer).append('>');
585:                    }
586:                }
587:            }
588:
589:            private static boolean shouldWriteToHead(int state, int laststate) {
590:                return state == TAG_STATE_HEAD
591:                        || (laststate == TAG_STATE_HEAD && (state == TAG_STATE_XML || state == TAG_STATE_XMP));
592:            }
593:
594:            /**
595:             * Populates a {@link Tag} object using data from the supplied {@link CharArray}.
596:             *
597:             * The supplied tag parameter is reset and reused - this avoids excess object
598:             * creation which hwlps performance.
599:             *
600:             * @return the same tag instance that was passed in, except it will be populated
601:             * with a new <tt>name</tt> value (and the corresponding <tt>nameEndIdx</tt> value).
602:             * However if the tag contained nathing but whitespace, this method will return
603:             * <tt>null</tt>.
604:             */
605:            private Tag parseTag(Tag tag, CharArray buf) {
606:                int len = buf.length();
607:                int idx = 0;
608:                int begin;
609:
610:                // Skip over any leading whitespace in the tag
611:                while (idx < len && Character.isWhitespace(buf.charAt(idx)))
612:                    idx++;
613:
614:                if (idx == len)
615:                    return null;
616:
617:                // Find out where the non-whitespace characters end. This will give us the tag name.
618:                begin = idx;
619:                while (idx < len && !Character.isWhitespace(buf.charAt(idx)))
620:                    idx++;
621:
622:                // Mark the tag name as a substring within the buffer. This allows us to perform
623:                // a substring comparison against it at a later date
624:                buf
625:                        .setSubstr(begin, buf.charAt(idx - 1) == '/' ? idx - 1
626:                                : idx);
627:
628:                // Remember where the name finishes so we can pull out the properties later if need be
629:                tag.nameEndIdx = idx;
630:
631:                return tag;
632:            }
633:
634:            /**
635:             * This is called when we need to extract the properties for the tag from the tag's HTML.
636:             * We only call this when necessary since it has quite a lot of overhead.
637:             *
638:             * @param tag the tag that is currently being processed. This should be the
639:             * tag that was returned as a result of a call to {@link #parseTag(FastPageParser.Tag, CharArray)}
640:             * (ie, it has the <tt>name</tt> and <tt>nameEndIdx</tt> fields set correctly for the
641:             * tag in question. The <tt>properties</tt> field can be in an undefined state - it
642:             * will get replaced regardless).
643:             * @param buffer a <tt>CharArray</tt> containing the entire tag that is being parsed.
644:             * @return the same tag instance that was passed in, only it will now be populated
645:             * with any properties that were specified in the tag's HTML.
646:             */
647:            private static Tag parseProperties(Tag tag, CharArray buffer) {
648:                int len = buffer.length();
649:                int idx = tag.nameEndIdx;
650:
651:                // Start with an empty hashmap. A new HashMap is lazy-created if we happen to find any properties
652:                tag.properties = Collections.EMPTY_MAP;
653:                int begin;
654:                while (idx < len) {
655:                    // Skip forward to the next non-whitespace character
656:                    while (idx < len
657:                            && Character.isWhitespace(buffer.charAt(idx)))
658:                        idx++;
659:
660:                    if (idx == len)
661:                        continue;
662:
663:                    begin = idx;
664:                    if (buffer.charAt(idx) == '"') {
665:                        idx++;
666:                        while (idx < len && buffer.charAt(idx) != '"')
667:                            idx++;
668:                        if (idx == len)
669:                            continue;
670:                        idx++;
671:                    } else if (buffer.charAt(idx) == '\'') {
672:                        idx++;
673:                        while (idx < len && buffer.charAt(idx) != '\'')
674:                            idx++;
675:                        if (idx == len)
676:                            continue;
677:                        idx++;
678:                    } else {
679:                        while (idx < len
680:                                && !Character.isWhitespace(buffer.charAt(idx))
681:                                && buffer.charAt(idx) != '=')
682:                            idx++;
683:                    }
684:
685:                    // Mark the substring. This is the attribute name
686:                    buffer.setSubstr(begin, idx);
687:
688:                    if (idx < len && Character.isWhitespace(buffer.charAt(idx))) {
689:                        while (idx < len
690:                                && Character.isWhitespace(buffer.charAt(idx)))
691:                            idx++;
692:                    }
693:
694:                    if (idx == len || buffer.charAt(idx) != '=')
695:                        continue;
696:
697:                    idx++;
698:
699:                    if (idx == len)
700:                        continue;
701:
702:                    while (idx < len
703:                            && (buffer.charAt(idx) == '\n' || buffer
704:                                    .charAt(idx) == '\r'))
705:                        idx++;
706:
707:                    if (buffer.charAt(idx) == ' ') {
708:                        while (idx < len
709:                                && Character.isWhitespace(buffer.charAt(idx)))
710:                            idx++;
711:                        if (idx == len
712:                                || (buffer.charAt(idx) != '"' && buffer
713:                                        .charAt(idx) != '"'))
714:                            continue;
715:                    }
716:
717:                    begin = idx;
718:                    int end;
719:                    if (buffer.charAt(idx) == '"') {
720:                        idx++;
721:                        begin = idx;
722:                        while (idx < len && buffer.charAt(idx) != '"')
723:                            idx++;
724:                        if (idx == len)
725:                            continue;
726:                        end = idx;
727:                        idx++;
728:                    } else if (buffer.charAt(idx) == '\'') {
729:                        idx++;
730:                        begin = idx;
731:                        while (idx < len && buffer.charAt(idx) != '\'')
732:                            idx++;
733:                        if (idx == len)
734:                            continue;
735:                        end = idx;
736:                        idx++;
737:                    } else {
738:                        while (idx < len
739:                                && !Character.isWhitespace(buffer.charAt(idx)))
740:                            idx++;
741:                        end = idx;
742:                    }
743:                    // Extract the name and value as String objects and add them to the property map
744:                    String name = buffer.getLowerSubstr();
745:                    String value = buffer.substring(begin, end);
746:
747:                    tag.addProperty(name, value);
748:                }
749:                return tag;
750:            }
751:
752:            private class Tag {
753:                // The index where the name string ends. This is used as the starting
754:                // offet if we need to continue processing to find the tag's properties
755:                public int nameEndIdx = 0;
756:
757:                // This holds a map of the various properties for a particular tag.
758:                // This map is only populated when required - normally it will remain empty
759:                public Map properties = Collections.EMPTY_MAP;
760:
761:                /**
762:                 * Adds a name/value property pair to this tag. Each property that is
763:                 * added represents a property that was parsed from the tag's HTML.
764:                 */
765:                public void addProperty(String name, String value) {
766:                    if (properties == Collections.EMPTY_MAP) {
767:                        properties = new HashMap(8);
768:                    }
769:                    properties.put(name, value);
770:                }
771:            }
772:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.