Source Code Cross Referenced for Parser.java in  » Web-Server » Quadcap-Web-Server » com » quadcap » text » sax » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Web Server » Quadcap Web Server » com.quadcap.text.sax 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package com.quadcap.text.sax;
002:
003:        /* Copyright 1999 - 2003 Quadcap Software.  All rights reserved.
004:         *
005:         * This software is distributed under the Quadcap Free Software License.
006:         * This software may be used or modified for any purpose, personal or
007:         * commercial.  Open Source redistributions are permitted.  Commercial
008:         * redistribution of larger works derived from, or works which bundle
009:         * this software requires a "Commercial Redistribution License"; see
010:         * http://www.quadcap.com/purchase.
011:         *
012:         * Redistributions qualify as "Open Source" under  one of the following terms:
013:         *   
014:         *    Redistributions are made at no charge beyond the reasonable cost of
015:         *    materials and delivery.
016:         *
017:         *    Redistributions are accompanied by a copy of the Source Code or by an
018:         *    irrevocable offer to provide a copy of the Source Code for up to three
019:         *    years at the cost of materials and delivery.  Such redistributions
020:         *    must allow further use, modification, and redistribution of the Source
021:         *    Code under substantially the same terms as this license.
022:         *
023:         * Redistributions of source code must retain the copyright notices as they
024:         * appear in each source code file, these license terms, and the
025:         * disclaimer/limitation of liability set forth as paragraph 6 below.
026:         *
027:         * Redistributions in binary form must reproduce this Copyright Notice,
028:         * these license terms, and the disclaimer/limitation of liability set
029:         * forth as paragraph 6 below, in the documentation and/or other materials
030:         * provided with the distribution.
031:         *
032:         * The Software is provided on an "AS IS" basis.  No warranty is
033:         * provided that the Software is free of defects, or fit for a
034:         * particular purpose.  
035:         *
036:         * Limitation of Liability. Quadcap Software shall not be liable
037:         * for any damages suffered by the Licensee or any third party resulting
038:         * from use of the Software.
039:         */
040:
041:        import java.io.CharArrayWriter;
042:        import java.io.IOException;
043:        import java.io.InputStreamReader;
044:        import java.io.Reader;
045:
046:        import org.xml.sax.DocumentHandler;
047:        import org.xml.sax.DTDHandler;
048:        import org.xml.sax.EntityResolver;
049:        import org.xml.sax.ErrorHandler;
050:        import org.xml.sax.HandlerBase;
051:        import org.xml.sax.InputSource;
052:        import org.xml.sax.SAXException;
053:
054:        import com.quadcap.text.NoStringPool;
055:        import com.quadcap.text.StringPool;
056:        import com.quadcap.util.collections.ArrayQueue;
057:
058:        import com.quadcap.util.Debug;
059:
060:        /**
061:         * SAX Parser implementation.
062:         *
063:         * @author Stan Bailes
064:         */
065:        public class Parser implements  org.xml.sax.Parser {
066:            boolean docStarted = false;
067:            HandlerBase defaultHandler = new HandlerBase();
068:            StringPool pool = new NoStringPool();
069:            InputSource in;
070:            Reader r;
071:            DocumentHandler docHandler = defaultHandler;
072:            DTDHandler dtdHandler = defaultHandler;
073:            EntityResolver entityResolver = defaultHandler;
074:            ErrorHandler errorHandler = defaultHandler;
075:            char[] ebuf = new char[6];
076:            char[] tag = new char[1024 * 32];
077:            int taglen = 0;
078:            CharArrayWriter data = new CharArrayWriter();
079:            AttributeList attributes = new AttributeList();
080:            String attrName = null;
081:            String tagName = null;
082:            ArrayQueue inStack = null;
083:            ArrayQueue locStack = null;
084:            int lineNumber = 1;
085:            int columnNumber = 1;
086:            String lastEntityVal = "";
087:            boolean trace = false;
088:            int commentLevel = 0;
089:
090:            public Parser() {
091:            }
092:
093:            public void parse(InputSource in) throws SAXException, IOException {
094:                this .in = in;
095:                this .r = getCharacterStream(in);
096:                taglen = 0;
097:                lineNumber = 1;
098:                columnNumber = 1;
099:                data.reset();
100:                try {
101:                    parse();
102:                } catch (SAXException ex) {
103:                    if (locStack != null) {
104:                        for (int i = 0; i < locStack.size(); i++) {
105:                            com.quadcap.util.Debug.println(" at "
106:                                    + locStack.top(i));
107:                        }
108:                    }
109:                    throw ex;
110:                }
111:            }
112:
113:            final Reader getCharacterStream(InputSource in) {
114:                Reader rd = in.getCharacterStream();
115:                if (rd == null) {
116:                    rd = new InputStreamReader(in.getByteStream());
117:                }
118:                return rd;
119:            }
120:
121:            public void pushInputSource(InputSource in2) {
122:                if (inStack == null) {
123:                    inStack = new ArrayQueue();
124:                    locStack = new ArrayQueue();
125:                }
126:                inStack.push(in);
127:                locStack.push("" + lineNumber + ":" + columnNumber);
128:                lineNumber = 1;
129:                columnNumber = 1;
130:                in = in2;
131:                r = getCharacterStream(in);
132:            }
133:
134:            boolean popInputSource() {
135:                if (inStack == null || inStack.size() == 0)
136:                    return false;
137:                in = (InputSource) inStack.pop();
138:                String s = locStack.pop().toString();
139:                int idx = s.indexOf(':');
140:                lineNumber = Integer.parseInt(s.substring(0, idx));
141:                columnNumber = Integer.parseInt(s.substring(idx + 1));
142:                r = getCharacterStream(in);
143:                return true;
144:            }
145:
146:            final void addTagChar(int c) throws SAXException {
147:                if (taglen >= tag.length)
148:                    throw new SAXException("tag too long");
149:                tag[taglen++] = (char) c;
150:            }
151:
152:            public void parse(String s) {
153:            }
154:
155:            public void setDocumentHandler(DocumentHandler dh) {
156:                this .docHandler = dh;
157:            }
158:
159:            public void setDTDHandler(DTDHandler dh) {
160:                this .dtdHandler = dh;
161:            }
162:
163:            public void setEntityResolver(EntityResolver er) {
164:                this .entityResolver = er;
165:            }
166:
167:            public EntityResolver getEntityResolver() {
168:                return entityResolver;
169:            }
170:
171:            public void setErrorHandler(ErrorHandler er) {
172:                errorHandler = er;
173:            }
174:
175:            public void setLocale(java.util.Locale locale) {
176:            }
177:
178:            final int read() throws IOException {
179:                int c = r.read();
180:                if (c == '\n') {
181:                    lineNumber++;
182:                    columnNumber = 1;
183:                } else {
184:                    columnNumber++;
185:                }
186:                return c;
187:            }
188:
189:            final char parseEntity() throws SAXException, IOException {
190:                int len = 0;
191:                int c;
192:                int state = 0;
193:                while ((c = read()) >= 0) {
194:                    ebuf[len++] = (char) c;
195:                    if (!Character.isLetter((char) c) || len >= ebuf.length)
196:                        break;
197:                }
198:                lastEntityVal = new String(ebuf, 0, len);
199:                if (len == 5 && ebuf[0] == 'q' && ebuf[1] == 'u'
200:                        && ebuf[2] == 'o' && ebuf[3] == 't') {
201:                    return '"';
202:                }
203:                if (len == 4 && ebuf[0] == 'a' && ebuf[1] == 'm'
204:                        && ebuf[2] == 'p') {
205:                    return '&';
206:                }
207:                if (len == 3) {
208:                    if (ebuf[0] == 'l') {
209:                        if (ebuf[1] == 't')
210:                            return '<';
211:                    } else if (ebuf[0] == 'g') {
212:                        if (ebuf[1] == 't')
213:                            return '>';
214:                    }
215:                }
216:                throw new SAXException("unknown entity: " + lastEntityVal);
217:
218:            }
219:
220:            public int step(int state, int c) throws SAXException, IOException {
221:                //         Debug.println("step[" + state + " " + commentLevel +
222:                //                       "]: " + ((char)c));
223:                switch (state) {
224:                case 0:
225:                    if (c == '<') {
226:                        if (data.size() > 0) {
227:                            docHandler.characters(data.toCharArray(), 0, data
228:                                    .size());
229:                            data.reset();
230:                        }
231:                        state = 1;
232:                    } else {
233:                        if (c == '&') {
234:                            try {
235:                                c = parseEntity();
236:                            } catch (SAXException e) {
237:                                data.write('&');
238:                                data.write(lastEntityVal);
239:                                break;
240:                            }
241:                        }
242:                        data.write(c);
243:                    }
244:                    break;
245:                case 1: // seen '<'
246:                    switch (c) {
247:                    case '!':
248:                        state = 30;
249:                        break;
250:                    case '\\':
251:                        state = 4;
252:                        break;
253:                    case '/':
254:                        state = 8;
255:                        break;
256:                    case '?':
257:                        data.reset();
258:                        state = 20;
259:                        break;
260:                    default:
261:                        addTagChar(c);
262:                        state = 5;
263:                        break;
264:                    }
265:                    break;
266:                case 4: // seen <\
267:                    data.write('<');
268:                    data.write(c);
269:                    state = 0;
270:                    break;
271:                case 5: // collect tag name
272:                    switch (c) {
273:                    case ' ':
274:                    case '\r':
275:                    case '\n':
276:                    case '\t':
277:                        tagName = pool.intern(tag, 0, taglen);
278:                        taglen = 0;
279:                        state = 6;
280:                        break;
281:                    case '/':
282:                        tagName = pool.intern(tag, 0, taglen);
283:                        taglen = 0;
284:                        state = 9;
285:                        break;
286:                    case '>':
287:                        tagName = pool.intern(tag, 0, taglen);
288:                        taglen = 0;
289:                        state = 0;
290:                        startElement(tagName, attributes);
291:                        break;
292:                    case '<':
293:                        tagName = pool.intern(tag, 0, taglen);
294:                        taglen = 0;
295:                        if (data.size() > 0) {
296:                            docHandler.characters(data.toCharArray(), 0, data
297:                                    .size());
298:                            data.reset();
299:                        }
300:                        state = 1;
301:                        break;
302:                    default:
303:                        if (Character.isLetter((char) c)
304:                                || Character.isDigit((char) c) || c == '.'
305:                                || c == '-' || c == '_' || c == ':') {
306:                            addTagChar(c);
307:                        } else {
308:                            // this isn't a tag after all (e.g., inside a <script>
309:                            // section, we've found "if (a < b) ..."
310:                            for (int i = 0; i < taglen; i++) {
311:                                data.write(tag[i]);
312:                            }
313:                            data.write(c);
314:                            state = 0;
315:                            taglen = 0;
316:                            break;
317:                        }
318:
319:                    }
320:                    break;
321:                case 6: // collect attributes
322:                    switch (c) {
323:                    case ' ':
324:                    case '\n':
325:                    case '\r':
326:                    case '\t':
327:                        break;
328:                    case '/':
329:                        state = 9;
330:                        break;
331:                    case '%':
332:                        addTagChar(c);
333:                        break;
334:                    case '>':
335:                        state = 0;
336:                        startElement(tagName, attributes);
337:                        break;
338:                    case '=':
339:                        attrName = pool.intern(tag, 0, taglen);
340:                        taglen = 0;
341:                        state = 10;
342:                        break;
343:                    case '<':
344:                        state = 61;
345:                        break;
346:                    default:
347:                        addTagChar(c);
348:                    }
349:                    break;
350:                case 61:
351:                    switch (c) {
352:                    case '?':
353:                        state = 62;
354:                        break;
355:                    default:
356:                        addTagChar('<');
357:                        addTagChar(c);
358:                        state = 6;
359:                        break;
360:                    }
361:                    break;
362:                case 62:
363:                    switch (c) {
364:                    case '?':
365:                        state = 63;
366:                        break;
367:                    default:
368:                        addTagChar(c);
369:                        break;
370:                    }
371:                    break;
372:                case 63:
373:                    switch (c) {
374:                    case '>':
375:                        addTagChar(c);
376:                        state = 6;
377:                        break;
378:                    default:
379:                        addTagChar('?');
380:                        if (c != '?')
381:                            state = 62;
382:                        break;
383:                    }
384:                    break;
385:                case 8: // seen </
386:                    if (c == '>') {
387:                        tagName = pool.intern(tag, 0, taglen);
388:                        taglen = 0;
389:                        state = 0;
390:                        docHandler.endElement(tagName);
391:                    } else {
392:                        addTagChar(c);
393:                    }
394:                    break;
395:                case 9: // in <tag, seen /
396:                    if (c == '>') {
397:                        startElement(tagName, attributes);
398:                        state = 0;
399:                        docHandler.endElement(tagName);
400:                    } else {
401:                        addTagChar('/');
402:                        addTagChar(c);
403:                        state = 6;
404:                    }
405:                    break;
406:                case 10: // in attriblist, seen name=
407:                    if (c == '"') {
408:                        state = 12;
409:                    } else if (c == '\'') {
410:                        state = 121;
411:                    } else {
412:                        addTagChar(c);
413:                        state = 13;
414:                    }
415:                    break;
416:                case 12: // in attriblist, seen name="
417:                    if (c == '"') {
418:                        attributes.addAttribute(attrName, "CDATA", pool.intern(
419:                                tag, 0, taglen));
420:                        taglen = 0;
421:                        state = 6;
422:                    } else {
423:                        addTagChar(c);
424:                    }
425:                    break;
426:                case 121: // in attriblist, seen name='
427:                    if (c == '\'') {
428:                        attributes.addAttribute(attrName, "CDATA", pool.intern(
429:                                tag, 0, taglen));
430:                        taglen = 0;
431:                        state = 6;
432:                    } else {
433:                        addTagChar(c);
434:                    }
435:                    break;
436:                case 13: // in attriblist, seen name=c
437:                    switch (c) {
438:                    case ' ':
439:                        attributes.addAttribute(attrName, "CDATA", pool.intern(
440:                                tag, 0, taglen));
441:                        taglen = 0;
442:                        state = 6;
443:                        break;
444:                    case '/':
445:                        state = 14;
446:                        break;
447:                    case '>':
448:                        attributes.addAttribute(attrName, "CDATA", pool.intern(
449:                                tag, 0, taglen));
450:                        taglen = 0;
451:                        state = 0;
452:                        startElement(tagName, attributes);
453:                        break;
454:                    default:
455:                        addTagChar(c);
456:                    }
457:                    break;
458:                case 14: // in attriblist, seen name=dfdf/
459:                    if (c == '>') {
460:                        attributes.addAttribute(attrName, "CDATA", pool.intern(
461:                                tag, 0, taglen));
462:                        taglen = 0;
463:                        state = 0;
464:                        startElement(tagName, attributes);
465:                        docHandler.endElement(tagName);
466:                    } else {
467:                        addTagChar('/');
468:                        if (c != '/') {
469:                            addTagChar(c);
470:                            state = 13;
471:                        }
472:                    }
473:                    break;
474:                case 15:
475:                    if (c == '-')
476:                        state = 16;
477:                    break;
478:                case 16:
479:                    if (c == '-')
480:                        state = 17;
481:                    else
482:                        state = 15;
483:                    break;
484:                case 17:
485:                    if (c == '>')
486:                        state = 0;
487:                    else if (c != '-')
488:                        state = 15;
489:                    break;
490:                case 20:
491:                    if (c == '?')
492:                        state = 21;
493:                    else
494:                        data.write(c);
495:                    break;
496:                case 21:
497:                    if (c == '>') {
498:                        String s = data.toString().trim();
499:                        if (s.startsWith("xml")) {
500:                            if (inStack == null || inStack.size() == 0) {
501:                                if (!docStarted) {
502:                                    docStarted = true;
503:                                    docHandler.startDocument();
504:                                }
505:                            }
506:                        } else {
507:                            int idx = s.indexOf(' ');
508:                            String dat = "";
509:                            String target = s;
510:                            if (idx >= 0) {
511:                                target = s.substring(0, idx);
512:                                dat = s.substring(idx + 1).trim();
513:                            }
514:                            docHandler.processingInstruction(target, dat);
515:                        }
516:                        data.reset();
517:                        state = 0;
518:                    } else {
519:                        data.write('?');
520:                        if (c != '?') {
521:                            data.write(c);
522:                            state = 20;
523:                        }
524:                    }
525:                    break;
526:                case 30: // seen <!
527:                    if (c == '-')
528:                        state = 31;
529:                    else if (c == '[')
530:                        state = 41;
531:                    else
532:                        state = 40;
533:                    break;
534:                case 31: // seen <!-
535:                    if (c == '-') {
536:                        commentLevel = 1;
537:                        state = 32;
538:                    } else
539:                        state = 40;
540:                    break;
541:                case 32: // in comment, look for '-'
542:                    if (c == '-')
543:                        state = 33;
544:                    else if (c == '<')
545:                        state = 320;
546:                    break;
547:                case 320: // in comment, seen <
548:                    if (c == '!')
549:                        state = 321;
550:                    else if (c == '-')
551:                        state = 33;
552:                    else
553:                        state = 32;
554:                    break;
555:                case 321: // in comment, seen <!
556:                    if (c == '-')
557:                        state = 322;
558:                    else
559:                        state = 32;
560:                    break;
561:                case 322: // in comment, seen <!-
562:                    if (c == '-') {
563:                        commentLevel++;
564:                    }
565:                    state = 32;
566:                    break;
567:                case 33: // in comment, seen -
568:                    if (c == '-')
569:                        state = 34;
570:                    else
571:                        state = 32;
572:                    break;
573:                case 34: // in comment, seen --
574:                    if (c == '>') {
575:                        if (--commentLevel == 0) {
576:                            state = 0;
577:                        } else {
578:                            state = 32;
579:                        }
580:                    } else if (c != '-')
581:                        state = 32;
582:                    break;
583:                case 40: // seen <!, but not comment
584:                    if (c == '>')
585:                        state = 0;
586:                    break;
587:                case 41: // seen <![
588:                    if (c == '[') {
589:                        if (data.toString().equals("CDATA")) {
590:                            data.reset();
591:                            state = 42;
592:                        } else {
593:                            state = 40;
594:                        }
595:                    } else {
596:                        data.write(c);
597:                    }
598:                    break;
599:                case 42: // in CDATA section
600:                    if (c == ']') {
601:                        state = 43;
602:                    } else {
603:                        data.write(c);
604:                    }
605:                    break;
606:                case 43: // in CDATA, seen ']'
607:                    if (c == ']') {
608:                        state = 44;
609:                    } else {
610:                        data.write(']');
611:                        data.write(c);
612:                        state = 42;
613:                    }
614:                    break;
615:                case 44: // in CDATA, seen ']]'
616:                    if (c == '>') {
617:                        state = 0;
618:                    } else if (c == ']') {
619:                        data.write(']');
620:                    } else {
621:                        data.write("]]");
622:                        data.write(c);
623:                        state = 42;
624:                    }
625:                    break;
626:                default:
627:                    throw new SAXException("Bad parser state: " + state);
628:                }
629:                return state;
630:            }
631:
632:            public void parse() throws SAXException, IOException {
633:                int state = 0;
634:                docHandler.setDocumentLocator(new Locator(this ));
635:                while (parseUntilEOF()) {
636:                }
637:                docHandler.endDocument();
638:            }
639:
640:            public boolean parseUntilEOF() throws SAXException, IOException {
641:                boolean ret = false;
642:                int state = 0;
643:                while (state >= 0) {
644:                    int c = read();
645:                    if (c < 0) {
646:                        try {
647:                            r.close();
648:                        } catch (Exception e) {
649:                        }
650:                        ret = popInputSource();
651:                        state = -1;
652:                    } else {
653:                        state = step(state, c);
654:                    }
655:                }
656:                return ret;
657:            }
658:
659:            public int getLineNumber() {
660:                return lineNumber;
661:            }
662:
663:            public int getColumnNumber() {
664:                return columnNumber;
665:            }
666:
667:            void startElement(String name, AttributeList attributes)
668:                    throws SAXException {
669:                if (!docStarted) {
670:                    docStarted = true;
671:                    docHandler.startDocument();
672:                }
673:                docHandler.startElement(tagName, attributes);
674:                attributes.clear();
675:            }
676:
677:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.