Source Code Cross Referenced for XMLParser.java in  » Scripting » Kawa » gnu » xml » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Scripting » Kawa » gnu.xml 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package gnu.xml;
002:
003:        import java.io.*;
004:        import gnu.text.*;
005:        import gnu.lists.*;
006:        import gnu.text.Path; /* #ifdef use:java.nio */
007:        import java.nio.charset.*;
008:
009:        /* #endif */
010:
011:        /** Reads XML from a char array.
012:         * Assumes a state-less character encoding containing ascii as a sub-set,
013:         * and where no byte in a multi-byte character is the same as a xml special
014:         * character.  Any bytes with high-order bit set are treated as if they
015:         * are letters, and can be part of names.
016:         *
017:         * Handles CR/LF, CDATA, entity references, processing instructions, DOCTYPE,
018:         * as well as the obvious (text, element, and attributes).
019:         *
020:         * @author Per Bothner
021:         */
022:
023:        public class XMLParser {
024:            private static final int EXPECT_NAME_MODIFIER = 1;
025:            private static final int SKIP_SPACES_MODIFIER = 2;
026:            private static final int INIT_STATE = 0;
027:            private static final int TEXT_STATE = 1;
028:            private static final int BEGIN_ELEMENT_STATE = 2;
029:            private static final int END_ELEMENT_STATE = 4;
030:            private static final int SAW_ENTITY_REF = 6; // Saw '&'.  
031:            private static final int ATTRIBUTE_SEEN_NAME_STATE = 8;
032:            private static final int MAYBE_ATTRIBUTE_STATE = 10;
033:            private static final int ATTRIBUTE_SEEN_EQ_STATE = 11;
034:            private static final int DOCTYPE_SEEN_STATE = 13;
035:            private static final int DOCTYPE_NAME_SEEN_STATE = 16;
036:            private static final int SAW_LEFT_STATE = 14;
037:            private static final int SAW_LEFT_SLASH_STATE = 19; // Seen '</'
038:            private static final int SAW_LEFT_EXCL_STATE = 20;
039:            private static final int SAW_LEFT_QUEST_STATE = 21; // Seen '<?'
040:            private static final int SAW_LEFT_EXCL_MINUS_STATE = 22;
041:            private static final int SAW_AMP_STATE = 25; // Saw '&'.  
042:            private static final int SAW_AMP_SHARP_STATE = 26; // Saw '&#'.  
043:            private static final int EXPECT_RIGHT_STATE = 27;
044:            private static final int PREV_WAS_CR_STATE = 28;
045:            private static final int INIT_LEFT_QUEST_STATE = 30;
046:            private static final int INIT_TEXT_STATE = 31;
047:            private static final int INIT_LEFT_STATE = 34;
048:            private static final int INVALID_VERSION_DECL = 35;
049:            private static final int SAW_ERROR = 36;
050:            private static final int SAW_EOF_ERROR = 37; // Unexpected end-of-file.
051:
052:            static final String BAD_ENCODING_SYNTAX = "bad encoding declaration";
053:
054:            public static void parse(Object uri, SourceMessages messages,
055:                    Consumer out) throws java.io.IOException {
056:                parse(Path.openInputStream(uri), uri, messages, out);
057:            }
058:
059:            public static LineInputStreamReader XMLStreamReader(InputStream strm)
060:                    throws java.io.IOException {
061:                LineInputStreamReader in = new LineInputStreamReader(strm);
062:                /* #ifndef use:java.nio */
063:                // in.markStart();
064:                /* #endif */
065:                int b1 = in.getByte();
066:                int b2 = b1 < 0 ? -1 : in.getByte();
067:                int b3 = b2 < 0 ? -1 : in.getByte();
068:                if (b1 == 0xEF && b2 == 0xBB && b3 == 0xBF) {
069:                    in.resetStart(3);
070:                    in.setCharset("UTF-8");
071:                } else if (b1 == 0xFF && b2 == 0xFE && b3 != 0) {
072:                    in.resetStart(2);
073:                    in.setCharset("UTF-16LE");
074:                } else if (b1 == 0xFE && b2 == 0xFF && b3 != 0) {
075:                    in.resetStart(2);
076:                    in.setCharset("UTF-16BE");
077:                } else {
078:                    int b4 = b3 < 0 ? -1 : in.getByte();
079:                    if (b1 == 0x4C && b2 == 0x6F && b3 == 0xA7 && b4 == 0x94)
080:                        throw new RuntimeException(
081:                                "XMLParser: EBCDIC encodings not supported");
082:                    in.resetStart(0);
083:                    if ((b1 == '<' && ((b2 == '?' && b3 == 'x' && b4 == 'm') || (b2 == 0
084:                            && b3 == '?' && b4 == 0)))
085:                            || (b1 == 0 && b2 == '<' && b3 == 0 && b4 == '?')) {
086:                        char[] buffer = in.buffer;
087:                        if (buffer == null)
088:                            in.buffer = buffer = new char[LineBufferedReader.BUFFER_SIZE];
089:                        int pos = 0;
090:                        int quote = 0;
091:                        for (;;) {
092:                            int b = in.getByte();
093:                            if (b == 0)
094:                                continue;
095:                            if (b < 0) // Unexpected EOF - handled later.
096:                                break;
097:                            buffer[pos++] = (char) (b & 0xFF);
098:                            if (quote == 0) {
099:                                if (b == '>')
100:                                    break;
101:                                if (b == '\'' || b == '\"')
102:                                    quote = b;
103:                            } else if (b == quote)
104:                                quote = 0;
105:                        }
106:                        in.pos = 0;
107:                        in.limit = pos;
108:                    } else
109:                        in.setCharset("UTF-8");
110:                }
111:                in.setKeepFullLines(false);
112:                return in;
113:            }
114:
115:            public static void parse(InputStream strm, Object uri,
116:                    SourceMessages messages, Consumer out)
117:                    throws java.io.IOException {
118:                LineInputStreamReader in = XMLStreamReader(strm);
119:                if (uri != null)
120:                    in.setName(uri);
121:                parse(in, messages, out);
122:                in.close();
123:            }
124:
125:            public static void parse(LineBufferedReader in,
126:                    SourceMessages messages, Consumer out)
127:                    throws java.io.IOException {
128:                XMLFilter filter = new XMLFilter(out);
129:                filter.setMessages(messages);
130:                filter.setSourceLocator(in);
131:                filter.startDocument();
132:                Object uri = in.getPath();
133:                if (uri != null)
134:                    filter.writeDocumentUri(uri);
135:                parse(in, filter);
136:                filter.endDocument();
137:            }
138:
139:            public static void parse(LineBufferedReader in,
140:                    SourceMessages messages, XMLFilter filter)
141:                    throws java.io.IOException {
142:                filter.setMessages(messages);
143:                filter.setSourceLocator(in);
144:                filter.startDocument();
145:                Object uri = in.getPath();
146:                if (uri != null)
147:                    filter.writeDocumentUri(uri);
148:                parse(in, filter);
149:                filter.endDocument();
150:                in.close();
151:            }
152:
153:            public static void parse(LineBufferedReader in, XMLFilter out) {
154:                // Cache fields in local variables, for speed.
155:                char[] buffer = in.buffer;
156:                int pos = in.pos;
157:                int limit = in.limit;
158:
159:                // The flow logic of this method is unusual.  It is one big state machine,
160:                // but with two "subroutines": SKIP_SPACES_MODIFIER and EXPECT_NAME_MODIFIER.
161:                // There is also a "subroutine" to get a new character (and leave it in 'ch')
162:                // when 'break handleChar' is executed, except this has the hard-wired
163:                // continuation of switching on the 'state'.
164:                //
165:                // The justification for this rather usual design is performance.
166:                // As long as the input is contained within 'buffer', we don't need
167:                // to call input methods (only methods for emitting parsed data is
168:                // called).  We also maximize use of local variables - we do not
169:                // access any object fields (including fields of 'this') except
170:                // for getting the next char from 'buffer'.  These properties mean
171:                // this method can be compiled to very tight efficient code.
172:
173:                int state = INIT_STATE;
174:                // 0: normal - in character context.
175:                // 1: seen '&'
176:
177:                // The next two varibles are only relevant if state==INIT_STATE:
178:                char terminator = (char) '<';
179:                int continue_state = SAW_LEFT_STATE;
180:                char ch = (char) ' '; // ???
181:                int length = 0;
182:                int dstart = -1;
183:                String message = null;
184:
185:                int start = limit;
186:                mainLoop: for (;;) {
187:                    handleChar: // When done get next character.
188:                    switch (state) {
189:                    case INIT_STATE:
190:                        state = TEXT_STATE;
191:                        state = INIT_TEXT_STATE;
192:                        break handleChar;
193:
194:                    case INIT_TEXT_STATE:
195:                        if (ch == '<') {
196:                            state = INIT_LEFT_STATE;
197:                            break handleChar;
198:                        }
199:                        state = TEXT_STATE;
200:                        continue mainLoop;
201:
202:                    case INIT_LEFT_STATE:
203:                        if (ch == '?') {
204:                            start = pos;
205:                            state = EXPECT_NAME_MODIFIER + SKIP_SPACES_MODIFIER
206:                                    + INIT_LEFT_QUEST_STATE;
207:                            break handleChar;
208:                        }
209:                        state = SAW_LEFT_STATE;
210:                        continue mainLoop;
211:
212:                    case INVALID_VERSION_DECL:
213:                        pos = dstart;
214:                        message = "invalid xml version specifier";
215:                        /* ... fall thorugh ... */
216:
217:                    case SAW_ERROR:
218:                        in.pos = pos;
219:                        out.error('e', message);
220:                        for (;;) {
221:                            if (pos >= limit)
222:                                break mainLoop;
223:                            ch = buffer[pos++];
224:                            if (ch == '>') {
225:                                state = TEXT_STATE;
226:                                break handleChar;
227:                            }
228:                        }
229:
230:                    case SAW_EOF_ERROR:
231:                        in.pos = pos;
232:                        out.error('f', "unexpected end-of-file");
233:                        return;
234:
235:                    case TEXT_STATE:
236:                        // This state handle text not inside tags (in which case
237:                        // terminator=='<').  It also handles attribute values (in
238:                        // which case terminator is '\'' or '"').
239:                        start = pos - 1;
240:                        // Not length now, but used to calculate length when done.
241:                        length = pos;
242:                        for (;;) {
243:                            if (ch == terminator) {
244:                                state = continue_state;
245:                                break;
246:                            }
247:                            if (ch == '&') {
248:                                state = SAW_AMP_STATE;
249:                                break;
250:                            }
251:                            if (ch == '\r') {
252:                                length = pos - length;
253:                                in.pos = pos;
254:                                if (length > 0)
255:                                    out.textFromParser(buffer, start, length);
256:                                if (pos < limit) {
257:                                    ch = buffer[pos];
258:                                    if (ch == '\n') {
259:                                        start = pos;
260:                                        length = ++pos;
261:                                    } else {
262:                                        out.linefeedFromParser();
263:                                        if (ch == 0x85) {
264:                                            start = pos++;
265:                                            length = pos + 1;
266:                                        } else {
267:                                            in.incrLineNumber(1, pos);
268:                                            start = pos;
269:                                            length = ++pos;
270:                                            continue;
271:                                        }
272:                                    }
273:                                    in.incrLineNumber(1, pos);
274:                                } else {
275:                                    out.linefeedFromParser();
276:                                    state = PREV_WAS_CR_STATE;
277:                                    break handleChar;
278:                                }
279:                            } else if (ch == 0x85 || ch == 0x2028) {
280:                                length = pos - length;
281:                                in.pos = pos - 1;
282:                                if (length > 0)
283:                                    out.textFromParser(buffer, start, length);
284:                                out.linefeedFromParser();
285:                                in.incrLineNumber(1, pos);
286:                                length = pos + 1;
287:                                start = pos;
288:                            } else if (ch == '\n') {
289:                                in.incrLineNumber(1, pos);
290:                            }
291:                            if (pos == limit) {
292:                                length--;
293:                                break;
294:                            }
295:                            ch = buffer[pos++];
296:                        }
297:                        length = pos - length;
298:                        if (length > 0) {
299:                            in.pos = pos;
300:                            out.textFromParser(buffer, start, length);
301:                        }
302:                        start = buffer.length;
303:                        break handleChar;
304:
305:                    case PREV_WAS_CR_STATE:
306:                        // The previous character was a '\r', and we passed along '\n'
307:                        // to out.  If the new character is '\n' or 0x85 ignore it.
308:                        state = TEXT_STATE;
309:                        if (ch == '\n' | ch == 0x85) {
310:                            in.incrLineNumber(1, pos);
311:                            break handleChar;
312:                        } else {
313:                            in.incrLineNumber(1, pos - 1);
314:                            continue;
315:                        }
316:
317:                    case SKIP_SPACES_MODIFIER + EXPECT_RIGHT_STATE:
318:                    case SKIP_SPACES_MODIFIER + MAYBE_ATTRIBUTE_STATE:
319:                    case SKIP_SPACES_MODIFIER + SAW_LEFT_QUEST_STATE:
320:                    case SKIP_SPACES_MODIFIER + INIT_LEFT_QUEST_STATE:
321:                    case SKIP_SPACES_MODIFIER + DOCTYPE_SEEN_STATE:
322:                        // "Subroutine" for skipping whitespace.
323:                        if (ch == ' ' || ch == '\t')
324:                            break handleChar;
325:                        if (ch == '\n' || ch == '\r' || ch == '\u0085'
326:                                || ch == '\u2028') {
327:                            in.incrLineNumber(1, pos);
328:                            break handleChar;
329:                        }
330:                        // Not a space, so "return" to next state.
331:                        state -= SKIP_SPACES_MODIFIER;
332:                        continue mainLoop;
333:
334:                    case EXPECT_NAME_MODIFIER + BEGIN_ELEMENT_STATE:
335:                    case EXPECT_NAME_MODIFIER + END_ELEMENT_STATE:
336:                    case EXPECT_NAME_MODIFIER + ATTRIBUTE_SEEN_NAME_STATE:
337:                    case EXPECT_NAME_MODIFIER + SAW_ENTITY_REF:
338:                    case EXPECT_NAME_MODIFIER + DOCTYPE_NAME_SEEN_STATE:
339:                    case EXPECT_NAME_MODIFIER + SKIP_SPACES_MODIFIER
340:                            + SAW_LEFT_QUEST_STATE:
341:                    case EXPECT_NAME_MODIFIER + SKIP_SPACES_MODIFIER
342:                            + INIT_LEFT_QUEST_STATE:
343:                        length = start + 1;
344:                        // "Subroutine" for reading a Name.
345:                        for (;;) {
346:                            // XML 1.1 candidate recommendation:
347:                            // [2] Char    ::=    #x9 | #xA | #xD | [#x20-#x7E] | #x85
348:                            //   | [#xA0-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
349:                            // [4]  NameStartChar := ":" | [A-Z] | "_" | [a-z] |
350:                            //   [#xC0-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
351:                            //   [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
352:                            //   [#x3001-#xD7FF] | [#xF900-#xEFFFF]
353:                            // [4a] NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 |
354:                            //   [#x0300-#x036F] | [#x203F-#x2040]
355:                            if ((ch >= 'a' && ch <= 'z')
356:                                    || (ch >= 'A' && ch <= 'Z')
357:                                    || ch == '_'
358:                                    || ch == ':'
359:                                    || (ch >= 0xC0 && (ch <= 0x2FF || (ch >= 0x370 && ((ch <= 0x1FFF && ch != 0x37E) || (ch >= 0x200C && (ch <= 0x200D
360:                                            || (ch >= 0x2070 && ch <= 0x218F)
361:                                            || (ch >= 0x2C00 && ch <= 0x2FEF)
362:                                            || (ch >= 0x3001 && ch <= 0xD7FF) || (ch >= 0xF900 && ch <= 0xFFFD)))))))
363:                                    || (pos > length
364:                                            && (ch >= '0' && ch <= '9')
365:                                            || ch == '.' || ch == '-'
366:                                            || ch == 0xB7 || (ch > 0x300 && (ch <= 0x36F || (ch >= 0x203F && ch <= 0x2040))))) {
367:                            } else {
368:                                state -= EXPECT_NAME_MODIFIER;
369:                                length = pos - length;
370:                                if (length == 0) {
371:                                    if (state == ATTRIBUTE_SEEN_NAME_STATE)
372:                                        message = "missing or invalid attribute name";
373:                                    else if (state == BEGIN_ELEMENT_STATE
374:                                            || state == END_ELEMENT_STATE)
375:                                        message = "missing or invalid element name";
376:                                    else
377:                                        message = "missing or invalid name";
378:                                    state = SAW_ERROR;
379:                                }
380:                                continue mainLoop;
381:                            }
382:                            if (pos < limit)
383:                                ch = buffer[pos++];
384:                            else
385:                                break handleChar;
386:                        }
387:                    case SAW_AMP_SHARP_STATE:
388:                        for (;;) {
389:                            if (ch == ';') {
390:                                in.pos = pos;
391:                                out.emitCharacterReference(length, buffer,
392:                                        start, pos - 1 - start);
393:                                state = TEXT_STATE;
394:                                break handleChar;
395:                            }
396:                            if (ch == 'x' && dstart == 0)
397:                                dstart = 16;
398:                            else if (length >= 0x8000000)
399:                                break; // Overflow likely.
400:                            else {
401:                                int base = dstart == 0 ? 10 : dstart;
402:                                int digit = Character.digit((char) ch, base);
403:                                if (digit < 0)
404:                                    break;
405:                                length = length * base + digit;
406:                            }
407:                            if (pos < limit)
408:                                ch = buffer[pos++];
409:                            else
410:                                break handleChar;
411:                        }
412:                        in.pos = pos;
413:                        out.error('e', "invalid character reference");
414:                        state = TEXT_STATE;
415:                        break handleChar;
416:
417:                    case SAW_AMP_STATE:
418:                        if (ch == '#') {
419:                            state = SAW_AMP_SHARP_STATE;
420:                            start = pos;
421:                            length = 0; // accumulated value; -1 means error, -2 overflow
422:                            dstart = 0; // base - 0 means not seen yet
423:                            break handleChar;
424:                        }
425:                        start = pos - 1;
426:                        state = EXPECT_NAME_MODIFIER + SAW_ENTITY_REF;
427:                        continue mainLoop;
428:
429:                    case SAW_ENTITY_REF:
430:                        in.pos = pos;
431:                        if (ch != ';')
432:                            out.error('w', "missing ';'");
433:                        out.emitEntityReference(buffer, start, length);
434:                        start = limit;
435:                        state = TEXT_STATE;
436:                        break handleChar;
437:
438:                    case SAW_LEFT_STATE: // Saw '<'
439:                        if (ch == '/') {
440:                            state = SAW_LEFT_SLASH_STATE;
441:                            break handleChar;
442:                        }
443:                        if (ch == '?') {
444:                            start = pos;
445:                            state = EXPECT_NAME_MODIFIER + SKIP_SPACES_MODIFIER
446:                                    + SAW_LEFT_QUEST_STATE;
447:                            break handleChar;
448:                        }
449:                        if (ch == '!') {
450:                            state = SAW_LEFT_EXCL_STATE;
451:                            start = pos;
452:                            break handleChar;
453:                        }
454:                        // Read Name then goto BEGIN_ELEMENT_STATE.
455:                        start = pos - 1;
456:                        state = EXPECT_NAME_MODIFIER + BEGIN_ELEMENT_STATE;
457:                        continue mainLoop;
458:                    case BEGIN_ELEMENT_STATE:
459:                        in.pos = pos - length; // position of start of name, for errors.
460:                        out.emitStartElement(buffer, start, length);
461:                        state = SKIP_SPACES_MODIFIER + MAYBE_ATTRIBUTE_STATE;
462:                        start = limit;
463:                        continue mainLoop;
464:
465:                    case SAW_LEFT_QUEST_STATE: // Seen '<?' Name Spaces
466:                    case INIT_LEFT_QUEST_STATE: // Seen '<?' Name Spaces
467:                        if (dstart < 0)
468:                            dstart = pos - 1;
469:                        for (;;) {
470:                            int end;
471:                            if (ch == '>' && buffer[end = pos - 2] == '?'
472:                                    && end >= dstart) {
473:                                in.pos = pos;
474:                                if (length == 3 && buffer[start] == 'x'
475:                                        && buffer[start + 1] == 'm'
476:                                        && buffer[start + 2] == 'l') {
477:                                    if (state == INIT_LEFT_QUEST_STATE) {
478:                                        if (end <= dstart + 7
479:                                                || buffer[dstart] != 'v'
480:                                                || buffer[dstart + 1] != 'e'
481:                                                || buffer[dstart + 2] != 'r'
482:                                                || buffer[dstart + 3] != 's'
483:                                                || buffer[dstart + 4] != 'i'
484:                                                || buffer[dstart + 5] != 'o'
485:                                                || buffer[dstart + 6] != 'n') {
486:                                            pos = dstart;
487:                                            message = "xml declaration without version";
488:                                            state = SAW_ERROR;
489:                                            continue mainLoop;
490:                                        }
491:                                        dstart += 7;
492:                                        ch = buffer[dstart];
493:                                        while (Character.isWhitespace(ch)
494:                                                && ++dstart < end)
495:                                            ch = buffer[dstart];
496:                                        if (ch != '=') {
497:                                            state = INVALID_VERSION_DECL;
498:                                            continue mainLoop;
499:                                        }
500:                                        ch = buffer[++dstart];
501:                                        while (Character.isWhitespace(ch)
502:                                                && ++dstart < end)
503:                                            ch = buffer[dstart];
504:                                        if (ch != '\'' && ch != '\"') {
505:                                            state = INVALID_VERSION_DECL;
506:                                            continue mainLoop;
507:                                        }
508:                                        char quote = ch;
509:                                        int i = ++dstart;
510:                                        for (;; i++) {
511:                                            if (i == end) {
512:                                                state = INVALID_VERSION_DECL;
513:                                                continue mainLoop;
514:                                            }
515:                                            ch = buffer[i];
516:                                            if (ch == quote)
517:                                                break;
518:                                        }
519:                                        if (i == dstart + 3
520:                                                && buffer[dstart] == '1'
521:                                                && buffer[dstart + 1] == '.'
522:                                                && (ch = buffer[dstart + 2]) == '0'
523:                                                || ch == '1') {
524:                                            // Save version number, if that is useful.
525:                                        } else {
526:                                            state = INVALID_VERSION_DECL;
527:                                            continue mainLoop;
528:                                        }
529:                                        dstart = i + 1;
530:                                        while (dstart < end
531:                                                && Character
532:                                                        .isWhitespace(buffer[dstart]))
533:                                            dstart++;
534:                                        if (end > dstart + 7
535:                                                && buffer[dstart] == 'e'
536:                                                && buffer[dstart + 1] == 'n'
537:                                                && buffer[dstart + 2] == 'c'
538:                                                && buffer[dstart + 3] == 'o'
539:                                                && buffer[dstart + 4] == 'd'
540:                                                && buffer[dstart + 5] == 'i'
541:                                                && buffer[dstart + 6] == 'n'
542:                                                && buffer[dstart + 7] == 'g') {
543:                                            dstart += 8;
544:                                            ch = buffer[dstart];
545:                                            while (Character.isWhitespace(ch)
546:                                                    && ++dstart < end)
547:                                                ch = buffer[dstart];
548:                                            if (ch != '=') {
549:                                                message = BAD_ENCODING_SYNTAX;
550:                                                state = SAW_ERROR;
551:                                                continue mainLoop;
552:                                            }
553:                                            ch = buffer[++dstart];
554:                                            while (Character.isWhitespace(ch)
555:                                                    && ++dstart < end)
556:                                                ch = buffer[dstart];
557:                                            if (ch != '\'' && ch != '\"') {
558:                                                message = BAD_ENCODING_SYNTAX;
559:                                                state = SAW_ERROR;
560:                                                continue mainLoop;
561:                                            }
562:                                            quote = ch;
563:                                            i = ++dstart;
564:                                            for (;; i++) {
565:                                                if (i == end) {
566:                                                    message = BAD_ENCODING_SYNTAX;
567:                                                    state = SAW_ERROR;
568:                                                    continue mainLoop;
569:                                                }
570:                                                ch = buffer[i];
571:                                                if (ch == quote)
572:                                                    break;
573:                                            }
574:                                            String encoding = new String(
575:                                                    buffer, dstart, i - dstart);
576:                                            if (in instanceof  LineInputStreamReader)
577:                                                ((LineInputStreamReader) in)
578:                                                        .setCharset(encoding);
579:                                            dstart = i + 1;
580:                                            while (dstart < end
581:                                                    && Character
582:                                                            .isWhitespace(buffer[dstart]))
583:                                                dstart++;
584:                                        }
585:                                        if (end != dstart) {
586:                                            message = "junk at end of xml declaration";
587:                                            pos = dstart;
588:                                            state = SAW_ERROR;
589:                                            continue mainLoop;
590:                                        }
591:                                    } else {
592:                                        message = "<?xml must be at start of file";
593:                                        state = SAW_ERROR;
594:                                        continue mainLoop;
595:                                    }
596:                                } else
597:                                    out
598:                                            .processingInstructionFromParser(
599:                                                    buffer, start, length,
600:                                                    dstart, end - dstart);
601:                                start = limit;
602:                                dstart = -1;
603:                                state = TEXT_STATE;
604:                                break handleChar;
605:                            }
606:                            if (pos < limit)
607:                                ch = buffer[pos++];
608:                            else
609:                                break handleChar;
610:                        }
611:
612:                    case SAW_LEFT_EXCL_STATE: // Seen '<!'
613:                        exclLoop: for (;;) {
614:                            if (ch == '>') {
615:                                length = pos - 1 - start;
616:                                if (length >= 4 && buffer[start] == '-'
617:                                        && buffer[start + 1] == '-') {
618:                                    if (buffer[pos - 2] == '-'
619:                                            && buffer[pos - 3] == '-') {
620:                                        in.pos = pos;
621:                                        out.commentFromParser(buffer,
622:                                                start + 2, length - 4);
623:                                        break exclLoop;
624:                                    }
625:                                } else if (length >= 6 && buffer[start] == '['
626:                                        && buffer[start + 1] == 'C'
627:                                        && buffer[start + 2] == 'D'
628:                                        && buffer[start + 3] == 'A'
629:                                        && buffer[start + 4] == 'T'
630:                                        && buffer[start + 5] == 'A'
631:                                        && buffer[start + 6] == '[') {
632:                                    if (buffer[pos - 2] == ']'
633:                                            && buffer[pos - 3] == ']') {
634:                                        in.pos = pos;
635:                                        out.writeCDATA(buffer, start + 7, pos
636:                                                - 10 - start);
637:                                        break exclLoop;
638:                                    }
639:                                } else {
640:                                    // FIXME ignoreing <!ELEMENT ... > etc.
641:                                    break exclLoop;
642:                                }
643:                            } else if (pos == start + 7 && buffer[start] == 'D'
644:                                    && buffer[start + 1] == 'O'
645:                                    && buffer[start + 2] == 'C'
646:                                    && buffer[start + 3] == 'T'
647:                                    && buffer[start + 4] == 'Y'
648:                                    && buffer[start + 5] == 'P' && ch == 'E') {
649:                                start = limit;
650:                                state = SKIP_SPACES_MODIFIER
651:                                        + DOCTYPE_SEEN_STATE;
652:                                break handleChar;
653:                            }
654:                            if (pos < limit)
655:                                ch = buffer[pos++];
656:                            else
657:                                break handleChar;
658:                        }
659:                        start = limit;
660:                        state = TEXT_STATE;
661:                        break handleChar;
662:
663:                    case DOCTYPE_SEEN_STATE: /* Seen '<!DOCTYPE' S* */
664:                        state = EXPECT_NAME_MODIFIER + DOCTYPE_NAME_SEEN_STATE;
665:                        start = pos - 1;
666:                        continue mainLoop;
667:
668:                    case DOCTYPE_NAME_SEEN_STATE: /* Seen '<!DOCTYPE' S* Name */
669:                        if (dstart < 0) {
670:                            // First type - i.e. not after a handelChar call.
671:                            dstart = pos - 1;
672:                            dstart -= start; // Make relative.
673:                            dstart <<= 1; // Add bit for whether in a '['.
674:                            terminator = 0;
675:                        }
676:                        for (;;) {
677:                            if (ch == '\'' || ch == '\"') {
678:                                if (terminator == 0)
679:                                    terminator = ch;
680:                                else if (terminator == ch)
681:                                    terminator = 0;
682:                            } else if (terminator == 0) // I.e. not inside a string.
683:                            {
684:                                // Low-order bit of dstart is 1 if we've seen a '['.
685:                                if (ch == '[')
686:                                    dstart |= 1;
687:                                else if (ch == ']')
688:                                    dstart &= ~1;
689:                                else if (ch == '>' && (dstart & 1) == 0) {
690:                                    in.pos = pos;
691:                                    dstart >>= 1;
692:                                    dstart += start;
693:                                    out.emitDoctypeDecl(buffer, start, length,
694:                                            dstart, pos - 1 - dstart);
695:                                    terminator = (char) '<';
696:                                    start = limit;
697:                                    dstart = -1;
698:                                    state = TEXT_STATE;
699:                                    break handleChar;
700:                                }
701:                            }
702:                            if (pos < limit)
703:                                ch = buffer[pos++];
704:                            else
705:                                break handleChar;
706:                        }
707:
708:                    case MAYBE_ATTRIBUTE_STATE:
709:                        terminator = '<';
710:                        continue_state = SAW_LEFT_STATE;
711:                        if (ch == '/') {
712:                            in.pos = pos;
713:                            out.emitEndAttributes();
714:                            out.emitEndElement(null, 0, 0);
715:                            state = EXPECT_RIGHT_STATE;
716:                            break handleChar;
717:                        }
718:                        if (ch == '>') {
719:                            in.pos = pos;
720:                            out.emitEndAttributes();
721:                            state = TEXT_STATE;
722:                            break handleChar;
723:                        }
724:                        start = pos - 1;
725:                        state = EXPECT_NAME_MODIFIER
726:                                + ATTRIBUTE_SEEN_NAME_STATE;
727:                        continue mainLoop;
728:                    case ATTRIBUTE_SEEN_NAME_STATE:
729:                        if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'
730:                                || ch == '\u0085' || ch == '\u2028')
731:                            break handleChar;
732:                        in.pos = pos - length; // position of start of name, for errors.
733:                        out.emitStartAttribute(buffer, start, length);
734:                        start = limit;
735:                        if (ch == '=') {
736:                            state = ATTRIBUTE_SEEN_EQ_STATE;
737:                            break handleChar;
738:                        }
739:                        out.emitEndAttributes();
740:                        message = "missing or misplaced '=' after attribute name";
741:                        state = SAW_ERROR;
742:                        continue mainLoop;
743:                    case ATTRIBUTE_SEEN_EQ_STATE:
744:                        if (ch == '\'' || ch == '\"') {
745:                            terminator = ch;
746:                            continue_state = SKIP_SPACES_MODIFIER
747:                                    + MAYBE_ATTRIBUTE_STATE;
748:                            state = TEXT_STATE;
749:                            break handleChar;
750:                        }
751:                        if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'
752:                                || ch == '\u0085' || ch == '\u2028')
753:                            break handleChar;
754:                        message = "missing or unquoted attribute value";
755:                        state = SAW_ERROR;
756:                        continue mainLoop;
757:
758:                    case SAW_LEFT_SLASH_STATE: // Seen '</'.
759:                        // Do "Name" subroutine, then goto END_ELEMENT_STATE.
760:                        start = pos - 1;
761:                        state = EXPECT_NAME_MODIFIER + END_ELEMENT_STATE;
762:                        continue mainLoop;
763:
764:                    case END_ELEMENT_STATE: // Seen '</' Name.
765:                        in.pos = pos;
766:                        out.emitEndElement(buffer, start, length);
767:                        start = limit;
768:                        // Skip spaces then goto EXPECT_RIGHT_STATE.
769:                        state = SKIP_SPACES_MODIFIER + EXPECT_RIGHT_STATE;
770:                        continue mainLoop;
771:
772:                    case EXPECT_RIGHT_STATE: // Looking for '>'.
773:                        if (ch != '>') {
774:                            message = "missing '>'";
775:                            state = SAW_ERROR;
776:                            continue mainLoop;
777:                        }
778:                        state = TEXT_STATE;
779:                        break handleChar;
780:                    }
781:
782:                    // After 'break handleChar', we get here.
783:                    if (pos < limit)
784:                        ch = buffer[pos++];
785:                    else {
786:                        int saved = pos - start;
787:                        try {
788:                            if (saved > 0) {
789:                                in.pos = start;
790:                                in.mark(saved + 1);
791:                            }
792:                            in.pos = pos;
793:                            int x = in.read();
794:                            if (x < 0) {
795:                                if (state == TEXT_STATE
796:                                        || state == PREV_WAS_CR_STATE)
797:                                    return;
798:                                state = SAW_EOF_ERROR;
799:                                continue;
800:                            }
801:                            if (saved > 0) {
802:                                in.reset();
803:                                in.skip(saved);
804:                            } else
805:                                in.unread_quick();
806:                        } catch (java.io.IOException ex) {
807:                            throw new RuntimeException(ex.getMessage());
808:                        }
809:                        pos = in.pos;
810:                        buffer = in.buffer;
811:
812:                        limit = in.limit;
813:                        start = saved > 0 ? pos - saved : limit;
814:                        ch = buffer[pos++];
815:                    }
816:                }
817:            }
818:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.