Source Code Cross Referenced for InputEntity.java in » 6.0-JDK-Modules » jaxb-xjc » com » sun » xml » dtdparser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » 6.0 JDK Modules » jaxb xjc » com.sun.xml.dtdparser
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * @(#)XmlChars.java    1.1 00/08/05
003:         *
004:         * Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
005:         */
006:
007:        package com.sun.xml.dtdparser;
008:
009:        import org.xml.sax.InputSource;
010:        import org.xml.sax.SAXException;
011:        import org.xml.sax.SAXParseException;
012:
013:        import java.io.CharConversionException;
014:        import java.io.IOException;
015:        import java.io.InputStream;
016:        import java.io.InputStreamReader;
017:        import java.io.Reader;
018:        import java.io.UnsupportedEncodingException;
019:        import java.net.URL;
020:        import java.util.Locale;
021:
022:        /**
023:         * This is how the parser talks to its input entities, of all kinds.
024:         * The entities are in a stack.
025:         * <p/>
026:         * <P> For internal entities, the character arrays are referenced here,
027:         * and read from as needed (they're read-only).  External entities have
028:         * mutable buffers, that are read into as needed.
029:         * <p/>
030:         * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
031:         * whether it's in an external (parsed) entity or not.  The XML 1.0 spec
032:         * is inconsistent in explaining EOL handling; this is the sensible way.
033:         *
034:         * @author David Brownell
035:         * @author Janet Koenig
036:         * @version 1.4 00/08/05
037:         */
038:        public class InputEntity {
039:            private int start, finish;
040:            private char buf[];
041:            private int lineNumber = 1;
042:            private boolean returnedFirstHalf = false;
043:            private boolean maybeInCRLF = false;
044:
045:            // name of entity (never main document or unnamed DTD PE)
046:            private String name;
047:
048:            private InputEntity next;
049:
050:            // for system and public IDs in diagnostics
051:            private InputSource input;
052:
053:            // this is a buffer; some buffers can be replenished.
054:            private Reader reader;
055:            private boolean isClosed;
056:
057:            private DTDEventListener errHandler;
058:            private Locale locale;
059:
060:            private StringBuffer rememberedText;
061:            private int startRemember;
062:
063:            // record if this is a PE, so endParsedEntity won't be called
064:            private boolean isPE;
065:
066:            // InputStreamReader throws an internal per-read exception, so
067:            // we minimize reads.  We also add a byte to compensate for the
068:            // "ungetc" byte we keep, so that our downstream reads are as
069:            // nicely sized as we can make them.
070:            final private static int BUFSIZ = 8 * 1024 + 1;
071:
072:            final private static char newline[] = { '\n' };
073:
074:            public static InputEntity getInputEntity(DTDEventListener h,
075:                    Locale l) {
076:                InputEntity retval = new InputEntity();
077:                retval.errHandler = h;
078:                retval.locale = l;
079:                return retval;
080:            }
081:
082:            private InputEntity() {
083:            }
084:
085:            //
086:            // predicate:  return true iff this is an internal entity reader,
087:            // and so may safely be "popped" as needed.  external entities have
088:            // syntax to uphold; internal parameter entities have at most validity
089:            // constraints to monitor.  also, only external entities get decent
090:            // location diagnostics.
091:            //
092:            public boolean isInternal() {
093:                return reader == null;
094:            }
095:
096:            //
097:            // predicate:  return true iff this is the toplevel document
098:            //
099:            public boolean isDocument() {
100:                return next == null;
101:            }
102:
103:            //
104:            // predicate:  return true iff this is a PE expansion (so that
105:            // LexicalEventListner.endParsedEntity won't be called)
106:            //
107:            public boolean isParameterEntity() {
108:                return isPE;
109:            }
110:
111:            //
112:            // return name of current entity
113:            //
114:            public String getName() {
115:                return name;
116:            }
117:
118:            //
119:            // use this for an external parsed entity
120:            //
121:            public void init(InputSource in, String name, InputEntity stack,
122:                    boolean isPE) throws IOException, SAXException {
123:
124:                input = in;
125:                this .isPE = isPE;
126:                reader = in.getCharacterStream();
127:
128:                if (reader == null) {
129:                    InputStream bytes = in.getByteStream();
130:
131:                    if (bytes == null)
132:                        reader = XmlReader.createReader(new URL(in
133:                                .getSystemId()).openStream());
134:                    else if (in.getEncoding() != null)
135:                        reader = XmlReader.createReader(in.getByteStream(), in
136:                                .getEncoding());
137:                    else
138:                        reader = XmlReader.createReader(in.getByteStream());
139:                }
140:                next = stack;
141:                buf = new char[BUFSIZ];
142:                this .name = name;
143:                checkRecursion(stack);
144:            }
145:
146:            //
147:            // use this for an internal parsed entity; buffer is readonly
148:            //
149:            public void init(char b[], String name, InputEntity stack,
150:                    boolean isPE) throws SAXException {
151:
152:                next = stack;
153:                buf = b;
154:                finish = b.length;
155:                this .name = name;
156:                this .isPE = isPE;
157:                checkRecursion(stack);
158:            }
159:
160:            private void checkRecursion(InputEntity stack) throws SAXException {
161:
162:                if (stack == null)
163:                    return;
164:                for (stack = stack.next; stack != null; stack = stack.next) {
165:                    if (stack.name != null && stack.name.equals(name))
166:                        fatal("P-069", new Object[] { name });
167:                }
168:            }
169:
170:            public InputEntity pop() throws IOException {
171:
172:                // caller has ensured there's nothing left to read
173:                close();
174:                return next;
175:            }
176:
177:            /**
178:             * returns true iff there's no more data to consume ...
179:             */
180:            public boolean isEOF() throws IOException, SAXException {
181:
182:                // called to ensure WF-ness of included entities and to pop
183:                // input entities appropriately ... EOF is not always legal.
184:                if (start >= finish) {
185:                    fillbuf();
186:                    return start >= finish;
187:                } else
188:                    return false;
189:            }
190:
191:            /**
192:             * Returns the name of the encoding in use, else null; the name
193:             * returned is in as standard a form as we can get.
194:             */
195:            public String getEncoding() {
196:
197:                if (reader == null)
198:                    return null;
199:                if (reader instanceof  XmlReader)
200:                    return ((XmlReader) reader).getEncoding();
201:
202:                // XXX prefer a java2javatd() call to normalize names...
203:
204:                if (reader instanceof  InputStreamReader)
205:                    return ((InputStreamReader) reader).getEncoding();
206:                return null;
207:            }
208:
209:            /**
210:             * returns the next name char, or NUL ... faster than getc(),
211:             * and the common "name or nmtoken must be next" case won't
212:             * need ungetc().
213:             */
214:            public char getNameChar() throws IOException, SAXException {
215:
216:                if (finish <= start)
217:                    fillbuf();
218:                if (finish > start) {
219:                    char c = buf[start++];
220:                    if (XmlChars.isNameChar(c))
221:                        return c;
222:                    start--;
223:                }
224:                return 0;
225:            }
226:
227:            /**
228:             * gets the next Java character -- might be part of an XML
229:             * text character represented by a surrogate pair, or be
230:             * the end of the entity.
231:             */
232:            public char getc() throws IOException, SAXException {
233:
234:                if (finish <= start)
235:                    fillbuf();
236:                if (finish > start) {
237:                    char c = buf[start++];
238:
239:                    // [2] Char ::= #x0009 | #x000A | #x000D
240:                    //            | [#x0020-#xD7FF]
241:                    //            | [#xE000-#xFFFD]
242:                    // plus surrogate _pairs_ representing [#x10000-#x10ffff]
243:                    if (returnedFirstHalf) {
244:                        if (c >= 0xdc00 && c <= 0xdfff) {
245:                            returnedFirstHalf = false;
246:                            return c;
247:                        } else
248:                            fatal("P-070", new Object[] { Integer
249:                                    .toHexString(c) });
250:                    }
251:                    if ((c >= 0x0020 && c <= 0xD7FF) || c == 0x0009
252:                    // no surrogates!
253:                            || (c >= 0xE000 && c <= 0xFFFD))
254:                        return c;
255:
256:                    //
257:                    // CRLF and CR are both line ends; map both to LF, and
258:                    // keep line count correct.
259:                    //
260:                    else if (c == '\r' && !isInternal()) {
261:                        maybeInCRLF = true;
262:                        c = getc();
263:                        if (c != '\n')
264:                            ungetc();
265:                        maybeInCRLF = false;
266:
267:                        lineNumber++;
268:                        return '\n';
269:
270:                    } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
271:                        if (!isInternal() && !maybeInCRLF)
272:                            lineNumber++;
273:                        return c;
274:                    }
275:
276:                    // surrogates...
277:                    if (c >= 0xd800 && c < 0xdc00) {
278:                        returnedFirstHalf = true;
279:                        return c;
280:                    }
281:
282:                    fatal("P-071", new Object[] { Integer.toHexString(c) });
283:                }
284:                throw new EndOfInputException();
285:            }
286:
287:            /**
288:             * lookahead one character
289:             */
290:            public boolean peekc(char c) throws IOException, SAXException {
291:
292:                if (finish <= start)
293:                    fillbuf();
294:                if (finish > start) {
295:                    if (buf[start] == c) {
296:                        start++;
297:                        return true;
298:                    } else
299:                        return false;
300:                }
301:                return false;
302:            }
303:
304:            /**
305:             * two character pushback is guaranteed
306:             */
307:            public void ungetc() {
308:
309:                if (start == 0)
310:                    throw new InternalError("ungetc");
311:                start--;
312:
313:                if (buf[start] == '\n' || buf[start] == '\r') {
314:                    if (!isInternal())
315:                        lineNumber--;
316:                } else if (returnedFirstHalf)
317:                    returnedFirstHalf = false;
318:            }
319:
320:            /**
321:             * optional grammatical whitespace (discarded)
322:             */
323:            public boolean maybeWhitespace() throws IOException, SAXException {
324:
325:                char c;
326:                boolean isSpace = false;
327:                boolean sawCR = false;
328:
329:                // [3] S ::= #20 | #09 | #0D | #0A
330:                for (;;) {
331:                    if (finish <= start)
332:                        fillbuf();
333:                    if (finish <= start)
334:                        return isSpace;
335:
336:                    c = buf[start++];
337:                    if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
338:                        isSpace = true;
339:
340:                        //
341:                        // CR, LF are line endings ... CLRF is one, not two!
342:                        //
343:                        if ((c == '\n' || c == '\r') && !isInternal()) {
344:                            if (!(c == '\n' && sawCR)) {
345:                                lineNumber++;
346:                                sawCR = false;
347:                            }
348:                            if (c == '\r')
349:                                sawCR = true;
350:                        }
351:                    } else {
352:                        start--;
353:                        return isSpace;
354:                    }
355:                }
356:            }
357:
358:            /**
359:             * normal content; whitespace in markup may be handled
360:             * specially if the parser uses the content model.
361:             * <p/>
362:             * <P> content terminates with markup delimiter characters,
363:             * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
364:             * <p/>
365:             * <P> the document handler's characters() method is called
366:             * on all the content found
367:             */
368:            public boolean parsedContent(DTDEventListener docHandler
369:            /*ElementValidator validator*/) throws IOException, SAXException {
370:
371:                // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
372:
373:                int first; // first char to return
374:                int last; // last char to return
375:                boolean sawContent; // sent any chars?
376:                char c;
377:
378:                // deliver right out of the buffer, until delimiter, EOF,
379:                // or error, refilling as we go
380:                for (first = last = start, sawContent = false;; last++) {
381:
382:                    // buffer empty?
383:                    if (last >= finish) {
384:                        if (last > first) {
385:                            //            validator.text ();
386:                            docHandler.characters(buf, first, last - first);
387:                            sawContent = true;
388:                            start = last;
389:                        }
390:                        if (isEOF()) // calls fillbuf
391:                            return sawContent;
392:                        first = start;
393:                        last = first - 1; // incremented in loop
394:                        continue;
395:                    }
396:
397:                    c = buf[last];
398:
399:                    //
400:                    // pass most chars through ASAP; this inlines the code of
401:                    // [2] !XmlChars.isChar(c) leaving only characters needing
402:                    // special treatment ... line ends, surrogates, and:
403:                    //    0x0026 == '&'
404:                    //    0x003C == '<'
405:                    //    0x005D == ']'
406:                    // Comparisons ordered for speed on 'typical' text
407:                    //
408:                    if ((c > 0x005D && c <= 0xD7FF) // a-z and more
409:                            || (c < 0x0026 && c >= 0x0020) // space & punct
410:                            || (c > 0x003C && c < 0x005D) // A-Z & punct
411:                            || (c > 0x0026 && c < 0x003C) // 0-9 & punct
412:                            || c == 0x0009 || (c >= 0xE000 && c <= 0xFFFD))
413:                        continue;
414:
415:                    // terminate on markup delimiters
416:                    if (c == '<' || c == '&')
417:                        break;
418:
419:                    // count lines
420:                    if (c == '\n') {
421:                        if (!isInternal())
422:                            lineNumber++;
423:                        continue;
424:                    }
425:
426:                    // External entities get CR, CRLF --> LF mapping
427:                    // Internal ones got it already, and we can't repeat
428:                    // else we break char ref handling!!
429:                    if (c == '\r') {
430:                        if (isInternal())
431:                            continue;
432:
433:                        docHandler.characters(buf, first, last - first);
434:                        docHandler.characters(newline, 0, 1);
435:                        sawContent = true;
436:                        lineNumber++;
437:                        if (finish > (last + 1)) {
438:                            if (buf[last + 1] == '\n')
439:                                last++;
440:                        } else { // CR at end of buffer
441:                        // XXX case not yet handled:  CRLF here will look like two lines
442:                        }
443:                        first = start = last + 1;
444:                        continue;
445:                    }
446:
447:                    // ']]>' is a WF error -- must fail if we see it
448:                    if (c == ']') {
449:                        switch (finish - last) {
450:                        // for suspicious end-of-buffer cases, get more data
451:                        // into the buffer to rule out this sequence.
452:                        case 2:
453:                            if (buf[last + 1] != ']')
454:                                continue;
455:                            // FALLTHROUGH
456:
457:                        case 1:
458:                            if (reader == null || isClosed)
459:                                continue;
460:                            if (last == first)
461:                                throw new InternalError("fillbuf");
462:                            last--;
463:                            if (last > first) {
464:                                //            validator.text ();
465:                                docHandler.characters(buf, first, last - first);
466:                                sawContent = true;
467:                                start = last;
468:                            }
469:                            fillbuf();
470:                            first = last = start;
471:                            continue;
472:
473:                            // otherwise any "]]>" would be buffered, and we can
474:                            // see right away if that's what we have
475:                        default:
476:                            if (buf[last + 1] == ']' && buf[last + 2] == '>')
477:                                fatal("P-072", null);
478:                            continue;
479:                        }
480:                    }
481:
482:                    // correctly paired surrogates are OK
483:                    if (c >= 0xd800 && c <= 0xdfff) {
484:                        if ((last + 1) >= finish) {
485:                            if (last > first) {
486:                                //            validator.text ();
487:                                docHandler.characters(buf, first, last - first);
488:                                sawContent = true;
489:                                start = last + 1;
490:                            }
491:                            if (isEOF()) { // calls fillbuf
492:                                fatal("P-081", new Object[] { Integer
493:                                        .toHexString(c) });
494:                            }
495:                            first = start;
496:                            last = first;
497:                            continue;
498:                        }
499:                        if (checkSurrogatePair(last))
500:                            last++;
501:                        else {
502:                            last--;
503:                            // also terminate on surrogate pair oddities
504:                            break;
505:                        }
506:                        continue;
507:                    }
508:
509:                    fatal("P-071", new Object[] { Integer.toHexString(c) });
510:                }
511:                if (last == first)
512:                    return sawContent;
513:                //    validator.text ();
514:                docHandler.characters(buf, first, last - first);
515:                start = last;
516:                return true;
517:            }
518:
519:            /**
520:             * CDATA -- character data, terminated by "]]>" and optionally
521:             * including unescaped markup delimiters (ampersand and left angle
522:             * bracket).  This should otherwise be exactly like character data,
523:             * modulo differences in error report details.
524:             * <p/>
525:             * <P> The document handler's characters() or ignorableWhitespace()
526:             * methods are invoked on all the character data found
527:             *
528:             * @param docHandler               gets callbacks for character data
529:             * @param ignorableWhitespace      if true, whitespace characters will
530:             *                                 be reported using docHandler.ignorableWhitespace(); implicitly,
531:             *                                 non-whitespace characters will cause validation errors
532:             * @param whitespaceInvalidMessage if true, ignorable whitespace
533:             *                                 causes a validity error report as well as a callback
534:             */
535:            public boolean unparsedContent(DTDEventListener docHandler,
536:            /*ElementValidator validator,*/
537:            boolean ignorableWhitespace, String whitespaceInvalidMessage)
538:                    throws IOException, SAXException {
539:
540:                // [18] CDSect ::= CDStart CData CDEnd
541:                // [19] CDStart ::= '<![CDATA['
542:                // [20] CData ::= (Char* - (Char* ']]>' Char*))
543:                // [21] CDEnd ::= ']]>'
544:
545:                // caller peeked the leading '<' ...
546:                if (!peek("![CDATA[", null))
547:                    return false;
548:                docHandler.startCDATA();
549:
550:                // only a literal ']]>' stops this ...
551:                int last;
552:
553:                for (;;) { // until ']]>' seen
554:                    boolean done = false;
555:                    char c;
556:
557:                    // don't report ignorable whitespace as "text" for
558:                    // validation purposes.
559:                    boolean white = ignorableWhitespace;
560:
561:                    for (last = start; last < finish; last++) {
562:                        c = buf[last];
563:
564:                        //
565:                        // Reject illegal characters.
566:                        //
567:                        if (!XmlChars.isChar(c)) {
568:                            white = false;
569:                            if (c >= 0xd800 && c <= 0xdfff) {
570:                                if (checkSurrogatePair(last)) {
571:                                    last++;
572:                                    continue;
573:                                } else {
574:                                    last--;
575:                                    break;
576:                                }
577:                            }
578:                            fatal("P-071", new Object[] { Integer
579:                                    .toHexString(buf[last]) });
580:                        }
581:                        if (c == '\n') {
582:                            if (!isInternal())
583:                                lineNumber++;
584:                            continue;
585:                        }
586:                        if (c == '\r') {
587:                            // As above, we can't repeat CR/CRLF --> LF mapping
588:                            if (isInternal())
589:                                continue;
590:
591:                            if (white) {
592:                                if (whitespaceInvalidMessage != null)
593:                                    errHandler.error(new SAXParseException(
594:                                            DTDParser.messages.getMessage(
595:                                                    locale,
596:                                                    whitespaceInvalidMessage),
597:                                            null));
598:                                docHandler.ignorableWhitespace(buf, start, last
599:                                        - start);
600:                                docHandler.ignorableWhitespace(newline, 0, 1);
601:                            } else {
602:                                //            validator.text ();
603:                                docHandler.characters(buf, start, last - start);
604:                                docHandler.characters(newline, 0, 1);
605:                            }
606:                            lineNumber++;
607:                            if (finish > (last + 1)) {
608:                                if (buf[last + 1] == '\n')
609:                                    last++;
610:                            } else { // CR at end of buffer
611:                            // XXX case not yet handled ... as above
612:                            }
613:                            start = last + 1;
614:                            continue;
615:                        }
616:                        if (c != ']') {
617:                            if (c != ' ' && c != '\t')
618:                                white = false;
619:                            continue;
620:                        }
621:                        if ((last + 2) < finish) {
622:                            if (buf[last + 1] == ']' && buf[last + 2] == '>') {
623:                                done = true;
624:                                break;
625:                            }
626:                            white = false;
627:                            continue;
628:                        } else {
629:                            //last--;
630:                            break;
631:                        }
632:                    }
633:                    if (white) {
634:                        if (whitespaceInvalidMessage != null)
635:                            errHandler.error(new SAXParseException(
636:                                    DTDParser.messages.getMessage(locale,
637:                                            whitespaceInvalidMessage), null));
638:                        docHandler
639:                                .ignorableWhitespace(buf, start, last - start);
640:                    } else {
641:                        //        validator.text ();
642:                        docHandler.characters(buf, start, last - start);
643:                    }
644:                    if (done) {
645:                        start = last + 3;
646:                        break;
647:                    }
648:                    start = last;
649:                    if (isEOF())
650:                        fatal("P-073", null);
651:                }
652:                docHandler.endCDATA();
653:                return true;
654:            }
655:
656:            // return false to backstep at end of buffer)
657:            private boolean checkSurrogatePair(int offset) throws SAXException {
658:
659:                if ((offset + 1) >= finish)
660:                    return false;
661:
662:                char c1 = buf[offset++];
663:                char c2 = buf[offset];
664:
665:                if ((c1 >= 0xd800 && c1 < 0xdc00)
666:                        && (c2 >= 0xdc00 && c2 <= 0xdfff))
667:                    return true;
668:                fatal("P-074", new Object[] {
669:                        Integer.toHexString(c1 & 0x0ffff),
670:                        Integer.toHexString(c2 & 0x0ffff) });
671:                return false;
672:            }
673:
674:            /**
675:             * whitespace in markup (flagged to app, discardable)
676:             * <p/>
677:             * <P> the document handler's ignorableWhitespace() method
678:             * is called on all the whitespace found
679:             */
680:            public boolean ignorableWhitespace(DTDEventListener handler)
681:                    throws IOException, SAXException {
682:
683:                char c;
684:                boolean isSpace = false;
685:                int first;
686:
687:                // [3] S ::= #20 | #09 | #0D | #0A
688:                for (first = start;;) {
689:                    if (finish <= start) {
690:                        if (isSpace)
691:                            handler.ignorableWhitespace(buf, first, start
692:                                    - first);
693:                        fillbuf();
694:                        first = start;
695:                    }
696:                    if (finish <= start)
697:                        return isSpace;
698:
699:                    c = buf[start++];
700:                    switch (c) {
701:                    case '\n':
702:                        if (!isInternal())
703:                            lineNumber++;
704:                        // XXX handles Macintosh line endings wrong
705:                        // fallthrough
706:                    case 0x09:
707:                    case 0x20:
708:                        isSpace = true;
709:                        continue;
710:
711:                    case '\r':
712:                        isSpace = true;
713:                        if (!isInternal())
714:                            lineNumber++;
715:                        handler.ignorableWhitespace(buf, first, (start - 1)
716:                                - first);
717:                        handler.ignorableWhitespace(newline, 0, 1);
718:                        if (start < finish && buf[start] == '\n')
719:                            ++start;
720:                        first = start;
721:                        continue;
722:
723:                    default:
724:                        ungetc();
725:                        if (isSpace)
726:                            handler.ignorableWhitespace(buf, first, start
727:                                    - first);
728:                        return isSpace;
729:                    }
730:                }
731:            }
732:
733:            /**
734:             * returns false iff 'next' string isn't as provided,
735:             * else skips that text and returns true.
736:             * <p/>
737:             * <P> NOTE:  two alternative string representations are
738:             * both passed in, since one is faster.
739:             */
740:            public boolean peek(String next, char chars[]) throws IOException,
741:                    SAXException {
742:
743:                int len;
744:                int i;
745:
746:                if (chars != null)
747:                    len = chars.length;
748:                else
749:                    len = next.length();
750:
751:                // buffer should hold the whole thing ... give it a
752:                // chance for the end-of-buffer case and cope with EOF
753:                // by letting fillbuf compact and fill
754:                if (finish <= start || (finish - start) < len)
755:                    fillbuf();
756:
757:                // can't peek past EOF
758:                if (finish <= start)
759:                    return false;
760:
761:                // compare the string; consume iff it matches
762:                if (chars != null) {
763:                    for (i = 0; i < len && (start + i) < finish; i++) {
764:                        if (buf[start + i] != chars[i])
765:                            return false;
766:                    }
767:                } else {
768:                    for (i = 0; i < len && (start + i) < finish; i++) {
769:                        if (buf[start + i] != next.charAt(i))
770:                            return false;
771:                    }
772:                }
773:
774:                // if the first fillbuf didn't get enough data, give
775:                // fillbuf another chance to read
776:                if (i < len) {
777:                    if (reader == null || isClosed)
778:                        return false;
779:
780:                    //
781:                    // This diagnostic "knows" that the only way big strings would
782:                    // fail to be peeked is where it's a symbol ... e.g. for an
783:                    // </EndTag> construct.  That knowledge could also be applied
784:                    // to get rid of the symbol length constraint, since having
785:                    // the wrong symbol is a fatal error anyway ...
786:                    //
787:                    if (len > buf.length)
788:                        fatal("P-077", new Object[] { new Integer(buf.length) });
789:
790:                    fillbuf();
791:                    return peek(next, chars);
792:                }
793:
794:                start += len;
795:                return true;
796:            }
797:
798:            //
799:            // Support for reporting the internal DTD subset, so <!DOCTYPE...>
800:            // declarations can be recreated.  This is collected as a single
801:            // string; such subsets are normally small, and many applications
802:            // don't even care about this.
803:            //
804:            public void startRemembering() {
805:
806:                if (startRemember != 0)
807:                    throw new InternalError();
808:                startRemember = start;
809:            }
810:
811:            public String rememberText() {
812:
813:                String retval;
814:
815:                // If the internal subset crossed a buffer boundary, we
816:                // created a temporary buffer.
817:                if (rememberedText != null) {
818:                    rememberedText.append(buf, startRemember, start
819:                            - startRemember);
820:                    retval = rememberedText.toString();
821:                } else
822:                    retval = new String(buf, startRemember, start
823:                            - startRemember);
824:
825:                startRemember = 0;
826:                rememberedText = null;
827:                return retval;
828:            }
829:
830:            private InputEntity getTopEntity() {
831:
832:                InputEntity current = this ;
833:
834:                // don't report locations within internal entities!
835:
836:                while (current != null && current.input == null)
837:                    current = current.next;
838:                return current == null ? this  : current;
839:            }
840:
841:            /**
842:             * Returns the public ID of this input source, if known
843:             */
844:            public String getPublicId() {
845:
846:                InputEntity where = getTopEntity();
847:                if (where == this )
848:                    return input.getPublicId();
849:                return where.getPublicId();
850:            }
851:
852:            /**
853:             * Returns the system ID of this input source, if known
854:             */
855:            public String getSystemId() {
856:
857:                InputEntity where = getTopEntity();
858:                if (where == this )
859:                    return input.getSystemId();
860:                return where.getSystemId();
861:            }
862:
863:            /**
864:             * Returns the current line number in this input source
865:             */
866:            public int getLineNumber() {
867:
868:                InputEntity where = getTopEntity();
869:                if (where == this )
870:                    return lineNumber;
871:                return where.getLineNumber();
872:            }
873:
874:            /**
875:             * returns -1; maintaining column numbers hurts performance
876:             */
877:            public int getColumnNumber() {
878:
879:                return -1; // not maintained (speed)
880:            }
881:
882:            //
883:            // n.b. for non-EOF end-of-buffer cases, reader should return
884:            // at least a handful of bytes so various lookaheads behave.
885:            //
886:            // two character pushback exists except at first; characters
887:            // represented by surrogate pairs can't be pushed back (they'd
888:            // only be in character data anyway).
889:            //
890:            // DTD exception thrown on char conversion problems; line number
891:            // will be low, as a rule.
892:            //
893:            private void fillbuf() throws IOException, SAXException {
894:
895:                // don't touched fixed buffers, that'll usually
896:                // change entity values (and isn't needed anyway)
897:                // likewise, ignore closed streams
898:                if (reader == null || isClosed)
899:                    return;
900:
901:                // if remembering DTD text, copy!
902:                if (startRemember != 0) {
903:                    if (rememberedText == null)
904:                        rememberedText = new StringBuffer(buf.length);
905:                    rememberedText.append(buf, startRemember, start
906:                            - startRemember);
907:                }
908:
909:                boolean extra = (finish > 0) && (start > 0);
910:                int len;
911:
912:                if (extra) // extra pushback
913:                    start--;
914:                len = finish - start;
915:
916:                System.arraycopy(buf, start, buf, 0, len);
917:                start = 0;
918:                finish = len;
919:
920:                try {
921:                    len = buf.length - len;
922:                    len = reader.read(buf, finish, len);
923:                } catch (UnsupportedEncodingException e) {
924:                    fatal("P-075", new Object[] { e.getMessage() });
925:                } catch (CharConversionException e) {
926:                    fatal("P-076", new Object[] { e.getMessage() });
927:                }
928:                if (len >= 0)
929:                    finish += len;
930:                else
931:                    close();
932:                if (extra) // extra pushback
933:                    start++;
934:
935:                if (startRemember != 0)
936:                    // assert extra == true
937:                    startRemember = 1;
938:            }
939:
940:            public void close() {
941:
942:                try {
943:                    if (reader != null && !isClosed)
944:                        reader.close();
945:                    isClosed = true;
946:                } catch (IOException e) {
947:                    /* NOTHING */
948:                }
949:            }
950:
951:            private void fatal(String messageId, Object params[])
952:                    throws SAXException {
953:
954:                SAXParseException x = new SAXParseException(DTDParser.messages
955:                        .getMessage(locale, messageId, params), null);
956:
957:                // not continuable ... e.g. WF errors
958:                close();
959:                errHandler.fatalError(x);
960:                throw x;
961:            }
962:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.