001: /*
002: * @(#)XmlChars.java 1.1 00/08/05
003: *
004: * Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
005: */
006:
007: package com.sun.xml.dtdparser;
008:
009: import org.xml.sax.InputSource;
010: import org.xml.sax.SAXException;
011: import org.xml.sax.SAXParseException;
012:
013: import java.io.CharConversionException;
014: import java.io.IOException;
015: import java.io.InputStream;
016: import java.io.InputStreamReader;
017: import java.io.Reader;
018: import java.io.UnsupportedEncodingException;
019: import java.net.URL;
020: import java.util.Locale;
021:
022: /**
023: * This is how the parser talks to its input entities, of all kinds.
024: * The entities are in a stack.
025: * <p/>
026: * <P> For internal entities, the character arrays are referenced here,
027: * and read from as needed (they're read-only). External entities have
028: * mutable buffers, that are read into as needed.
029: * <p/>
030: * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
031: * whether it's in an external (parsed) entity or not. The XML 1.0 spec
032: * is inconsistent in explaining EOL handling; this is the sensible way.
033: *
034: * @author David Brownell
035: * @author Janet Koenig
036: * @version 1.4 00/08/05
037: */
038: public class InputEntity {
039: private int start, finish;
040: private char buf[];
041: private int lineNumber = 1;
042: private boolean returnedFirstHalf = false;
043: private boolean maybeInCRLF = false;
044:
045: // name of entity (never main document or unnamed DTD PE)
046: private String name;
047:
048: private InputEntity next;
049:
050: // for system and public IDs in diagnostics
051: private InputSource input;
052:
053: // this is a buffer; some buffers can be replenished.
054: private Reader reader;
055: private boolean isClosed;
056:
057: private DTDEventListener errHandler;
058: private Locale locale;
059:
060: private StringBuffer rememberedText;
061: private int startRemember;
062:
063: // record if this is a PE, so endParsedEntity won't be called
064: private boolean isPE;
065:
066: // InputStreamReader throws an internal per-read exception, so
067: // we minimize reads. We also add a byte to compensate for the
068: // "ungetc" byte we keep, so that our downstream reads are as
069: // nicely sized as we can make them.
070: final private static int BUFSIZ = 8 * 1024 + 1;
071:
072: final private static char newline[] = { '\n' };
073:
074: public static InputEntity getInputEntity(DTDEventListener h,
075: Locale l) {
076: InputEntity retval = new InputEntity();
077: retval.errHandler = h;
078: retval.locale = l;
079: return retval;
080: }
081:
082: private InputEntity() {
083: }
084:
085: //
086: // predicate: return true iff this is an internal entity reader,
087: // and so may safely be "popped" as needed. external entities have
088: // syntax to uphold; internal parameter entities have at most validity
089: // constraints to monitor. also, only external entities get decent
090: // location diagnostics.
091: //
092: public boolean isInternal() {
093: return reader == null;
094: }
095:
096: //
097: // predicate: return true iff this is the toplevel document
098: //
099: public boolean isDocument() {
100: return next == null;
101: }
102:
103: //
104: // predicate: return true iff this is a PE expansion (so that
105: // LexicalEventListner.endParsedEntity won't be called)
106: //
107: public boolean isParameterEntity() {
108: return isPE;
109: }
110:
111: //
112: // return name of current entity
113: //
114: public String getName() {
115: return name;
116: }
117:
118: //
119: // use this for an external parsed entity
120: //
121: public void init(InputSource in, String name, InputEntity stack,
122: boolean isPE) throws IOException, SAXException {
123:
124: input = in;
125: this .isPE = isPE;
126: reader = in.getCharacterStream();
127:
128: if (reader == null) {
129: InputStream bytes = in.getByteStream();
130:
131: if (bytes == null)
132: reader = XmlReader.createReader(new URL(in
133: .getSystemId()).openStream());
134: else if (in.getEncoding() != null)
135: reader = XmlReader.createReader(in.getByteStream(), in
136: .getEncoding());
137: else
138: reader = XmlReader.createReader(in.getByteStream());
139: }
140: next = stack;
141: buf = new char[BUFSIZ];
142: this .name = name;
143: checkRecursion(stack);
144: }
145:
146: //
147: // use this for an internal parsed entity; buffer is readonly
148: //
149: public void init(char b[], String name, InputEntity stack,
150: boolean isPE) throws SAXException {
151:
152: next = stack;
153: buf = b;
154: finish = b.length;
155: this .name = name;
156: this .isPE = isPE;
157: checkRecursion(stack);
158: }
159:
160: private void checkRecursion(InputEntity stack) throws SAXException {
161:
162: if (stack == null)
163: return;
164: for (stack = stack.next; stack != null; stack = stack.next) {
165: if (stack.name != null && stack.name.equals(name))
166: fatal("P-069", new Object[] { name });
167: }
168: }
169:
170: public InputEntity pop() throws IOException {
171:
172: // caller has ensured there's nothing left to read
173: close();
174: return next;
175: }
176:
177: /**
178: * returns true iff there's no more data to consume ...
179: */
180: public boolean isEOF() throws IOException, SAXException {
181:
182: // called to ensure WF-ness of included entities and to pop
183: // input entities appropriately ... EOF is not always legal.
184: if (start >= finish) {
185: fillbuf();
186: return start >= finish;
187: } else
188: return false;
189: }
190:
191: /**
192: * Returns the name of the encoding in use, else null; the name
193: * returned is in as standard a form as we can get.
194: */
195: public String getEncoding() {
196:
197: if (reader == null)
198: return null;
199: if (reader instanceof XmlReader)
200: return ((XmlReader) reader).getEncoding();
201:
202: // XXX prefer a java2javatd() call to normalize names...
203:
204: if (reader instanceof InputStreamReader)
205: return ((InputStreamReader) reader).getEncoding();
206: return null;
207: }
208:
209: /**
210: * returns the next name char, or NUL ... faster than getc(),
211: * and the common "name or nmtoken must be next" case won't
212: * need ungetc().
213: */
214: public char getNameChar() throws IOException, SAXException {
215:
216: if (finish <= start)
217: fillbuf();
218: if (finish > start) {
219: char c = buf[start++];
220: if (XmlChars.isNameChar(c))
221: return c;
222: start--;
223: }
224: return 0;
225: }
226:
227: /**
228: * gets the next Java character -- might be part of an XML
229: * text character represented by a surrogate pair, or be
230: * the end of the entity.
231: */
232: public char getc() throws IOException, SAXException {
233:
234: if (finish <= start)
235: fillbuf();
236: if (finish > start) {
237: char c = buf[start++];
238:
239: // [2] Char ::= #x0009 | #x000A | #x000D
240: // | [#x0020-#xD7FF]
241: // | [#xE000-#xFFFD]
242: // plus surrogate _pairs_ representing [#x10000-#x10ffff]
243: if (returnedFirstHalf) {
244: if (c >= 0xdc00 && c <= 0xdfff) {
245: returnedFirstHalf = false;
246: return c;
247: } else
248: fatal("P-070", new Object[] { Integer
249: .toHexString(c) });
250: }
251: if ((c >= 0x0020 && c <= 0xD7FF) || c == 0x0009
252: // no surrogates!
253: || (c >= 0xE000 && c <= 0xFFFD))
254: return c;
255:
256: //
257: // CRLF and CR are both line ends; map both to LF, and
258: // keep line count correct.
259: //
260: else if (c == '\r' && !isInternal()) {
261: maybeInCRLF = true;
262: c = getc();
263: if (c != '\n')
264: ungetc();
265: maybeInCRLF = false;
266:
267: lineNumber++;
268: return '\n';
269:
270: } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
271: if (!isInternal() && !maybeInCRLF)
272: lineNumber++;
273: return c;
274: }
275:
276: // surrogates...
277: if (c >= 0xd800 && c < 0xdc00) {
278: returnedFirstHalf = true;
279: return c;
280: }
281:
282: fatal("P-071", new Object[] { Integer.toHexString(c) });
283: }
284: throw new EndOfInputException();
285: }
286:
287: /**
288: * lookahead one character
289: */
290: public boolean peekc(char c) throws IOException, SAXException {
291:
292: if (finish <= start)
293: fillbuf();
294: if (finish > start) {
295: if (buf[start] == c) {
296: start++;
297: return true;
298: } else
299: return false;
300: }
301: return false;
302: }
303:
304: /**
305: * two character pushback is guaranteed
306: */
307: public void ungetc() {
308:
309: if (start == 0)
310: throw new InternalError("ungetc");
311: start--;
312:
313: if (buf[start] == '\n' || buf[start] == '\r') {
314: if (!isInternal())
315: lineNumber--;
316: } else if (returnedFirstHalf)
317: returnedFirstHalf = false;
318: }
319:
320: /**
321: * optional grammatical whitespace (discarded)
322: */
323: public boolean maybeWhitespace() throws IOException, SAXException {
324:
325: char c;
326: boolean isSpace = false;
327: boolean sawCR = false;
328:
329: // [3] S ::= #20 | #09 | #0D | #0A
330: for (;;) {
331: if (finish <= start)
332: fillbuf();
333: if (finish <= start)
334: return isSpace;
335:
336: c = buf[start++];
337: if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
338: isSpace = true;
339:
340: //
341: // CR, LF are line endings ... CLRF is one, not two!
342: //
343: if ((c == '\n' || c == '\r') && !isInternal()) {
344: if (!(c == '\n' && sawCR)) {
345: lineNumber++;
346: sawCR = false;
347: }
348: if (c == '\r')
349: sawCR = true;
350: }
351: } else {
352: start--;
353: return isSpace;
354: }
355: }
356: }
357:
358: /**
359: * normal content; whitespace in markup may be handled
360: * specially if the parser uses the content model.
361: * <p/>
362: * <P> content terminates with markup delimiter characters,
363: * namely ampersand (&amp;) and left angle bracket (&lt;).
364: * <p/>
365: * <P> the document handler's characters() method is called
366: * on all the content found
367: */
368: public boolean parsedContent(DTDEventListener docHandler
369: /*ElementValidator validator*/) throws IOException, SAXException {
370:
371: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
372:
373: int first; // first char to return
374: int last; // last char to return
375: boolean sawContent; // sent any chars?
376: char c;
377:
378: // deliver right out of the buffer, until delimiter, EOF,
379: // or error, refilling as we go
380: for (first = last = start, sawContent = false;; last++) {
381:
382: // buffer empty?
383: if (last >= finish) {
384: if (last > first) {
385: // validator.text ();
386: docHandler.characters(buf, first, last - first);
387: sawContent = true;
388: start = last;
389: }
390: if (isEOF()) // calls fillbuf
391: return sawContent;
392: first = start;
393: last = first - 1; // incremented in loop
394: continue;
395: }
396:
397: c = buf[last];
398:
399: //
400: // pass most chars through ASAP; this inlines the code of
401: // [2] !XmlChars.isChar(c) leaving only characters needing
402: // special treatment ... line ends, surrogates, and:
403: // 0x0026 == '&'
404: // 0x003C == '<'
405: // 0x005D == ']'
406: // Comparisons ordered for speed on 'typical' text
407: //
408: if ((c > 0x005D && c <= 0xD7FF) // a-z and more
409: || (c < 0x0026 && c >= 0x0020) // space & punct
410: || (c > 0x003C && c < 0x005D) // A-Z & punct
411: || (c > 0x0026 && c < 0x003C) // 0-9 & punct
412: || c == 0x0009 || (c >= 0xE000 && c <= 0xFFFD))
413: continue;
414:
415: // terminate on markup delimiters
416: if (c == '<' || c == '&')
417: break;
418:
419: // count lines
420: if (c == '\n') {
421: if (!isInternal())
422: lineNumber++;
423: continue;
424: }
425:
426: // External entities get CR, CRLF --> LF mapping
427: // Internal ones got it already, and we can't repeat
428: // else we break char ref handling!!
429: if (c == '\r') {
430: if (isInternal())
431: continue;
432:
433: docHandler.characters(buf, first, last - first);
434: docHandler.characters(newline, 0, 1);
435: sawContent = true;
436: lineNumber++;
437: if (finish > (last + 1)) {
438: if (buf[last + 1] == '\n')
439: last++;
440: } else { // CR at end of buffer
441: // XXX case not yet handled: CRLF here will look like two lines
442: }
443: first = start = last + 1;
444: continue;
445: }
446:
447: // ']]>' is a WF error -- must fail if we see it
448: if (c == ']') {
449: switch (finish - last) {
450: // for suspicious end-of-buffer cases, get more data
451: // into the buffer to rule out this sequence.
452: case 2:
453: if (buf[last + 1] != ']')
454: continue;
455: // FALLTHROUGH
456:
457: case 1:
458: if (reader == null || isClosed)
459: continue;
460: if (last == first)
461: throw new InternalError("fillbuf");
462: last--;
463: if (last > first) {
464: // validator.text ();
465: docHandler.characters(buf, first, last - first);
466: sawContent = true;
467: start = last;
468: }
469: fillbuf();
470: first = last = start;
471: continue;
472:
473: // otherwise any "]]>" would be buffered, and we can
474: // see right away if that's what we have
475: default:
476: if (buf[last + 1] == ']' && buf[last + 2] == '>')
477: fatal("P-072", null);
478: continue;
479: }
480: }
481:
482: // correctly paired surrogates are OK
483: if (c >= 0xd800 && c <= 0xdfff) {
484: if ((last + 1) >= finish) {
485: if (last > first) {
486: // validator.text ();
487: docHandler.characters(buf, first, last - first);
488: sawContent = true;
489: start = last + 1;
490: }
491: if (isEOF()) { // calls fillbuf
492: fatal("P-081", new Object[] { Integer
493: .toHexString(c) });
494: }
495: first = start;
496: last = first;
497: continue;
498: }
499: if (checkSurrogatePair(last))
500: last++;
501: else {
502: last--;
503: // also terminate on surrogate pair oddities
504: break;
505: }
506: continue;
507: }
508:
509: fatal("P-071", new Object[] { Integer.toHexString(c) });
510: }
511: if (last == first)
512: return sawContent;
513: // validator.text ();
514: docHandler.characters(buf, first, last - first);
515: start = last;
516: return true;
517: }
518:
519: /**
520: * CDATA -- character data, terminated by "]]>" and optionally
521: * including unescaped markup delimiters (ampersand and left angle
522: * bracket). This should otherwise be exactly like character data,
523: * modulo differences in error report details.
524: * <p/>
525: * <P> The document handler's characters() or ignorableWhitespace()
526: * methods are invoked on all the character data found
527: *
528: * @param docHandler gets callbacks for character data
529: * @param ignorableWhitespace if true, whitespace characters will
530: * be reported using docHandler.ignorableWhitespace(); implicitly,
531: * non-whitespace characters will cause validation errors
532: * @param whitespaceInvalidMessage if true, ignorable whitespace
533: * causes a validity error report as well as a callback
534: */
535: public boolean unparsedContent(DTDEventListener docHandler,
536: /*ElementValidator validator,*/
537: boolean ignorableWhitespace, String whitespaceInvalidMessage)
538: throws IOException, SAXException {
539:
540: // [18] CDSect ::= CDStart CData CDEnd
541: // [19] CDStart ::= '<![CDATA['
542: // [20] CData ::= (Char* - (Char* ']]>' Char*))
543: // [21] CDEnd ::= ']]>'
544:
545: // caller peeked the leading '<' ...
546: if (!peek("![CDATA[", null))
547: return false;
548: docHandler.startCDATA();
549:
550: // only a literal ']]>' stops this ...
551: int last;
552:
553: for (;;) { // until ']]>' seen
554: boolean done = false;
555: char c;
556:
557: // don't report ignorable whitespace as "text" for
558: // validation purposes.
559: boolean white = ignorableWhitespace;
560:
561: for (last = start; last < finish; last++) {
562: c = buf[last];
563:
564: //
565: // Reject illegal characters.
566: //
567: if (!XmlChars.isChar(c)) {
568: white = false;
569: if (c >= 0xd800 && c <= 0xdfff) {
570: if (checkSurrogatePair(last)) {
571: last++;
572: continue;
573: } else {
574: last--;
575: break;
576: }
577: }
578: fatal("P-071", new Object[] { Integer
579: .toHexString(buf[last]) });
580: }
581: if (c == '\n') {
582: if (!isInternal())
583: lineNumber++;
584: continue;
585: }
586: if (c == '\r') {
587: // As above, we can't repeat CR/CRLF --> LF mapping
588: if (isInternal())
589: continue;
590:
591: if (white) {
592: if (whitespaceInvalidMessage != null)
593: errHandler.error(new SAXParseException(
594: DTDParser.messages.getMessage(
595: locale,
596: whitespaceInvalidMessage),
597: null));
598: docHandler.ignorableWhitespace(buf, start, last
599: - start);
600: docHandler.ignorableWhitespace(newline, 0, 1);
601: } else {
602: // validator.text ();
603: docHandler.characters(buf, start, last - start);
604: docHandler.characters(newline, 0, 1);
605: }
606: lineNumber++;
607: if (finish > (last + 1)) {
608: if (buf[last + 1] == '\n')
609: last++;
610: } else { // CR at end of buffer
611: // XXX case not yet handled ... as above
612: }
613: start = last + 1;
614: continue;
615: }
616: if (c != ']') {
617: if (c != ' ' && c != '\t')
618: white = false;
619: continue;
620: }
621: if ((last + 2) < finish) {
622: if (buf[last + 1] == ']' && buf[last + 2] == '>') {
623: done = true;
624: break;
625: }
626: white = false;
627: continue;
628: } else {
629: //last--;
630: break;
631: }
632: }
633: if (white) {
634: if (whitespaceInvalidMessage != null)
635: errHandler.error(new SAXParseException(
636: DTDParser.messages.getMessage(locale,
637: whitespaceInvalidMessage), null));
638: docHandler
639: .ignorableWhitespace(buf, start, last - start);
640: } else {
641: // validator.text ();
642: docHandler.characters(buf, start, last - start);
643: }
644: if (done) {
645: start = last + 3;
646: break;
647: }
648: start = last;
649: if (isEOF())
650: fatal("P-073", null);
651: }
652: docHandler.endCDATA();
653: return true;
654: }
655:
656: // return false to backstep at end of buffer)
657: private boolean checkSurrogatePair(int offset) throws SAXException {
658:
659: if ((offset + 1) >= finish)
660: return false;
661:
662: char c1 = buf[offset++];
663: char c2 = buf[offset];
664:
665: if ((c1 >= 0xd800 && c1 < 0xdc00)
666: && (c2 >= 0xdc00 && c2 <= 0xdfff))
667: return true;
668: fatal("P-074", new Object[] {
669: Integer.toHexString(c1 & 0x0ffff),
670: Integer.toHexString(c2 & 0x0ffff) });
671: return false;
672: }
673:
674: /**
675: * whitespace in markup (flagged to app, discardable)
676: * <p/>
677: * <P> the document handler's ignorableWhitespace() method
678: * is called on all the whitespace found
679: */
680: public boolean ignorableWhitespace(DTDEventListener handler)
681: throws IOException, SAXException {
682:
683: char c;
684: boolean isSpace = false;
685: int first;
686:
687: // [3] S ::= #20 | #09 | #0D | #0A
688: for (first = start;;) {
689: if (finish <= start) {
690: if (isSpace)
691: handler.ignorableWhitespace(buf, first, start
692: - first);
693: fillbuf();
694: first = start;
695: }
696: if (finish <= start)
697: return isSpace;
698:
699: c = buf[start++];
700: switch (c) {
701: case '\n':
702: if (!isInternal())
703: lineNumber++;
704: // XXX handles Macintosh line endings wrong
705: // fallthrough
706: case 0x09:
707: case 0x20:
708: isSpace = true;
709: continue;
710:
711: case '\r':
712: isSpace = true;
713: if (!isInternal())
714: lineNumber++;
715: handler.ignorableWhitespace(buf, first, (start - 1)
716: - first);
717: handler.ignorableWhitespace(newline, 0, 1);
718: if (start < finish && buf[start] == '\n')
719: ++start;
720: first = start;
721: continue;
722:
723: default:
724: ungetc();
725: if (isSpace)
726: handler.ignorableWhitespace(buf, first, start
727: - first);
728: return isSpace;
729: }
730: }
731: }
732:
733: /**
734: * returns false iff 'next' string isn't as provided,
735: * else skips that text and returns true.
736: * <p/>
737: * <P> NOTE: two alternative string representations are
738: * both passed in, since one is faster.
739: */
740: public boolean peek(String next, char chars[]) throws IOException,
741: SAXException {
742:
743: int len;
744: int i;
745:
746: if (chars != null)
747: len = chars.length;
748: else
749: len = next.length();
750:
751: // buffer should hold the whole thing ... give it a
752: // chance for the end-of-buffer case and cope with EOF
753: // by letting fillbuf compact and fill
754: if (finish <= start || (finish - start) < len)
755: fillbuf();
756:
757: // can't peek past EOF
758: if (finish <= start)
759: return false;
760:
761: // compare the string; consume iff it matches
762: if (chars != null) {
763: for (i = 0; i < len && (start + i) < finish; i++) {
764: if (buf[start + i] != chars[i])
765: return false;
766: }
767: } else {
768: for (i = 0; i < len && (start + i) < finish; i++) {
769: if (buf[start + i] != next.charAt(i))
770: return false;
771: }
772: }
773:
774: // if the first fillbuf didn't get enough data, give
775: // fillbuf another chance to read
776: if (i < len) {
777: if (reader == null || isClosed)
778: return false;
779:
780: //
781: // This diagnostic "knows" that the only way big strings would
782: // fail to be peeked is where it's a symbol ... e.g. for an
783: // </EndTag> construct. That knowledge could also be applied
784: // to get rid of the symbol length constraint, since having
785: // the wrong symbol is a fatal error anyway ...
786: //
787: if (len > buf.length)
788: fatal("P-077", new Object[] { new Integer(buf.length) });
789:
790: fillbuf();
791: return peek(next, chars);
792: }
793:
794: start += len;
795: return true;
796: }
797:
798: //
799: // Support for reporting the internal DTD subset, so <!DOCTYPE...>
800: // declarations can be recreated. This is collected as a single
801: // string; such subsets are normally small, and many applications
802: // don't even care about this.
803: //
804: public void startRemembering() {
805:
806: if (startRemember != 0)
807: throw new InternalError();
808: startRemember = start;
809: }
810:
811: public String rememberText() {
812:
813: String retval;
814:
815: // If the internal subset crossed a buffer boundary, we
816: // created a temporary buffer.
817: if (rememberedText != null) {
818: rememberedText.append(buf, startRemember, start
819: - startRemember);
820: retval = rememberedText.toString();
821: } else
822: retval = new String(buf, startRemember, start
823: - startRemember);
824:
825: startRemember = 0;
826: rememberedText = null;
827: return retval;
828: }
829:
830: private InputEntity getTopEntity() {
831:
832: InputEntity current = this ;
833:
834: // don't report locations within internal entities!
835:
836: while (current != null && current.input == null)
837: current = current.next;
838: return current == null ? this : current;
839: }
840:
841: /**
842: * Returns the public ID of this input source, if known
843: */
844: public String getPublicId() {
845:
846: InputEntity where = getTopEntity();
847: if (where == this )
848: return input.getPublicId();
849: return where.getPublicId();
850: }
851:
852: /**
853: * Returns the system ID of this input source, if known
854: */
855: public String getSystemId() {
856:
857: InputEntity where = getTopEntity();
858: if (where == this )
859: return input.getSystemId();
860: return where.getSystemId();
861: }
862:
863: /**
864: * Returns the current line number in this input source
865: */
866: public int getLineNumber() {
867:
868: InputEntity where = getTopEntity();
869: if (where == this )
870: return lineNumber;
871: return where.getLineNumber();
872: }
873:
874: /**
875: * returns -1; maintaining column numbers hurts performance
876: */
877: public int getColumnNumber() {
878:
879: return -1; // not maintained (speed)
880: }
881:
882: //
883: // n.b. for non-EOF end-of-buffer cases, reader should return
884: // at least a handful of bytes so various lookaheads behave.
885: //
886: // two character pushback exists except at first; characters
887: // represented by surrogate pairs can't be pushed back (they'd
888: // only be in character data anyway).
889: //
890: // DTD exception thrown on char conversion problems; line number
891: // will be low, as a rule.
892: //
893: private void fillbuf() throws IOException, SAXException {
894:
895: // don't touched fixed buffers, that'll usually
896: // change entity values (and isn't needed anyway)
897: // likewise, ignore closed streams
898: if (reader == null || isClosed)
899: return;
900:
901: // if remembering DTD text, copy!
902: if (startRemember != 0) {
903: if (rememberedText == null)
904: rememberedText = new StringBuffer(buf.length);
905: rememberedText.append(buf, startRemember, start
906: - startRemember);
907: }
908:
909: boolean extra = (finish > 0) && (start > 0);
910: int len;
911:
912: if (extra) // extra pushback
913: start--;
914: len = finish - start;
915:
916: System.arraycopy(buf, start, buf, 0, len);
917: start = 0;
918: finish = len;
919:
920: try {
921: len = buf.length - len;
922: len = reader.read(buf, finish, len);
923: } catch (UnsupportedEncodingException e) {
924: fatal("P-075", new Object[] { e.getMessage() });
925: } catch (CharConversionException e) {
926: fatal("P-076", new Object[] { e.getMessage() });
927: }
928: if (len >= 0)
929: finish += len;
930: else
931: close();
932: if (extra) // extra pushback
933: start++;
934:
935: if (startRemember != 0)
936: // assert extra == true
937: startRemember = 1;
938: }
939:
940: public void close() {
941:
942: try {
943: if (reader != null && !isClosed)
944: reader.close();
945: isClosed = true;
946: } catch (IOException e) {
947: /* NOTHING */
948: }
949: }
950:
951: private void fatal(String messageId, Object params[])
952: throws SAXException {
953:
954: SAXParseException x = new SAXParseException(DTDParser.messages
955: .getMessage(locale, messageId, params), null);
956:
957: // not continuable ... e.g. WF errors
958: close();
959: errHandler.fatalError(x);
960: throw x;
961: }
962: }
|