001: /*
002: * $Id: XMLReaderReader.java,v 1.5 2004/07/11 09:37:37 yuvalo Exp $
003: *
004: * (C) Copyright 2002-2004 by Yuval Oren. All rights reserved.
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: package com.bluecast.xml;
020:
021: import java.io.*;
022: import com.bluecast.io.*;
023: import java.util.*;
024:
025: /**
026: * A Reader for XML documents and streams. This class prepares a Reader
027: * source for XML processing by converting CR/LF patterns to LF and by
028: * checking for illegal XML characters.
029: *
030: * @author Yuval Oren, yuval@bluecast.com
031: * @version $Revision: 1.5 $
032: */
033: final public class XMLReaderReader extends XMLInputReader {
034: private static final int BUFFER_SIZE = 8192;
035: private Reader in;
036: private boolean rewindDeclaration;
037: private char[] cbuf = new char[BUFFER_SIZE];
038: private int cbufPos = 0, cbufEnd = 0;
039: private boolean eofReached = false;
040: private boolean sawCR = false;
041:
042: private char[] oneCharBuf = new char[1];
043:
044: /**
045: * Create an XMLReaderReader without providing an input Reader yet.
046: * You must call reset() before using.
047: */
048: public XMLReaderReader() {
049: }
050:
051: /**
052: * Creates an XMLReaderReader and resets the reader position
053: * after reading the XML declaration.
054: *
055: * @param in the input source
056: */
057:
058: public XMLReaderReader(Reader in) throws IOException {
059: this (in, true);
060: }
061:
062: /**
063: * Creates an XMLReaderReader.
064: *
065: * @param in the input source
066: * @param rewindDeclaration a value of false will skip past any
067: * XML declaration. True will dish out the entire document.
068: */
069: public XMLReaderReader(Reader in, boolean rewindDeclaration)
070: throws IOException {
071: reset(in, rewindDeclaration);
072: }
073:
074: public void reset(Reader in, boolean rewindDeclaration)
075: throws IOException {
076:
077: super .resetInput();
078: this .in = in;
079: this .rewindDeclaration = rewindDeclaration;
080: cbufPos = cbufEnd = 0;
081: sawCR = false;
082: eofReached = false;
083: fillCharBuffer();
084: processXMLDecl();
085: }
086:
087: public void close() throws IOException {
088: eofReached = true;
089: cbufPos = cbufEnd = 0;
090: if (in != null)
091: in.close();
092: }
093:
094: public void mark(int readAheadLimit) throws IOException {
095: throw new UnsupportedOperationException("mark() not supported");
096: }
097:
098: public boolean markSupported() {
099: return false;
100: }
101:
102: public int read() throws IOException {
103: int n = read(oneCharBuf, 0, 1);
104: if (n <= 0)
105: return n;
106: else
107: return oneCharBuf[0];
108: }
109:
110: public int read(char[] destbuf) throws IOException {
111: return read(destbuf, 0, destbuf.length);
112: }
113:
114: public int read(char[] destbuf, int off, int len)
115: throws IOException {
116: int charsRead = 0;
117: char c;
118:
119: while (charsRead < len) {
120: if (cbufPos < cbufEnd)
121: c = cbuf[cbufPos++];
122: else if (eofReached)
123: break;
124: else
125: // Only fill the buffer once per call to avoid unintentional blocking
126: // However, always read at least one character
127: if (charsRead == 0 || in.ready()) {
128: fillCharBuffer();
129: continue;
130: } else {
131: // Reading any more would block.
132: break;
133: }
134:
135: if (c >= 0x20) {
136: if ((c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD)
137: || (c >= 0x10000 && c <= 0x10FFFF)) {
138:
139: sawCR = false;
140: destbuf[off + (charsRead++)] = c;
141: } else
142: throw new IllegalCharException(
143: "Illegal XML Character: 0x"
144: + Integer.toHexString(c));
145: } else {
146: switch (c) {
147: case '\n':
148: if (sawCR) {
149: sawCR = false;
150: } else
151: destbuf[off + (charsRead++)] = '\n';
152: break;
153:
154: case '\r':
155: sawCR = true;
156: destbuf[off + (charsRead++)] = '\n';
157: break;
158:
159: case '\t':
160: destbuf[off + (charsRead++)] = '\t';
161: break;
162:
163: default:
164: throw new IllegalCharException(
165: "Illegal XML character: 0x"
166: + Integer.toHexString(c));
167: }
168: }
169: }
170: return ((charsRead == 0 && eofReached) ? -1 : charsRead);
171: }
172:
173: public boolean ready() throws IOException {
174: return ((cbufEnd - cbufPos > 0) || in.ready());
175: }
176:
177: public void reset() throws IOException {
178: super .resetInput();
179: in.reset();
180: cbufPos = cbufEnd = 0;
181: sawCR = false;
182: eofReached = false;
183: }
184:
185: public long skip(long n) throws IOException {
186: int charsRead = 0;
187: char c;
188:
189: while (charsRead < n) {
190: if (cbufPos < cbufEnd)
191: c = cbuf[cbufPos++];
192: else {
193: if (eofReached)
194: break;
195: fillCharBuffer();
196: continue;
197: }
198:
199: if (c >= 0x20) {
200: if ((c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD)
201: || (c >= 0x10000 && c <= 0x10FFFF)) {
202:
203: sawCR = false;
204: charsRead++;
205: } else
206: throw new IllegalCharException(
207: "Illegal XML Character: 0x"
208: + Integer.toHexString(c));
209: } else {
210: switch (c) {
211: case '\n':
212: if (sawCR) {
213: sawCR = false;
214: } else
215: charsRead++;
216: break;
217:
218: case '\r':
219: sawCR = true;
220: charsRead++;
221: break;
222:
223: case '\t':
224: charsRead++;
225: break;
226:
227: default:
228: throw new IllegalCharException(
229: "Illegal XML character: 0x"
230: + Integer.toHexString(c));
231: }
232: }
233: }
234: return ((charsRead == 0 && eofReached) ? -1 : charsRead);
235: }
236:
237: private void fillCharBuffer() throws IOException {
238: cbufPos = 0;
239: cbufEnd = in.read(cbuf, 0, BUFFER_SIZE);
240: if (cbufEnd <= 0)
241: eofReached = true;
242: }
243:
244: /* Read [max] characters, parse the <?xml...?> tag
245: * push it back onto the stream. Create a reader. Then, if there was
246: * no error parsing the declaration, eat up the declaration.
247: */
248: private void processXMLDecl() throws IOException {
249: int numCharsParsed = parseXMLDeclaration(cbuf, 0, cbufEnd);
250:
251: if (numCharsParsed > 0) {
252: // Declaration found and parsed
253:
254: // Skip the XML declaration unless told otherwise
255: if (!rewindDeclaration)
256: cbufPos += numCharsParsed;
257: }
258: }
259: }
|