001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.readers;
059:
060: import org.apache.xerces.framework.XMLErrorReporter;
061: import org.apache.xerces.utils.CharDataChunk;
062: import org.apache.xerces.utils.StringPool;
063: import java.io.Reader;
064:
065: /**
066: * General purpose character stream reader.
067: *
068: * This class is used when the input source for the document entity is
069: * specified using a character stream, when the input source is specified
070: * using a byte stream with an explicit encoding, or when a recognizer
071: * scans the encoding decl from the byte stream and chooses to use this
072: * reader class for that encoding. For the latter two cases, the byte
073: * stream is wrapped in the appropriate InputStreamReader using the
074: * desired encoding.
075: *
076: * @version
077: */
078: final class CharReader extends AbstractCharReader {
079: //
080: //
081: //
082: CharReader(XMLEntityHandler entityHandler,
083: XMLErrorReporter errorReporter,
084: boolean sendCharDataAsCharArray, Reader reader,
085: StringPool stringPool) throws Exception {
086: super (entityHandler, errorReporter, sendCharDataAsCharArray,
087: stringPool);
088: fCharacterStream = reader;
089: fillCurrentChunk();
090: }
091:
092: //
093: //
094: //
095: private Reader fCharacterStream = null;
096: //
097: // When we fill a chunk there may be data that was read from the
098: // input stream that has not been "processed". We need to save
099: // that data, and any in-progress state, between the calls to
100: // fillCurrentChunk() in these instance variables.
101: //
102: private boolean fCheckOverflow = false;
103: private char[] fOverflow = null;
104: private int fOverflowOffset = 0;
105: private int fOverflowEnd = 0;
106: private int fOutputOffset = 0;
107: private boolean fSkipLinefeed = false;
108:
109: //
110: //
111: //
112: protected int fillCurrentChunk() throws Exception {
113: //
114: // See if we can find a way to reuse the buffer that may have been returned
115: // with a recyled data chunk.
116: //
117: char[] recycledData = fCurrentChunk.toCharArray();
118: //
119: // If we have overflow from the last call, normalize from where
120: // we left off, copying into the front of the output buffer.
121: //
122: fOutputOffset = 0;
123: if (fCheckOverflow) {
124: //
125: // The fOverflowEnd should always be equal to CHUNK_SIZE, unless we hit
126: // EOF during the previous call. Copy the remaining data to the front
127: // of the buffer and return it as the final chunk.
128: //
129: fMostRecentData = recycledData;
130: if (fOverflowEnd < CharDataChunk.CHUNK_SIZE) {
131: recycledData = null;
132: if (fOverflowEnd > 0) {
133: if (fMostRecentData == null
134: || fMostRecentData.length < 1
135: + fOverflowEnd - fOverflowOffset)
136: fMostRecentData = new char[1 + fOverflowEnd
137: - fOverflowOffset];
138: copyNormalize(fOverflow, fOverflowOffset,
139: fMostRecentData, fOutputOffset);
140: } else {
141: if (fMostRecentData == null)
142: fMostRecentData = new char[1];
143: }
144: fMostRecentData[fOutputOffset] = 0;
145: //
146: // Update our instance variables
147: //
148: fOverflow = null;
149: fLength += fOutputOffset;
150: fCurrentIndex = 0;
151: fCurrentChunk.setCharArray(fMostRecentData);
152: return (fMostRecentChar = fMostRecentData[0]);
153: }
154: if (fMostRecentData == null
155: || fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
156: fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
157: else
158: recycledData = null;
159: copyNormalize(fOverflow, fOverflowOffset, fMostRecentData,
160: fOutputOffset);
161: fCheckOverflow = false;
162: } else {
163: if (fOverflow == null) {
164: fOverflow = recycledData;
165: if (fOverflow == null
166: || fOverflow.length < CharDataChunk.CHUNK_SIZE)
167: fOverflow = new char[CharDataChunk.CHUNK_SIZE];
168: else
169: recycledData = null;
170: }
171: fMostRecentData = null;
172: }
173: while (true) {
174: fOverflowOffset = 0;
175: fOverflowEnd = 0;
176: int capacity = CharDataChunk.CHUNK_SIZE;
177: int result = 0;
178: do {
179: try {
180: result = fCharacterStream.read(fOverflow,
181: fOverflowEnd, capacity);
182: } catch (java.io.IOException ex) {
183: result = -1;
184: }
185: if (result == -1) {
186: //
187: // We have reached the end of the stream.
188: //
189: fCharacterStream.close();
190: fCharacterStream = null;
191: if (fMostRecentData == null) {
192: //
193: // There is no previous output data, so we know that all of the
194: // new input data will fit.
195: //
196: fMostRecentData = recycledData;
197: if (fMostRecentData == null
198: || fMostRecentData.length < 1 + fOverflowEnd)
199: fMostRecentData = new char[1 + fOverflowEnd];
200: else
201: recycledData = null;
202: copyNormalize(fOverflow, fOverflowOffset,
203: fMostRecentData, fOutputOffset);
204: fOverflow = null;
205: fMostRecentData[fOutputOffset] = 0;
206: } else {
207: //
208: // Copy the input data to the end of the output buffer.
209: //
210: boolean alldone = copyNormalize(fOverflow,
211: fOverflowOffset, fMostRecentData,
212: fOutputOffset);
213: if (alldone) {
214: if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
215: //
216: // Special case - everything fit into the overflow buffer,
217: // except that there is no room for the nul char we use to
218: // indicate EOF. Set the overflow buffer length to zero.
219: // On the next call to this method, we will detect this
220: // case and which we will handle above .
221: //
222: fCheckOverflow = true;
223: fOverflowOffset = 0;
224: fOverflowEnd = 0;
225: } else {
226: //
227: // It all fit into the output buffer.
228: //
229: fOverflow = null;
230: fMostRecentData[fOutputOffset] = 0;
231: }
232: } else {
233: //
234: // There is still input data left over, save the remaining data as
235: // the overflow buffer for the next call.
236: //
237: fCheckOverflow = true;
238: }
239: }
240: break;
241: }
242: if (result > 0) {
243: fOverflowEnd += result;
244: capacity -= result;
245: }
246: } while (capacity > 0);
247: //
248: //
249: //
250: if (result == -1)
251: break;
252: if (fMostRecentData != null) {
253: boolean alldone = copyNormalize(fOverflow,
254: fOverflowOffset, fMostRecentData, fOutputOffset);
255: if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
256: //
257: // We filled the output buffer.
258: //
259: if (!alldone) {
260: //
261: // The input buffer will become the next overflow buffer.
262: //
263: fCheckOverflow = true;
264: }
265: break;
266: }
267: } else {
268: //
269: // Now normalize the end-of-line characters and see if we need to read more
270: // chars to fill up the buffer.
271: //
272: fMostRecentData = recycledData;
273: if (fMostRecentData == null
274: || fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
275: fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
276: else
277: recycledData = null;
278: copyNormalize(fOverflow, fOverflowOffset,
279: fMostRecentData, fOutputOffset);
280: if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
281: //
282: // The output buffer is full. We can return now.
283: //
284: break;
285: }
286: }
287: //
288: // We will need to get another intput buffer to be able to fill the
289: // overflow buffer completely.
290: //
291: }
292: //
293: // Update our instance variables
294: //
295: fLength += fOutputOffset;
296: fCurrentIndex = 0;
297: fCurrentChunk.setCharArray(fMostRecentData);
298: return (fMostRecentChar = fMostRecentData[0]);
299: }
300:
301: //
302: // Copy and normalize chars from the overflow buffer into chars in our data buffer.
303: //
304: private boolean copyNormalize(char[] in, int inOffset, char[] out,
305: int outOffset) throws Exception {
306: //
307: // Handle all edge cases before dropping into the inner loop.
308: //
309: int inEnd = fOverflowEnd;
310: int outEnd = out.length;
311: if (inOffset == inEnd)
312: return true;
313: char b = in[inOffset];
314: if (fSkipLinefeed) {
315: fSkipLinefeed = false;
316: if (b == 0x0A) {
317: if (++inOffset == inEnd)
318: return exitNormalize(inOffset, outOffset, true);
319: b = in[inOffset];
320: }
321: }
322: while (outOffset < outEnd) {
323: //
324: // Find the longest run that we can guarantee will not exceed the
325: // bounds of the outer loop.
326: //
327: int inCount = inEnd - inOffset;
328: int outCount = outEnd - outOffset;
329: if (inCount > outCount)
330: inCount = outCount;
331: inOffset++;
332: while (true) {
333: while (b == 0x0D) {
334: out[outOffset++] = 0x0A;
335: if (inOffset == inEnd) {
336: fSkipLinefeed = true;
337: return exitNormalize(inOffset, outOffset, true);
338: }
339: b = in[inOffset];
340: if (b == 0x0A) {
341: if (++inOffset == inEnd)
342: return exitNormalize(inOffset, outOffset,
343: true);
344: b = in[inOffset];
345: }
346: if (outOffset == outEnd)
347: return exitNormalize(inOffset, outOffset, false);
348: inCount = inEnd - inOffset;
349: outCount = outEnd - outOffset;
350: if (inCount > outCount)
351: inCount = outCount;
352: inOffset++;
353: }
354: while (true) {
355: out[outOffset++] = b;
356: if (--inCount == 0)
357: break;
358: b = in[inOffset++];
359: if (b == 0x0D)
360: break;
361: }
362: if (inCount == 0)
363: break;
364: }
365: if (inOffset == inEnd)
366: break;
367: }
368: return exitNormalize(inOffset, outOffset, inOffset == inEnd);
369: }
370:
371: //
372: //
373: //
374: private boolean exitNormalize(int inOffset, int outOffset,
375: boolean result) {
376: fOverflowOffset = inOffset;
377: fOutputOffset = outOffset;
378: return result;
379: }
380: }
|