001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.utils;
059:
060: import java.io.IOException;
061: import java.io.InputStream;
062:
063: /**
064: * This class is used for accessing the data provided by an InputStream.
065: *
066: * There are two ways in which this class is used. The first occurs
067: * when we are prescanning the start of the stream to determine the
068: * encoding being used. Since we do not require that the stream be
069: * positionable, we wrap it with an instance of this class. The first
070: * "chunk" of the file is read and the data may be accessed directly
071: * using the byteAt(offset) method. After we have determined the
072: * encoding of the byte stream, the instance of this class is passed
073: * on to the EntityReader that will process the data for the scanner.
074: *
075: * At this point, the reader may continue to access this instance using
076: * the byteAt method, which will continue to read the contents into
077: * chunks as required until end of input. An example of this is the
078: * UCSReader.
079: *
080: * Alternatively, the reader may access this instance as an InputStream
081: * which will first return any data that has been reading into the
082: * chunks, and will then return the remaining data from the original
083: * InputStream directly.
084: *
085: * @version
086: */
087: public final class ChunkyByteArray extends InputStream {
088:
089: /**
090: * Constructor
091: *
092: * Reads the first chunk.
093: *
094: * @param is The input stream containing the data of the entity.
095: */
096: public ChunkyByteArray(InputStream is) throws IOException {
097: fInputStream = is;
098: fill();
099: }
100:
101: /**
102: * Read a byte.
103: *
104: * @return The next byte of the input data or -1 if there is no more data.
105: */
106: public int read() throws IOException {
107: if (fData == null)
108: return fInputStream == null ? -1 : fInputStream.read();
109: int b = (int) (fData[0][fOffset]);
110: if (++fOffset == fLength) {
111: fData = null;
112: if (fLength < CHUNK_SIZE)
113: fInputStream = null;
114: }
115: return b;
116: }
117:
118: /**
119: * Read bytes.
120: *
121: * @param buffer The destination for the bytes returned. If null, then
122: * the data will discarded instead of returned.
123: * @param offset The offset within the buffer where the first returned
124: * byte should be placed.
125: * @param length The maximum number of bytes to place in the buffer or discard.
126: * @return The number of bytes actually placed in the buffer or discarded.
127: */
128: public int read(byte buffer[], int offset, int length)
129: throws IOException {
130: int bytesLeft = fLength - fOffset;
131: if (bytesLeft == 0)
132: return fInputStream == null ? -1 : fInputStream.read(
133: buffer, offset, length);
134: if (length <= 0)
135: return 0;
136: byte[] chunk = fData[0];
137: if (length >= bytesLeft) {
138: length = bytesLeft;
139: if (fLength < CHUNK_SIZE)
140: fInputStream = null;
141: }
142: if (buffer == null) {
143: fOffset += length;
144: return length;
145: }
146: int stop = offset + length;
147: do {
148: buffer[offset++] = chunk[fOffset++];
149: } while (offset < stop);
150: return length;
151: }
152:
153: /**
154: * Reset position within the data stream back to
155: * the very beginning.
156: */
157: public void rewind() {
158: fOffset = 0;
159: }
160:
161: /**
162: * Return a byte of input data at the given offset.
163: *
164: * @param offset The offset in the data stream.
165: * @return The byte at the specified position within the data stream.
166: */
167: public byte byteAt(int offset) throws IOException {
168: int chunk = offset >> CHUNK_SHIFT;
169: int index = offset & CHUNK_MASK;
170: try {
171: return fData[chunk][index];
172: } catch (NullPointerException ex) {
173: // ignore -- let fill create new chunk
174: } catch (ArrayIndexOutOfBoundsException e) {
175: // current chunk array is not big enough; resize
176: byte newdata[][] = new byte[fData.length * 2][];
177: System.arraycopy(fData, 0, newdata, 0, fData.length);
178: fData = newdata;
179: }
180: if (index == 0) {
181: fill();
182: return fData[chunk][index];
183: }
184: return 0;
185: }
186:
187: /**
188: * Test to see if an offset is at the end of the input data.
189: *
190: * @param offset A position in the data stream.
191: * @return <code>true</code> if the position is at the end of the data stream;
192: * <code>false</code> otherwise.
193: */
194: public boolean atEOF(int offset) {
195: return (offset > fLength);
196: }
197:
198: /**
199: * Closes this input Stream
200: *
201: * @exception IOException
202: */
203: public void close() throws IOException {
204: if (fInputStream != null) {
205: fInputStream.close();
206: fInputStream = null; // Null it
207: }
208: }
209:
210: //
211: // Fill in the next chunk with additional data.
212: //
213: private void fill() throws IOException {
214: int bufnum = fLength >> CHUNK_SHIFT;
215: byte[] data = new byte[CHUNK_SIZE];
216: fData[bufnum] = data;
217: int offset = 0;
218: int capacity = CHUNK_SIZE;
219: int result = 0;
220: do {
221: result = fInputStream.read(data, offset, capacity);
222: if (result == -1) {
223: data[offset] = (byte) 0xff;
224: fInputStream.close();
225: fInputStream = null;
226: break;
227: }
228: if (result > 0) {
229: fLength += result;
230: offset += result;
231: capacity -= result;
232: }
233: } while (capacity > 0);
234: }
235:
236: //
237: // Chunk size constants
238: //
239: private static final int CHUNK_SHIFT = 14; // 2^14 = 16k
240: private static final int CHUNK_SIZE = (1 << CHUNK_SHIFT);
241: private static final int CHUNK_MASK = CHUNK_SIZE - 1;
242: private static final int INITIAL_CHUNK_COUNT = (1 << (20 - CHUNK_SHIFT)); // 2^20 = 1m
243: //
244: // Instance variables
245: //
246: private InputStream fInputStream = null;
247: private byte[][] fData = new byte[INITIAL_CHUNK_COUNT][];
248: private int fLength = 0;
249: private int fOffset = 0; // for read methods
250: }
|