001: /* ReplayInputStream
002: *
003: * $Id: ReplayInputStream.java 5026 2007-03-28 02:48:47Z gojomo $
004: *
005: * Created on Sep 24, 2003
006: *
007: * Copyright (C) 2003 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.io;
026:
027: import java.io.File;
028: import java.io.IOException;
029: import java.io.OutputStream;
030:
031: /**
032: * Replays the bytes recorded from a RecordingInputStream or
033: * RecordingOutputStream.
034: *
035: * This InputStream supports mark and reset.
036: *
037: * @author gojomo
038: */
039: public class ReplayInputStream extends SeekInputStream {
040: private BufferedSeekInputStream diskStream;
041: private byte[] buffer;
042: private long position;
043:
044: /**
045: * Total size of stream content.
046: *
047: * Size of data to replay.
048: */
049: private long size = -1;
050:
051: /**
052: * Where the response body starts, if marked
053: */
054: protected long responseBodyStart = -1;
055:
056: /**
057: * Constructor.
058: *
059: * @param buffer Buffer to read from.
060: * @param size Size of data to replay.
061: * @param responseBodyStart Start of the response body.
062: * @param backingFilename Backing file that sits behind the buffer. If
063: * <code>size<code> > than buffer then we go to backing file to read
064: * data that is beyond buffer.length.
065: *
066: * @throws IOException If we fail to open an input stream on
067: * backing file.
068: */
069: public ReplayInputStream(byte[] buffer, long size,
070: long responseBodyStart, String backingFilename)
071: throws IOException {
072: this (buffer, size, backingFilename);
073: this .responseBodyStart = responseBodyStart;
074: }
075:
076: /**
077: * Constructor.
078: *
079: * @param buffer Buffer to read from.
080: * @param size Size of data to replay.
081: * @param backingFilename Backing file that sits behind the buffer. If
082: * <code>size<code> > than buffer then we go to backing file to read
083: * data that is beyond buffer.length.
084: * @throws IOException If we fail to open an input stream on
085: * backing file.
086: */
087: public ReplayInputStream(byte[] buffer, long size,
088: String backingFilename) throws IOException {
089: this .buffer = buffer;
090: this .size = size;
091: if (size > buffer.length) {
092: RandomAccessInputStream rais = new RandomAccessInputStream(
093: new File(backingFilename));
094: diskStream = new BufferedSeekInputStream(rais, 4096);
095: }
096: }
097:
098: public long setToResponseBodyStart() throws IOException {
099: position(responseBodyStart);
100: return this .position;
101: }
102:
103: /* (non-Javadoc)
104: * @see java.io.InputStream#read()
105: */
106: public int read() throws IOException {
107: if (position == size) {
108: return -1; // EOF
109: }
110: if (position < buffer.length) {
111: // Convert to unsigned int.
112: int c = buffer[(int) position] & 0xFF;
113: position++;
114: return c;
115: }
116: int c = diskStream.read();
117: if (c >= 0) {
118: position++;
119: }
120: return c;
121: }
122:
123: /*
124: * (non-Javadoc)
125: *
126: * @see java.io.InputStream#read(byte[], int, int)
127: */
128: public int read(byte[] b, int off, int len) throws IOException {
129: if (position == size) {
130: return -1; // EOF
131: }
132: if (position < buffer.length) {
133: int toCopy = (int) Math.min(size - position, Math.min(len,
134: buffer.length - position));
135: System.arraycopy(buffer, (int) position, b, off, toCopy);
136: if (toCopy > 0) {
137: position += toCopy;
138: }
139: return toCopy;
140: }
141: // into disk zone
142: int read = diskStream.read(b, off, len);
143: if (read > 0) {
144: position += read;
145: }
146: return read;
147: }
148:
149: public void readFullyTo(OutputStream os) throws IOException {
150: byte[] buf = new byte[4096];
151: int c = read(buf);
152: while (c != -1) {
153: os.write(buf, 0, c);
154: c = read(buf);
155: }
156: }
157:
158: /*
159: * Like 'readFullyTo', but only reads the header-part.
160: * Starts from the beginning each time it is called.
161: */
162: public void readHeaderTo(OutputStream os) throws IOException {
163: position = 0;
164: byte[] buf = new byte[(int) responseBodyStart];
165: int c = read(buf, 0, buf.length);
166: if (c != -1) {
167: os.write(buf, 0, c);
168: }
169: }
170:
171: /*
172: * Like 'readFullyTo', but only reads the content-part.
173: */
174: public void readContentTo(OutputStream os) throws IOException {
175: setToResponseBodyStart();
176: byte[] buf = new byte[4096];
177: int c = read(buf);
178: while (c != -1) {
179: os.write(buf, 0, c);
180: c = read(buf);
181: }
182: }
183:
184: public void readContentTo(OutputStream os, int maxSize)
185: throws IOException {
186: setToResponseBodyStart();
187: byte[] buf = new byte[4096];
188: int c = read(buf);
189: int tot = 0;
190: while (c != -1 && tot < maxSize) {
191: os.write(buf, 0, c);
192: c = read(buf);
193: tot += c;
194: }
195: }
196:
197: /* (non-Javadoc)
198: * @see java.io.InputStream#close()
199: */
200: public void close() throws IOException {
201: super .close();
202: if (diskStream != null) {
203: diskStream.close();
204: }
205: }
206:
207: /**
208: * Total size of stream content.
209: * @return Returns the size.
210: */
211: public long getSize() {
212: return size;
213: }
214:
215: /**
216: * Total size of header.
217: * @return the size of the header.
218: */
219: public long getHeaderSize() {
220: return responseBodyStart;
221: }
222:
223: /**
224: * Total size of content.
225: * @return the size of the content.
226: */
227: public long getContentSize() {
228: return size - responseBodyStart;
229: }
230:
231: /**
232: * @return Amount THEORETICALLY remaining (TODO: Its not theoretical
233: * seemingly. The class implemetentation depends on it being exact).
234: */
235: public long remaining() {
236: return size - position;
237: }
238:
239: /**
240: * Reposition the stream.
241: *
242: * @param p the new position for this stream
243: * @throws IOException if an IO error occurs
244: */
245: public void position(long p) throws IOException {
246: if (p < 0) {
247: throw new IOException("Negative seek offset.");
248: }
249: if (p > size) {
250: throw new IOException("Desired position exceeds size.");
251: }
252: if (p < buffer.length) {
253: // Only seek file if necessary
254: if (position > buffer.length) {
255: diskStream.position(0);
256: }
257: } else {
258: diskStream.position(p - buffer.length);
259: }
260: this .position = p;
261: }
262:
263: public long position() throws IOException {
264: return position;
265: }
266: }
|