001: /* HTTPRecorder
002: *
003: * $Id: HttpRecorder.java 4498 2006-08-15 04:39:00Z gojomo $
004: *
005: * Created on Sep 22, 2003
006: *
007: * Copyright (C) 2003 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util;
026:
027: import java.io.BufferedInputStream;
028: import java.io.File;
029: import java.io.IOException;
030: import java.io.InputStream;
031: import java.io.OutputStream;
032: import java.util.logging.Level;
033: import java.util.logging.Logger;
034:
035: import org.archive.io.RecordingInputStream;
036: import org.archive.io.RecordingOutputStream;
037: import org.archive.io.ReplayCharSequence;
038: import org.archive.io.ReplayInputStream;
039:
040: /**
041: * Pairs together a RecordingInputStream and RecordingOutputStream
042: * to capture exactly a single HTTP transaction.
043: *
044: * Initially only supports HTTP/1.0 (one request, one response per stream)
045: *
046: * Call {@link #markContentBegin()} to demarc the transition between HTTP
047: * header and body.
048: *
049: * @author gojomo
050: */
051: public class HttpRecorder {
052: protected static Logger logger = Logger
053: .getLogger("org.archive.util.HttpRecorder");
054:
055: private static final int DEFAULT_OUTPUT_BUFFER_SIZE = 4096;
056: private static final int DEFAULT_INPUT_BUFFER_SIZE = 65536;
057:
058: private RecordingInputStream ris = null;
059: private RecordingOutputStream ros = null;
060:
061: /**
062: * Backing file basename.
063: *
064: * Keep it around so can clean up backing files left on disk.
065: */
066: private String backingFileBasename = null;
067:
068: /**
069: * Backing file output stream suffix.
070: */
071: private static final String RECORDING_OUTPUT_STREAM_SUFFIX = ".ros";
072:
073: /**
074: * Backing file input stream suffix.
075: */
076: private static final String RECORDING_INPUT_STREAM_SUFFIX = ".ris";
077:
078: /**
079: * Response character encoding.
080: */
081: private String characterEncoding = null;
082:
083: /**
084: * Constructor with limited access.
085: * Used internally for case where we're wrapping an already
086: * downloaded stream with a HttpRecorder.
087: */
088: protected HttpRecorder() {
089: super ();
090: }
091:
092: /**
093: * Create an HttpRecorder.
094: *
095: * @param tempDir Directory into which we drop backing files for
096: * recorded input and output.
097: * @param backingFilenameBase Backing filename base to which we'll append
098: * suffices <code>ris</code> for recorded input stream and
099: * <code>ros</code> for recorded output stream.
100: * @param outBufferSize Size of output buffer to use.
101: * @param inBufferSize Size of input buffer to use.
102: */
103: public HttpRecorder(File tempDir, String backingFilenameBase,
104: int outBufferSize, int inBufferSize) {
105: super ();
106: tempDir.mkdirs();
107: this .backingFileBasename = (new File(tempDir.getPath(),
108: backingFilenameBase)).getAbsolutePath();
109: this .ris = new RecordingInputStream(inBufferSize,
110: this .backingFileBasename
111: + RECORDING_INPUT_STREAM_SUFFIX);
112: this .ros = new RecordingOutputStream(outBufferSize,
113: this .backingFileBasename
114: + RECORDING_OUTPUT_STREAM_SUFFIX);
115: }
116:
117: /**
118: * Create an HttpRecorder.
119: *
120: * @param tempDir
121: * Directory into which we drop backing files for recorded input
122: * and output.
123: * @param backingFilenameBase
124: * Backing filename base to which we'll append suffices
125: * <code>ris</code> for recorded input stream and
126: * <code>ros</code> for recorded output stream.
127: */
128: public HttpRecorder(File tempDir, String backingFilenameBase) {
129: this (tempDir, backingFilenameBase, DEFAULT_INPUT_BUFFER_SIZE,
130: DEFAULT_OUTPUT_BUFFER_SIZE);
131: }
132:
133: /**
134: * Wrap the provided stream with the internal RecordingInputStream
135: *
136: * open() throws an exception if RecordingInputStream is already open.
137: *
138: * @param is InputStream to wrap.
139: *
140: * @return The input stream wrapper which itself is an input stream.
141: * Pass this in place of the passed stream so input can be recorded.
142: *
143: * @throws IOException
144: */
145: public InputStream inputWrap(InputStream is) throws IOException {
146: logger.fine(Thread.currentThread().getName()
147: + " wrapping input");
148: this .ris.open(is);
149: return this .ris;
150: }
151:
152: /**
153: * Wrap the provided stream with the internal RecordingOutputStream
154: *
155: * open() throws an exception if RecordingOutputStream is already open.
156: *
157: * @param os The output stream to wrap.
158: *
159: * @return The output stream wrapper which is itself an output stream.
160: * Pass this in place of the passed stream so output can be recorded.
161: *
162: * @throws IOException
163: */
164: public OutputStream outputWrap(OutputStream os) throws IOException {
165: this .ros.open(os);
166: return this .ros;
167: }
168:
169: /**
170: * Close all streams.
171: */
172: public void close() {
173: logger.fine(Thread.currentThread().getName() + " closing");
174: try {
175: this .ris.close();
176: } catch (IOException e) {
177: // TODO: Can we not let the exception out of here and report it
178: // higher up in the caller?
179: DevUtils.logger.log(Level.SEVERE, "close() ris"
180: + DevUtils.extraInfo(), e);
181: }
182: try {
183: this .ros.close();
184: } catch (IOException e) {
185: DevUtils.logger.log(Level.SEVERE, "close() ros"
186: + DevUtils.extraInfo(), e);
187: }
188: }
189:
190: /**
191: * Return the internal RecordingInputStream
192: *
193: * @return A RIS.
194: */
195: public RecordingInputStream getRecordedInput() {
196: return this .ris;
197: }
198:
199: /**
200: * @return The RecordingOutputStream.
201: */
202: public RecordingOutputStream getRecordedOutput() {
203: return this .ros;
204: }
205:
206: /**
207: * Mark current position as the point where the HTTP headers end.
208: */
209: public void markContentBegin() {
210: this .ris.markContentBegin();
211: }
212:
213: public long getResponseContentLength() {
214: return this .ris.getResponseContentLength();
215: }
216:
217: /**
218: * Close both input and output recorders.
219: *
220: * Recorders are the output streams to which we are recording.
221: * {@link #close()} closes the stream that is being recorded and the
222: * recorder. This method explicitly closes the recorder only.
223: */
224: public void closeRecorders() {
225: try {
226: this .ris.closeRecorder();
227: this .ros.closeRecorder();
228: } catch (IOException e) {
229: DevUtils.warnHandle(e, "Convert to runtime exception?");
230: }
231: }
232:
233: /**
234: * Cleanup backing files.
235: *
236: * Call when completely done w/ recorder. Removes any backing files that
237: * may have been dropped.
238: */
239: public void cleanup() {
240: this .close();
241: this .delete(this .backingFileBasename
242: + RECORDING_OUTPUT_STREAM_SUFFIX);
243: this .delete(this .backingFileBasename
244: + RECORDING_INPUT_STREAM_SUFFIX);
245: }
246:
247: /**
248: * Delete file if exists.
249: *
250: * @param name Filename to delete.
251: */
252: private void delete(String name) {
253: File f = new File(name);
254: if (f.exists()) {
255: f.delete();
256: }
257: }
258:
259: /**
260: * Get the current threads' HttpRecorder.
261: *
262: * @return This threads' HttpRecorder. Returns null if can't find a
263: * HttpRecorder in current instance.
264: */
265: public static HttpRecorder getHttpRecorder() {
266: HttpRecorder recorder = null;
267: Thread thread = Thread.currentThread();
268: if (thread instanceof HttpRecorderMarker) {
269: recorder = ((HttpRecorderMarker) thread).getHttpRecorder();
270: }
271: return recorder;
272: }
273:
274: /**
275: * @param characterEncoding Character encoding of recording.
276: */
277: public void setCharacterEncoding(String characterEncoding) {
278: this .characterEncoding = characterEncoding;
279: }
280:
281: /**
282: * @return Returns the characterEncoding.
283: */
284: public String getCharacterEncoding() {
285: return this .characterEncoding;
286: }
287:
288: /**
289: * @return A ReplayCharSequence. Call close on the RCS when done w/ it.
290: * Will return indeterminate results if the underlying recording streams
291: * have not been closed first.
292: * @throws IOException
293: * @throws IOException
294: */
295: public ReplayCharSequence getReplayCharSequence()
296: throws IOException {
297: return getRecordedInput().getReplayCharSequence(
298: this .characterEncoding);
299: }
300:
301: /**
302: * @return A replay input stream.
303: * @throws IOException
304: */
305: public ReplayInputStream getReplayInputStream() throws IOException {
306: return getRecordedInput().getReplayInputStream();
307: }
308:
309: /**
310: * Record the input stream for later playback by an extractor, etc.
311: * This is convenience method used to setup an artificial HttpRecorder
312: * scenario used in unit tests, etc.
313: * @param dir Directory to write backing file to.
314: * @param basename of what we're recording.
315: * @param in Stream to read.
316: * @param encoding Stream encoding.
317: * @throws IOException
318: * @return An {@link org.archive.util.HttpRecorder}.
319: */
320: public static HttpRecorder wrapInputStreamWithHttpRecord(File dir,
321: String basename, InputStream in, String encoding)
322: throws IOException {
323: HttpRecorder rec = new HttpRecorder(dir, basename);
324: if (encoding != null && encoding.length() > 0) {
325: rec.setCharacterEncoding(encoding);
326: }
327: // Do not use FastBufferedInputStream here. It does not
328: // support mark.
329: InputStream is = rec.inputWrap(new BufferedInputStream(in));
330: final int BUFFER_SIZE = 1024 * 4;
331: byte[] buffer = new byte[BUFFER_SIZE];
332: while (true) {
333: // Just read it all down.
334: int x = is.read(buffer);
335: if (x == -1) {
336: break;
337: }
338: }
339: is.close();
340: return rec;
341: }
342: }
|