001: /* HttpRecorderGetMethod
002: *
003: * Created on Feb 24, 2004
004: *
005: * Copyright (C) 2003 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.httpclient;
024:
025: import java.io.IOException;
026: import java.util.logging.Logger;
027:
028: import org.apache.commons.httpclient.HttpConnection;
029: import org.apache.commons.httpclient.HttpException;
030: import org.apache.commons.httpclient.HttpState;
031: import org.apache.commons.httpclient.methods.GetMethod;
032: import org.archive.util.HttpRecorder;
033:
034: /**
035: * Override of GetMethod that marks the passed HttpRecorder w/ the transition
036: * from HTTP head to body and that forces a close on the http connection.
037: *
038: * The actions done in this subclass used to be done by copying
039: * org.apache.commons.HttpMethodBase, overlaying our version in place of the
040: * one that came w/ httpclient. Here is the patch of the difference between
041: * shipped httpclient code and our mods:
042: * <pre>
043: * -- -1338,6 +1346,12 --
044: *
045: * public void releaseConnection() {
046: *
047: * + // HERITRIX always ants the streams closed.
048: * + if (responseConnection != null)
049: * + {
050: * + responseConnection.close();
051: * + }
052: * +
053: * if (responseStream != null) {
054: * try {
055: * // FYI - this may indirectly invoke responseBodyConsumed.
056: * -- -1959,6 +1973,11 --
057: * this.statusLine = null;
058: * }
059: * }
060: * + // HERITRIX mark transition from header to content.
061: * + if (this.httpRecorder != null)
062: * + {
063: * + this.httpRecorder.markContentBegin();
064: * + }
065: * readResponseBody(state, conn);
066: * processResponseBody(state, conn);
067: * } catch (IOException e) {
068: * </pre>
069: *
070: * <p>We're not supposed to have access to the underlying connection object;
071: * am only violating contract because see cases where httpclient is skipping
072: * out w/o cleaning up after itself.
073: *
074: * @author stack
075: * @version $Revision: 4646 $, $Date: 2006-09-22 17:23:04 +0000 (Fri, 22 Sep 2006) $
076: */
077: public class HttpRecorderGetMethod extends GetMethod {
078:
079: protected static Logger logger = Logger
080: .getLogger(HttpRecorderGetMethod.class.getName());
081:
082: /**
083: * Instance of http recorder method.
084: */
085: protected HttpRecorderMethod httpRecorderMethod = null;
086:
087: public HttpRecorderGetMethod(String uri, HttpRecorder recorder) {
088: super (uri);
089: this .httpRecorderMethod = new HttpRecorderMethod(recorder);
090: }
091:
092: protected void readResponseBody(HttpState state,
093: HttpConnection connection) throws IOException,
094: HttpException {
095: // We're about to read the body. Mark transition in http recorder.
096: this .httpRecorderMethod.markContentBegin(connection);
097: super .readResponseBody(state, connection);
098: }
099:
100: protected boolean shouldCloseConnection(HttpConnection conn) {
101: // Always close connection after each request. As best I can tell, this
102: // is superfluous -- we've set our client to be HTTP/1.0. Doing this
103: // out of paranoia.
104: return true;
105: }
106:
107: public int execute(HttpState state, HttpConnection conn)
108: throws HttpException, IOException {
109: // Save off the connection so we can close it on our way out in case
110: // httpclient fails to (We're not supposed to have access to the
111: // underlying connection object; am only violating contract because
112: // see cases where httpclient is skipping out w/o cleaning up
113: // after itself).
114: this .httpRecorderMethod.setConnection(conn);
115: return super .execute(state, conn);
116: }
117:
118: protected void addProxyConnectionHeader(HttpState state,
119: HttpConnection conn) throws IOException, HttpException {
120: super.addProxyConnectionHeader(state, conn);
121: this.httpRecorderMethod.handleAddProxyConnectionHeader(this);
122: }
123: }
|