01: /* HttpRecorderGetMethod
02: *
03: * Created on Sep 29, 2004
04: *
05: * Copyright (C) 2003 Internet Archive.
06: *
07: * This file is part of the Heritrix web crawler (crawler.archive.org).
08: *
09: * Heritrix is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU Lesser Public License as published by
11: * the Free Software Foundation; either version 2.1 of the License, or
12: * any later version.
13: *
14: * Heritrix is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU Lesser Public License for more details.
18: *
19: * You should have received a copy of the GNU Lesser Public License
20: * along with Heritrix; if not, write to the Free Software
21: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22: */
23: package org.archive.crawler.fetcher;
24:
25: import java.io.IOException;
26: import java.net.SocketTimeoutException;
27:
28: import org.apache.commons.httpclient.HttpMethod;
29: import org.apache.commons.httpclient.HttpMethodRetryHandler;
30: import org.apache.commons.httpclient.NoHttpResponseException;
31: import org.apache.commons.httpclient.methods.PostMethod;
32:
33: /**
34: * Retry handler that tries ten times to establish connection and then once
35: * established, if a GET method, tries ten times to get response (If POST,
36: * it tries once only).
37: *
38: * Its unsafe retrying POSTs. See 'Rule of Thumb' under 'Method Recovery'
39: * here: <a href="http://jakarta.apache.org/commons/httpclient/tutorial.html">
40: * HttpClient Tutorial</a>.
41: *
42: * @author stack
43: * @version $Date: 2005-06-12 17:59:14 +0000 (Sun, 12 Jun 2005) $, $Revision: 3560 $
44: */
45: public class HeritrixHttpMethodRetryHandler implements
46: HttpMethodRetryHandler {
47: private static final int DEFAULT_RETRY_COUNT = 10;
48:
49: private final int maxRetryCount;
50:
51: /**
52: * Constructor.
53: */
54: public HeritrixHttpMethodRetryHandler() {
55: this (DEFAULT_RETRY_COUNT);
56: }
57:
58: /**
59: * Constructor.
60: * @param maxRetryCount Maximum amount of times to retry.
61: */
62: public HeritrixHttpMethodRetryHandler(int maxRetryCount) {
63: this .maxRetryCount = maxRetryCount;
64: }
65:
66: public boolean retryMethod(HttpMethod method,
67: IOException exception, int executionCount) {
68: if (exception instanceof SocketTimeoutException) {
69: // already waited for the configured amount of time with no reply;
70: // do not retry further until next go round
71: return false;
72: }
73: if (executionCount >= this .maxRetryCount) {
74: // Do not retry if over max retry count
75: return false;
76: }
77: if (exception instanceof NoHttpResponseException) {
78: // Retry if the server dropped connection on us
79: return true;
80: }
81: if (!method.isRequestSent()
82: && (!(method instanceof PostMethod))) {
83: // Retry if the request has not been sent fully or
84: // if it's OK to retry methods that have been sent
85: return true;
86: }
87: // otherwise do not retry
88: return false;
89: }
90: }
|