01: /* CrawlSubstats
02: *
03: * $Id: CrawlSubstats.java 4668 2006-09-26 21:49:01Z paul_jack $
04: *
05: * Created on Nov 4, 2005
06: *
07: * Copyright (C) 2005 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.crawler.datamodel;
26:
27: import java.io.Serializable;
28:
29: import org.apache.commons.httpclient.HttpStatus;
30:
31: /**
32: * Collector of statististics for a 'subset' of a crawl,
33: * such as a server (host:port), host, or frontier group
34: * (eg queue).
35: *
36: * @author gojomo
37: */
38: public class CrawlSubstats implements Serializable, FetchStatusCodes {
39:
40: private static final long serialVersionUID = 8624425657056569036L;
41:
42: public interface HasCrawlSubstats {
43: public CrawlSubstats getSubstats();
44: }
45:
46: long fetchSuccesses; // 2XX response codes
47: long fetchResponses; // all positive responses (incl. 3XX, 4XX, 5XX)
48: long successBytes; // total size of all success responses
49: long totalBytes; // total size of all responses
50: long fetchNonResponses; // processing attempts resulting in no response
51:
52: // (both failures and temp deferrals)
53:
54: public synchronized void tally(CrawlURI curi) {
55: if (curi.getFetchStatus() <= 0) {
56: fetchNonResponses++;
57: return;
58: }
59: fetchResponses++;
60: totalBytes += curi.getContentSize();
61: if (curi.getFetchStatus() >= HttpStatus.SC_OK
62: && curi.getFetchStatus() < 300) {
63: fetchSuccesses++;
64: successBytes += curi.getContentSize();
65: }
66: }
67:
68: public long getFetchSuccesses() {
69: return fetchSuccesses;
70: }
71:
72: public long getFetchResponses() {
73: return fetchResponses;
74: }
75:
76: public long getSuccessBytes() {
77: return successBytes;
78: }
79:
80: public long getTotalBytes() {
81: return totalBytes;
82: }
83:
84: public long getFetchNonResponses() {
85: return fetchNonResponses;
86: }
87: }
|