001: /* SeedRecord
002: *
003: * $Id: SeedRecord.java 4671 2006-09-26 23:47:15Z paul_jack $
004: *
005: * Created on June 12, 2005
006: *
007: * Copyright (C) 2005 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: *
025: */
026: package org.archive.crawler.admin;
027:
028: import java.io.Serializable;
029:
030: import org.archive.crawler.datamodel.CandidateURI;
031: import org.archive.crawler.datamodel.CoreAttributeConstants;
032: import org.archive.crawler.datamodel.CrawlURI;
033:
034: /**
035: * Record of all interesting info about the most-recent
036: * processing of a specific seed.
037: *
038: * @author gojomo
039: */
040: public class SeedRecord implements CoreAttributeConstants, Serializable {
041: private static final long serialVersionUID = -8455358640509744478L;
042: private final String uri;
043: private int statusCode;
044: private final String disposition;
045: private String redirectUri;
046:
047: /**
048: * Create a record from the given CrawlURI and disposition string
049: *
050: * @param curi CrawlURI, already processed as reported to StatisticsTracker
051: * @param disposition descriptive disposition string
052: *
053: */
054: public SeedRecord(CrawlURI curi, String disposition) {
055: super ();
056: this .uri = curi.toString();
057: this .statusCode = curi.getFetchStatus();
058: this .disposition = disposition;
059: if (statusCode == 301 || statusCode == 302) {
060: for (CandidateURI cauri : curi.getOutCandidates()) {
061: if ("location:".equalsIgnoreCase(cauri.getViaContext()
062: .toString())) {
063: redirectUri = cauri.toString();
064: }
065: }
066: }
067: }
068:
069: /**
070: * Constructor for when a CrawlURI is unavailable; such
071: * as when considering seeds not yet passed through as
072: * CrawlURIs.
073: *
074: * @param uri
075: * @param disposition
076: */
077: public SeedRecord(String uri, String disposition) {
078: this (uri, disposition, -1, null);
079: }
080:
081: /**
082: * Create a record from the given URI, disposition, HTTP status code,
083: * and redirect URI.
084: * @param uri
085: * @param disposition
086: * @param statusCode
087: * @param redirectUri
088: */
089: public SeedRecord(String uri, String disposition, int statusCode,
090: String redirectUri) {
091: super ();
092: this .uri = uri;
093: this .statusCode = statusCode;
094: this .disposition = disposition;
095: this .redirectUri = redirectUri;
096: }
097:
098: /**
099: * @return Returns the disposition.
100: */
101: public String getDisposition() {
102: return disposition;
103: }
104:
105: /**
106: * @return Returns the redirectUri.
107: */
108: public String getRedirectUri() {
109: return redirectUri;
110: }
111:
112: /**
113: * @return Returns the statusCode.
114: */
115: public int getStatusCode() {
116: return statusCode;
117: }
118:
119: /**
120: * @return Returns the uri.
121: */
122: public String getUri() {
123: return uri;
124: }
125: }
|