001: /* Copyright (C) 2003 Internet Archive.
002: *
003: * This file is part of the Heritrix web crawler (crawler.archive.org).
004: *
005: * Heritrix is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU Lesser Public License as published by
007: * the Free Software Foundation; either version 2.1 of the License, or
008: * any later version.
009: *
010: * Heritrix is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser Public License
016: * along with Heritrix; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: *
019: * FetchStatusCodes.java
020: * Created on Jun 19, 2003
021: *
022: * $Header$
023: */
024: package org.archive.crawler.datamodel;
025:
026: /**
027: * Constant flag codes to be used, in lieu of per-protocol
028: * codes (like HTTP's 200, 404, etc.), when network/internal/
029: * out-of-band conditions occur.
030: *
031: * The URISelector may use such codes, along with user-configured
032: * options, to determine whether, when, and how many times
033: * a CrawlURI might be reattempted.
034: *
035: * @author gojomo
036: *
037: */
038: public interface FetchStatusCodes {
039: /** fetch never tried (perhaps protocol unsupported or illegal URI) */
040: public static final int S_UNATTEMPTED = 0;
041: /** DNS lookup failed */
042: public static final int S_DOMAIN_UNRESOLVABLE = -1; //
043: /** HTTP connect failed */
044: public static final int S_CONNECT_FAILED = -2; //
045: /** HTTP connect broken */
046: public static final int S_CONNECT_LOST = -3; //
047: /** HTTP timeout (before any meaningful response received) */
048: public static final int S_TIMEOUT = -4; //
049: /** Unexpected runtime exception; see runtime-errors.log */
050: public static final int S_RUNTIME_EXCEPTION = -5; //
051: /** DNS prerequisite failed, precluding attempt */
052: public static final int S_DOMAIN_PREREQUISITE_FAILURE = -6; //
053: /** URI recognized as unsupported or illegal) */
054: public static final int S_UNFETCHABLE_URI = -7; //
055: /** multiple retries all failed */
056: public static final int S_TOO_MANY_RETRIES = -8; //
057:
058: /** temporary status assigned URIs awaiting preconditions; appearance in
059: * logs is a bug */
060: public static final int S_DEFERRED = -50;
061: /** URI could not be queued in Frontier; when URIs are properly
062: * filtered for format, should never occur */
063: public static final int S_UNQUEUEABLE = -60;
064:
065: /** Robots prerequisite failed, precluding attempt */
066: public static final int S_ROBOTS_PREREQUISITE_FAILURE = -61; //
067: /** DNS prerequisite failed, precluding attempt */
068: public static final int S_OTHER_PREREQUISITE_FAILURE = -62; //
069: /** DNS prerequisite failed, precluding attempt */
070: public static final int S_PREREQUISITE_UNSCHEDULABLE_FAILURE = -63; //
071:
072: /** severe java 'Error' conditions (OutOfMemoryError, StackOverflowError,
073: * etc.) during URI processing */
074: public static final int S_SERIOUS_ERROR = -3000; //
075:
076: /** 'chaff' detection of traps/content of negligible value applied */
077: public static final int S_DEEMED_CHAFF = -4000;
078: /** overstepped link hops */
079: public static final int S_TOO_MANY_LINK_HOPS = -4001;
080: /** overstepped embed/trans hops */
081: public static final int S_TOO_MANY_EMBED_HOPS = -4002;
082: /** out-of-scope upoin reexamination (only when scope changes during
083: * crawl) */
084: public static final int S_OUT_OF_SCOPE = -5000;
085: /** blocked from fetch by user setting. */
086: public static final int S_BLOCKED_BY_USER = -5001;
087: /**
088: * Blocked by custom prefetcher processor.
089: * A check against scope or against filters in a custom prefetch
090: * processor rules CrawlURI should not be crawled.
091: * TODO: Add to documentation and help page.
092: */
093: public static final int S_BLOCKED_BY_CUSTOM_PROCESSOR = -5002;
094: /**
095: * Blocked due to exceeding an established quota.
096: * TODO: Add to documentation and help page.
097: */
098: public static final int S_BLOCKED_BY_QUOTA = -5003;
099: /**
100: * Blocked due to exceeding an established runtime.
101: * TODO: Add to documentation and help page.
102: */
103: public static final int S_BLOCKED_BY_RUNTIME_LIMIT = -5004;
104: /** deleted from frontier by user */
105: public static final int S_DELETED_BY_USER = -6000;
106:
107: /** Processing thread was killed */
108: public static final int S_PROCESSING_THREAD_KILLED = -7000;
109:
110: /** robots rules precluded fetch */
111: public static final int S_ROBOTS_PRECLUDED = -9998;
112:
113: /** DNS success */
114: public static final int S_DNS_SUCCESS = 1;
115: /** InetAddress.getByName success */
116: public static final int S_GETBYNAME_SUCCESS = 1001;
117: }
|