01: /* Constants
02: *
03: * $Id: AdaptiveRevisitAttributeConstants.java 3297 2005-04-01 14:57:45Z kristinn_sig $
04: *
05: * Created on 26.11.2004
06: *
07: * Copyright (C) 2004 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.crawler.frontier;
26:
27: import org.archive.crawler.datamodel.CoreAttributeConstants;
28:
29: /**
30: * Defines static constants for the Adaptive Revisiting module defining data
31: * keys in the CrawlURI AList.
32: *
33: * @author Kristinn Sigurdsson
34: *
35: * @see org.archive.crawler.datamodel.CoreAttributeConstants
36: */
37: public interface AdaptiveRevisitAttributeConstants extends
38: CoreAttributeConstants {
39:
40: /** Designates a field in the CrawlURIs AList for the content digest of
41: * an earlier visit. */
42: public static final String A_LAST_CONTENT_DIGEST = "last-content-digest";
43: public static final String A_TIME_OF_NEXT_PROCESSING = "time-of-next-processing";
44: public static final String A_WAIT_INTERVAL = "wait-interval";
45: public static final String A_NUMBER_OF_VISITS = "number-of-visits";
46: public static final String A_NUMBER_OF_VERSIONS = "number-of-versions";
47: public static final String A_FETCH_OVERDUE = "fetch-overdue";
48:
49: public static final String A_LAST_ETAG = "last-etag";
50: public static final String A_LAST_DATESTAMP = "last-datestamp";
51:
52: public static final String A_WAIT_REEVALUATED = "wait-reevaluated";
53:
54: /** No knowledge of URI content. Possibly not fetched yet, unable
55: * to check if different or an error occured on last fetch attempt. */
56: public static final int CONTENT_UNKNOWN = -1;
57:
58: /** URI content has not changed between the two latest, successfully
59: * completed fetches. */
60: public static final int CONTENT_UNCHANGED = 0;
61:
62: /** URI content had changed between the two latest, successfully completed
63: * fetches. By definition, content has changed if there has only been one
64: * successful fetch made. */
65: public static final int CONTENT_CHANGED = 1;
66:
67: /**
68: * Key to use getting state of crawluri from the CrawlURI alist.
69: */
70: public static final String A_CONTENT_STATE_KEY = "ar-state";
71: }
|