01: /* ImageWaitEvaluator
02: *
03: * $Id: ImageWaitEvaluator.java 4667 2006-09-26 20:38:48Z paul_jack $
04: *
05: * Created on 1.4.2005
06: *
07: * Copyright (C) 2005 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.crawler.postprocessor;
26:
27: /**
28: * A specialized ContentBasedWaitEvaluator. Comes preset with a regular
29: * expression that matches text documents. <code>^image/.*$</code>
30: *
31: * @author Kristinn Sigurdsson
32: *
33: * @see org.archive.crawler.postprocessor.ContentBasedWaitEvaluator
34: */
35: public class ImageWaitEvaluator extends ContentBasedWaitEvaluator {
36:
37: private static final long serialVersionUID = -2762377129860398333L;
38:
39: protected final static Long DEFAULT_INITIAL_WAIT_INTERVAL = new Long(
40: 172800); // 2 days
41:
42: protected final static String DEFAULT_CONTENT_REGEXPR = "^image/.*$"; //Text
43:
44: /**
45: * Constructor
46: *
47: * @param name The name of the module
48: */
49: public ImageWaitEvaluator(String name) {
50: super (
51: name,
52: "Evaluates how long to wait before fetching a URI again. "
53: + "Only handles CrawlURIs whose content type indicates a "
54: + "image document (^image/.*$). "
55: + "Typically, this processor should be in the post processing "
56: + "chain. It will pass if another wait evaluator has already "
57: + "processed the CrawlURI.",
58: DEFAULT_CONTENT_REGEXPR, DEFAULT_INITIAL_WAIT_INTERVAL,
59: DEFAULT_MAX_WAIT_INTERVAL, DEFAULT_MIN_WAIT_INTERVAL,
60: DEFAULT_UNCHANGED_FACTOR, DEFAULT_CHANGED_FACTOR);
61: }
62:
63: }
|