01: /* FrontierJournal
02: *
03: * Created on Oct 26, 2004
04: *
05: * Copyright (C) 2004 Internet Archive.
06: *
07: * This file is part of the Heritrix web crawler (crawler.archive.org).
08: *
09: * Heritrix is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU Lesser Public License as published by
11: * the Free Software Foundation; either version 2.1 of the License, or
12: * any later version.
13: *
14: * Heritrix is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU Lesser Public License for more details.
18: *
19: * You should have received a copy of the GNU Lesser Public License
20: * along with Heritrix; if not, write to the Free Software
21: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22: */
23: package org.archive.crawler.frontier;
24:
25: import java.io.File;
26: import java.io.IOException;
27:
28: import org.archive.crawler.datamodel.CrawlURI;
29: import org.archive.net.UURI;
30:
31: /**
32: * Record of key Frontier happenings.
33: * @author stack
34: * @version $Date: 2007-03-08 18:41:46 +0000 (Thu, 08 Mar 2007) $, $Revision: 4967 $
35: */
36: public interface FrontierJournal {
37: public static final String LOGNAME_RECOVER = "recover.gz";
38:
39: /**
40: * @param curi CrawlURI that has been scheduled to be added to the
41: * Frontier.
42: */
43: public abstract void added(CrawlURI curi);
44:
45: /**
46: * @param curi CrawlURI that finished successfully.
47: */
48: public abstract void finishedSuccess(CrawlURI curi);
49:
50: /**
51: * @param uuri UURI that finished successfully.
52: */
53: public abstract void finishedSuccess(UURI uuri);
54:
55: /**
56: * Note that a CrawlURI was emitted for processing.
57: * If not followed by a finished or rescheduled notation in
58: * the journal, the CrawlURI was still in-process when the journal ended.
59: *
60: * @param curi CrawlURI emitted.
61: */
62: public abstract void emitted(CrawlURI curi);
63:
64: /**
65: * @param u UURI that finished unsuccessfully
66: */
67: public abstract void finishedFailure(UURI u);
68:
69: /**
70: * @param curi CrawlURI finished unsuccessfully.
71: */
72: public abstract void finishedFailure(CrawlURI curi);
73:
74: /**
75: * @param curi CrawlURI that was returned to the Frontier for
76: * another try.
77: */
78: public abstract void rescheduled(CrawlURI curi);
79:
80: /**
81: * Flush and close any held objects.
82: */
83: public abstract void close();
84:
85: /**
86: * Checkpoint.
87: * @param checkpointDir Directory we're checkpointing into.
88: * @throws IOException
89: */
90: public abstract void checkpoint(final File checkpointDir)
91: throws IOException;
92:
93: /**
94: * Add a line noting a serious crawl error.
95: *
96: * @param string
97: */
98: public abstract void seriousError(String string);
99: }
|