001: /* PersistStoreProcessor
002: *
003: * Created on Feb 12, 2005
004: *
005: * Copyright (C) 2007 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.processor.recrawl;
024:
025: import java.io.File;
026:
027: import org.archive.crawler.datamodel.CrawlURI;
028: import org.archive.crawler.event.CrawlStatusListener;
029:
030: import com.sleepycat.je.DatabaseException;
031:
032: /**
033: * Store CrawlURI attributes from latest fetch to persistent storage for
034: * consultation by a later recrawl.
035: *
036: * @author gojomo
037: * @version $Date: 2006-09-25 20:19:54 +0000 (Mon, 25 Sep 2006) $, $Revision: 4654 $
038: */
039: public class PersistStoreProcessor extends PersistOnlineProcessor
040: implements CrawlStatusListener {
041: private static final long serialVersionUID = -8308356194337303758L;
042:
043: /**
044: * Usual constructor
045: *
046: * @param name
047: */
048: public PersistStoreProcessor(String name) {
049: super (
050: name,
051: "PersistStoreProcessor. Stores CrawlURI attributes "
052: + "from latest fetch for consultation by a later recrawl.");
053: }
054:
055: protected void initialTasks() {
056: super .initialTasks();
057: // Add this class to crawl state listeners to note checkpoints
058: getController().addCrawlStatusListener(this );
059: }
060:
061: @Override
062: protected void innerProcess(CrawlURI curi)
063: throws InterruptedException {
064: if (shouldStore(curi)) {
065: store.put(persistKeyFor(curi), curi.getPersistentAList());
066: }
067: }
068:
069: public void crawlCheckpoint(File checkpointDir) throws Exception {
070: // sync db
071: try {
072: historyDb.sync();
073: } catch (DatabaseException e) {
074: // TODO Auto-generated catch block
075: throw new RuntimeException(e);
076: }
077: }
078:
079: public void crawlEnded(String sExitMessage) {
080: // ignored
081:
082: }
083:
084: public void crawlEnding(String sExitMessage) {
085: // ignored
086:
087: }
088:
089: public void crawlPaused(String statusMessage) {
090: // ignored
091:
092: }
093:
094: public void crawlPausing(String statusMessage) {
095: // ignored
096:
097: }
098:
099: public void crawlResuming(String statusMessage) {
100: // ignored
101:
102: }
103:
104: public void crawlStarted(String message) {
105: // ignored
106: }
107: }
|