001: /* $Id: CheckpointSelfTest.java 4931 2007-02-21 18:48:17Z gojomo $
002: *
003: * Created Aug 15, 2006
004: *
005: * Copyright (C) 2006 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.selftest;
024:
025: import java.io.File;
026: import java.io.FileNotFoundException;
027: import java.io.IOException;
028: import java.util.logging.Logger;
029:
030: import javax.management.Attribute;
031: import javax.management.AttributeNotFoundException;
032: import javax.management.InvalidAttributeValueException;
033: import javax.management.MBeanException;
034: import javax.management.ReflectionException;
035:
036: import org.archive.crawler.admin.CrawlJob.MBeanCrawlController;
037: import org.archive.crawler.datamodel.Checkpoint;
038: import org.archive.crawler.datamodel.CrawlOrder;
039: import org.archive.crawler.datamodel.CrawlURI;
040: import org.archive.crawler.event.CrawlStatusListener;
041: import org.archive.crawler.event.CrawlURIDispositionListener;
042: import org.archive.crawler.framework.Checkpointer;
043: import org.archive.crawler.framework.CrawlController;
044: import org.archive.crawler.framework.exceptions.InitializationException;
045: import org.archive.crawler.settings.XMLSettingsHandler;
046: import org.archive.crawler.util.CheckpointUtils;
047:
048: /**
049: * Assumes checkpoint was run during the SelfTest.
050: * @author stack
051: * @version $Date: 2007-02-21 18:48:17 +0000 (Wed, 21 Feb 2007) $ $Version$
052: */
053: public class CheckpointSelfTest extends SelfTestCase implements
054: CrawlStatusListener, CrawlURIDispositionListener {
055: private final Logger LOG = Logger.getLogger(this .getClass()
056: .getName());
057: private boolean crawlEnded = false;
058:
059: public CheckpointSelfTest() {
060: // TODO Auto-generated constructor stub
061: }
062:
063: public CheckpointSelfTest(String testName) {
064: super (testName);
065: // TODO Auto-generated constructor stub
066: }
067:
068: /**
069: * Recover from the checkpoint made during selftest.
070: * @throws InitializationException
071: * @throws IOException
072: * @throws InvalidAttributeValueException
073: * @throws ReflectionException
074: * @throws MBeanException
075: * @throws AttributeNotFoundException
076: * @throws ClassNotFoundException
077: * @throws InterruptedException
078: */
079: public void stestCheckpointRecover()
080: throws InitializationException, IOException,
081: InvalidAttributeValueException, AttributeNotFoundException,
082: MBeanException, ReflectionException,
083: ClassNotFoundException, InterruptedException {
084: assertInitialized();
085: // Check checkpoint dir is in place.
086: File f = getFile(getCrawlJobDir(), "checkpoints");
087: // Use the first checkpoint in the dir.
088: File cpdir = getFile(f, Checkpointer
089: .formatCheckpointName("", 1));
090: // Check valid checkpoint file is in place.
091: getFile(cpdir, Checkpoint.VALIDITY_STAMP_FILENAME);
092: // Get order file from checkpoint dir.
093: File order = getFile(cpdir, "order.xml");
094: XMLSettingsHandler handler = new XMLSettingsHandler(order);
095: handler.initialize();
096: // Set recover-path to be this checkpoint dir.
097: handler.getOrder().setAttribute(
098: new Attribute(CrawlOrder.ATTR_RECOVER_PATH, cpdir
099: .toString()));
100: Checkpoint cp = CrawlController.getCheckpointRecover(handler
101: .getOrder());
102: if (cp == null) {
103: throw new NullPointerException(
104: "Failed read of checkpoint object");
105: }
106: CrawlController c = (MBeanCrawlController) CheckpointUtils
107: .readObjectFromFile(MBeanCrawlController.class, cpdir);
108: c.initialize(handler);
109: c.addCrawlStatusListener(this );
110: c.addCrawlURIDispositionListener(this );
111: c.requestCrawlStart();
112: LOG.info("Recover from selftest crawl started using "
113: + order.toString() + ".");
114: // Wait here a while till its up and running?
115: while (!this .crawlEnded) {
116: LOG.info("Waiting on recovered crawl to finish");
117: Thread.sleep(1000);
118: }
119: }
120:
121: private File getFile(final File parent, final String name)
122: throws IOException {
123: File f = new File(parent, name);
124: if (!f.exists()) {
125: throw new FileNotFoundException(f.getAbsolutePath());
126: }
127: if (!f.canRead()) {
128: throw new IOException("Can't read " + f.getAbsolutePath());
129: }
130: return f;
131: }
132:
133: public void crawlCheckpoint(File checkpointDir) throws Exception {
134: // TODO Auto-generated method stub
135:
136: }
137:
138: public void crawlEnded(String sExitMessage) {
139: this .crawlEnded = true;
140: }
141:
142: public void crawlEnding(String sExitMessage) {
143: // TODO Auto-generated method stub
144:
145: }
146:
147: public void crawlPaused(String statusMessage) {
148: // TODO Auto-generated method stub
149:
150: }
151:
152: public void crawlPausing(String statusMessage) {
153: // TODO Auto-generated method stub
154:
155: }
156:
157: public void crawlResuming(String statusMessage) {
158: // TODO Auto-generated method stub
159:
160: }
161:
162: public void crawlStarted(String message) {
163: // TODO Auto-generated method stub
164:
165: }
166:
167: public void crawledURIDisregard(CrawlURI curi) {
168: // TODO Auto-generated method stub
169:
170: }
171:
172: public void crawledURIFailure(CrawlURI curi) {
173: // TODO Auto-generated method stub
174:
175: }
176:
177: public void crawledURINeedRetry(CrawlURI curi) {
178: // TODO Auto-generated method stub
179:
180: }
181:
182: public void crawledURISuccessful(CrawlURI curi) {
183: LOG.info(curi.toString());
184: }
185: }
|