001: /* SelftestCrawlJobHandler
002: *
003: * Created on Feb 4, 2004
004: *
005: * Copyright (C) 2004 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.selftest;
024:
025: import java.io.File;
026: import java.util.ArrayList;
027: import java.util.Date;
028: import java.util.List;
029: import java.util.logging.Logger;
030:
031: import junit.framework.Test;
032: import junit.framework.TestResult;
033:
034: import org.archive.crawler.Heritrix;
035: import org.archive.crawler.admin.CrawlJob;
036: import org.archive.crawler.admin.CrawlJobHandler;
037: import org.archive.crawler.datamodel.CrawlURI;
038: import org.archive.crawler.event.CrawlURIDispositionListener;
039:
040: /**
041: * An override to gain access to end-of-crawljob message.
042: *
043: *
044: * @author stack
045: * @version $Id: SelfTestCrawlJobHandler.java 4667 2006-09-26 20:38:48Z paul_jack $
046: */
047:
048: public class SelfTestCrawlJobHandler extends CrawlJobHandler implements
049: CrawlURIDispositionListener {
050: /**
051: * Name of the selftest webapp.
052: */
053: private static final String SELFTEST_WEBAPP = "selftest";
054:
055: private static Logger logger = Logger
056: .getLogger("org.archive.crawler.admin.SelftestCrawlJobHandler");
057:
058: /**
059: * Name of selftest to run.
060: *
061: * If set, run this test only. Otherwise run them all.
062: */
063: private String selfTestName = null;
064:
065: private String selfTestUrl = null;
066:
067: private SelfTestCrawlJobHandler() {
068: this (null, null, null);
069: }
070:
071: public SelfTestCrawlJobHandler(final File jobsDir,
072: final String selfTestName, final String url) {
073: // No need to load jobs or profiles
074: super (jobsDir, false, false);
075: this .selfTestName = selfTestName;
076: this .selfTestUrl = url;
077: }
078:
079: @Override
080: public void crawlStarted(String message) {
081: super .crawlStarted(message);
082: this .getCurrentJob().getController()
083: .addCrawlURIDispositionListener(this );
084: }
085:
086: public void crawlEnded(String sExitMessage) {
087: TestResult result = null;
088: try {
089: super .crawlEnded(sExitMessage);
090:
091: // At crawlEnded time, there is no current job. Get the selftest
092: // job by pulling from the completedCrawlJobs queue.
093: List completedCrawlJobs = getCompletedJobs();
094: if (completedCrawlJobs == null
095: || completedCrawlJobs.size() <= 0) {
096: logger.severe("Selftest job did not complete.");
097: } else {
098: CrawlJob job = (CrawlJob) completedCrawlJobs
099: .get(completedCrawlJobs.size() - 1);
100: Test test = null;
101: if (this .selfTestName != null
102: && this .selfTestName.length() > 0) {
103: // Run single selftest only.
104: // Get class for the passed single selftest.
105: // Assume test to run is in this package.
106: String this ClassName = this .getClass().getName();
107: String pkg = this ClassName.substring(0,
108: this ClassName.lastIndexOf('.'));
109: // All selftests end in 'SelfTest'.
110: String selftestClass = pkg + '.'
111: + this .selfTestName + "SelfTest";
112: // Need to make a list. Make an array first.
113: List<Class<?>> classList = new ArrayList<Class<?>>();
114: classList.add(Class.forName(selftestClass));
115: test = AllSelfTestCases
116: .suite(this .selfTestUrl, job, job
117: .getDirectory(), Heritrix
118: .getHttpServer().getWebappPath(
119: SELFTEST_WEBAPP), classList);
120: } else {
121: // Run all tests.
122: test = AllSelfTestCases.suite(this .selfTestUrl,
123: job, job.getDirectory(), Heritrix
124: .getHttpServer().getWebappPath(
125: SELFTEST_WEBAPP));
126: }
127: result = junit.textui.TestRunner.run(test);
128: }
129: } catch (Exception e) {
130: logger.info("Failed running selftest analysis: "
131: + e.getMessage());
132: } finally {
133: // TODO: This technique where I'm calling shutdown directly means
134: // we bypass the running of other crawlended handlers. Means
135: // that such as the statistics tracker have no chance to run so
136: // reports are never generated. Fix -- but preserve 0 or 1 exit
137: // code.
138: logger
139: .info((new Date()).toString()
140: + " Selftest "
141: + (result != null && result.wasSuccessful() ? "PASSED"
142: : "FAILED"));
143: stop();
144: Heritrix.shutdown(((result != null) && result
145: .wasSuccessful()) ? 0 : 1);
146: }
147: }
148:
149: public void crawledURIDisregard(CrawlURI curi) {
150: // TODO Auto-generated method stub
151: }
152:
153: public void crawledURIFailure(CrawlURI curi) {
154: // TODO Auto-generated method stub
155: }
156:
157: public void crawledURINeedRetry(CrawlURI curi) {
158: // TODO Auto-generated method stub
159: }
160:
161: public void crawledURISuccessful(CrawlURI curi) {
162: // If curi ends in 'Checkpoint/index.html', then run a Checkpoint.
163: if (curi.toString().endsWith("/Checkpoint/")) {
164: this.getCurrentJob().getController()
165: .requestCrawlCheckpoint();
166: }
167: }
168: }
|