001: /* SelfTestCase
002: *
003: * Created on Feb 4, 2004
004: *
005: * Copyright (C) 2004 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.selftest;
024:
025: import java.io.File;
026: import java.io.FileNotFoundException;
027: import java.io.IOException;
028: import java.util.ArrayList;
029: import java.util.Iterator;
030: import java.util.List;
031:
032: import javax.management.AttributeNotFoundException;
033: import javax.management.MBeanException;
034: import javax.management.ReflectionException;
035:
036: import junit.framework.TestCase;
037:
038: import org.archive.crawler.admin.CrawlJob;
039: import org.archive.crawler.datamodel.CrawlOrder;
040: import org.archive.crawler.settings.ComplexType;
041: import org.archive.crawler.settings.StringList;
042: import org.archive.crawler.writer.ARCWriterProcessor;
043: import org.archive.io.arc.ARCReader;
044: import org.archive.io.arc.ARCReaderFactory;
045: import org.archive.io.arc.ARCRecordMetaData;
046: import org.archive.util.FileUtils;
047:
048: /**
049: * Base class for integrated selftest unit tests.
050: *
051: * Has utility for integrated selftest such as location of selftest generated
052: * arc file.
053: *
054: * @author stack
055: * @version $Id: SelfTestCase.java 4931 2007-02-21 18:48:17Z gojomo $
056: */
057: public abstract class SelfTestCase extends TestCase {
058: /**
059: * Suffix for selftest classes.
060: */
061: protected static final String SELFTEST = "SelfTest";
062:
063: private static CrawlJob crawlJob = null;
064: private static File crawlJobDir = null;
065: private static File[] arcFile = null;
066: private static String selftestURL = null;
067:
068: /**
069: * Directory logs are kept in.
070: */
071: private static File logsDir = null;
072:
073: /**
074: * Has the static initializer for this class been run.
075: */
076: private static boolean initialized = false;
077:
078: /**
079: * The selftest webapp htdocs directory.
080: */
081: private static File htdocs = null;
082:
083: /**
084: * A reference to an ARCReader on which the validate method has been called.
085: * Can be used to walk the metadata.
086: *
087: * @see org.archive.io.arc.ARCReader#validate()
088: */
089: private static ARCReader[] readReader = null;
090:
091: /**
092: * Metadata list from the arc reader.
093: *
094: * Gotten as byproduct of calling validate on the arcreader.
095: */
096: private static List[] metaDatas;
097:
098: public SelfTestCase() {
099: super ();
100: }
101:
102: public SelfTestCase(String testName) {
103: super (testName);
104: }
105:
106: public void testNothing() {
107: // dummy test that always succeeds; prevents warning of no tests found
108: // when running 'all JUnit tests' in Heritrix project
109: }
110:
111: public void assertInitialized() {
112: assertTrue("SelfTestCase.initialize() not called "
113: + "before running selftest.", initialized);
114: }
115:
116: /**
117: * Test non null and not empty.
118: *
119: * @param str String to test.
120: * @return The passed string.
121: * @throws IllegalArgumentException if null or empty string.
122: */
123: protected static void assertNonEmpty(String str) {
124: assertTrue("String " + str + " is empty", str.length() > 0);
125: }
126:
127: /**
128: * Test nonull and exits.
129: *
130: * @param file File to test.
131: * @return Passed file.
132: * @throws FileNotFoundException passed file doesn't exist.
133: */
134: protected static void assertExists(File file) {
135: assertTrue("File " + file + " doesn't exist", file.exists());
136: }
137:
138: /**
139: * Static initializer.
140: *
141: * Must be called before instantiation of any tests based off this class.
142: *
143: * @param url URL to selftest webapp.
144: * @param job The selftest crawl job.
145: * @param jobDir Job output directory. Has the seed file, the order file
146: * and logs.
147: * @param docs Expanded webapp directory location.
148: *
149: * @throws IOException if nonexistent directories passed.
150: */
151: public static synchronized void initialize(final String url,
152: final CrawlJob job, final File jobDir, final File docs)
153: throws IOException, AttributeNotFoundException,
154: MBeanException, ReflectionException, InterruptedException {
155: assertNotNull(url);
156: assertNonEmpty(url);
157: SelfTestCase.selftestURL = url.endsWith("/") ? url : url + "/";
158:
159: assertNotNull(job);
160: SelfTestCase.crawlJob = job;
161:
162: assertNotNull(jobDir);
163: assertExists(jobDir);
164: SelfTestCase.crawlJobDir = jobDir;
165:
166: assertNotNull(docs);
167: assertExists(docs);
168: SelfTestCase.htdocs = docs;
169:
170: // Calculate the logs directory. If diskPath is not absolute, then logs
171: // are in the jobs directory under the diskPath subdirectory. Guard
172: // against case where diskPath is empty.
173: CrawlOrder crawlOrder = job.getSettingsHandler().getOrder();
174: assertNotNull(crawlOrder);
175:
176: String diskPath = (String) crawlOrder.getAttribute(null,
177: CrawlOrder.ATTR_DISK_PATH);
178: if (diskPath != null && diskPath.length() > 0
179: && diskPath.startsWith(File.separator)) {
180: SelfTestCase.logsDir = new File(diskPath);
181: } else {
182: SelfTestCase.logsDir = (diskPath != null && diskPath
183: .length() > 0) ? new File(jobDir, diskPath)
184: : jobDir;
185: }
186: assertNotNull(SelfTestCase.logsDir);
187: assertExists(SelfTestCase.logsDir);
188:
189: // Calculate the arcfile name. Find it in the arcDir. Should only be
190: // one. Then make an instance of ARCReader and call the validate on it.
191: ComplexType arcWriterProcessor = crawlOrder
192: .getSettingsHandler().getModule("Archiver");
193: String arcDirStr = (String) ((StringList) arcWriterProcessor
194: .getAttribute(ARCWriterProcessor.ATTR_PATH)).get(0);
195: File arcDir = null;
196: if (arcDirStr != null && arcDirStr.length() > 0
197: && arcDirStr.startsWith(File.separator)) {
198: arcDir = new File(arcDirStr);
199: } else {
200: arcDir = (arcDirStr != null && arcDirStr.length() > 0) ? new File(
201: SelfTestCase.logsDir, arcDirStr)
202: : SelfTestCase.logsDir;
203: }
204: assertNotNull(arcDir);
205: assertExists(arcDir);
206:
207: String prefix = ((String) arcWriterProcessor
208: .getAttribute(ARCWriterProcessor.ATTR_PREFIX));
209: assertNotNull(prefix);
210: assertNonEmpty(prefix);
211:
212: File[] arcs = FileUtils.getFilesWithPrefix(arcDir, prefix);
213: /*
214: if (arcs.length != 1) {
215: throw new IOException("Expected one only arc file. Found" +
216: " instead " + Integer.toString(arcs.length) + " files.");
217: }
218: */
219: SelfTestCase.readReader = new ARCReader[arcs.length];
220: SelfTestCase.arcFile = new File[arcs.length];
221: SelfTestCase.metaDatas = new List[arcs.length];
222: for (int i = 0; i < arcs.length; i++) {
223: File f = arcs[i];
224: SelfTestCase.arcFile[i] = f;
225: SelfTestCase.readReader[i] = ARCReaderFactory.get(f);
226: SelfTestCase.metaDatas[i] = SelfTestCase.readReader[i]
227: .validate();
228: }
229: SelfTestCase.initialized = true;
230: }
231:
232: /**
233: * @return Returns the arcDir.
234: */
235: protected static File[] getArcFiles() {
236: return arcFile;
237: }
238:
239: /**
240: * @return Returns the jobDir.
241: */
242: protected static File getCrawlJobDir() {
243: return SelfTestCase.crawlJobDir;
244: }
245:
246: /**
247: * @return Return the directory w/ logs in it.
248: */
249: protected static File getLogsDir() {
250: return SelfTestCase.logsDir;
251: }
252:
253: /**
254: * Returns the selftest read ARCReader.
255: *
256: * The returned ARCReader has been validated. Use it to get at metadata.
257: *
258: * @return Returns the readReader, an ARCReader that has been validated.
259: */
260: protected static ARCReader[] getReadReaders() {
261: return SelfTestCase.readReader;
262: }
263:
264: /**
265: * @return Returns list of ARCReader metadatas, the byproduct of calling
266: * validate.
267: */
268: protected static List[] getMetaDatas() {
269: return SelfTestCase.metaDatas;
270: }
271:
272: /**
273: * @return Returns the selftestURL.
274: */
275: public static String getSelftestURL() {
276: return SelfTestCase.selftestURL;
277: }
278:
279: /**
280: * @return Returns the selftestURL. URL returned is guaranteed to have
281: * a trailing '/'.
282: */
283: public static String getSelftestURLWithTrailingSlash() {
284: return selftestURL.endsWith("/") ? selftestURL : selftestURL
285: + "/";
286: }
287:
288: /**
289: * Calculates test name by stripping SelfTest from current class name.
290: *
291: * @return The name of the test.
292: */
293: public String getTestName() {
294: String classname = getClass().getName();
295: int selftestIndex = classname.indexOf(SELFTEST);
296: assertTrue("Class name ends with SelfTest", selftestIndex > 0);
297: int lastDotIndex = classname.lastIndexOf('.');
298: assertTrue("Package dot in unexpected location",
299: lastDotIndex + 1 < classname.length()
300: && lastDotIndex > 0);
301: return classname.substring(lastDotIndex + 1, selftestIndex);
302: }
303:
304: /**
305: * @return Returns the selftest webappDir.
306: */
307: public static File getHtdocs() {
308: return SelfTestCase.htdocs;
309: }
310:
311: /**
312: * @return Returns the crawlJob.
313: */
314: public static CrawlJob getCrawlJob() {
315: return crawlJob;
316: }
317:
318: /**
319: * Confirm passed files exist on disk under the test directory.
320: *
321: * @param files Files to test for existence under the test's directory.
322: * @return true if all files exist on disk.
323: */
324: public boolean filesExist(List files) {
325: boolean result = true;
326: for (Iterator i = files.iterator(); i.hasNext();) {
327: if (!fileExists((File) i.next())) {
328: result = false;
329: break;
330: }
331: }
332: return result;
333: }
334:
335: /**
336: * Confirm passed file exists on disk under the test directory.
337: *
338: * This method takes care of building up the file path under the selftest
339: * webapp. Just pass the file name.
340: *
341: * @param file Name of file to look for.
342: * @return True if file exists.
343: */
344: public boolean fileExists(File file) {
345: File testDir = new File(getHtdocs(), getTestName());
346: File fileOnDisk = new File(testDir, file.getPath());
347: return fileOnDisk.exists();
348: }
349:
350: /**
351: * Test passed list were all found in the arc.
352: *
353: * If more or less found, test fails.
354: *
355: * @param files List of files to find in the arc. No other files but these
356: * should be found in the arc.
357: */
358: public void testFilesInArc(List<File> files) {
359: testFilesInArc(files, filesFoundInArc());
360: }
361:
362: /**
363: * Test passed list were all found in the arc.
364: *
365: * If more or less found, test fails.
366: *
367: * @param files List of files to find in the arc. No other files but these
368: * should be found in the arc.
369: * @param foundFiles Files found in the arc.
370: */
371: public void testFilesInArc(List<File> files, List<File> foundFiles) {
372: assertTrue("All files are on disk: " + files, filesExist(files));
373: assertTrue("All found: " + files + ", " + foundFiles,
374: foundFiles.containsAll(files));
375: assertTrue("Same size: " + files + ", " + foundFiles,
376: foundFiles.size() == files.size());
377: }
378:
379: /**
380: * Find all files that belong to this test that are mentioned in the arc.
381: * @return List of unique found file File objects.
382: */
383: protected List<File> filesFoundInArc() {
384: String baseURL = getSelftestURLWithTrailingSlash();
385: if (baseURL.endsWith(getTestName() + '/')) {
386: // URL may already end in the test name for case where we're
387: // running one test only. If so, strip back the trailing '/'.
388: baseURL = baseURL.substring(0, baseURL.length() - 1);
389: } else {
390: baseURL += getTestName();
391: }
392: List[] metaDatas = getMetaDatas();
393: ARCRecordMetaData metaData = null;
394: List<File> filesFound = new ArrayList<File>();
395: for (int mdi = 0; mdi < metaDatas.length; mdi++) {
396: List list = metaDatas[mdi];
397: for (final Iterator i = list.iterator(); i.hasNext();) {
398: metaData = (ARCRecordMetaData) i.next();
399: String url = metaData.getUrl();
400: if (url.startsWith(baseURL)
401: && metaData.getMimetype().equalsIgnoreCase(
402: "text/html")) {
403: String fileName = url.substring(baseURL.length());
404: if (fileName.startsWith("/")) {
405: fileName = fileName.substring(1);
406: }
407: if (fileName != null && fileName.length() > 0) {
408: File f = new File(fileName);
409: if (!filesFound.contains(f)) {
410: // Don't add duplicates.
411: filesFound.add(new File(fileName));
412: }
413: }
414: }
415: }
416: }
417: return filesFound;
418: }
419: }
|