01: /* BackgroundImageExtractionSelfTest
02: *
03: * Created on Jan 29, 2004
04: *
05: * Copyright (C) 2004 Internet Archive.
06: *
07: * This file is part of the Heritrix web crawler (crawler.archive.org).
08: *
09: * Heritrix is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU Lesser Public License as published by
11: * the Free Software Foundation; either version 2.1 of the License, or
12: * any later version.
13: *
14: * Heritrix is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU Lesser Public License for more details.
18: *
19: * You should have received a copy of the GNU Lesser Public License
20: * along with Heritrix; if not, write to the Free Software
21: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22: */
23: package org.archive.crawler.selftest;
24:
25: import java.io.File;
26: import java.util.Iterator;
27: import java.util.List;
28:
29: import org.archive.io.arc.ARCRecordMetaData;
30:
31: /**
32: * Test the crawler can find background images in pages.
33: *
34: * @author stack
35: * @version $Id: BackgroundImageExtractionSelfTestCase.java 4931 2007-02-21 18:48:17Z gojomo $
36: */
37: public class BackgroundImageExtractionSelfTestCase extends SelfTestCase {
38: /**
39: * The name of the background image the crawler is supposed to find.
40: */
41: private static final String IMAGE_NAME = "example-background-image.jpeg";
42:
43: private static final String JPEG = "image/jpeg";
44:
45: /**
46: * Read ARC file for the background image the file that contained it.
47: *
48: * Look that there is only one instance of the background image in the
49: * ARC and that it is of the same size as the image in the webapp dir.
50: */
51: public void stestBackgroundImageExtraction() {
52: assertInitialized();
53: String relativePath = getTestName() + '/' + IMAGE_NAME;
54: String url = getSelftestURLWithTrailingSlash() + relativePath;
55: File image = new File(getHtdocs(), relativePath);
56: assertTrue("Image exists", image.exists());
57: List[] metaDatas = getMetaDatas();
58: boolean found = false;
59: ARCRecordMetaData metaData = null;
60: for (int mi = 0; mi < metaDatas.length; mi++) {
61: List list = metaDatas[mi];
62: for (final Iterator i = list.iterator(); i.hasNext();) {
63: metaData = (ARCRecordMetaData) i.next();
64: if (metaData.getUrl().equals(url)
65: && metaData.getMimetype()
66: .equalsIgnoreCase(JPEG)) {
67: if (!found) {
68: found = true;
69: } else {
70: fail("Found a 2nd instance of " + url);
71: }
72: }
73: }
74: }
75: }
76: }
|