Source Code Cross Referenced for SelfTestCase.java in  » Web-Crawler » heritrix » org » archive » crawler » selftest » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Web Crawler » heritrix » org.archive.crawler.selftest 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /* SelfTestCase
002:         *
003:         * Created on Feb 4, 2004
004:         *
005:         * Copyright (C) 2004 Internet Archive.
006:         *
007:         * This file is part of the Heritrix web crawler (crawler.archive.org).
008:         *
009:         * Heritrix is free software; you can redistribute it and/or modify
010:         * it under the terms of the GNU Lesser Public License as published by
011:         * the Free Software Foundation; either version 2.1 of the License, or
012:         * any later version.
013:         *
014:         * Heritrix is distributed in the hope that it will be useful,
015:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
016:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017:         * GNU Lesser Public License for more details.
018:         *
019:         * You should have received a copy of the GNU Lesser Public License
020:         * along with Heritrix; if not, write to the Free Software
021:         * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022:         */
023:        package org.archive.crawler.selftest;
024:
025:        import java.io.File;
026:        import java.io.FileNotFoundException;
027:        import java.io.IOException;
028:        import java.util.ArrayList;
029:        import java.util.Iterator;
030:        import java.util.List;
031:
032:        import javax.management.AttributeNotFoundException;
033:        import javax.management.MBeanException;
034:        import javax.management.ReflectionException;
035:
036:        import junit.framework.TestCase;
037:
038:        import org.archive.crawler.admin.CrawlJob;
039:        import org.archive.crawler.datamodel.CrawlOrder;
040:        import org.archive.crawler.settings.ComplexType;
041:        import org.archive.crawler.settings.StringList;
042:        import org.archive.crawler.writer.ARCWriterProcessor;
043:        import org.archive.io.arc.ARCReader;
044:        import org.archive.io.arc.ARCReaderFactory;
045:        import org.archive.io.arc.ARCRecordMetaData;
046:        import org.archive.util.FileUtils;
047:
048:        /**
049:         * Base class for integrated selftest unit tests.
050:         *
051:         * Has utility for integrated selftest such as location of selftest generated
052:         * arc file.
053:         *
054:         * @author stack
055:         * @version $Id: SelfTestCase.java 4931 2007-02-21 18:48:17Z gojomo $
056:         */
057:        public abstract class SelfTestCase extends TestCase {
058:            /**
059:             * Suffix for selftest classes.
060:             */
061:            protected static final String SELFTEST = "SelfTest";
062:
063:            private static CrawlJob crawlJob = null;
064:            private static File crawlJobDir = null;
065:            private static File[] arcFile = null;
066:            private static String selftestURL = null;
067:
068:            /**
069:             * Directory logs are kept in.
070:             */
071:            private static File logsDir = null;
072:
073:            /**
074:             * Has the static initializer for this class been run.
075:             */
076:            private static boolean initialized = false;
077:
078:            /**
079:             * The selftest webapp htdocs directory.
080:             */
081:            private static File htdocs = null;
082:
083:            /**
084:             * A reference to an ARCReader on which the validate method has been called.
085:             * Can be used to walk the metadata.
086:             *
087:             * @see org.archive.io.arc.ARCReader#validate()
088:             */
089:            private static ARCReader[] readReader = null;
090:
091:            /**
092:             * Metadata list from the arc reader.
093:             *
094:             * Gotten as byproduct of calling validate on the arcreader.
095:             */
096:            private static List[] metaDatas;
097:
098:            public SelfTestCase() {
099:                super ();
100:            }
101:
102:            public SelfTestCase(String testName) {
103:                super (testName);
104:            }
105:
106:            public void testNothing() {
107:                // dummy test that always succeeds; prevents warning of no tests found
108:                // when running 'all JUnit tests' in Heritrix project
109:            }
110:
111:            public void assertInitialized() {
112:                assertTrue("SelfTestCase.initialize() not called "
113:                        + "before running selftest.", initialized);
114:            }
115:
116:            /**
117:             * Test non null and not empty.
118:             *
119:             * @param str String to test.
120:             * @return The passed string.
121:             * @throws IllegalArgumentException if null or empty string.
122:             */
123:            protected static void assertNonEmpty(String str) {
124:                assertTrue("String " + str + " is empty", str.length() > 0);
125:            }
126:
127:            /**
128:             * Test nonull and exits.
129:             *
130:             * @param file File to test.
131:             * @return Passed file.
132:             * @throws FileNotFoundException passed file doesn't exist.
133:             */
134:            protected static void assertExists(File file) {
135:                assertTrue("File " + file + " doesn't exist", file.exists());
136:            }
137:
138:            /**
139:             * Static initializer.
140:             *
141:             * Must be called before instantiation of any tests based off this class.
142:             *
143:             * @param url URL to selftest webapp.
144:             * @param job The selftest crawl job.
145:             * @param jobDir Job output directory.  Has the seed file, the order file
146:             * and logs.
147:             * @param docs Expanded webapp directory location.
148:             *
149:             * @throws IOException if nonexistent directories passed.
150:             */
151:            public static synchronized void initialize(final String url,
152:                    final CrawlJob job, final File jobDir, final File docs)
153:                    throws IOException, AttributeNotFoundException,
154:                    MBeanException, ReflectionException, InterruptedException {
155:                assertNotNull(url);
156:                assertNonEmpty(url);
157:                SelfTestCase.selftestURL = url.endsWith("/") ? url : url + "/";
158:
159:                assertNotNull(job);
160:                SelfTestCase.crawlJob = job;
161:
162:                assertNotNull(jobDir);
163:                assertExists(jobDir);
164:                SelfTestCase.crawlJobDir = jobDir;
165:
166:                assertNotNull(docs);
167:                assertExists(docs);
168:                SelfTestCase.htdocs = docs;
169:
170:                // Calculate the logs directory.  If diskPath is not absolute, then logs
171:                // are in the jobs directory under the diskPath subdirectory.  Guard
172:                // against case where diskPath is empty.
173:                CrawlOrder crawlOrder = job.getSettingsHandler().getOrder();
174:                assertNotNull(crawlOrder);
175:
176:                String diskPath = (String) crawlOrder.getAttribute(null,
177:                        CrawlOrder.ATTR_DISK_PATH);
178:                if (diskPath != null && diskPath.length() > 0
179:                        && diskPath.startsWith(File.separator)) {
180:                    SelfTestCase.logsDir = new File(diskPath);
181:                } else {
182:                    SelfTestCase.logsDir = (diskPath != null && diskPath
183:                            .length() > 0) ? new File(jobDir, diskPath)
184:                            : jobDir;
185:                }
186:                assertNotNull(SelfTestCase.logsDir);
187:                assertExists(SelfTestCase.logsDir);
188:
189:                // Calculate the arcfile name.  Find it in the arcDir.  Should only be
190:                // one. Then make an instance of ARCReader and call the validate on it.
191:                ComplexType arcWriterProcessor = crawlOrder
192:                        .getSettingsHandler().getModule("Archiver");
193:                String arcDirStr = (String) ((StringList) arcWriterProcessor
194:                        .getAttribute(ARCWriterProcessor.ATTR_PATH)).get(0);
195:                File arcDir = null;
196:                if (arcDirStr != null && arcDirStr.length() > 0
197:                        && arcDirStr.startsWith(File.separator)) {
198:                    arcDir = new File(arcDirStr);
199:                } else {
200:                    arcDir = (arcDirStr != null && arcDirStr.length() > 0) ? new File(
201:                            SelfTestCase.logsDir, arcDirStr)
202:                            : SelfTestCase.logsDir;
203:                }
204:                assertNotNull(arcDir);
205:                assertExists(arcDir);
206:
207:                String prefix = ((String) arcWriterProcessor
208:                        .getAttribute(ARCWriterProcessor.ATTR_PREFIX));
209:                assertNotNull(prefix);
210:                assertNonEmpty(prefix);
211:
212:                File[] arcs = FileUtils.getFilesWithPrefix(arcDir, prefix);
213:                /*
214:                if (arcs.length != 1) {
215:                    throw new IOException("Expected one only arc file.  Found" +
216:                        " instead " + Integer.toString(arcs.length) + " files.");
217:                }
218:                 */
219:                SelfTestCase.readReader = new ARCReader[arcs.length];
220:                SelfTestCase.arcFile = new File[arcs.length];
221:                SelfTestCase.metaDatas = new List[arcs.length];
222:                for (int i = 0; i < arcs.length; i++) {
223:                    File f = arcs[i];
224:                    SelfTestCase.arcFile[i] = f;
225:                    SelfTestCase.readReader[i] = ARCReaderFactory.get(f);
226:                    SelfTestCase.metaDatas[i] = SelfTestCase.readReader[i]
227:                            .validate();
228:                }
229:                SelfTestCase.initialized = true;
230:            }
231:
232:            /**
233:             * @return Returns the arcDir.
234:             */
235:            protected static File[] getArcFiles() {
236:                return arcFile;
237:            }
238:
239:            /**
240:             * @return Returns the jobDir.
241:             */
242:            protected static File getCrawlJobDir() {
243:                return SelfTestCase.crawlJobDir;
244:            }
245:
246:            /**
247:             * @return Return the directory w/ logs in it.
248:             */
249:            protected static File getLogsDir() {
250:                return SelfTestCase.logsDir;
251:            }
252:
253:            /**
254:             * Returns the selftest read ARCReader.
255:             *
256:             * The returned ARCReader has been validated.  Use it to get at metadata.
257:             *
258:             * @return Returns the readReader, an ARCReader that has been validated.
259:             */
260:            protected static ARCReader[] getReadReaders() {
261:                return SelfTestCase.readReader;
262:            }
263:
264:            /**
265:             * @return Returns list of ARCReader metadatas, the byproduct of calling
266:             * validate.
267:             */
268:            protected static List[] getMetaDatas() {
269:                return SelfTestCase.metaDatas;
270:            }
271:
272:            /**
273:             * @return Returns the selftestURL.
274:             */
275:            public static String getSelftestURL() {
276:                return SelfTestCase.selftestURL;
277:            }
278:
279:            /**
280:             * @return Returns the selftestURL.  URL returned is guaranteed to have
281:             * a trailing '/'.
282:             */
283:            public static String getSelftestURLWithTrailingSlash() {
284:                return selftestURL.endsWith("/") ? selftestURL : selftestURL
285:                        + "/";
286:            }
287:
288:            /**
289:             * Calculates test name by stripping SelfTest from current class name.
290:             *
291:             * @return The name of the test.
292:             */
293:            public String getTestName() {
294:                String classname = getClass().getName();
295:                int selftestIndex = classname.indexOf(SELFTEST);
296:                assertTrue("Class name ends with SelfTest", selftestIndex > 0);
297:                int lastDotIndex = classname.lastIndexOf('.');
298:                assertTrue("Package dot in unexpected location",
299:                        lastDotIndex + 1 < classname.length()
300:                                && lastDotIndex > 0);
301:                return classname.substring(lastDotIndex + 1, selftestIndex);
302:            }
303:
304:            /**
305:             * @return Returns the selftest webappDir.
306:             */
307:            public static File getHtdocs() {
308:                return SelfTestCase.htdocs;
309:            }
310:
311:            /**
312:             * @return Returns the crawlJob.
313:             */
314:            public static CrawlJob getCrawlJob() {
315:                return crawlJob;
316:            }
317:
318:            /**
319:             * Confirm passed files exist on disk under the test directory.
320:             *
321:             * @param files Files to test for existence under the test's directory.
322:             * @return true if all files exist on disk.
323:             */
324:            public boolean filesExist(List files) {
325:                boolean result = true;
326:                for (Iterator i = files.iterator(); i.hasNext();) {
327:                    if (!fileExists((File) i.next())) {
328:                        result = false;
329:                        break;
330:                    }
331:                }
332:                return result;
333:            }
334:
335:            /**
336:             * Confirm passed file exists on disk under the test directory.
337:             *
338:             * This method takes care of building up the file path under the selftest
339:             * webapp.  Just pass the file name.
340:             *
341:             * @param file Name of file to look for.
342:             * @return True if file exists.
343:             */
344:            public boolean fileExists(File file) {
345:                File testDir = new File(getHtdocs(), getTestName());
346:                File fileOnDisk = new File(testDir, file.getPath());
347:                return fileOnDisk.exists();
348:            }
349:
350:            /**
351:             * Test passed list were all found in the arc.
352:             *
353:             * If more or less found, test fails.
354:             *
355:             * @param files List of files to find in the arc.  No other files but these
356:             * should be found in the arc.
357:             */
358:            public void testFilesInArc(List<File> files) {
359:                testFilesInArc(files, filesFoundInArc());
360:            }
361:
362:            /**
363:             * Test passed list were all found in the arc.
364:             *
365:             * If more or less found, test fails.
366:             *
367:             * @param files List of files to find in the arc.  No other files but these
368:             * should be found in the arc.
369:             * @param foundFiles Files found in the arc.
370:             */
371:            public void testFilesInArc(List<File> files, List<File> foundFiles) {
372:                assertTrue("All files are on disk: " + files, filesExist(files));
373:                assertTrue("All found: " + files + ", " + foundFiles,
374:                        foundFiles.containsAll(files));
375:                assertTrue("Same size: " + files + ", " + foundFiles,
376:                        foundFiles.size() == files.size());
377:            }
378:
379:            /**
380:             * Find all files that belong to this test that are mentioned in the arc.
381:             * @return List of unique found file File objects.
382:             */
383:            protected List<File> filesFoundInArc() {
384:                String baseURL = getSelftestURLWithTrailingSlash();
385:                if (baseURL.endsWith(getTestName() + '/')) {
386:                    // URL may already end in the test name for case where we're
387:                    // running one test only.  If so, strip back the trailing '/'.
388:                    baseURL = baseURL.substring(0, baseURL.length() - 1);
389:                } else {
390:                    baseURL += getTestName();
391:                }
392:                List[] metaDatas = getMetaDatas();
393:                ARCRecordMetaData metaData = null;
394:                List<File> filesFound = new ArrayList<File>();
395:                for (int mdi = 0; mdi < metaDatas.length; mdi++) {
396:                    List list = metaDatas[mdi];
397:                    for (final Iterator i = list.iterator(); i.hasNext();) {
398:                        metaData = (ARCRecordMetaData) i.next();
399:                        String url = metaData.getUrl();
400:                        if (url.startsWith(baseURL)
401:                                && metaData.getMimetype().equalsIgnoreCase(
402:                                        "text/html")) {
403:                            String fileName = url.substring(baseURL.length());
404:                            if (fileName.startsWith("/")) {
405:                                fileName = fileName.substring(1);
406:                            }
407:                            if (fileName != null && fileName.length() > 0) {
408:                                File f = new File(fileName);
409:                                if (!filesFound.contains(f)) {
410:                                    // Don't add duplicates.
411:                                    filesFound.add(new File(fileName));
412:                                }
413:                            }
414:                        }
415:                    }
416:                }
417:                return filesFound;
418:            }
419:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.