001: /* ARCReaderFactoryTest.java
002: *
003: * $Id: ARCReaderFactoryTest.java 4512 2006-08-19 00:22:10Z stack-sf $
004: *
005: * Created Jul 15, 2005
006: *
007: * Copyright (C) 2005 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.io.arc;
026:
027: import java.io.File;
028: import java.io.IOException;
029: import java.net.MalformedURLException;
030: import java.net.URL;
031: import java.util.Iterator;
032:
033: import org.archive.util.TmpDirTestCase;
034:
035: public class ARCReaderFactoryTest extends TmpDirTestCase {
036: // public void testGetHttpURL() throws MalformedURLException, IOException {
037: // ARCReader reader = null;
038: // try {
039: // // TODO: I can get a single ARCRecord but trying to iterate from
040: // // a certain point is getting an EOR when I go to read GZIP header.
041: // reader = ARCReaderFactory.
042: // get(new URL("http://localhost/test.arc.gz"), 0);
043: // for (final Iterator i = reader.iterator(); i.hasNext();) {
044: // ARCRecord ar = (ARCRecord)i.next();
045: // System.out.println(ar.getMetaData().getUrl());
046: // }
047: // } finally {
048: // if (reader != null) {
049: // reader.close();
050: // }
051: // }
052: // }
053:
054: /**
055: * Test File URL.
056: * If a file url, we just use the pointed to file. There is no
057: * copying down to a file in tmp that gets cleaned up after close.
058: * @throws MalformedURLException
059: * @throws IOException
060: */
061: public void testGetFileURL() throws MalformedURLException,
062: IOException {
063: File arc = ARCWriterTest.createARCFile(getTmpDir(), true);
064: doGetFileUrl(arc);
065: }
066:
067: protected void doGetFileUrl(File arc) throws MalformedURLException,
068: IOException {
069: ARCReader reader = null;
070: File tmpFile = null;
071: try {
072: reader = ARCReaderFactory.get(new URL("file:////"
073: + arc.getAbsolutePath()));
074: tmpFile = null;
075: for (Iterator i = reader.iterator(); i.hasNext();) {
076: ARCRecord r = (ARCRecord) i.next();
077: if (tmpFile == null) {
078: tmpFile = new File(r.getMetaData().getArc());
079: }
080: }
081: assertTrue(tmpFile.exists());
082: } finally {
083: if (reader != null) {
084: reader.close();
085: }
086: }
087: assertTrue(tmpFile.exists());
088: }
089:
090: /**
091: * Test path or url.
092: * @throws MalformedURLException
093: * @throws IOException
094: */
095: public void testGetPathOrURL() throws MalformedURLException,
096: IOException {
097: File arc = ARCWriterTest.createARCFile(getTmpDir(), true);
098: ARCReader reader = ARCReaderFactory.get(arc.getAbsoluteFile());
099: assertNotNull(reader);
100: reader.close();
101: doGetFileUrl(arc);
102: }
103: }
|