001: /* GzippedInputStreamTest
002: *
003: * Created on May 4, 2005
004: *
005: * Copyright (C) 2005 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.io;
024:
025: import it.unimi.dsi.fastutil.io.RepositionableStream;
026:
027: import java.io.BufferedOutputStream;
028: import java.io.ByteArrayInputStream;
029: import java.io.File;
030: import java.io.FileOutputStream;
031: import java.io.IOException;
032: import java.io.InputStream;
033: import java.io.OutputStream;
034: import java.util.Iterator;
035:
036: import org.archive.util.TmpDirTestCase;
037:
038: /**
039: * @author stack
040: * @version $Date: 2006-08-04 00:13:51 +0000 (Fri, 04 Aug 2006) $, $Revision: 4431 $
041: */
042: public class GzippedInputStreamTest extends TmpDirTestCase {
043: /**
044: * Number of records in gzip member file.
045: */
046: final static int GZIPMEMBER_COUNT = 4;
047: final static String TEXT = "Some old text to compress.";
048: // Create file to use in tests below.
049: private File compressedFile = null;
050:
051: protected void setUp() throws Exception {
052: super .setUp();
053: this .compressedFile = createMultiGzipMembers();
054: }
055:
056: protected void tearDown() throws Exception {
057: if (this .compressedFile != null) {
058: this .compressedFile.delete();
059: }
060: super .tearDown();
061: }
062:
063: public static void main(String[] args) {
064: junit.textui.TestRunner.run(GzippedInputStreamTest.class);
065: }
066:
067: protected class RepositionableRandomAccessInputStream extends
068: RandomAccessInputStream implements RepositionableStream {
069: public RepositionableRandomAccessInputStream(final File file)
070: throws IOException {
071: super (file);
072: }
073:
074: public RepositionableRandomAccessInputStream(final File file,
075: final long offset) throws IOException {
076: super (file, offset);
077: }
078: }
079:
080: protected File createMultiGzipMembers() throws IOException {
081: final File f = new File(getTmpDir(), this .getClass().getName()
082: + ".gz");
083: OutputStream os = new BufferedOutputStream(
084: new FileOutputStream(f));
085: for (int i = 0; i < GZIPMEMBER_COUNT; i++) {
086: os.write(GzippedInputStream.gzip(TEXT.getBytes()));
087: }
088: os.close();
089: return f;
090: }
091:
092: public void testCountOfMembers() throws IOException {
093: InputStream is = new RepositionableRandomAccessInputStream(
094: this .compressedFile);
095: GzippedInputStream gis = new GzippedInputStream(is);
096: int records = 0;
097: // Get offset of second record. Will use it later in tests below.
098: long offsetOfSecondRecord = -1;
099: for (Iterator i = gis.iterator(); i.hasNext();) {
100: long offset = gis.position();
101: if (records == 1) {
102: offsetOfSecondRecord = offset;
103: }
104: is = (InputStream) i.next();
105: records++;
106: }
107: assertTrue("Record count is off " + records,
108: records == GZIPMEMBER_COUNT);
109: gis.close();
110:
111: // Test random record read.
112: is = new RepositionableRandomAccessInputStream(
113: this .compressedFile);
114: gis = new GzippedInputStream(is);
115: byte[] buffer = new byte[TEXT.length()];
116: // Seek to second record, read in gzip header.
117: gis.gzipMemberSeek(offsetOfSecondRecord);
118: gis.read(buffer);
119: String readString = new String(buffer);
120: assertEquals("Failed read", TEXT, readString);
121: gis.close();
122:
123: // Test the count we get makes sense after iterating through
124: // starting at second record.
125: is = new RepositionableRandomAccessInputStream(
126: this .compressedFile, offsetOfSecondRecord);
127: gis = new GzippedInputStream(is);
128: records = 0;
129: for (final Iterator i = gis.iterator(); i.hasNext(); i.next()) {
130: records++;
131: }
132: assertEquals(records, GZIPMEMBER_COUNT - 1 /*We started at 2nd record*/);
133: gis.close();
134: }
135:
136: public void testCompressedStream() throws IOException {
137: byte[] bytes = "test".getBytes();
138: ByteArrayInputStream baos = new ByteArrayInputStream(bytes);
139: assertFalse(GzippedInputStream.isCompressedStream(baos));
140:
141: byte[] gzippedMetaData = GzippedInputStream.gzip(bytes);
142: baos = new ByteArrayInputStream(gzippedMetaData);
143: assertTrue(GzippedInputStream.isCompressedStream(baos));
144:
145: gzippedMetaData = GzippedInputStream.gzip(bytes);
146: final RepositionableByteArrayInputStream rbaos = new RepositionableByteArrayInputStream(
147: gzippedMetaData);
148: final int totalBytes = gzippedMetaData.length;
149: assertTrue(GzippedInputStream
150: .isCompressedRepositionableStream(rbaos));
151: long available = rbaos.available();
152: assertEquals(available, totalBytes);
153: assertEquals(rbaos.position(), 0);
154: }
155:
156: private class RepositionableByteArrayInputStream extends
157: ByteArrayInputStream implements RepositionableStream {
158: public RepositionableByteArrayInputStream(final byte[] bytes) {
159: super (bytes);
160: }
161:
162: public void position(long p) {
163: this .pos = (int) p;
164: }
165:
166: public long position() {
167: return this.pos;
168: }
169: }
170: }
|