001: /* RepositionableInputStream.java
002: *
003: * $Id: RepositionableInputStream.java 4826 2006-12-22 00:16:27Z stack-sf $
004: *
005: * Created Dec 20, 2005
006: *
007: * Copyright (C) 2005 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.io;
026:
027: import it.unimi.dsi.fastutil.io.RepositionableStream;
028:
029: import java.io.BufferedInputStream;
030: import java.io.IOException;
031: import java.io.InputStream;
032:
033: /**
034: * Wrapper around an {@link InputStream} to make a primitive Repositionable
035: * stream. Uses a {@link BufferedInputStream}. Calls mark on every read so
036: * we'll remember at least the last thing read (You can only backup on the
037: * last thing read -- not last 2 or 3 things read). Used by
038: * {@link GzippedInputStream} when reading streams over a network. Wraps a
039: * HTTP, etc., stream so we can back it up if needs be after the
040: * GZIP inflater has done a fill of its full buffer though it only needed
041: * the first few bytes to finish decompressing the current GZIP member.
042: *
043: * <p>TODO: More robust implementation. Tried to use the it.unimi.dsi.io
044: * FastBufferdInputStream but relies on FileChannel ByteBuffers and if not
045: * present -- as would be the case reading from a network stream, the main
046: * application for this instance -- then it expects the underlying stream
047: * implements RepositionableStream interface so chicken or egg problem.
048: * @author stack
049: */
050: public class RepositionableInputStream extends BufferedInputStream
051: implements RepositionableStream {
052: private long position = 0;
053: private long markPosition = -1;
054:
055: public RepositionableInputStream(InputStream in) {
056: super (in);
057: }
058:
059: public RepositionableInputStream(InputStream in, int size) {
060: super (in, size);
061: }
062:
063: public int read(byte[] b) throws IOException {
064: int read = super .read(b);
065: if (read != -1) {
066: position += read;
067: }
068: return read;
069: }
070:
071: public synchronized int read(byte[] b, int offset, int ct)
072: throws IOException {
073: // Mark the underlying stream so that we'll remember what we are about
074: // to read unless a mark has been set in this RepositionableStream
075: // (We have two levels of mark). In this latter case we want the
076: // underlying stream to preserve its mark position so aligns with
077: // this RS when eset is called.
078: if (!isMarked()) {
079: super .mark((ct > offset) ? ct - offset : ct);
080: }
081: int read = super .read(b, offset, ct);
082: if (read != -1) {
083: position += read;
084: }
085: return read;
086: }
087:
088: public int read() throws IOException {
089: // Mark the underlying stream so that we'll remember what we are about
090: // to read unless a mark has been set in this RepositionableStream
091: // (We have two levels of mark). In this latter case we want the
092: // underlying stream to preserve its mark position so aligns with
093: // this RS when eset is called.
094: if (!isMarked()) {
095: super .mark(1);
096: }
097: int c = super .read();
098: if (c != -1) {
099: position++;
100: }
101: return c;
102: }
103:
104: public void position(final long offset) {
105: if (this .position == offset) {
106: return;
107: }
108: int diff = (int) (offset - this .position);
109: long lowerBound = this .position - this .pos;
110: long upperBound = lowerBound + this .count;
111: if (offset < lowerBound || offset >= upperBound) {
112: throw new IllegalAccessError(
113: "Offset goes outside "
114: + "current this.buf (TODO: Do buffer fills if positive)");
115: }
116: this .position = offset;
117: this .pos += diff;
118: // Clear any mark.
119: this .markPosition = -1;
120: }
121:
122: public void mark(int readlimit) {
123: this .markPosition = this .position;
124: super .mark(readlimit);
125: }
126:
127: public void reset() throws IOException {
128: super .reset();
129: this .position = this .markPosition;
130: this .markPosition = -1;
131: }
132:
133: protected boolean isMarked() {
134: return this .markPosition != -1;
135: }
136:
137: public long position() {
138: return this.position;
139: }
140: }
|