001: /* BlockInputStream
002: *
003: * Created on September 12, 2006
004: *
005: * Copyright (C) 2006 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.util.ms;
024:
025: import java.io.IOException;
026:
027: import org.archive.io.SeekInputStream;
028: import static org.archive.util.ms.BlockFileSystem.BLOCK_SIZE;
029:
030: /**
031: * InputStream for a file contained in a BlockFileSystem.
032: */
033: public class BlockInputStream extends SeekInputStream {
034:
035: /**
036: * The starting block number.
037: */
038: private int start;
039:
040: /**
041: * The current block.
042: */
043: private int block;
044:
045: /**
046: * The BlockFileSystem that produced this stream.
047: */
048: private BlockFileSystem bfs;
049:
050: /**
051: * The raw input stream of the BlockFileSystem.
052: */
053: private SeekInputStream raw;
054:
055: /**
056: * The current logical position of this stream.
057: */
058: private long position;
059:
060: /**
061: * The current file pointer position of the raw input stream.
062: */
063: private long expectedRawPosition;
064:
065: /**
066: * The number of bytes read in the current block.
067: */
068: private int blockBytesRead;
069:
070: /**
071: * Constructor.
072: *
073: * @param bfs The block file system that owns this stream
074: * @param block The starting block number.
075: */
076: public BlockInputStream(BlockFileSystem bfs, int block)
077: throws IOException {
078: this .raw = bfs.getRawInput();
079: this .bfs = bfs;
080: this .start = block;
081: this .block = block;
082: this .position = 0;
083: seek(block, 0);
084: }
085:
086: private void seek(long block, long rem) throws IOException {
087: assert rem < BLOCK_SIZE;
088: long pos = (block + 1) * BLOCK_SIZE + rem;
089: blockBytesRead = (int) rem;
090: expectedRawPosition = pos;
091: raw.position(pos);
092: }
093:
094: private void ensureRawPosition() throws IOException {
095: if (raw.position() != expectedRawPosition) {
096: raw.position(expectedRawPosition);
097: }
098: }
099:
100: private boolean ensureBuffer() throws IOException {
101: if (block < 0) {
102: return false;
103: }
104: ensureRawPosition();
105: if (blockBytesRead < BLOCK_SIZE) {
106: return true;
107: }
108: block = bfs.getNextBlock(block);
109: if (block < 0) {
110: return false;
111: }
112: seek(block, 0);
113: return true;
114: }
115:
116: public long skip(long v) throws IOException {
117: // FIXME
118: int r = read();
119: return (r < 0) ? 0 : 1;
120: }
121:
122: public int read() throws IOException {
123: if (!ensureBuffer()) {
124: return -1;
125: }
126: int r = raw.read();
127: position++;
128: expectedRawPosition++;
129: blockBytesRead++;
130: return r;
131: }
132:
133: public int read(byte[] b, int ofs, int len) throws IOException {
134: if (!ensureBuffer()) {
135: return 0;
136: }
137: int rem = BLOCK_SIZE - (int) (position % BLOCK_SIZE);
138: len = Math.min(len, rem);
139: int c = raw.read(b, ofs, len);
140: position += c;
141: expectedRawPosition += c;
142: blockBytesRead++;
143: return len;
144: }
145:
146: public int read(byte[] b) throws IOException {
147: return read(b, 0, b.length);
148: }
149:
150: public long position() {
151: return position;
152: }
153:
154: public void position(long v) throws IOException {
155: ensureRawPosition();
156: if (v == position) {
157: return;
158: }
159:
160: // If new position is in same block, just seek.
161: if (v / BLOCK_SIZE == position / BLOCK_SIZE) {
162: long rem = v % BLOCK_SIZE;
163: seek(block, rem);
164: position = v;
165: return;
166: }
167:
168: if (v > position) {
169: seekAfter(v);
170: } else {
171: seekBefore(v);
172: }
173: }
174:
175: private void seekAfter(long v) throws IOException {
176: long currentBlock = position / BLOCK_SIZE;
177: long destBlock = v / BLOCK_SIZE;
178: long blockAdvance = destBlock - currentBlock;
179: for (int i = 0; i < blockAdvance; i++) {
180: block = bfs.getNextBlock(block);
181: }
182: seek(block, v % BLOCK_SIZE);
183: position = v;
184: }
185:
186: private void seekBefore(long v) throws IOException {
187: long blockAdvance = (v - 1) / BLOCK_SIZE;
188: block = start;
189: for (int i = 0; i < blockAdvance; i++) {
190: block = bfs.getNextBlock(block);
191: }
192: seek(block, v % BLOCK_SIZE);
193: }
194: }
|