01: /* BlockFileSystem
02: *
03: * Created on September 12, 2006
04: *
05: * Copyright (C) 2006 Internet Archive.
06: *
07: * This file is part of the Heritrix web crawler (crawler.archive.org).
08: *
09: * Heritrix is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU Lesser Public License as published by
11: * the Free Software Foundation; either version 2.1 of the License, or
12: * any later version.
13: *
14: * Heritrix is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU Lesser Public License for more details.
18: *
19: * You should have received a copy of the GNU Lesser Public License
20: * along with Heritrix; if not, write to the Free Software
21: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22: */
23: package org.archive.util.ms;
24:
25: import java.io.IOException;
26:
27: import org.archive.io.SeekInputStream;
28:
29: /**
30: * Describes the internal file system contained in .doc files.
31: */
32: public interface BlockFileSystem {
33:
34: /**
35: * The size of a block in bytes.
36: */
37: int BLOCK_SIZE = 512;
38:
39: /**
40: * Returns the root entry of the file system. Subfiles and directories
41: * can be found by searching the returned entry.
42: *
43: * @return the root entry
44: * @throws IOException if an IO error occurs
45: */
46: public abstract Entry getRoot() throws IOException;
47:
48: /**
49: * Returns the number of the block that follows the given block.
50: * The internal block allocation tables are consulted to determine the
51: * next block. A return value that is less than zero indicates that
52: * there is no next block.
53: *
54: * @param block the number of block whose successor to return
55: * @return the successor of that block
56: * @throws IOException if an IO error occurs
57: */
58: public abstract int getNextBlock(int block) throws IOException;
59:
60: /**
61: * Returns the raw input stream for this file system.
62: * Typically this will be the random access file containing the .doc.
63: *
64: * @return the raw input stream for this file system
65: */
66: public abstract SeekInputStream getRawInput();
67:
68: }
|