001: // kelondroEcoFS.java
002: // (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
003: // first published 14.01.2008 on http://yacy.net
004: //
005: // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
006: // $LastChangedRevision: 1986 $
007: // $LastChangedBy: orbiter $
008: //
009: // LICENSE
010: //
011: // This program is free software; you can redistribute it and/or modify
012: // it under the terms of the GNU General Public License as published by
013: // the Free Software Foundation; either version 2 of the License, or
014: // (at your option) any later version.
015: //
016: // This program is distributed in the hope that it will be useful,
017: // but WITHOUT ANY WARRANTY; without even the implied warranty of
018: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: // GNU General Public License for more details.
020: //
021: // You should have received a copy of the GNU General Public License
022: // along with this program; if not, write to the Free Software
023: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024:
025: package de.anomic.kelondro;
026:
027: import java.io.File;
028: import java.io.FileNotFoundException;
029: import java.io.FileOutputStream;
030: import java.io.IOException;
031: import java.io.RandomAccessFile;
032:
033: public class kelondroEcoFS {
034:
035: /*
036: * The EcoFS is a flat file with records of fixed length. The file does not contain
037: * any meta information and the first record starts right at file position 0
038: * The access rules are in such a way that a minimum of IO operations are necessary
039: * Two caches provide a mirror to content in the file: a read cache and a write buffer
040: * The read cache contains a number of entries from the file; a mirror that moves
041: * whenever information outside the mirror is requested.
042: * The write buffer always exists only at the end of the file. It contains only records
043: * that have never been written to the file before. When the write buffer is flushed,
044: * the file grows
045: * The record file may also shrink when the last entry of the file is removed.
046: * Removal of Entries inside the file is not possible, but such entries can be erased
047: * by overwriting the data with zero bytes
048: * All access to the file is made with byte[] that are generated outside of this class
049: * This class only references byte[] that are handed over to methods of this class.
050: */
051:
052: private RandomAccessFile raf;
053: private File tablefile;
054: protected int recordsize; // number of bytes in one record
055: private long cacheindex;
056: private int cachecount, buffercount; // number of entries in buffer
057: private byte[] cache, buffer, zero;
058:
059: private static final int maxBuffer = 4 * 1024; // stay below hard disc cache (is that necessary?)
060:
061: public kelondroEcoFS(File tablefile, int recordsize)
062: throws IOException {
063: this .tablefile = tablefile;
064: this .recordsize = recordsize;
065:
066: // initialize zero buffer
067: this .zero = new byte[recordsize];
068: for (int i = 0; i < recordsize; i++)
069: this .zero[i] = 0;
070:
071: // initialize table file
072: if (!tablefile.exists()) {
073: // make new file
074: FileOutputStream fos = null;
075: try {
076: fos = new FileOutputStream(tablefile);
077: } catch (FileNotFoundException e) {
078: // should not happen
079: e.printStackTrace();
080: }
081: try {
082: fos.close();
083: } catch (IOException e) {
084: }
085: }
086:
087: // open an existing table file
088: try {
089: raf = new RandomAccessFile(tablefile, "rw");
090: } catch (FileNotFoundException e) {
091: // should never happen
092: e.printStackTrace();
093: }
094:
095: // initialize cache and buffer
096: int maxrecords = Math.max(1, maxBuffer / recordsize);
097: cache = new byte[maxrecords * recordsize];
098: buffer = new byte[maxrecords * recordsize];
099: this .buffercount = 0;
100:
101: // first-time read of cache
102: fillCache(0);
103: }
104:
105: public static long tableSize(File tablefile, long recordsize) {
106: // returns number of records in table
107: if (!tablefile.exists())
108: return 0;
109: long size = tablefile.length();
110: assert size % recordsize == 0;
111: return size / (long) recordsize;
112: }
113:
114: public synchronized long size() throws IOException {
115: // return the number of records in file plus number of records in buffer
116: return filesize() + (long) this .buffercount;
117: }
118:
119: public File filename() {
120: return this .tablefile;
121: }
122:
123: private long filesize() throws IOException {
124: return raf.length() / (long) recordsize;
125: }
126:
127: private int inCache(long index) {
128: // checks if the index is inside the cache and returns the index offset inside
129: // the cache if the index is inside the cache
130: // returns -1 if the index is not in the cache
131: if ((index >= this .cacheindex)
132: && (index < this .cacheindex + this .cachecount)) {
133: return (int) (index - this .cacheindex);
134: }
135: return -1;
136: }
137:
138: private int inBuffer(long index) throws IOException {
139: // checks if the index is inside the buffer and returns the index offset inside
140: // the buffer if the index is inside the buffer
141: // returns -1 if the index is not in the buffer
142: long fs = filesize();
143: if ((index >= fs) && (index < fs + this .buffercount)) {
144: return (int) (index - fs);
145: }
146: return -1;
147: }
148:
149: private void fillCache(long index) throws IOException {
150: // load cache with copy of disc content; start with record at index
151: // if the record would overlap with the write buffer,
152: // its start is shifted forward until it fits
153:
154: // first check if the index is inside the current cache
155: assert inCache(index) < 0;
156: if (inCache(index) >= 0)
157: return;
158:
159: // calculate new start position
160: long fs = this .filesize();
161: if (index + this .cache.length / this .recordsize > fs) {
162: index = fs - this .cache.length / this .recordsize;
163: }
164: if (index < 0)
165: index = 0;
166:
167: // calculate number of records that shall be stored in the cache
168: this .cachecount = (int) Math.min(this .cache.length
169: / this .recordsize, this .filesize() - index);
170: assert this .cachecount >= 0;
171:
172: // check if we need to read 0 bytes from the file
173: this .cacheindex = index;
174: if (this .cachecount == 0)
175: return;
176:
177: // copy records from file to cache
178: raf.seek((long) this .recordsize * (long) index);
179: raf.read(this .cache, 0, this .recordsize * this .cachecount);
180: }
181:
182: private void flushBuffer() {
183: // write buffer to end of file
184: try {
185: raf.seek(raf.length());
186: raf.write(this .buffer, 0, this .recordsize
187: * this .buffercount);
188: } catch (IOException e) {
189: // TODO Auto-generated catch block
190: e.printStackTrace();
191: }
192: this .buffercount = 0;
193: }
194:
195: public synchronized void close() {
196: flushBuffer();
197:
198: // then close the file
199: try {
200: raf.close();
201: } catch (IOException e) {
202: e.printStackTrace();
203: }
204: raf = null;
205: buffer = null;
206: cache = null;
207: }
208:
209: public synchronized void get(long index, byte[] b, int start)
210: throws IOException {
211: assert b.length - start >= this .recordsize;
212: if (index >= size())
213: throw new IndexOutOfBoundsException("kelondroEcoFS.get("
214: + index + ") outside bounds (" + this .size() + ")");
215: // check if index is inside of cache
216: int p = inCache(index);
217: int q = (p >= 0) ? -1 : inBuffer(index);
218: if ((p < 0) && (q < 0)) {
219: // the index is outside of cache and buffer index. shift cache window
220: fillCache(index);
221: p = inCache(index);
222: assert p >= 0;
223: }
224: if (p >= 0) {
225: // read entry from the cache
226: System.arraycopy(this .cache, p * this .recordsize, b, start,
227: this .recordsize);
228: return;
229: }
230: if (q >= 0) {
231: // read entry from the buffer
232: System.arraycopy(this .buffer, q * this .recordsize, b,
233: start, this .recordsize);
234: return;
235: }
236: assert false;
237: }
238:
239: public synchronized void put(long index, byte[] b, int start)
240: throws IOException {
241: assert b.length - start >= this .recordsize;
242: if (index > size())
243: throw new IndexOutOfBoundsException("kelondroEcoFS.put("
244: + index + ") outside bounds (" + this .size() + ")");
245: // check if this is an empty entry
246:
247: if (isClean(b, start, this .recordsize)) {
248: clean(index);
249: return;
250: }
251:
252: // check if index is inside of cache
253: int p = inCache(index);
254: int q = (p >= 0) ? -1 : inBuffer(index);
255: if (p >= 0) {
256: // write entry to the cache and to the file
257: System.arraycopy(b, start, this .cache, p * this .recordsize,
258: this .recordsize);
259: raf.seek((long) index * (long) this .recordsize);
260: raf.write(b, start, this .recordsize);
261: return;
262: }
263: if (q >= 0) {
264: // write entry to the buffer
265: System.arraycopy(b, start, this .buffer,
266: q * this .recordsize, this .recordsize);
267: return;
268: }
269: if (index == size()) {
270: // append the record to the end of the file;
271:
272: // look if there is space in the buffer
273: int bufferpos = (int) (index - filesize());
274: if (bufferpos >= this .buffer.length / this .recordsize) {
275: assert this .buffercount == this .buffer.length
276: / this .recordsize;
277: // the record does not fit in current buffer
278: // write buffer
279: flushBuffer();
280: // write new entry to buffer
281: System.arraycopy(b, start, this .buffer, 0,
282: this .recordsize);
283: this .buffercount = 1;
284: } else {
285: System.arraycopy(b, start, this .buffer, bufferpos
286: * this .recordsize, this .recordsize);
287: this .buffercount++;
288: }
289: assert this .buffercount <= this .buffer.length
290: / this .recordsize;
291: } else {
292: // write the record directly to the file,
293: // do not care about the cache; this case was checked before
294: raf.seek((long) index * (long) this .recordsize);
295: raf.write(b, start, this .recordsize);
296: }
297: }
298:
299: public synchronized void add(byte[] b, int start)
300: throws IOException {
301: put(size(), b, start);
302: }
303:
304: private boolean isClean(byte[] b, int offset, int length) {
305: for (int i = 0; i < length; i++) {
306: if (b[i + offset] != 0)
307: return false;
308: }
309: return true;
310: }
311:
312: private boolean isClean(long index) throws IOException {
313: assert index < size();
314: // check if index is inside of cache
315: int p = inCache(index);
316: int q = (p >= 0) ? -1 : inBuffer(index);
317: if ((p < 0) && (q < 0)) {
318: // the index is outside of cache and buffer index. shift cache window
319: fillCache(index);
320: p = inCache(index);
321: assert p >= 0;
322: }
323: if (p >= 0) {
324: // check entry from the cache
325: return isClean(this .cache, p * this .recordsize,
326: this .recordsize);
327: }
328: if (q >= 0) {
329: // check entry from the buffer
330: return isClean(this .buffer, q * this .recordsize,
331: this .recordsize);
332: }
333: assert false;
334: return false;
335: }
336:
337: public synchronized void clean(long index, byte[] b, int start)
338: throws IOException {
339: // removes an entry by cleaning (writing zero bytes to the file)
340: // the entry that had been at the specific place before is copied to the given array b
341: // if the last entry in the file was cleaned, the file shrinks by the given record
342: // this is like
343: // get(index, b, start);
344: // put(index, zero, 0);
345: // plus an additional check if the file should shrink
346:
347: assert b.length - start >= this .recordsize;
348: if (index >= size())
349: throw new IndexOutOfBoundsException("kelondroEcoFS.clean("
350: + index + ") outside bounds (" + this .size() + ")");
351: if (index == size() - 1) {
352: cleanLast(b, start);
353: return;
354: }
355:
356: // check if index is inside of cache
357: int p = inCache(index);
358: int q = (p >= 0) ? -1 : inBuffer(index);
359: if ((p < 0) && (q < 0)) {
360: // the index is outside of cache and buffer index. shift cache window
361: fillCache(index);
362: p = inCache(index);
363: assert p >= 0;
364: }
365: if (p >= 0) {
366: // read entry from the cache
367: System.arraycopy(this .cache, p * this .recordsize, b, start,
368: this .recordsize);
369:
370: // write zero bytes to the cache and to the file
371: System.arraycopy(zero, 0, this .cache, p * this .recordsize,
372: this .recordsize);
373: this .raf.seek((long) index * (long) this .recordsize);
374: this .raf.write(zero, 0, this .recordsize);
375: return;
376: }
377: if (q >= 0) {
378: // read entry from the buffer
379: System.arraycopy(this .buffer, q * this .recordsize, b,
380: start, this .recordsize);
381: // write zero to the buffer
382: System.arraycopy(zero, 0, this .buffer, q * this .recordsize,
383: this .recordsize);
384: return;
385: }
386: assert false;
387: }
388:
389: public synchronized void clean(long index) throws IOException {
390: if (index >= size())
391: throw new IndexOutOfBoundsException("kelondroEcoFS.clean("
392: + index + ") outside bounds (" + this .size() + ")");
393: if (index == size() - 1) {
394: cleanLast();
395: return;
396: }
397:
398: // check if index is inside of cache
399: int p = inCache(index);
400: int q = (p >= 0) ? -1 : inBuffer(index);
401: if (p >= 0) {
402: // write zero bytes to the cache and to the file
403: System.arraycopy(zero, 0, this .cache, p * this .recordsize,
404: this .recordsize);
405: raf.seek((long) index * (long) this .recordsize);
406: raf.write(zero, 0, this .recordsize);
407: return;
408: }
409: if (q >= 0) {
410: // write zero to the buffer
411: System.arraycopy(zero, 0, this .buffer, q * this .recordsize,
412: this .recordsize);
413: return;
414: }
415:
416: raf.seek((long) index * (long) this .recordsize);
417: raf.write(zero, 0, this .recordsize);
418: }
419:
420: public synchronized void cleanLast(byte[] b, int start)
421: throws IOException {
422: cleanLast0(b, start);
423: long i;
424: while (((i = size()) > 0) && (isClean(i - 1))) {
425: //System.out.println("Extra clean/1: before size = " + size());
426: cleanLast0();
427: //System.out.println(" after size = " + size());
428: }
429: }
430:
431: private synchronized void cleanLast0(byte[] b, int start)
432: throws IOException {
433: // this is like
434: // clean(this.size() - 1, b, start);
435:
436: assert b.length - start >= this .recordsize;
437: // check if index is inside of cache
438: int p = inCache(this .size() - 1);
439: int q = (p >= 0) ? -1 : inBuffer(this .size() - 1);
440: if ((p < 0) && (q < 0)) {
441: // the index is outside of cache and buffer index. shift cache window
442: fillCache(this .size() - 1);
443: p = inCache(this .size() - 1);
444: assert p >= 0;
445: }
446: if (p >= 0) {
447: // read entry from the cache
448: System.arraycopy(this .cache, p * this .recordsize, b, start,
449: this .recordsize);
450: // shrink cache and file
451: assert this .buffercount == 0;
452: this .raf.setLength((long) (this .size() - 1)
453: * (long) this .recordsize);
454: this .cachecount--;
455: return;
456: }
457: if (q >= 0) {
458: // read entry from the buffer
459: System.arraycopy(this .buffer, q * this .recordsize, b,
460: start, this .recordsize);
461: // shrink buffer
462: assert this .buffercount > 0;
463: this .buffercount--;
464: return;
465: }
466: assert false;
467: }
468:
469: public synchronized void cleanLast() throws IOException {
470: cleanLast0();
471: long i;
472: while (((i = size()) > 0) && (isClean(i - 1))) {
473: //System.out.println("Extra clean/0: before size = " + size());
474: cleanLast0();
475: //System.out.println(" after size = " + size());
476: }
477: }
478:
479: private synchronized void cleanLast0() throws IOException {
480:
481: // check if index is inside of cache
482: long p = inCache(this .size() - 1);
483: long q = (p >= 0) ? -1 : inBuffer(this .size() - 1);
484: if (p >= 0) {
485: // shrink cache and file
486: assert this .buffercount == 0;
487: this .raf.setLength((long) (this .size() - 1)
488: * (long) this .recordsize);
489: this .cachecount--;
490: return;
491: }
492: if (q >= 0) {
493: // shrink buffer
494: assert this .buffercount > 0;
495: this .buffercount--;
496: return;
497: }
498: // check if file should shrink
499: assert this .buffercount == 0;
500: this .raf.setLength((long) (this .size() - 1)
501: * (long) this .recordsize);
502: }
503:
504: public static void main(String[] args) {
505: // open a file, add one entry and exit
506: File f = new File(args[0]);
507: if (f.exists())
508: f.delete();
509: try {
510: kelondroEcoFS t = new kelondroEcoFS(f, 8);
511: byte[] b = new byte[8];
512: t.add("01234567".getBytes(), 0);
513: t.add("ABCDEFGH".getBytes(), 0);
514: t.add("abcdefgh".getBytes(), 0);
515: t.add("--------".getBytes(), 0);
516: t.add("********".getBytes(), 0);
517: for (int i = 0; i < 1000; i++)
518: t.add("++++++++".getBytes(), 0);
519: t.add("=======0".getBytes(), 0);
520: t.add("=======1".getBytes(), 0);
521: t.add("=======2".getBytes(), 0);
522: t.cleanLast(b, 0);
523: System.out.println(new String(b));
524: //t.clean(2, b, 0);
525: System.out.println(new String(b));
526: t.get(1, b, 0);
527: System.out.println(new String(b));
528: t.put(1, "AbCdEfGh".getBytes(), 0);
529: t.get(1, b, 0);
530: System.out.println(new String(b));
531: t.get(3, b, 0);
532: System.out.println(new String(b));
533: t.get(4, b, 0);
534: System.out.println(new String(b));
535: System.out.println("size = " + t.size());
536: //t.clean(t.size() - 2);
537: t.cleanLast();
538: long start = System.currentTimeMillis();
539: long c = 0;
540: for (int i = 0; i < 100000; i++) {
541: c = t.size();
542: }
543: System.out.println("size() needs "
544: + ((System.currentTimeMillis() - start) / 100)
545: + " nanoseconds");
546: System.out.println("size = " + c);
547:
548: t.close();
549: } catch (IOException e) {
550: e.printStackTrace();
551: }
552: }
553:
554: }
|