001: // kelondroCache.java
002: // (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
003: // first published 26.10.2006 on http://www.anomic.de
004: //
005: // This is a part of YaCy, a peer-to-peer based web search engine
006: //
007: // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
008: // $LastChangedRevision: 1986 $
009: // $LastChangedBy: orbiter $
010: //
011: // LICENSE
012: //
013: // This program is free software; you can redistribute it and/or modify
014: // it under the terms of the GNU General Public License as published by
015: // the Free Software Foundation; either version 2 of the License, or
016: // (at your option) any later version.
017: //
018: // This program is distributed in the hope that it will be useful,
019: // but WITHOUT ANY WARRANTY; without even the implied warranty of
020: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
021: // GNU General Public License for more details.
022: //
023: // You should have received a copy of the GNU General Public License
024: // along with this program; if not, write to the Free Software
025: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
026:
027: package de.anomic.kelondro;
028:
029: import java.io.IOException;
030: import java.util.ArrayList;
031: import java.util.Date;
032: import java.util.HashMap;
033: import java.util.Iterator;
034: import java.util.List;
035: import java.util.Map;
036: import java.util.TreeMap;
037:
038: import de.anomic.kelondro.kelondroRow.Entry;
039: import de.anomic.server.serverMemory;
040:
041: public class kelondroCache implements kelondroIndex {
042:
043: // this is a combined read cache and write buffer
044: // we maintain four tables:
045: // - a read-cache
046: // - a miss-cache
047: // - a write buffer for rows that are not contained in the target index
048: // - a write buffer for rows that are known to be contained in target
049: // furthermore, if we access a kelondroFlexTable, we can use the ram index of the underlying index
050:
051: // static object tracker; stores information about object cache usage
052: private static final TreeMap<String, kelondroCache> objectTracker = new TreeMap<String, kelondroCache>();
053: private static long memStopGrow = 10000000; // a limit for the node cache to stop growing if less than this memory amount is available
054: private static long memStartShrink = 6000000; // a limit for the node cache to start with shrinking if less than this memory amount is available
055:
056: // class objects
057: private kelondroRowSet readHitCache;
058: private kelondroRowSet readMissCache;
059: private kelondroIndex index;
060: private kelondroRow keyrow;
061: private int readHit, readMiss, writeUnique, writeDouble,
062: cacheDelete, cacheFlush;
063: private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble,
064: hasnotDelete;
065:
066: public kelondroCache(kelondroIndex backupIndex) {
067: this .index = backupIndex;
068: init();
069: objectTracker.put(backupIndex.filename(), this );
070: }
071:
072: private void init() {
073: this .keyrow = new kelondroRow(new kelondroColumn[] { index
074: .row().column(index.row().primaryKeyIndex) }, index
075: .row().objectOrder, 0);
076: this .readHitCache = new kelondroRowSet(index.row(), 0);
077: this .readMissCache = new kelondroRowSet(this .keyrow, 0);
078: this .readHit = 0;
079: this .readMiss = 0;
080: this .writeUnique = 0;
081: this .writeDouble = 0;
082: this .cacheDelete = 0;
083: this .cacheFlush = 0;
084: this .hasnotHit = 0;
085: this .hasnotMiss = 0;
086: this .hasnotUnique = 0;
087: this .hasnotDouble = 0;
088: this .hasnotDelete = 0;
089: }
090:
091: public final int cacheObjectChunkSize() {
092: return index.row().objectsize;
093: }
094:
095: public int writeBufferSize() {
096: return 0;
097: }
098:
099: public kelondroProfile profile() {
100: return index.profile(); // TODO: implement own profile and merge with global
101: }
102:
103: public static void setCacheGrowStati(long memStopGrowNew,
104: long memStartShrinkNew) {
105: memStopGrow = memStopGrowNew;
106: memStartShrink = memStartShrinkNew;
107: }
108:
109: public static long getMemStopGrow() {
110: return memStopGrow;
111: }
112:
113: public static long getMemStartShrink() {
114: return memStartShrink;
115: }
116:
117: public static final Iterator<String> filenames() {
118: // iterates string objects; all file names from record tracker
119: return objectTracker.keySet().iterator();
120: }
121:
122: public static final Map<String, String> memoryStats(String filename) {
123: // returns a map for each file in the tracker;
124: // the map represents properties for each record oobjects,
125: // i.e. for cache memory allocation
126: kelondroCache theObjectsCache = (kelondroCache) objectTracker
127: .get(filename);
128: return theObjectsCache.memoryStats();
129: }
130:
131: private final Map<String, String> memoryStats() {
132: // returns statistical data about this object
133: HashMap<String, String> map = new HashMap<String, String>();
134: map.put("objectHitChunkSize", (readHitCache == null) ? "0"
135: : Integer.toString(readHitCache.rowdef.objectsize));
136: map.put("objectHitCacheCount", (readHitCache == null) ? "0"
137: : Integer.toString(readHitCache.size()));
138: map
139: .put(
140: "objectHitMem",
141: (readHitCache == null) ? "0"
142: : Integer
143: .toString((int) (readHitCache.rowdef.objectsize
144: * readHitCache.size() * kelondroRowCollection.growfactor)));
145: map.put("objectHitCacheReadHit", Integer.toString(readHit));
146: map.put("objectHitCacheReadMiss", Integer.toString(readMiss));
147: map.put("objectHitCacheWriteUnique", Integer
148: .toString(writeUnique));
149: map.put("objectHitCacheWriteDouble", Integer
150: .toString(writeDouble));
151: map.put("objectHitCacheDeletes", Integer.toString(cacheDelete));
152: map.put("objectHitCacheFlushes", Integer.toString(cacheFlush));
153:
154: map.put("objectMissChunkSize", (readMissCache == null) ? "0"
155: : Integer.toString(readMissCache.rowdef.objectsize));
156: map.put("objectMissCacheCount", (readMissCache == null) ? "0"
157: : Integer.toString(readMissCache.size()));
158: map
159: .put(
160: "objectMissMem",
161: (readMissCache == null) ? "0"
162: : Integer
163: .toString((int) (readMissCache.rowdef.objectsize
164: * readMissCache.size() * kelondroRowCollection.growfactor)));
165: map.put("objectMissCacheReadHit", Integer.toString(hasnotHit));
166: map
167: .put("objectMissCacheReadMiss", Integer
168: .toString(hasnotMiss));
169: map.put("objectMissCacheWriteUnique", Integer
170: .toString(hasnotUnique));
171: map.put("objectMissCacheWriteDouble", Integer
172: .toString(hasnotDouble));
173: map.put("objectMissCacheDeletes", Integer
174: .toString(hasnotDelete));
175: map.put("objectMissCacheFlushes", "0"); // a miss cache flush can only happen if we have a deletion cache (which we dont have)
176:
177: // future feature .. map.put("objectElderTimeRead", index.profile().)
178: return map;
179: }
180:
181: private int cacheGrowStatus() {
182: return kelondroCachedRecords.cacheGrowStatus(serverMemory
183: .available(), memStopGrow, memStartShrink);
184: }
185:
186: private boolean checkMissSpace() {
187: // returns true if it is allowed to write into this cache
188: if (cacheGrowStatus() < 1) {
189: if (readMissCache != null) {
190: readMissCache.clear();
191: }
192: return false;
193: }
194: return true;
195: }
196:
197: private boolean checkHitSpace() throws IOException {
198: // returns true if it is allowed to write into this cache
199: int status = cacheGrowStatus();
200: if (status < 1) {
201: if (readHitCache != null) {
202: readHitCache.clear();
203: }
204: return false;
205: }
206: if (status < 2) {
207: if (readHitCache != null)
208: readHitCache.clear();
209: }
210: return true;
211: }
212:
213: public synchronized void close() {
214: index.close();
215: readHitCache = null;
216: readMissCache = null;
217: }
218:
219: public boolean has(byte[] key) throws IOException {
220: return (get(key) != null);
221: }
222:
223: public synchronized Entry get(byte[] key) throws IOException {
224: // first look into the miss cache
225: if (readMissCache != null) {
226: if (readMissCache.get(key) != null) {
227: this .hasnotHit++;
228: return null;
229: } else {
230: this .hasnotMiss++;
231: }
232: }
233:
234: Entry entry = null;
235:
236: // then try the hit cache and the buffers
237: if (readHitCache != null) {
238: entry = readHitCache.get(key);
239: if (entry != null) {
240: this .readHit++;
241: return entry;
242: }
243: }
244:
245: // finally ask the backend index
246: this .readMiss++;
247: entry = index.get(key);
248: // learn from result
249: if (entry == null) {
250: if ((checkMissSpace()) && (readMissCache != null)) {
251: kelondroRow.Entry dummy = readMissCache
252: .put(readMissCache.row().newEntry(key));
253: if (dummy == null)
254: this .hasnotUnique++;
255: else
256: this .hasnotDouble++;
257: }
258: return null;
259: } else {
260: if ((checkHitSpace()) && (readHitCache != null)) {
261: kelondroRow.Entry dummy = readHitCache.put(entry);
262: if (dummy == null)
263: this .writeUnique++;
264: else
265: this .writeDouble++;
266: }
267: return entry;
268: }
269: }
270:
271: public synchronized void putMultiple(List<Entry> rows)
272: throws IOException {
273: Iterator<Entry> i = rows.iterator();
274: while (i.hasNext())
275: put(i.next());
276: }
277:
278: public synchronized void putMultiple(List<Entry> rows,
279: Date entryDate) throws IOException {
280: Iterator<Entry> i = rows.iterator();
281: while (i.hasNext())
282: put(i.next(), entryDate);
283: }
284:
285: public synchronized Entry put(Entry row) throws IOException {
286: assert (row != null);
287: assert (row.columns() == row().columns());
288: //assert (!(serverLog.allZero(row.getColBytes(index.primarykey()))));
289:
290: byte[] key = row.getPrimaryKeyBytes();
291: checkHitSpace();
292:
293: // remove entry from miss- and hit-cache
294: if (readMissCache != null) {
295: if (readMissCache.remove(key, true) != null) {
296: this .hasnotHit++;
297: // the entry does not exist before
298: index.put(row); // write to backend
299: if (readHitCache != null) {
300: kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
301: if (dummy == null)
302: this .writeUnique++;
303: else
304: this .writeDouble++;
305: }
306: return null;
307: }
308: }
309:
310: Entry entry;
311:
312: if (readHitCache != null) {
313: entry = readHitCache.get(key);
314: if (entry != null) {
315: // since we know that the entry was in the read cache, it cannot be in any write cache
316: // write directly to backend index
317: index.put(row);
318: // learn from situation
319: kelondroRow.Entry dummy = readHitCache.put(row); // overwrite old entry
320: if (dummy == null)
321: this .writeUnique++;
322: else
323: this .writeDouble++;
324: return entry;
325: }
326: }
327:
328: // the worst case: we must write to the back-end directly
329: entry = index.put(row);
330: if (readHitCache != null) {
331: kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
332: if (dummy == null)
333: this .writeUnique++;
334: else
335: this .writeDouble++;
336: }
337: return entry;
338: }
339:
340: public synchronized Entry put(Entry row, Date entryDate)
341: throws IOException {
342: // a put with a date is bad for the cache: the date cannot be handled
343: // The write buffer does not work here, because it does not store dates.
344:
345: throw new UnsupportedOperationException(
346: "put with date is inefficient in kelondroCache");
347: }
348:
349: public synchronized void addUnique(Entry row) throws IOException {
350: assert (row != null);
351: assert (row.columns() == row().columns());
352: //assert (!(serverLog.allZero(row.getColBytes(index.primarykey()))));
353:
354: byte[] key = row.getPrimaryKeyBytes();
355: checkHitSpace();
356:
357: // remove entry from miss- and hit-cache
358: if (readMissCache != null) {
359: this .readMissCache.remove(key, true);
360: this .hasnotDelete++;
361: // the entry does not exist before
362: index.addUnique(row); // write to backend
363: if (readHitCache != null) {
364: kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
365: if (dummy == null)
366: this .writeUnique++;
367: else
368: this .writeDouble++;
369: }
370: return;
371: }
372:
373: // the worst case: we must write to the back-end directly
374: index.addUnique(row);
375: if (readHitCache != null) {
376: kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
377: if (dummy == null)
378: this .writeUnique++;
379: else
380: this .writeDouble++;
381: }
382: }
383:
384: public synchronized void addUnique(Entry row, Date entryDate)
385: throws IOException {
386: if (entryDate == null) {
387: addUnique(row);
388: return;
389: }
390:
391: assert (row != null);
392: assert (row.columns() == row().columns());
393:
394: byte[] key = row.getPrimaryKeyBytes();
395: checkHitSpace();
396:
397: // remove entry from miss- and hit-cache
398: if (readMissCache != null) {
399: this .readMissCache.remove(key, true);
400: this .hasnotDelete++;
401: }
402:
403: // the worst case: we must write to the backend directly
404: index.addUnique(row);
405: if (readHitCache != null) {
406: kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
407: if (dummy == null)
408: this .writeUnique++;
409: else
410: this .writeDouble++;
411: }
412: }
413:
414: public synchronized void addUniqueMultiple(List<Entry> rows)
415: throws IOException {
416: Iterator<Entry> i = rows.iterator();
417: while (i.hasNext())
418: addUnique((Entry) i.next());
419: }
420:
421: public synchronized ArrayList<kelondroRowSet> removeDoubles()
422: throws IOException {
423: return index.removeDoubles();
424: // todo: remove reported entries from the cache!!!
425: }
426:
427: public synchronized Entry remove(byte[] key, boolean keepOrder)
428: throws IOException {
429: checkMissSpace();
430:
431: // add entry to miss-cache
432: if (readMissCache != null) {
433: // set the miss cache; if there was already an entry we know that the return value must be null
434: kelondroRow.Entry dummy = readMissCache.put(readMissCache
435: .row().newEntry(key));
436: if (dummy == null) {
437: this .hasnotUnique++;
438: } else {
439: this .hasnotHit++;
440: this .hasnotDouble++;
441: }
442: }
443:
444: // remove entry from hit-cache
445: if (readHitCache != null) {
446: Entry entry = readHitCache.remove(key, true);
447: if (entry == null) {
448: this .readMiss++;
449: } else {
450: this .readHit++;
451: this .cacheDelete++;
452: }
453: }
454:
455: return index.remove(key, false);
456: }
457:
458: public synchronized Entry removeOne() throws IOException {
459:
460: checkMissSpace();
461:
462: Entry entry = index.removeOne();
463: if (entry == null)
464: return null;
465: byte[] key = entry.getPrimaryKeyBytes();
466: if (readMissCache != null) {
467: kelondroRow.Entry dummy = readMissCache.put(readMissCache
468: .row().newEntry(key));
469: if (dummy == null)
470: this .hasnotUnique++;
471: else
472: this .hasnotDouble++;
473: }
474: if (readHitCache != null) {
475: kelondroRow.Entry dummy = readHitCache.remove(key, true);
476: if (dummy != null)
477: this .cacheDelete++;
478: }
479: return entry;
480: }
481:
482: public synchronized kelondroRow row() {
483: return index.row();
484: }
485:
486: public synchronized kelondroCloneableIterator<byte[]> keys(
487: boolean up, byte[] firstKey) throws IOException {
488: return index.keys(up, firstKey);
489: }
490:
491: public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(
492: boolean up, byte[] firstKey) throws IOException {
493: return index.rows(up, firstKey);
494: }
495:
496: public int size() {
497: return index.size();
498: }
499:
500: public String filename() {
501: return index.filename();
502: }
503:
504: public void reset() throws IOException {
505: this.index.reset();
506: init();
507: }
508:
509: }
|