001: /* CachedBdbMap
002: *
003: * $Id: CachedBdbMap.java 4926 2007-02-21 06:02:36Z gojomo $
004: *
005: * Created on Mar 24, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util;
026:
027: import java.io.File;
028: import java.io.IOException;
029: import java.io.Serializable;
030: import java.lang.ref.PhantomReference;
031: import java.lang.ref.Reference;
032: import java.lang.ref.ReferenceQueue;
033: import java.lang.ref.SoftReference;
034: import java.lang.reflect.Field;
035: import java.util.AbstractMap;
036: import java.util.HashMap;
037: import java.util.Iterator;
038: import java.util.LinkedList;
039: import java.util.Map;
040: import java.util.Set;
041: import java.util.logging.Level;
042: import java.util.logging.Logger;
043:
044: import com.sleepycat.bind.EntryBinding;
045: import com.sleepycat.bind.serial.SerialBinding;
046: import com.sleepycat.bind.serial.StoredClassCatalog;
047: import com.sleepycat.bind.tuple.TupleBinding;
048: import com.sleepycat.collections.StoredSortedMap;
049: import com.sleepycat.je.Database;
050: import com.sleepycat.je.DatabaseConfig;
051: import com.sleepycat.je.DatabaseException;
052: import com.sleepycat.je.Environment;
053: import com.sleepycat.je.EnvironmentConfig;
054:
055: /**
056: * A BDB JE backed hashmap. It extends the normal BDB JE map implementation by
057: * holding a cache of soft referenced objects. That is objects are not written
058: * to disk until they are not referenced by any other object and therefore can be
059: * Garbage Collected.
060: *
061: * @author John Erik Halse
062: * @author stack
063: * @author gojomo
064: *
065: */
066: public class CachedBdbMap<K, V> extends AbstractMap<K, V> implements
067: Map<K, V>, Serializable {
068:
069: private static final long serialVersionUID = -8655539411367047332L;
070:
071: private static final Logger logger = Logger
072: .getLogger(CachedBdbMap.class.getName());
073:
074: /** The database name of the class definition catalog.*/
075: private static final String CLASS_CATALOG = "java_class_catalog";
076:
077: /**
078: * A map of BDB JE Environments so that we reuse the Environment for
079: * databases in the same directory.
080: */
081: private static final Map<String, DbEnvironmentEntry> dbEnvironmentMap = new HashMap<String, DbEnvironmentEntry>();
082:
083: /** The BDB JE environment used for this instance.
084: */
085: private transient DbEnvironmentEntry dbEnvironment;
086:
087: /** The BDB JE database used for this instance. */
088: protected transient Database db;
089:
090: /** The Collection view of the BDB JE database used for this instance. */
091: protected transient StoredSortedMap diskMap;
092:
093: /** The softreferenced cache */
094: private transient Map<K, SoftEntry<V>> memMap;
095:
096: protected transient ReferenceQueue<V> refQueue;
097:
098: /** The number of objects in the diskMap StoredMap.
099: * (Package access for unit testing.) */
100: protected int diskMapSize = 0;
101:
102: /**
103: * Count of times we got an object from in-memory cache.
104: */
105: private long cacheHit = 0;
106:
107: /**
108: * Count of times the {@link CachedBdbMap#get(Object)} method was called.
109: */
110: private long countOfGets = 0;
111:
112: /**
113: * Count of every time we went to the disk-based map AND we found an
114: * object (Doesn't include accesses that came back null).
115: */
116: private long diskHit = 0;
117:
118: /**
119: * Name of bdbje db.
120: */
121: private String dbName = null;
122:
123: /**
124: * Reference to the Reference#referent Field.
125: */
126: protected static Field referentField;
127: static {
128: // We need access to the referent field in the PhantomReference.
129: // For more on this trick, see
130: // http://www.javaspecialists.co.za/archive/Issue098.html and for
131: // discussion:
132: // http://www.theserverside.com/tss?service=direct/0/NewsThread/threadViewer.markNoisy.link&sp=l29865&sp=l146901
133: try {
134: referentField = Reference.class
135: .getDeclaredField("referent");
136: referentField.setAccessible(true);
137: } catch (SecurityException e) {
138: throw new RuntimeException(e);
139: } catch (NoSuchFieldException e) {
140: throw new RuntimeException(e);
141: }
142: }
143:
144: /**
145: * Simple structure to keep needed information about a DB Environment.
146: */
147: protected static class DbEnvironmentEntry {
148: Environment environment;
149: StoredClassCatalog classCatalog;
150: int openDbCount = 0;
151: File dbDir;
152: }
153:
154: /**
155: * Shudown default constructor.
156: */
157: private CachedBdbMap() {
158: super ();
159: }
160:
161: /**
162: * Constructor.
163: *
164: * You must call
165: * {@link #initialize(Environment, Class, Class, StoredClassCatalog)}
166: * to finish construction. Construction is two-stepped to support
167: * reconnecting a deserialized CachedBdbMap with its backing bdbje
168: * database.
169: *
170: * @param dbName Name of the backing db this instance should use.
171: */
172: public CachedBdbMap(final String dbName) {
173: this ();
174: this .dbName = dbName;
175: }
176:
177: /**
178: * A constructor for creating a new CachedBdbMap.
179: *
180: * Even though the put and get methods conforms to the Collections interface
181: * taking any object as key or value, you have to submit the class of the
182: * allowed key and value objects here and will get an exception if you try
183: * to put anything else in the map.
184: *
185: * <p>This constructor internally calls
186: * {@link #initialize(Environment, Class, Class, StoredClassCatalog)}.
187: * Do not call initialize if you use this constructor.
188: *
189: * @param dbDir The directory where the database will be created.
190: * @param dbName The name of the database to back this map by.
191: * @param keyClass The class of the objects allowed as keys.
192: * @param valueClass The class of the objects allowed as values.
193: *
194: * @throws DatabaseException is thrown if the underlying BDB JE database
195: * throws an exception.
196: */
197: public CachedBdbMap(final File dbDir, final String dbName,
198: final Class<K> keyClass, final Class<V> valueClass)
199: throws DatabaseException {
200: this (dbName);
201: this .dbEnvironment = getDbEnvironment(dbDir);
202: this .dbEnvironment.openDbCount++;
203: initialize(dbEnvironment.environment, keyClass, valueClass,
204: dbEnvironment.classCatalog);
205: if (logger.isLoggable(Level.INFO)) {
206: // Write out the bdb configuration.
207: EnvironmentConfig cfg = this .dbEnvironment.environment
208: .getConfig();
209: logger.info("BdbConfiguration: Cache percentage "
210: + cfg.getCachePercent() + ", cache size "
211: + cfg.getCacheSize() + ", Map size: " + size());
212: }
213: }
214:
215: /**
216: * Call this method when you have an instance when you used the
217: * default constructor or when you have a deserialized instance that you
218: * want to reconnect with an extant bdbje environment. Do not
219: * call this method if you used the
220: * {@link #CachedBdbMap(File, String, Class, Class)} constructor.
221: * @param env
222: * @param keyClass
223: * @param valueClass
224: * @param classCatalog
225: * @throws DatabaseException
226: */
227: public synchronized void initialize(final Environment env,
228: final Class keyClass, final Class valueClass,
229: final StoredClassCatalog classCatalog)
230: throws DatabaseException {
231: initializeInstance();
232: this .db = openDatabase(env, this .dbName);
233: this .diskMap = createDiskMap(this .db, classCatalog, keyClass,
234: valueClass);
235: }
236:
237: /**
238: * Do any instance setup.
239: * This method is used by constructors and when deserializing an instance.
240: */
241: protected void initializeInstance() {
242: this .memMap = new HashMap<K, SoftEntry<V>>();
243: this .refQueue = new ReferenceQueue<V>();
244: }
245:
246: protected StoredSortedMap createDiskMap(Database database,
247: StoredClassCatalog classCatalog, Class keyClass,
248: Class valueClass) {
249: EntryBinding keyBinding = TupleBinding
250: .getPrimitiveBinding(keyClass);
251: if (keyBinding == null) {
252: keyBinding = new SerialBinding(classCatalog, keyClass);
253: }
254: EntryBinding valueBinding = TupleBinding
255: .getPrimitiveBinding(valueClass);
256: if (valueBinding == null) {
257: valueBinding = new SerialBinding(classCatalog, valueClass);
258: }
259: return new StoredSortedMap(database, keyBinding, valueBinding,
260: true);
261: }
262:
263: /**
264: * Get the database environment for a physical directory where data will be
265: * stored.
266: * <p>
267: * If the environment already exist it will be reused, else a new one will
268: * be created.
269: *
270: * @param dbDir The directory where BDB JE data will be stored.
271: * @return a datastructure containing the environment and a default database
272: * for storing class definitions.
273: */
274: private DbEnvironmentEntry getDbEnvironment(File dbDir) {
275: if (dbEnvironmentMap.containsKey(dbDir.getAbsolutePath())) {
276: return (DbEnvironmentEntry) dbEnvironmentMap.get(dbDir
277: .getAbsolutePath());
278: }
279: EnvironmentConfig envConfig = new EnvironmentConfig();
280: envConfig.setAllowCreate(true);
281: envConfig.setTransactional(false);
282:
283: // We're doing the caching ourselves so setting these at the lowest
284: // possible level.
285: envConfig.setCachePercent(1);
286: DbEnvironmentEntry env = new DbEnvironmentEntry();
287: try {
288: env.environment = new Environment(dbDir, envConfig);
289: env.dbDir = dbDir;
290: dbEnvironmentMap.put(dbDir.getAbsolutePath(), env);
291:
292: DatabaseConfig dbConfig = new DatabaseConfig();
293: dbConfig.setTransactional(false);
294: dbConfig.setAllowCreate(true);
295: dbConfig.setDeferredWrite(true);
296:
297: Database catalogDb = env.environment.openDatabase(null,
298: CLASS_CATALOG, dbConfig);
299:
300: env.classCatalog = new StoredClassCatalog(catalogDb);
301: } catch (DatabaseException e) {
302: e.printStackTrace();
303: //throw new FatalConfigurationException(e.getMessage());
304: }
305: return env;
306: }
307:
308: protected Database openDatabase(final Environment environment,
309: final String dbName) throws DatabaseException {
310: DatabaseConfig dbConfig = new DatabaseConfig();
311: dbConfig.setTransactional(false);
312: dbConfig.setAllowCreate(true);
313: dbConfig.setDeferredWrite(true);
314: return environment.openDatabase(null, dbName, dbConfig);
315: }
316:
317: public synchronized void close() throws DatabaseException {
318: // Close out my bdb db.
319: if (this .db != null) {
320: try {
321: this .db.sync();
322: this .db.close();
323: } catch (DatabaseException e) {
324: e.printStackTrace();
325: } finally {
326: this .db = null;
327: }
328: }
329: if (dbEnvironment != null) {
330: dbEnvironment.openDbCount--;
331: if (dbEnvironment.openDbCount <= 0) {
332: dbEnvironment.classCatalog.close();
333: dbEnvironment.environment.close();
334: dbEnvironmentMap.remove(dbEnvironment.dbDir
335: .getAbsolutePath());
336: dbEnvironment = null;
337: }
338: }
339: }
340:
341: protected void finalize() throws Throwable {
342: close();
343: super .finalize();
344: }
345:
346: /**
347: * The keySet of the diskMap is all relevant keys.
348: *
349: * @see java.util.Map#keySet()
350: */
351: @SuppressWarnings("unchecked")
352: public Set<K> keySet() {
353: return diskMap.keySet();
354: }
355:
356: public Set<Map.Entry<K, V>> entrySet() {
357: // Would require complicated implementation to
358: // maintain identity guarantees, so skipping
359: throw new UnsupportedOperationException();
360: }
361:
362: public synchronized V get(final Object object) {
363: K key = toKey(object);
364: countOfGets++;
365: expungeStaleEntries();
366: if (countOfGets % 10000 == 0) {
367: logCacheSummary();
368: }
369: SoftEntry<V> entry = memMap.get(key);
370: if (entry != null) {
371: V val = entry.get(); // get & hold, so not cleared pre-return
372: if (val != null) {
373: cacheHit++;
374: return val;
375: }
376: // Explicitly clear this entry from referencequeue since its
377: // value is null.
378: expungeStaleEntry(entry);
379: }
380:
381: // check backing diskMap
382: V v = diskMapGet(key);
383: if (v != null) {
384: diskHit++;
385: memMap.put(key, new SoftEntry<V>(key, v, refQueue));
386: }
387: return v;
388: }
389:
390: /**
391: * Info to log, if at FINE level, on every get()
392: */
393: private void logCacheSummary() {
394: if (!logger.isLoggable((Level.FINE))) {
395: return;
396: }
397: try {
398: long cacheHitPercent = (cacheHit * 100)
399: / (cacheHit + diskHit);
400: logger.fine("DB name: " + this .db.getDatabaseName()
401: + ", Cache Hit: " + cacheHitPercent
402: + "%, Not in map: "
403: + (countOfGets - (cacheHit + diskHit))
404: + ", Total number of gets: " + countOfGets);
405: } catch (DatabaseException e) {
406: // This is just for logging so ignore DB Exceptions
407: }
408: }
409:
410: public synchronized V put(K key, V value) {
411: V prevVal = get(key);
412: memMap.put(key, new SoftEntry<V>(key, value, refQueue));
413: diskMap.put(key, value); // dummy
414: if (prevVal == null) {
415: diskMapSize++;
416: }
417: return prevVal;
418: }
419:
420: /**
421: * Note that a call to this method CLOSEs the underlying bdbje.
422: * This instance is no longer of any use. It must be re-initialized.
423: * We close the db here because if this BigMap is being treated as a plain
424: * Map, this is only opportunity for cleanup.
425: */
426: public synchronized void clear() {
427: this .memMap.clear();
428: this .diskMap.clear();
429: this .diskMapSize = 0;
430: try {
431: close();
432: } catch (DatabaseException e) {
433: e.printStackTrace();
434: }
435: }
436:
437: public synchronized V remove(final Object key) {
438: V prevValue = get(key);
439: memMap.remove(key);
440: expungeStaleEntries();
441: diskMap.remove(key);
442: diskMapSize--;
443: return prevValue;
444: }
445:
446: public synchronized boolean containsKey(Object key) {
447: if (quickContainsKey(key)) {
448: return true;
449: }
450: return diskMap.containsKey(key);
451: }
452:
453: public synchronized boolean quickContainsKey(Object key) {
454: expungeStaleEntries();
455: return memMap.containsKey(key);
456: }
457:
458: public synchronized boolean containsValue(Object value) {
459: if (quickContainsValue(value)) {
460: return true;
461: }
462: return diskMap.containsValue(value);
463: }
464:
465: public synchronized boolean quickContainsValue(Object value) {
466: expungeStaleEntries();
467: // FIXME this isn't really right, as memMap is of SoftEntries
468: return memMap.containsValue(value);
469: }
470:
471: public int size() {
472: return diskMapSize;
473: }
474:
475: protected String getDatabaseName() {
476: String name = "DbName-Lookup-Failed";
477: try {
478: if (this .db != null) {
479: name = this .db.getDatabaseName();
480: }
481: } catch (DatabaseException e) {
482: // Ignore.
483: }
484: return name;
485: }
486:
487: /**
488: * Sync in-memory map entries to backing disk store.
489: * When done, the memory map will be cleared and all entries stored
490: * on disk.
491: */
492: public synchronized void sync() {
493: String dbName = null;
494: // Sync. memory and disk.
495: long startTime = 0;
496: if (logger.isLoggable(Level.INFO)) {
497: dbName = getDatabaseName();
498: startTime = System.currentTimeMillis();
499: logger.info(dbName + " start sizes: disk "
500: + this .diskMapSize + ", mem " + this .memMap.size());
501: }
502: expungeStaleEntries();
503: LinkedList<SoftEntry> stale = new LinkedList<SoftEntry>();
504: for (Iterator i = this .memMap.keySet().iterator(); i.hasNext();) {
505: Object key = i.next();
506: SoftEntry entry = (SoftEntry) memMap.get(key);
507: if (entry != null) {
508: // Get & hold so not cleared pre-return.
509: Object value = entry.get();
510: if (value != null) {
511: this .diskMap.put(key, value);
512: } else {
513: stale.add(entry);
514: }
515: }
516: }
517: // for any entries above that had been cleared, ensure expunged
518: for (SoftEntry entry : stale) {
519: expungeStaleEntry(entry);
520: }
521:
522: // force sync of deferred-writes
523: try {
524: this .db.sync();
525: } catch (DatabaseException e) {
526: // TODO Auto-generated catch block
527: throw new RuntimeException(e);
528: }
529:
530: if (logger.isLoggable(Level.INFO)) {
531: logger.info(dbName + " sync took "
532: + (System.currentTimeMillis() - startTime) + "ms. "
533: + "Finish sizes: disk " + this .diskMapSize
534: + ", mem " + this .memMap.size());
535: }
536: }
537:
538: private void expungeStaleEntries() {
539: int c = 0;
540: for (SoftEntry entry; (entry = refQueuePoll()) != null;) {
541: expungeStaleEntry(entry);
542: c++;
543: }
544: if (c > 0 && logger.isLoggable(Level.FINER)) {
545: try {
546: logger.finer("DB: " + db.getDatabaseName()
547: + ", Expunged: " + c + ", Diskmap size: "
548: + diskMapSize + ", Cache size: "
549: + memMap.size());
550: } catch (DatabaseException e) {
551: // Just for logging so ignore Exceptions
552: }
553: }
554: }
555:
556: private void expungeStaleEntry(SoftEntry entry) {
557: // If phantom already null, its already expunged -- probably
558: // because it was purged directly first from inside in
559: // {@link #get(String)} and then it went on the poll queue and
560: // when it came off inside in expungeStaleEntries, this method
561: // was called again.
562: if (entry.getPhantom() == null) {
563: return;
564: }
565: // If the object that is in memMap is not the one passed here, then
566: // memMap has been changed -- probably by a put on top of this entry.
567: if (memMap.get(entry.getPhantom().getKey()) == entry) {
568: memMap.remove(entry.getPhantom().getKey());
569: diskMap.put(entry.getPhantom().getKey(), entry.getPhantom()
570: .doctoredGet());
571: }
572: entry.clearPhantom();
573: }
574:
575: private class PhantomEntry<T> extends PhantomReference<T> {
576: private final Object key;
577:
578: public PhantomEntry(Object key, T referent) {
579: super (referent, null);
580: this .key = key;
581: }
582:
583: /**
584: * @return Return the referent. The contract for {@link #get()}
585: * always returns a null referent. We've cheated and doctored
586: * PhantomReference to return the actual referent value. See notes
587: * at {@link #referentField};
588: */
589: public Object doctoredGet() {
590: try {
591: // Here we use the referentField saved off on static
592: // initialization of this class to get at this References'
593: // private referent field.
594: return referentField.get(this );
595: } catch (IllegalAccessException e) {
596: throw new RuntimeException(e);
597: }
598: }
599:
600: /**
601: * @return Returns the key.
602: */
603: public Object getKey() {
604: return this .key;
605: }
606: }
607:
608: private class SoftEntry<T> extends SoftReference<T> {
609: private PhantomEntry<T> phantom;
610:
611: public SoftEntry(Object key, T referent, ReferenceQueue<T> q) {
612: super (referent, q);
613: this .phantom = new PhantomEntry<T>(key, referent);
614: }
615:
616: /**
617: * @return Returns the phantom reference.
618: */
619: public PhantomEntry getPhantom() {
620: return this .phantom;
621: }
622:
623: public void clearPhantom() {
624: this .phantom.clear();
625: this .phantom = null;
626: super .clear();
627: }
628: }
629:
630: private void readObject(java.io.ObjectInputStream stream)
631: throws IOException, ClassNotFoundException {
632: stream.defaultReadObject();
633: initializeInstance();
634: if (logger.isLoggable(Level.FINE)) {
635: logger.fine(getDatabaseName() + " diskMapSize: "
636: + diskMapSize);
637: }
638: }
639:
640: @SuppressWarnings("unchecked")
641: private K toKey(Object o) {
642: return (K) o;
643: }
644:
645: @SuppressWarnings("unchecked")
646: private V diskMapGet(K k) {
647: return (V) diskMap.get(k);
648: }
649:
650: @SuppressWarnings("unchecked")
651: private SoftEntry<V> refQueuePoll() {
652: return (SoftEntry) refQueue.poll();
653: }
654: }
|