001: /* SettingsCache
002: *
003: * $Id: SettingsCache.java 4448 2006-08-07 06:29:13Z gojomo $
004: *
005: * Created on Mar 17, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.settings;
026:
027: /** This class keeps a map of host names to settings objects.
028: *
029: * It is implemented with soft references which implies that the elements can
030: * be garbage collected when there's no strong references to the elements.
031: * Even if there's no strong references left elements will not be garbage
032: * collected unless the memory is needed.
033: *
034: * @author John Erik Halse
035: *
036: */
037: public class SettingsCache {
038: /** Cached CrawlerSettings objects */
039: private final SoftSettingsHash settingsCache = new SoftSettingsHash(
040: 16);
041:
042: /** Maps hostname to effective settings object */
043: private final SoftSettingsHash hostToSettings = new SoftSettingsHash(
044: 4000);
045:
046: private final CrawlerSettings globalSettings;
047:
048: /** Creates a new instance of the settings cache
049: */
050: public SettingsCache(CrawlerSettings globalSettings) {
051: this .globalSettings = globalSettings;
052: }
053:
054: /** Get the effective settings for a host.
055: *
056: * @param host the host to get settings for.
057: * @return the settings or null if not in cache.
058: */
059: public CrawlerSettings getSettings(String host, String refinement) {
060: String key = computeKey(host, refinement);
061: return (key == "") ? this .globalSettings : hostToSettings
062: .get(key);
063: }
064:
065: /** Get a settings object.
066: *
067: * @param scope the scope of the settings object to get.
068: * @return the settings object or null if not in cache.
069: */
070: public CrawlerSettings getSettingsObject(String scope,
071: String refinement) {
072: String key = computeKey(scope, refinement);
073: return (key == "") ? this .globalSettings : settingsCache
074: .get(key);
075: }
076:
077: /** Add a settings object to the cache.
078: *
079: * @param host the host for which the settings object is valid.
080: * @param settings the settings object.
081: */
082: public synchronized void putSettings(String host,
083: CrawlerSettings settings) {
084: String refinement = settings.isRefinement() ? settings
085: .getName() : null;
086: String key = computeKey(host, refinement);
087: hostToSettings.put(key, settings);
088: key = computeKey(settings.getScope(), refinement);
089: settingsCache.put(key, settings);
090: }
091:
092: /** Delete a settings object from the cache.
093: *
094: * @param settings the settings object to remove.
095: */
096: public synchronized void deleteSettingsObject(
097: CrawlerSettings settings) {
098: String refinement = settings.isRefinement() ? settings
099: .getName() : null;
100: settingsCache
101: .remove(computeKey(settings.getScope(), refinement));
102:
103: // Find all references to this settings object in the hostToSettings
104: // cache and remove them.
105: for (SoftSettingsHash.EntryIterator it = hostToSettings
106: .iterator(); it.hasNext();) {
107: if (it.nextEntry().getValue().equals(settings)) {
108: it.remove();
109: }
110: }
111: }
112:
113: /** Make sure that no host strings points to wrong settings.
114: *
115: * This method clears most of the host to settings mappings. Because of the
116: * performance penalty this should only used when really needed.
117: */
118: public synchronized void refreshHostToSettings() {
119: hostToSettings.clear();
120: SoftSettingsHash.EntryIterator it = settingsCache.iterator();
121: while (it.hasNext()) {
122: SoftSettingsHash.SettingsEntry entry = it.nextEntry();
123: hostToSettings.put(entry);
124: }
125: }
126:
127: /**
128: * Clear all cached settings.
129: */
130: public void clear() {
131: hostToSettings.clear();
132: settingsCache.clear();
133: }
134:
135: public CrawlerSettings getGlobalSettings() {
136: return globalSettings;
137: }
138:
139: private String computeKey(String host, String refinement) {
140: host = host == null ? "" : host;
141: return (refinement == null) || refinement.equals("") ? host
142: : host + '#' + refinement;
143: }
144: }
|