001: /* CrawlerSettings
002: *
003: * $Id: CrawlerSettings.java 4662 2006-09-25 23:45:21Z paul_jack $
004: *
005: * Created on Dec 16, 2003
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.settings;
026:
027: import java.util.ArrayList;
028: import java.util.Date;
029: import java.util.HashMap;
030: import java.util.Iterator;
031: import java.util.List;
032: import java.util.ListIterator;
033: import java.util.Map;
034:
035: import org.archive.crawler.settings.refinements.Refinement;
036: import org.archive.net.UURI;
037:
038: /**
039: * Class representing a settings file.
040: *
041: * More precisely it represents a collection of settings valid in a particular
042: * scope. The scope is either the global settings, or the settings to be used
043: * for a particular domain or host. For scopes other than global, the instance
044: * will only contain those settings that are different from the global.
045: *
046: * In the default implementation this is a one to one mapping from a file to
047: * an instance of this class, but in other implementations the information in
048: * an instance of this class might be stored in a different way (for example
049: * in a RDBMS).
050: *
051: * @author John Erik Halse
052: */
053: public class CrawlerSettings {
054: /** Registry of DataContainers for ComplexTypes in this settings object
055: * indexed on absolute name */
056: private final Map<String, DataContainer> localComplexTypes = new HashMap<String, DataContainer>();
057:
058: /** Registry of top level ModuleTypes in this settings object indexed on
059: * module name. These are modules that doesn't have parents in this
060: * settings object
061: */
062: private final Map<String, ModuleType> topLevelModules = new HashMap<String, ModuleType>();
063:
064: /** Registry of all ModuleTypes in this settings object indexed on
065: * module name.
066: */
067: private final Map<String, ComplexType> localModules = new HashMap<String, ComplexType>();
068:
069: /** Reference to the settings handler this settings object belongs to */
070: private final SettingsHandler settingsHandler;
071:
072: /** Scope for this collection of settings (hostname) */
073: private final String scope;
074:
075: /** List of refinements applied to this settings object */
076: private List<Refinement> refinements;
077:
078: /** True if this settings object is a refinement */
079: private boolean isRefinement = false;
080:
081: /** Name of this collection of settings */
082: private String name = "";
083:
084: /** Description of this collection of settings */
085: private String description = "";
086:
087: /**
088: * Operator of this crawl job.
089: */
090: private String operator = "Admin";
091:
092: /**
093: * Organization running this crawl job.
094: */
095: private String organization = "";
096:
097: /**
098: * Audience/recipient/customer on whose behalf this crawl is being run.
099: */
100: private String audience = "";
101:
102: /** Time when this collection was last saved to persistent storage */
103: private Date lastSaved = null;
104:
105: /**
106: * Constructs a new CrawlerSettings object.
107: *
108: * Application code should not call the constructor directly, but use the
109: * methods in SettingsHandler instead.
110: *
111: * @param handler The SettingsHandler this object belongs to.
112: * @param scope The scope of this settings object (ie. host or domain).
113: *
114: * @see SettingsHandler#getSettings(String)
115: * @see SettingsHandler#getSettingsObject(String)
116: */
117: public CrawlerSettings(SettingsHandler handler, String scope) {
118: this .settingsHandler = handler;
119: this .scope = scope;
120: }
121:
122: /**
123: * Constructs a new CrawlerSettings object which is a refinement of another
124: * settings object.
125: *
126: * Application code should not call the constructor directly, but use the
127: * methods in SettingsHandler instead.
128: *
129: * @param handler The SettingsHandler this object belongs to.
130: * @param scope The scope of this settings object (ie. host or domain).
131: * @param refinement the name or reference to the refinement.
132: *
133: * @see SettingsHandler#getSettings(String)
134: * @see SettingsHandler#getSettingsObject(String)
135: */
136: public CrawlerSettings(SettingsHandler handler, String scope,
137: String refinement) {
138: this (handler, scope);
139: if (refinement != null && !refinement.equals("")) {
140: this .isRefinement = true;
141: this .name = refinement;
142: }
143: }
144:
145: /** Get the description of this CrawlerSettings object.
146: *
147: * @return the description of this CrawlerSettings object.
148: */
149: public String getDescription() {
150: return description;
151: }
152:
153: /** Get the name of this CrawlerSettings object.
154: *
155: * @return the name of this CrawlerSettings object.
156: */
157: public String getName() {
158: return name;
159: }
160:
161: /**
162: * Get the name of operator of this crawl from this CrawlerSettings object.
163: *
164: * @return the name of this CrawlerSettings object.
165: */
166: public String getOperator() {
167: return operator;
168: }
169:
170: /**
171: * Get the name of the organization running this crawl from this
172: * CrawlerSettings object.
173: *
174: * @return the name of the organization running this crawl.
175: */
176: public String getOrganization() {
177: return organization;
178: }
179:
180: /**
181: * Get the audience/customer/recipient of the crawl job product from
182: * this CrawlerSettings object.
183: *
184: * @return the audience/customer/recipient of the crawl job product.
185: */
186: public String getAudience() {
187: return audience;
188: }
189:
190: /** Get the scope of this CrawlerSettings object.
191: *
192: * @return the scope of this CrawlerSettings object.
193: */
194: public String getScope() {
195: return scope;
196: }
197:
198: /** Set the description of this CrawlerSettings object.
199: *
200: * @param string the description to be set for this CrawlerSettings object.
201: */
202: public void setDescription(String string) {
203: description = string;
204: }
205:
206: /**
207: * Set the operator of this crawl job.
208: * @param name Operator running this crawl.
209: */
210: public void setOperator(String name) {
211: this .operator = name;
212: }
213:
214: /**
215: * Set the name of the organization who is running this crawl.
216: * @param name Name of organization running this crawl.
217: */
218: public void setOrganization(String name) {
219: this .organization = name;
220: }
221:
222: /**
223: * Set the recipient/customer for the crawl job product.
224: * @param name Recipient of crawl job product.
225: */
226: public void setAudience(String name) {
227: this .audience = name;
228: }
229:
230: /** Set the name of this CrawlerSettings object.
231: *
232: * @param string the name to be set for this CrawlerSettings object.
233: */
234: public void setName(String string) {
235: name = string;
236: }
237:
238: /**
239: * Get the time when this CrawlerSettings was last saved to persistent
240: * storage.
241: *
242: * @return the time when this CrawlerSettings was last saved to persistent
243: * storage. Null if it has not been saved.
244: */
245: public Date getLastSavedTime() {
246: return lastSaved;
247: }
248:
249: /**
250: * Set the time when this CrawlerSettings was last saved to persistent
251: * storage.
252: *
253: * @param lastSaved the time when this CrawlerSettings was last saved to
254: * persistent storage.
255: */
256: protected void setLastSavedTime(Date lastSaved) {
257: this .lastSaved = lastSaved;
258: }
259:
260: protected void addTopLevelModule(ModuleType module) {
261: // if (topLevelModules.containsKey(module.getName())) {
262: // throw new IllegalArgumentException(
263: // "Duplicate module name: " + module.getName());
264: // } else {
265: topLevelModules.put(module.getName(), module);
266: // }
267: }
268:
269: protected DataContainer addComplexType(ComplexType type) {
270: DataContainer data = new DataContainer(this , type);
271: localComplexTypes.put(type.getAbsoluteName(), data);
272: if (type instanceof ModuleType) {
273: localModules.put(type.getName(), type);
274: }
275: return data;
276: }
277:
278: protected DataContainer getData(ComplexType complex) {
279: return getData(complex.getAbsoluteName());
280: }
281:
282: protected DataContainer getData(String absoluteName) {
283: return (DataContainer) localComplexTypes.get(absoluteName);
284: }
285:
286: protected ModuleType getTopLevelModule(String name) {
287: return (ModuleType) topLevelModules.get(name);
288: }
289:
290: public ModuleType getModule(String name) {
291: return (ModuleType) localModules.get(name);
292: }
293:
294: protected Iterator topLevelModules() {
295: return topLevelModules.values().iterator();
296: }
297:
298: /** Get the parent of this CrawlerSettings object.
299: *
300: * @return the parent of this CrawlerSettings object.
301: */
302: public CrawlerSettings getParent() {
303: return getParent(null);
304: }
305:
306: /**
307: * Get the parent of this CrawlerSettings object.
308: * This method passes around a URI so that refinements could be checked.
309: *
310: * @param uri The uri for which parents of this object shoul be found.
311: * @return the parent of this CrawlerSettings object.
312: */
313: public CrawlerSettings getParent(UURI uri) {
314: return (isRefinement()) ? settingsHandler
315: .getSettingsForHost(scope) : (scope == null || scope
316: .equals("")) ? null : settingsHandler.getSettings(
317: settingsHandler.getParentScope(scope), uri);
318: }
319:
320: /** Get the SettingHandler this CrawlerSettings object belongs to.
321: *
322: * @return the SettingHandler this CrawlerSettings object belongs to.
323: */
324: public SettingsHandler getSettingsHandler() {
325: return settingsHandler;
326: }
327:
328: /**
329: * Get an <code>ListIterator</code> over the refinements for this
330: * settings object.
331: *
332: * @return Returns an iterator over the refinements.
333: */
334: public ListIterator refinementsIterator() {
335: if (refinements == null) {
336: refinements = new ArrayList<Refinement>();
337: }
338: return refinements.listIterator();
339: }
340:
341: /**
342: * Add a refinement to this settings object.
343: *
344: * @param refinement The refinements to set.
345: */
346: public void addRefinement(Refinement refinement) {
347: if (refinements == null) {
348: refinements = new ArrayList<Refinement>();
349: }
350: this .refinements.remove(refinement);
351: this .refinements.add(refinement);
352: }
353:
354: /**
355: * Remove a refinement from this settings object.
356: *
357: * @param reference the reference (name) to the refinement to be removed.
358: * @return true if something was removed, false if the refinement was not
359: * found.
360: */
361: public boolean removeRefinement(String reference) {
362: if (hasRefinements()) {
363: for (Iterator it = refinements.iterator(); it.hasNext();) {
364: if (((Refinement) it.next()).getReference().equals(
365: reference)) {
366: it.remove();
367: return true;
368: }
369: }
370: }
371: return false;
372: }
373:
374: /**
375: * Get a refinement with a given reference.
376: *
377: * @param reference the reference (name) to the refinement to get.
378: * @return the refinement having the specified reference or null if no
379: * refinement matches it.
380: */
381: public Refinement getRefinement(String reference) {
382: if (hasRefinements()) {
383: for (Iterator it = refinements.iterator(); it.hasNext();) {
384: Refinement tmp = (Refinement) it.next();
385: if (tmp.getReference().equals(reference)) {
386: return tmp;
387: }
388: }
389: }
390: return null;
391: }
392:
393: /**
394: * Returns true if this settings object has refinements attached to it.
395: *
396: * @return true if this settings object has refinements attached to it.
397: */
398: public boolean hasRefinements() {
399: return refinements != null && !refinements.isEmpty();
400: }
401:
402: /**
403: * Returns true if this settings object is a refinement.
404: *
405: * @return true if this settings object is a refinement.
406: */
407: public boolean isRefinement() {
408: return isRefinement;
409: }
410:
411: /**
412: * Mark this settings object as an refinement.
413: *
414: * @param isRefinement Set this to true if this settings object is a
415: * refinement.
416: */
417: public void setRefinement(boolean isRefinement) {
418: this.isRefinement = isRefinement;
419: }
420: }
|