001: /* Scoper
002: *
003: * Created on Jun 6, 2005
004: *
005: * Copyright (C) 2005 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.framework;
024:
025: import java.util.logging.Level;
026: import java.util.logging.Logger;
027:
028: import javax.management.AttributeNotFoundException;
029:
030: import org.archive.crawler.datamodel.CandidateURI;
031: import org.archive.crawler.settings.SimpleType;
032: import org.archive.crawler.settings.Type;
033: import org.archive.crawler.util.LogUtils;
034:
035: /**
036: * Base class for Scopers.
037: * Scopers test CandidateURIs against a scope.
038: * Scopers allow logging of rejected CandidateURIs.
039: * @author stack
040: * @version $Date: 2006-09-25 23:59:43 +0000 (Mon, 25 Sep 2006) $, $Revision: 4664 $
041: */
042: public abstract class Scoper extends Processor {
043: private static Logger LOGGER = Logger.getLogger(Scoper.class
044: .getName());
045:
046: /**
047: * Protected so avaiilable to subclasses.
048: */
049: protected static final String ATTR_OVERRIDE_LOGGER_ENABLED = "override-logger";
050:
051: /**
052: * Constructor.
053: * @param name
054: * @param description
055: */
056: public Scoper(String name, String description) {
057: super (name, description);
058: Type t = addElementToDefinition(new SimpleType(
059: ATTR_OVERRIDE_LOGGER_ENABLED,
060: "If enabled, override default logger for this class (Default "
061: + "logger writes the console). Override "
062: + "logger will instead send all logging to a file named for this "
063: + "class in the job log directory. Set the logging level and "
064: + "other "
065: + "characteristics of the override logger such as rotation size, "
066: + "suffix pattern, etc. in heritrix.properties. This attribute "
067: + "is only checked once, on startup of a job.",
068: new Boolean(false)));
069: t.setExpertSetting(true);
070: }
071:
072: protected void initialTasks() {
073: super .initialTasks();
074: if (!isOverrideLogger(null)) {
075: return;
076: }
077: // Set up logger for this instance. May have special directives
078: // since this class can log scope-rejected URLs.
079: LogUtils.createFileLogger(getController().getLogsDir(), this
080: .getClass().getName(), Logger.getLogger(this .getClass()
081: .getName()));
082: }
083:
084: /**
085: * @param context Context to use looking up attribute.
086: * @return True if we are to override default logger (default logs
087: * to console) with a logger that writes all loggings to a file
088: * named for this class.
089: */
090: protected boolean isOverrideLogger(Object context) {
091: boolean result = true;
092: try {
093: Boolean b = (Boolean) getAttribute(context,
094: ATTR_OVERRIDE_LOGGER_ENABLED);
095: if (b != null) {
096: result = b.booleanValue();
097: }
098: } catch (AttributeNotFoundException e) {
099: LOGGER.warning("Failed get of 'enabled' attribute.");
100: }
101:
102: return result;
103: }
104:
105: /**
106: * Schedule the given {@link CandidateURI CandidateURI} with the Frontier.
107: * @param caUri The CandidateURI to be scheduled.
108: * @return true if CandidateURI was accepted by crawl scope, false
109: * otherwise.
110: */
111: protected boolean isInScope(CandidateURI caUri) {
112: boolean result = false;
113: if (getController().getScope().accepts(caUri)) {
114: result = true;
115: if (LOGGER.isLoggable(Level.FINER)) {
116: LOGGER.finer("Accepted: " + caUri);
117: }
118: } else {
119: outOfScope(caUri);
120: }
121: return result;
122: }
123:
124: /**
125: * Called when a CandidateUri is ruled out of scope.
126: * Override if you don't want logs as coming from this class.
127: * @param caUri CandidateURI that is out of scope.
128: */
129: protected void outOfScope(CandidateURI caUri) {
130: if (!LOGGER.isLoggable(Level.INFO)) {
131: return;
132: }
133: LOGGER.info(caUri.getUURI().toString());
134: }
135: }
|