001: /* ProcessorChainList
002: *
003: * $Id: ProcessorChainList.java 4664 2006-09-25 23:59:43Z paul_jack $
004: *
005: * Created on Mar 3, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.framework;
026:
027: import java.util.ArrayList;
028: import java.util.HashMap;
029: import java.util.Iterator;
030: import java.util.List;
031: import java.util.Map;
032:
033: import javax.management.AttributeNotFoundException;
034: import javax.management.MBeanException;
035: import javax.management.ReflectionException;
036:
037: import org.archive.crawler.datamodel.CrawlOrder;
038: import org.archive.crawler.framework.exceptions.FatalConfigurationException;
039: import org.archive.crawler.settings.MapType;
040:
041: /** A list of all the ProcessorChains.
042: *
043: * @author John Erik Halse
044: */
045: public class ProcessorChainList {
046: private List<ProcessorChain> chainList = new ArrayList<ProcessorChain>();
047: private Map<String, ProcessorChain> chainMap = new HashMap<String, ProcessorChain>();
048:
049: /** Constructs a new ProcessorChainList.
050: *
051: * @param order the Crawl Order to get configuration from.
052: *
053: * @throws FatalConfigurationException is thrown if chains could not be
054: * set up properly.
055: */
056: public ProcessorChainList(CrawlOrder order)
057: throws FatalConfigurationException {
058: try {
059: addProcessorMap(
060: CrawlOrder.ATTR_PRE_FETCH_PROCESSORS,
061: (MapType) order
062: .getAttribute(CrawlOrder.ATTR_PRE_FETCH_PROCESSORS));
063: addProcessorMap(
064: CrawlOrder.ATTR_FETCH_PROCESSORS,
065: (MapType) order
066: .getAttribute(CrawlOrder.ATTR_FETCH_PROCESSORS));
067: addProcessorMap(
068: CrawlOrder.ATTR_EXTRACT_PROCESSORS,
069: (MapType) order
070: .getAttribute(CrawlOrder.ATTR_EXTRACT_PROCESSORS));
071: addProcessorMap(
072: CrawlOrder.ATTR_WRITE_PROCESSORS,
073: (MapType) order
074: .getAttribute(CrawlOrder.ATTR_WRITE_PROCESSORS));
075: addProcessorMap(
076: CrawlOrder.ATTR_POST_PROCESSORS,
077: (MapType) order
078: .getAttribute(CrawlOrder.ATTR_POST_PROCESSORS));
079: } catch (AttributeNotFoundException e) {
080: throw new FatalConfigurationException(
081: "Could not get processors" + " from crawl order: "
082: + e.getMessage());
083: } catch (MBeanException e) {
084: throw new FatalConfigurationException(
085: "Could not get processors" + " from crawl order: "
086: + e.getMessage());
087: } catch (ReflectionException e) {
088: throw new FatalConfigurationException(
089: "Could not get processors" + " from crawl order: "
090: + e.getMessage());
091: }
092:
093: if (processorCount() == 0) {
094: throw new FatalConfigurationException(
095: "No processors defined");
096: }
097: }
098:
099: /** Add a new chain of processors to the chain list.
100: *
101: * This method takes a map of processors and wraps it in a ProcessorChain
102: * object and adds it to the list of chains.
103: *
104: * @param processorMap the processor map to be added.
105: */
106: public void addProcessorMap(String name, MapType processorMap) {
107: ProcessorChain processorChain = new ProcessorChain(processorMap);
108: ProcessorChain previousChain = getLastChain();
109: if (previousChain != null) {
110: previousChain.setNextChain(processorChain);
111: }
112: chainList.add(processorChain);
113: chainMap.put(name, processorChain);
114: }
115:
116: /** Get the first processor chain.
117: *
118: * @return the first processor chain.
119: */
120: public ProcessorChain getFirstChain() {
121: return (ProcessorChain) chainList.get(0);
122: }
123:
124: /** Get the last processor chain.
125: *
126: * The last processor chain should contain processors that should always
127: * be run for a URI that has started its way through the processors.
128: *
129: * @return the last processor chain.
130: */
131: public ProcessorChain getLastChain() {
132: if (size() == 0) {
133: return null;
134: } else {
135: return (ProcessorChain) chainList.get(size() - 1);
136: }
137: }
138:
139: /** Get the total number of all processors in all the chains.
140: *
141: * @return the total number of all processors in all the chains.
142: */
143: public int processorCount() {
144: int processorCount = 0;
145: for (Iterator it = iterator(); it.hasNext();) {
146: processorCount += ((ProcessorChain) it.next()).size();
147: }
148: return processorCount;
149: }
150:
151: /** Get an iterator over the processor chains.
152: *
153: * @return an iterator over the processor chains.
154: */
155: public Iterator iterator() {
156: return chainList.iterator();
157: }
158:
159: /** Get the number of processor chains.
160: *
161: * @return the number of processor chains.
162: */
163: public int size() {
164: return chainList.size();
165: }
166:
167: /** Get a processor chain by its index in the list of chains.
168: *
169: * @param index the chains index in the list of chains.
170: * @return the requested processor chain.
171: */
172: public ProcessorChain getProcessorChain(int index) {
173: return (ProcessorChain) chainList.get(index);
174: }
175:
176: /** Get a processor chain by its name.
177: *
178: * @param name name of the processor chain to get.
179: * @return the requested processor chain.
180: */
181: public ProcessorChain getProcessorChain(String name) {
182: return (ProcessorChain) chainMap.get(name);
183: }
184:
185: public void kickUpdate() {
186: for (ProcessorChain chain : chainList) {
187: chain.kickUpdate();
188: }
189: }
190:
191: }
|