001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.bean.helpers;
018:
019: import java.util.HashMap;
020: import java.util.HashSet;
021: import java.util.Iterator;
022: import java.util.Map;
023: import java.util.Set;
024:
025: import org.apache.cocoon.bean.Target;
026: import org.apache.cocoon.ProcessingException;
027:
028: /**
029: * A simple Cocoon crawler
030: *
031: * @author <a href="mailto:uv@upaya.co.uk">Upayavira</a>
032: * @version CVS $Id: Crawler.java 433543 2006-08-22 06:22:54Z crossley $
033: */
034:
035: public class Crawler {
036:
037: private Map allTranslatedLinks;
038: private Map stillNotVisited;
039: private Set visitedAlready;
040:
041: public Crawler() {
042: visitedAlready = new HashSet();
043: stillNotVisited = new HashMap();
044: allTranslatedLinks = new HashMap();
045: }
046:
047: /**
048: * Add a target for future processing
049: */
050: public boolean addTarget(Target target) {
051: String targetString = target.toString();
052: if (!visitedAlready.contains(targetString)) {
053: if (!stillNotVisited.containsKey(targetString)) {
054: stillNotVisited.put(targetString, target);
055: return true;
056: }
057: }
058: return false;
059: }
060:
061: /**
062: * Returns the number of targets for processing
063: */
064: public int getRemainingCount() {
065: return stillNotVisited.size();
066: }
067:
068: public int getProcessedCount() {
069: return visitedAlready.size();
070: }
071:
072: public int getTranslatedCount() {
073: return allTranslatedLinks.size();
074: }
075:
076: public void addTranslatedLink(Target target)
077: throws ProcessingException {
078: allTranslatedLinks.put(target.getSourceURI(), target);
079: }
080:
081: public boolean hasTranslatedLink(Target link) {
082: return allTranslatedLinks.get(link.getSourceURI()) != null;
083: }
084:
085: public Target getTranslatedLink(Target link) {
086: return (Target) allTranslatedLinks.get(link.getSourceURI());
087: }
088:
089: /**
090: * Returns an iterator for reading targets
091: */
092: public CrawlingIterator iterator() {
093: return new CrawlingIterator(visitedAlready, stillNotVisited);
094: }
095:
096: public static class CrawlingIterator implements Iterator {
097:
098: private Map stillNotVisited;
099: private Set visitedAlready;
100:
101: public CrawlingIterator(Set visitedAlready, Map stillNotVisited) {
102: this .visitedAlready = visitedAlready;
103: this .stillNotVisited = stillNotVisited;
104: }
105:
106: /**
107: * Check if list of not visited URIs is empty
108: *
109: * @return boolean true iff list of not visited URIs is not empty
110: */
111: public boolean hasNext() {
112: return !stillNotVisited.isEmpty();
113: }
114:
115: /**
116: * Removing objects is not supported, and will always throw
117: * a <code>UnsupportedOperationException</code>.
118: */
119: public void remove() {
120: throw new UnsupportedOperationException();
121: }
122:
123: /**
124: * Get next not visited URIs
125: *
126: * @return object from list of not visited URIs, move it immediatly
127: * to set of visited URIs
128: */
129: public Object next() {
130: // could this be simpler:
131: Object nextKey = stillNotVisited.keySet().toArray()[0];
132: Object nextElement = stillNotVisited.remove(nextKey);
133: visitedAlready.add(nextKey);
134: return nextElement;
135: }
136: }
137: }
|