001: /*
002: * This is the MIT license, see also http://www.opensource.org/licenses/mit-license.html
003: *
004: * Copyright (c) 2001 Brian Pitcher
005: *
006: * Permission is hereby granted, free of charge, to any person obtaining a
007: * copy of this software and associated documentation files (the "Software"),
008: * to deal in the Software without restriction, including without limitation
009: * the rights to use, copy, modify, merge, publish, distribute, sublicense,
010: * and/or sell copies of the Software, and to permit persons to whom the
011: * Software is furnished to do so, subject to the following conditions:
012: *
013: * The above copyright notice and this permission notice shall be included in
014: * all copies or substantial portions of the Software.
015: *
016: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
017: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
018: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
019: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
020: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
021: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
022: * SOFTWARE.
023: */
024:
025: // $Header: /cvsroot/weblech/weblech/src/weblech/spider/DownloadQueue.java,v 1.3 2002/06/09 11:36:23 weblech Exp $
026: package weblech.spider;
027:
028: import java.util.*;
029: import java.net.URL;
030: import java.io.Serializable;
031:
032: public class DownloadQueue implements Serializable {
033: private SpiderConfig config;
034:
035: private List interestingURLsToDownload;
036: private List averageURLsToDownload;
037: private List boringURLsToDownload;
038: private Set urlsInQueue;
039:
040: public DownloadQueue(SpiderConfig config) {
041: this .config = config;
042: interestingURLsToDownload = new ArrayList();
043: averageURLsToDownload = new ArrayList();
044: boringURLsToDownload = new ArrayList();
045: urlsInQueue = new HashSet();
046: }
047:
048: public void queueURL(URLToDownload url) {
049: URL u = url.getURL();
050: if (urlsInQueue.contains(u)) {
051: return;
052: }
053:
054: if (config.isInteresting(u)) {
055: if (config.isDepthFirstSearch()) {
056: interestingURLsToDownload.add(0, url);
057: } else {
058: interestingURLsToDownload.add(url);
059: }
060: } else if (config.isBoring(u)) {
061: if (config.isDepthFirstSearch()) {
062: boringURLsToDownload.add(0, url);
063: } else {
064: boringURLsToDownload.add(url);
065: }
066: } else {
067: if (config.isDepthFirstSearch()) {
068: averageURLsToDownload.add(0, url);
069: } else {
070: averageURLsToDownload.add(url);
071: }
072: }
073:
074: urlsInQueue.add(u);
075: }
076:
077: public void queueURLs(Collection urls) {
078: for (Iterator i = urls.iterator(); i.hasNext();) {
079: URLToDownload u2d = (URLToDownload) i.next();
080: queueURL(u2d);
081: }
082: }
083:
084: public URLToDownload getNextInQueue() {
085: if (interestingURLsToDownload.size() > 0) {
086: return returnURLFrom(interestingURLsToDownload);
087: } else if (averageURLsToDownload.size() > 0) {
088: return returnURLFrom(averageURLsToDownload);
089: } else if (boringURLsToDownload.size() > 0) {
090: return returnURLFrom(boringURLsToDownload);
091: } else {
092: return null;
093: }
094: }
095:
096: private URLToDownload returnURLFrom(List urlList) {
097: URLToDownload u2d = (URLToDownload) urlList.get(0);
098: urlList.remove(0);
099: urlsInQueue.remove(u2d.getURL());
100: return u2d;
101: }
102:
103: public int size() {
104: return interestingURLsToDownload.size()
105: + averageURLsToDownload.size()
106: + boringURLsToDownload.size();
107: }
108:
109: public String toString() {
110: return size() + " URLs";
111: }
112:
113: } // End class DownloadQueue
|