001: /*
002: * TestDomianScope
003: *
004: * $Id: DomainScopeTest.java 4651 2006-09-25 18:31:13Z paul_jack $
005: *
006: * Created on May 17, 2004
007: *
008: * Copyright (C) 2004 Internet Archive.
009: *
010: * This file is part of the Heritrix web crawler (crawler.archive.org).
011: *
012: * Heritrix is free software; you can redistribute it and/or modify
013: * it under the terms of the GNU Lesser Public License as published by
014: * the Free Software Foundation; either version 2.1 of the License, or
015: * any later version.
016: *
017: * Heritrix is distributed in the hope that it will be useful,
018: * but WITHOUT ANY WARRANTY; without even the implied warranty of
019: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
020: * GNU Lesser Public License for more details.
021: *
022: * You should have received a copy of the GNU Lesser Public License
023: * along with Heritrix; if not, write to the Free Software
024: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
025: */
026:
027: package org.archive.crawler.scope;
028:
029: import java.util.ArrayList;
030: import java.util.Iterator;
031:
032: import junit.framework.TestCase;
033:
034: import org.apache.commons.httpclient.URIException;
035: import org.archive.net.UURI;
036: import org.archive.net.UURIFactory;
037:
038: /**
039: * Test the domain scope focus filter.
040: *
041: * @author Igor Ranitovic
042: */
043: public class DomainScopeTest extends TestCase {
044:
045: private ArrayList<UURI> testSeeds;
046: private ArrayList<UURI> urlsInScope;
047: private ArrayList<UURI> urlsOutOfScope;
048:
049: private TestUnitDomainScope dc;
050:
051: /**
052: * Since testing only focus filter overwrite all other filter to return
053: * false.
054: *
055: * Also override seedsIterator so the test seeds are used.
056: */
057: @SuppressWarnings("deprecation")
058: private class TestUnitDomainScope extends DomainScope {
059:
060: private static final long serialVersionUID = 2509499903112690451L;
061:
062: public TestUnitDomainScope(String name) {
063: super (name);
064: }
065:
066: /* Force test seeds to be used.
067: * @see org.archive.crawler.framework.CrawlScope#seedsIterator()
068: */
069: public Iterator<UURI> seedsIterator() {
070: return testSeeds.iterator();
071: }
072:
073: protected boolean additionalFocusAccepts(Object o) {
074: return false;
075: }
076:
077: protected boolean transitiveAccepts(Object o) {
078: return false;
079: }
080:
081: protected boolean excludeAccepts(Object o) {
082: return false;
083: }
084: }
085:
086: public void setUp() throws URIException {
087: testSeeds = new ArrayList<UURI>();
088: urlsInScope = new ArrayList<UURI>();
089: urlsOutOfScope = new ArrayList<UURI>();
090: dc = new TestUnitDomainScope("TESTCASE");
091:
092: // Add seeds
093: addURL(testSeeds, "http://www.a.com/");
094: addURL(testSeeds, "http://b.com/");
095: addURL(testSeeds, "http://www11.c.com");
096: addURL(testSeeds, "http://www.x.y.z.com/index.html");
097: addURL(testSeeds, "http://www.1.com/index.html");
098: addURL(testSeeds, "http://www.a_b.com/index.html");
099:
100: // Add urls in domain scope
101: addURL(urlsInScope, "http://www.a.com/");
102: addURL(urlsInScope, "http://www1.a.com/");
103: addURL(urlsInScope, "http://a.com/");
104: addURL(urlsInScope, "http://a.a.com/");
105:
106: addURL(urlsInScope, "http://www.b.com/");
107: addURL(urlsInScope, "http://www1.b.com/");
108: addURL(urlsInScope, "http://b.com/");
109: addURL(urlsInScope, "http://b.b.com/");
110:
111: addURL(urlsInScope, "http://www.c.com/");
112: addURL(urlsInScope, "http://www1.c.com/");
113: addURL(urlsInScope, "http://c.com/");
114: addURL(urlsInScope, "http://c.c.com/");
115:
116: addURL(urlsInScope, "http://www.x.y.z.com/");
117: addURL(urlsInScope, "http://www1.x.y.z.com/");
118: addURL(urlsInScope, "http://x.y.z.com/");
119: addURL(urlsInScope, "http://xyz.x.y.z.com/");
120: addURL(urlsInScope, "http://1.com/index.html");
121: addURL(urlsInScope, "http://a_b.com/index.html");
122:
123: // Add urls out of scope
124: addURL(urlsOutOfScope, "http://a.co");
125: addURL(urlsOutOfScope, "http://a.comm");
126: addURL(urlsOutOfScope, "http://aa.com");
127: addURL(urlsOutOfScope, "http://z.com");
128: addURL(urlsOutOfScope, "http://y.z.com");
129: }
130:
131: public void addURL(ArrayList<UURI> list, String url)
132: throws URIException {
133: list.add(UURIFactory.getInstance(url));
134: }
135:
136: public void testInScope() throws URIException {
137: for (Iterator i = this .urlsInScope.iterator(); i.hasNext();) {
138: Object url = i.next();
139: assertTrue("Should be in domain scope: " + url, dc
140: .accepts(url));
141: }
142: }
143:
144: public void testOutOfScope() throws URIException {
145: for (Iterator i = this .urlsOutOfScope.iterator(); i.hasNext();) {
146: Object url = i.next();
147: assertFalse("Should not be in domain scope: " + url, dc
148: .accepts(url));
149: }
150: }
151: }
|