001: /* SurtPrefixSetTest
002: *
003: * $Id: SurtPrefixSetTest.java 3712 2005-07-19 00:27:50Z gojomo $
004: *
005: * Created on Jul 23, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util;
026:
027: import java.io.IOException;
028: import java.io.StringReader;
029: import java.util.Iterator;
030:
031: import junit.framework.Test;
032: import junit.framework.TestCase;
033: import junit.framework.TestSuite;
034:
035: /**
036: * @author gojomo
037: */
038: public class SurtPrefixSetTest extends TestCase {
039: private static final String ARCHIVE_ORG_DOMAIN_SURT = "http://(org,archive,";
040: private static final String WWW_EXAMPLE_ORG_HOST_SURT = "http://(org,example,www,)";
041: private static final String HOME_EXAMPLE_ORG_PATH_SURT = "http://(org,example,home,)/pages/";
042: private static final String BOK_IS_REDUNDANT_SURT = "http://(is,bok,";
043: private static final String IS_DOMAIN_SURT = "http://(is,";
044: private static final String WWW_BOK_IS_REDUNDANT_SURT = "http://(is,bok,www";
045:
046: private static final String TEST_SURT_LIST = "# a test set of surt prefixes \n"
047: + ARCHIVE_ORG_DOMAIN_SURT
048: + "\n"
049: + WWW_EXAMPLE_ORG_HOST_SURT
050: + "\n"
051: + HOME_EXAMPLE_ORG_PATH_SURT
052: + "\n"
053: + BOK_IS_REDUNDANT_SURT
054: + " # is redundant\n"
055: + IS_DOMAIN_SURT
056: + "\n"
057: + WWW_BOK_IS_REDUNDANT_SURT + " # is redundant\n";
058:
059: /**
060: * Create a new SurtPrefixSetTest object
061: *
062: * @param testName
063: * the name of the test
064: */
065: public SurtPrefixSetTest(final String testName) {
066: super (testName);
067: }
068:
069: /**
070: * run all the tests for SurtPrefixSetTest
071: *
072: * @param argv
073: * the command line arguments
074: */
075: public static void main(String argv[]) {
076: junit.textui.TestRunner.run(suite());
077: }
078:
079: /**
080: * return the suite of tests for SurtPrefixSetTest
081: *
082: * @return the suite of test
083: */
084: public static Test suite() {
085: return new TestSuite(SurtPrefixSetTest.class);
086: }
087:
088: public void testMisc() throws IOException {
089: SurtPrefixSet surts = new SurtPrefixSet();
090: StringReader sr = new StringReader(TEST_SURT_LIST);
091: surts.importFrom(sr);
092:
093: assertContains(surts, ARCHIVE_ORG_DOMAIN_SURT);
094: assertContains(surts, WWW_EXAMPLE_ORG_HOST_SURT);
095: assertContains(surts, HOME_EXAMPLE_ORG_PATH_SURT);
096: assertContains(surts, IS_DOMAIN_SURT);
097:
098: assertDoesntContain(surts, BOK_IS_REDUNDANT_SURT);
099: assertDoesntContain(surts, WWW_BOK_IS_REDUNDANT_SURT);
100:
101: assertContainsPrefix(surts, SURT
102: .fromURI("http://example.is/foo"));
103: assertDoesntContainPrefix(surts, SURT
104: .fromURI("http://home.example.org/foo"));
105: }
106:
107: /**
108: * @param surts
109: * @param string
110: */
111: private void assertDoesntContainPrefix(SurtPrefixSet surts, String s) {
112: assertEquals(s + " is prefixed", surts.containsPrefixOf(s),
113: false);
114: }
115:
116: /**
117: * @param surts
118: * @param string
119: */
120: private void assertContainsPrefix(SurtPrefixSet surts, String s) {
121: assertEquals(s + " isn't prefixed", surts.containsPrefixOf(s),
122: true);
123: }
124:
125: /**
126: * @param surts
127: * @param www_bok_is_redundant_surt2
128: */
129: private void assertDoesntContain(SurtPrefixSet surts, String s) {
130: assertEquals(s + " is present", surts.contains(s), false);
131: }
132:
133: /**
134: * @param archive_org_domain_surt2
135: */
136: private void assertContains(SurtPrefixSet surts, String s) {
137: assertEquals(s + " is missing", surts.contains(s), true);
138: }
139:
140: public void testImportFromUris() throws IOException {
141: String seed = "http://www.archive.org/index.html";
142: assertEquals("Convert failed " + seed,
143: "http://(org,archive,www,)/", makeSurtPrefix(seed));
144: seed = "http://timmknibbs4senate.blogspot.com/";
145: assertEquals("Convert failed " + seed,
146: "http://(com,blogspot,timmknibbs4senate,)/",
147: makeSurtPrefix(seed));
148: seed = "https://one.two.three";
149: assertEquals("Convert failed " + seed,
150: "http://(three,two,one,", makeSurtPrefix(seed));
151: seed = "https://xone.two.three/a/b/c/";
152: assertEquals("Convert failed " + seed,
153: "http://(three,two,xone,)/a/b/c/", makeSurtPrefix(seed));
154: seed = "https://yone.two.three/a/b/c";
155: assertEquals("Convert failed " + seed,
156: "http://(three,two,yone,)/a/b/", makeSurtPrefix(seed));
157: }
158:
159: private String makeSurtPrefix(String seed) {
160: SurtPrefixSet surts = new SurtPrefixSet();
161: StringReader sr = new StringReader(seed);
162: surts.importFromUris(sr);
163: String result = null;
164: for (Iterator i = surts.iterator(); i.hasNext();) {
165: result = (String) i.next();
166: }
167: return result;
168: }
169: }
|