01: /*
02: * ExtractorURITest
03: *
04: * $Id: ExtractorURITest.java 4595 2006-09-02 00:43:59Z gojomo $
05: *
06: * Created on August 30, 2006
07: *
08: * Copyright (C) 2006 Internet Archive.
09: *
10: * This file is part of the Heritrix web crawler (crawler.archive.org).
11: *
12: * Heritrix is free software; you can redistribute it and/or modify
13: * it under the terms of the GNU Lesser Public License as published by
14: * the Free Software Foundation; either version 2.1 of the License, or
15: * any later version.
16: *
17: * Heritrix is distributed in the hope that it will be useful,
18: * but WITHOUT ANY WARRANTY; without even the implied warranty of
19: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: * GNU Lesser Public License for more details.
21: *
22: * You should have received a copy of the GNU Lesser Public License
23: * along with Heritrix; if not, write to the Free Software
24: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: */
26: package org.archive.crawler.extractor;
27:
28: import java.util.List;
29:
30: import org.archive.net.UURI;
31:
32: import junit.framework.TestCase;
33:
34: /**
35: * Test ExtractorURI
36: *
37: * @author gojomo
38: */
39: public class ExtractorURITest extends TestCase {
40:
41: public void testFullQuery() {
42: String queryStringUri = "http://www.example2.com";
43: innerTestQueryString(queryStringUri, queryStringUri);
44: }
45:
46: public void testFullQueryEncoded() {
47: String queryStringUri = "http%3A//www.example2.com/";
48: String expectedUri = "http://www.example2.com/";
49: innerTestQueryString(queryStringUri, expectedUri);
50: }
51:
52: public void testFullQueryEncodedComplex() {
53: String queryStringUri = "http%3A//www.example2.com/foo%3Fbar%3Dbz%26red%3Dblue";
54: String expectedUri = "http://www.example2.com/foo?bar=bz&red=blue";
55: innerTestQueryString(queryStringUri, expectedUri);
56: }
57:
58: private void innerTestQueryString(String queryStringUri,
59: String expectedUri) {
60: UURI uuri = UURI.from("http://www.example.com/foo?"
61: + queryStringUri);
62: innerTestForPresence(uuri, expectedUri);
63: }
64:
65: private void innerTestForPresence(UURI uuri, String expectedUri) {
66: List<String> results = ExtractorURI
67: .extractQueryStringLinks(uuri);
68: assertTrue("URI not found: " + expectedUri, results
69: .contains(expectedUri));
70: }
71:
72: public void testParameterComplex() {
73: String parameterUri = "http%3A//www.example2.com/foo%3Fbar%3Dbz%26red%3Dblue";
74: String expectedUri = "http://www.example2.com/foo?bar=bz&red=blue";
75: UURI uuri = UURI.from("http://www.example.com/foo?uri="
76: + parameterUri + "&foo=bar");
77: innerTestForPresence(uuri, expectedUri);
78: }
79: }
|