001: /* StripSessionIDsTest
002: *
003: * Created on Oct 6, 2004
004: *
005: * Copyright (C) 2004 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.url.canonicalize;
024:
025: import org.apache.commons.httpclient.URIException;
026: import org.archive.net.UURIFactory;
027:
028: import junit.framework.TestCase;
029:
030: /**
031: * Test stripping of session ids.
032: * @author stack
033: * @version $Date: 2006-09-01 22:44:50 +0000 (Fri, 01 Sep 2006) $, $Revision: 4591 $
034: */
035: public class StripSessionIDsTest extends TestCase {
036: private static final String BASE = "http://www.archive.org/index.html";
037:
038: public void testCanonicalize() throws URIException {
039: String str32id = "0123456789abcdefghijklemopqrstuv";
040: String url = BASE + "?jsessionid=" + str32id;
041: String expectedResult = BASE + "?";
042: String result = (new StripSessionIDs("test")).canonicalize(url,
043: UURIFactory.getInstance(url));
044: assertTrue("Failed " + result, expectedResult.equals(result));
045:
046: // Test that we don't strip if not 32 chars only.
047: url = BASE + "?jsessionid=" + str32id + '0';
048: expectedResult = url;
049: result = (new StripSessionIDs("test")).canonicalize(url,
050: UURIFactory.getInstance(url));
051: assertTrue("Failed " + result, expectedResult.equals(result));
052:
053: // Test what happens when followed by another key/value pair.
054: url = BASE + "?jsessionid=" + str32id + "&x=y";
055: expectedResult = BASE + "?x=y";
056: result = (new StripSessionIDs("test")).canonicalize(url,
057: UURIFactory.getInstance(url));
058: assertTrue("Failed " + result, expectedResult.equals(result));
059:
060: // Test what happens when followed by another key/value pair and
061: // prefixed by a key/value pair.
062: url = BASE + "?one=two&jsessionid=" + str32id + "&x=y";
063: expectedResult = BASE + "?one=two&x=y";
064: result = (new StripSessionIDs("test")).canonicalize(url,
065: UURIFactory.getInstance(url));
066: assertTrue("Failed " + result, expectedResult.equals(result));
067:
068: // Test what happens when prefixed by a key/value pair.
069: url = BASE + "?one=two&jsessionid=" + str32id;
070: expectedResult = BASE + "?one=two&";
071: result = (new StripSessionIDs("test")).canonicalize(url,
072: UURIFactory.getInstance(url));
073: assertTrue("Failed " + result, expectedResult.equals(result));
074:
075: // Test aspsession.
076: url = BASE + "?aspsessionidABCDEFGH="
077: + "ABCDEFGHIJKLMNOPQRSTUVWX" + "&x=y";
078: expectedResult = BASE + "?x=y";
079: result = (new StripSessionIDs("test")).canonicalize(url,
080: UURIFactory.getInstance(url));
081: assertTrue("Failed " + result, expectedResult.equals(result));
082:
083: // Test archive phpsession.
084: url = BASE + "?phpsessid=" + str32id + "&x=y";
085: expectedResult = BASE + "?x=y";
086: result = (new StripSessionIDs("test")).canonicalize(url,
087: UURIFactory.getInstance(url));
088: assertTrue("Failed " + result, expectedResult.equals(result));
089:
090: // With prefix too.
091: url = BASE + "?one=two&phpsessid=" + str32id + "&x=y";
092: expectedResult = BASE + "?one=two&x=y";
093: result = (new StripSessionIDs("test")).canonicalize(url,
094: UURIFactory.getInstance(url));
095: assertTrue("Failed " + result, expectedResult.equals(result));
096:
097: // With only prefix
098: url = BASE + "?one=two&phpsessid=" + str32id;
099: expectedResult = BASE + "?one=two&";
100: result = (new StripSessionIDs("test")).canonicalize(url,
101: UURIFactory.getInstance(url));
102: assertTrue("Failed " + result, expectedResult.equals(result));
103:
104: // Test sid.
105: url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805"
106: + "&x=y";
107: expectedResult = BASE + "?x=y";
108: result = (new StripSessionIDs("test")).canonicalize(url,
109: UURIFactory.getInstance(url));
110: assertTrue("Failed " + result, expectedResult.equals(result));
111:
112: // Igor test.
113: url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&"
114: + "jsessionid=" + str32id;
115: expectedResult = BASE + "?";
116: result = (new StripSessionIDs("test")).canonicalize(url,
117: UURIFactory.getInstance(url));
118: assertTrue("Failed " + result, expectedResult.equals(result));
119: }
120: }
|