01: /*
02: * ExtractorURITest
03: *
04: * $Id: ExtractorImpliedURITest.java 4667 2006-09-26 20:38:48Z paul_jack $
05: *
06: * Created on August 30, 2006
07: *
08: * Copyright (C) 2006 Internet Archive.
09: *
10: * This file is part of the Heritrix web crawler (crawler.archive.org).
11: *
12: * Heritrix is free software; you can redistribute it and/or modify
13: * it under the terms of the GNU Lesser Public License as published by
14: * the Free Software Foundation; either version 2.1 of the License, or
15: * any later version.
16: *
17: * Heritrix is distributed in the hope that it will be useful,
18: * but WITHOUT ANY WARRANTY; without even the implied warranty of
19: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: * GNU Lesser Public License for more details.
21: *
22: * You should have received a copy of the GNU Lesser Public License
23: * along with Heritrix; if not, write to the Free Software
24: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: */
26: package org.archive.crawler.extractor;
27:
28: import junit.framework.TestCase;
29:
30: /**
31: * Test ExtractorImpliedURI
32: *
33: * @author gojomo
34: */
35: public class ExtractorImpliedURITest extends TestCase {
36:
37: public void testYouTubeExample() {
38: String startUri = "http://youtube.com/player2.swf?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904";
39: String expectedUri = "http://youtube.com/get_video?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904";
40: // without escaping: ^(http://[\w\.:@]*)/player2.swf\?(.*)$
41: String triggerPattern = "^(http://[\\w\\.:@]*)/player2.swf\\?(.*)$";
42: String buildPattern = "$1/get_video?$2";
43:
44: String implied = ExtractorImpliedURI.extractImplied(startUri,
45: triggerPattern, buildPattern);
46: assertEquals(expectedUri, implied);
47: }
48: }
|