001: /* TextUtilsTest.java
002: *
003: * $Id: TextUtilsTest.java 5107 2007-05-01 00:21:52Z gojomo $
004: *
005: * Created Tue Jan 20 14:17:59 PST 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025:
026: package org.archive.util;
027:
028: import java.util.regex.Matcher;
029:
030: import junit.framework.Test;
031: import junit.framework.TestCase;
032: import junit.framework.TestSuite;
033:
034: /**
035: * JUnit test suite for TextUtils
036: *
037: * @author gojomo
038: * @version $ Id$
039: */
040: public class TextUtilsTest extends TestCase {
041: /**
042: * Create a new TextUtilsTest object
043: *
044: * @param testName
045: * the name of the test
046: */
047: public TextUtilsTest(final String testName) {
048: super (testName);
049: }
050:
051: /**
052: * run all the tests for TextUtilsTest
053: *
054: * @param argv
055: * the command line arguments
056: */
057: public static void main(String argv[]) {
058: junit.textui.TestRunner.run(suite());
059: }
060:
061: /**
062: * return the suite of tests for MemQueueTest
063: *
064: * @return the suite of test
065: */
066: public static Test suite() {
067: return new TestSuite(TextUtilsTest.class);
068: }
069:
070: public void testMatcherRecycling() {
071: String pattern = "f.*";
072: Matcher m1 = TextUtils.getMatcher(pattern, "foo");
073: assertTrue("matcher against 'foo' problem", m1.matches());
074: TextUtils.recycleMatcher(m1);
075: Matcher m2 = TextUtils.getMatcher(pattern, "");
076: assertFalse("matcher against '' problem", m2.matches());
077: assertTrue("matcher not recycled", m1 == m2);
078: // now verify proper behavior without recycling
079: Matcher m3 = TextUtils.getMatcher(pattern, "fuggedaboutit");
080: assertTrue("matcher against 'fuggedaboutit' problem", m3
081: .matches());
082: assertFalse("matcher was recycled", m3 == m2);
083: }
084:
085: public void testGetFirstWord() {
086: final String firstWord = "one";
087: String tmpStr = TextUtils
088: .getFirstWord(firstWord + " two three");
089: assertTrue("Failed to get first word 1 " + tmpStr, tmpStr
090: .equals(firstWord));
091: tmpStr = TextUtils.getFirstWord(firstWord);
092: assertTrue("Failed to get first word 2 " + tmpStr, tmpStr
093: .equals(firstWord));
094: }
095:
096: public void testUnescapeHtml() {
097: final String abc = "abc";
098: CharSequence cs = TextUtils.unescapeHtml("abc");
099: assertEquals(cs, abc);
100: final String backwards = "aaa;lt&aaa";
101: cs = TextUtils.unescapeHtml(backwards);
102: assertEquals(cs, backwards);
103: final String ampersand = "aaa&aaa";
104: cs = TextUtils.unescapeHtml(ampersand);
105: assertEquals(cs, ampersand);
106: final String encodedAmpersand = "aaa&aaa";
107: cs = TextUtils.unescapeHtml(encodedAmpersand);
108: assertEquals(cs, ampersand);
109: final String encodedQuote = "aaa'aaa";
110: cs = TextUtils.unescapeHtml(encodedQuote);
111: assertEquals(cs, "aaa'aaa");
112: final String entityQuote = "aaa"aaa";
113: cs = TextUtils.unescapeHtml(entityQuote);
114: assertEquals(cs, "aaa\"aaa");
115: }
116:
117: public void testUnescapeHtmlWithDanglingAmpersand() {
118: final String mixedEncodedAmpersand1 = "aaa&aaa&aaa";
119: CharSequence cs = TextUtils
120: .unescapeHtml(mixedEncodedAmpersand1);
121: assertEquals(cs, "aaa&aaa&aaa");
122: final String mixedEncodedAmpersand2 = "aaa&aaa&aaa&aaa";
123: cs = TextUtils.unescapeHtml(mixedEncodedAmpersand2);
124: assertEquals(cs, "aaa&aaa&aaa&aaa");
125: }
126: }
|