001: /**********************************************************************************
002: * $URL: https://source.sakaiproject.org/svn/search/tags/sakai_2-4-1/search-impl/impl/src/test/org/sakai/search/index/impl/test/HTMLParserTest.java $
003: * $Id: HTMLParserTest.java 22588 2007-03-14 09:53:30Z ian@caret.cam.ac.uk $
004: ***********************************************************************************
005: *
006: * Copyright (c) 2003, 2004, 2005, 2006, 2007 The Sakai Foundation.
007: *
008: * Licensed under the Educational Community License, Version 1.0 (the "License");
009: * you may not use this file except in compliance with the License.
010: * You may obtain a copy of the License at
011: *
012: * http://www.opensource.org/licenses/ecl1.php
013: *
014: * Unless required by applicable law or agreed to in writing, software
015: * distributed under the License is distributed on an "AS IS" BASIS,
016: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: * See the License for the specific language governing permissions and
018: * limitations under the License.
019: *
020: **********************************************************************************/package org.sakai.search.index.impl.test;
021:
022: import java.io.BufferedReader;
023: import java.io.IOException;
024: import java.io.InputStream;
025: import java.io.InputStreamReader;
026: import java.util.Iterator;
027: import java.util.Properties;
028:
029: import junit.framework.TestCase;
030:
031: import org.sakaiproject.search.api.SearchUtils;
032: import org.sakaiproject.search.component.adapter.contenthosting.HTMLParser;
033:
034: /**
035: * @author ieb
036: *
037: */
038: public class HTMLParserTest extends TestCase {
039:
040: /**
041: * @param arg0
042: */
043: public HTMLParserTest(String arg0) {
044: super (arg0);
045: }
046:
047: /* (non-Javadoc)
048: * @see junit.framework.TestCase#setUp()
049: */
050: protected void setUp() throws Exception {
051: super .setUp();
052: }
053:
054: /* (non-Javadoc)
055: * @see junit.framework.TestCase#tearDown()
056: */
057: protected void tearDown() throws Exception {
058: super .tearDown();
059: }
060:
061: public void testParser() throws Exception {
062: Properties p = new Properties();
063: InputStream inStream = getClass().getResourceAsStream(
064: "parsertest.config");
065: p.load(inStream);
066: inStream.close();
067: for (Iterator tests = p.keySet().iterator(); tests.hasNext();) {
068:
069: String tname = (String) tests.next();
070: StringBuilder sb = new StringBuilder();
071: for (Iterator<String> i = new HTMLParser(loadFile(p
072: .getProperty(tname))); i.hasNext();) {
073: String n = i.next();
074: SearchUtils.appendCleanString(n, sb);
075: }
076: String result = sb.toString();
077: System.err.println("Result is " + result);
078: if (p.containsKey(tname + ".result")) {
079: assertEquals("Tokens dont match ", loadFile(p
080: .getProperty(tname + ".result")), sb.toString());
081: }
082: }
083: }
084:
085: /**
086: * @param property
087: * @return
088: * @throws IOException
089: */
090: private String loadFile(String property) throws IOException {
091: System.err.println("Loading :" + property + ":");
092: BufferedReader br = new BufferedReader(new InputStreamReader(
093: getClass().getResourceAsStream(property), "UTF-8"));
094: StringBuffer sb = new StringBuffer();
095: for (String s = br.readLine(); s != null; s = br.readLine()) {
096: sb.append(s).append("\n");
097: }
098: br.close();
099: return sb.toString();
100: }
101:
102: }
|