01: package org.apache.lucene.benchmark.byTask.feeds;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import java.io.IOException;
21: import java.io.Reader;
22: import java.text.DateFormat;
23: import java.util.Date;
24:
25: /**
26: * HTML Parsing Interfacew for test purposes
27: */
28: public interface HTMLParser {
29:
30: /**
31: * Parse the input Reader and return DocData.
32: * A provided name or date is used for the result, otherwise an attempt is
33: * made to set them from the parsed data.
34: * @param dateFormat date formatter to use for extracting the date.
35: * @param name name of the result doc data. If null, attempt to set by parsed data.
36: * @param date date of the result doc data. If null, attempt to set by parsed data.
37: * @param reader of html text to parse.
38: * @return Parsed doc data.
39: * @throws IOException
40: * @throws InterruptedException
41: */
42: public DocData parse(String name, Date date, Reader reader,
43: DateFormat dateFormat) throws IOException,
44: InterruptedException;
45:
46: /**
47: * Parse the inputText and return DocData.
48: * @param inputText the html text to parse.
49: * @see #parse(String, Date, Reader, DateFormat)
50: */
51: public DocData parse(String name, Date date,
52: StringBuffer inputText, DateFormat dateFormat)
53: throws IOException, InterruptedException;
54:
55: }
|