001: package org.apache.lucene.xmlparser;
002:
003: import java.io.BufferedReader;
004: import java.io.IOException;
005: import java.io.InputStream;
006: import java.io.InputStreamReader;
007:
008: import junit.framework.TestCase;
009:
010: import org.apache.lucene.analysis.Analyzer;
011: import org.apache.lucene.analysis.standard.StandardAnalyzer;
012: import org.apache.lucene.document.Field;
013: import org.apache.lucene.index.IndexReader;
014: import org.apache.lucene.index.IndexWriter;
015: import org.apache.lucene.queryParser.QueryParser;
016: import org.apache.lucene.search.Hits;
017: import org.apache.lucene.search.IndexSearcher;
018: import org.apache.lucene.search.Query;
019: import org.apache.lucene.store.Directory;
020: import org.apache.lucene.store.RAMDirectory;
021:
022: /**
023: * Licensed to the Apache Software Foundation (ASF) under one or more
024: * contributor license agreements. See the NOTICE file distributed with
025: * this work for additional information regarding copyright ownership.
026: * The ASF licenses this file to You under the Apache License, Version 2.0
027: * (the "License"); you may not use this file except in compliance with
028: * the License. You may obtain a copy of the License at
029: *
030: * http://www.apache.org/licenses/LICENSE-2.0
031: *
032: * Unless required by applicable law or agreed to in writing, software
033: * distributed under the License is distributed on an "AS IS" BASIS,
034: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
035: * See the License for the specific language governing permissions and
036: * limitations under the License.
037: */
038: /**
039: * @author maharwood
040: */
041: public class TestParser extends TestCase {
042:
043: CoreParser builder;
044: static Directory dir;
045: Analyzer analyzer = new StandardAnalyzer();
046: IndexReader reader;
047: private IndexSearcher searcher;
048:
049: //CHANGE THIS TO SEE OUTPUT
050: boolean printResults = false;
051:
052: /*
053: * @see TestCase#setUp()
054: */
055: protected void setUp() throws Exception {
056: super .setUp();
057:
058: //initialize the parser
059: builder = new CorePlusExtensionsParser(analyzer,
060: new QueryParser("contents", analyzer));
061:
062: //initialize the index (done once, then cached in static data for use with ALL tests)
063: if (dir == null) {
064: BufferedReader d = new BufferedReader(
065: new InputStreamReader(TestParser.class
066: .getResourceAsStream("reuters21578.txt")));
067: dir = new RAMDirectory();
068: IndexWriter writer = new IndexWriter(dir, analyzer, true);
069: String line = d.readLine();
070: while (line != null) {
071: int endOfDate = line.indexOf('\t');
072: String date = line.substring(0, endOfDate).trim();
073: String content = line.substring(endOfDate).trim();
074: org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
075: doc.add(new Field("date", date, Field.Store.YES,
076: Field.Index.TOKENIZED));
077: doc.add(new Field("contents", content, Field.Store.YES,
078: Field.Index.TOKENIZED));
079: writer.addDocument(doc);
080: line = d.readLine();
081: }
082: d.close();
083: writer.close();
084: }
085: reader = IndexReader.open(dir);
086: searcher = new IndexSearcher(reader);
087:
088: }
089:
090: protected void tearDown() throws Exception {
091: reader.close();
092: searcher.close();
093: // dir.close();
094:
095: }
096:
097: public void testSimpleXML() throws ParserException, IOException {
098: Query q = parse("TermQuery.xml");
099: dumpResults("TermQuery", q, 5);
100: }
101:
102: public void testSimpleTermsQueryXML() throws ParserException,
103: IOException {
104: Query q = parse("TermsQuery.xml");
105: dumpResults("TermsQuery", q, 5);
106: }
107:
108: public void testBooleanQueryXML() throws ParserException,
109: IOException {
110: Query q = parse("BooleanQuery.xml");
111: dumpResults("BooleanQuery", q, 5);
112: }
113:
114: public void testRangeFilterQueryXML() throws ParserException,
115: IOException {
116: Query q = parse("RangeFilterQuery.xml");
117: dumpResults("RangeFilter", q, 5);
118: }
119:
120: public void testUserQueryXML() throws ParserException, IOException {
121: Query q = parse("UserInputQuery.xml");
122: dumpResults("UserInput with Filter", q, 5);
123: }
124:
125: public void testLikeThisQueryXML() throws Exception {
126: Query q = parse("LikeThisQuery.xml");
127: dumpResults("like this", q, 5);
128: }
129:
130: public void testBoostingQueryXML() throws Exception {
131: Query q = parse("BoostingQuery.xml");
132: dumpResults("boosting ", q, 5);
133: }
134:
135: public void testFuzzyLikeThisQueryXML() throws Exception {
136: Query q = parse("FuzzyLikeThisQuery.xml");
137: //show rewritten fuzzyLikeThisQuery - see what is being matched on
138: if (printResults) {
139: System.out.println(q.rewrite(reader));
140: }
141: dumpResults("FuzzyLikeThis", q, 5);
142: }
143:
144: public void testTermsFilterXML() throws Exception {
145: Query q = parse("TermsFilterQuery.xml");
146: dumpResults("Terms Filter", q, 5);
147: }
148:
149: public void testSpanTermXML() throws Exception {
150: Query q = parse("SpanQuery.xml");
151: dumpResults("Span Query", q, 5);
152: }
153:
154: public void testConstantScoreQueryXML() throws Exception {
155: Query q = parse("ConstantScoreQuery.xml");
156: dumpResults("ConstantScoreQuery", q, 5);
157: }
158:
159: public void testMatchAllDocsPlusFilterXML() throws ParserException,
160: IOException {
161: Query q = parse("MatchAllDocsQuery.xml");
162: dumpResults("MatchAllDocsQuery with range filter", q, 5);
163: }
164:
165: public void testBooleanFilterXML() throws ParserException,
166: IOException {
167: Query q = parse("BooleanFilter.xml");
168: dumpResults("Boolean filter", q, 5);
169: }
170:
171: public void testNestedBooleanQuery() throws ParserException,
172: IOException {
173: Query q = parse("NestedBooleanQuery.xml");
174: dumpResults("Nested Boolean query", q, 5);
175: }
176:
177: public void testCachedFilterXML() throws ParserException,
178: IOException {
179: Query q = parse("CachedFilter.xml");
180: dumpResults("Cached filter", q, 5);
181: }
182:
183: public void testDuplicateFilterQueryXML() throws ParserException,
184: IOException {
185: Query q = parse("DuplicateFilterQuery.xml");
186: Hits h = searcher.search(q);
187: assertEquals("DuplicateFilterQuery should produce 1 result ",
188: 1, h.length());
189: }
190:
191: //================= Helper methods ===================================
192: private Query parse(String xmlFileName) throws ParserException,
193: IOException {
194: InputStream xmlStream = TestParser.class
195: .getResourceAsStream(xmlFileName);
196: Query result = builder.parse(xmlStream);
197: xmlStream.close();
198: return result;
199: }
200:
201: private void dumpResults(String qType, Query q, int numDocs)
202: throws IOException {
203: Hits h = searcher.search(q);
204: assertTrue(qType + " should produce results ", h.length() > 0);
205: if (printResults) {
206: System.out.println("=========" + qType + "============");
207: for (int i = 0; i < Math.min(numDocs, h.length()); i++) {
208: org.apache.lucene.document.Document ldoc = h.doc(i);
209: System.out.println("[" + ldoc.get("date") + "]"
210: + ldoc.get("contents"));
211: }
212: System.out.println();
213: }
214: }
215:
216: }
|