001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr;
017:
018: import org.apache.solr.request.*;
019: import org.apache.solr.util.*;
020: import org.apache.solr.schema.*;
021:
022: import java.util.HashMap;
023:
024: /**
025: * Tests some basic functionality of Solr while demonstrating good
026: * Best Practices for using AbstractSolrTestCase
027: */
028: public class HighlighterTest extends AbstractSolrTestCase {
029:
030: public String getSchemaFile() {
031: return "schema.xml";
032: }
033:
034: public String getSolrConfigFile() {
035: return "solrconfig.xml";
036: }
037:
038: public void setUp() throws Exception {
039: // if you override setUp or tearDown, you better call
040: // the super classes version
041: super .setUp();
042: }
043:
044: public void tearDown() throws Exception {
045: // if you override setUp or tearDown, you better call
046: // the super classes version
047: super .tearDown();
048:
049: }
050:
051: public void testTermVecHighlight() {
052:
053: // do summarization using term vectors
054: HashMap<String, String> args = new HashMap<String, String>();
055: args.put("hl", "true");
056: args.put("hl.fl", "tv_text");
057: args.put("hl.snippets", "2");
058: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
059: "standard", 0, 200, args);
060:
061: assertU(adoc(
062: "tv_text",
063: "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all--we want two disjoint long fragments.",
064: "id", "1"));
065: assertU(commit());
066: assertU(optimize());
067: assertQ(
068: "Basic summarization",
069: sumLRF.makeRequest("tv_text:long"),
070: "//lst[@name='highlighting']/lst[@name='1']",
071: "//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
072: "//arr[@name='tv_text']/str[.=' <em>long</em> fragments.']");
073: }
074:
075: public void testDisMaxHighlight() {
076:
077: // same test run through dismax handler
078: HashMap<String, String> args = new HashMap<String, String>();
079: args.put("hl", "true");
080: args.put("hl.fl", "tv_text");
081: args.put("qf", "tv_text");
082: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
083: "dismax", 0, 200, args);
084:
085: assertU(adoc("tv_text", "a long day's night", "id", "1"));
086: assertU(commit());
087: assertU(optimize());
088: assertQ("Basic summarization", sumLRF.makeRequest("long"),
089: "//lst[@name='highlighting']/lst[@name='1']",
090: "//lst[@name='1']/arr[@name='tv_text']/str");
091:
092: }
093:
094: public void testMultiValueAnalysisHighlight() {
095:
096: // do summarization using re-analysis of the field
097: HashMap<String, String> args = new HashMap<String, String>();
098: args.put("hl", "true");
099: args.put("hl.fl", "textgap");
100: args.put("df", "textgap");
101: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
102: "standard", 0, 200, args);
103:
104: assertU(adoc("textgap", "first entry hasnt queryword",
105: "textgap", "second entry has queryword long", "id", "1"));
106: assertU(commit());
107: assertU(optimize());
108: assertQ("Basic summarization", sumLRF.makeRequest("long"),
109: "//lst[@name='highlighting']/lst[@name='1']",
110: "//lst[@name='1']/arr[@name='textgap']/str");
111:
112: }
113:
114: public void testDefaultFieldHighlight() {
115:
116: // do summarization using re-analysis of the field
117: HashMap<String, String> args = new HashMap<String, String>();
118: args.put("hl", "true");
119: args.put("df", "t_text");
120: args.put("hl.fl", "");
121: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
122: "standard", 0, 200, args);
123:
124: assertU(adoc("t_text", "a long day's night", "id", "1"));
125: assertU(commit());
126: assertU(optimize());
127: assertQ("Basic summarization", sumLRF.makeRequest("long"),
128: "//lst[@name='highlighting']/lst[@name='1']",
129: "//lst[@name='1']/arr[@name='t_text']/str");
130:
131: }
132:
133: public void testHighlightDisabled() {
134:
135: // ensure highlighting can be explicitly disabled
136: HashMap<String, String> args = new HashMap<String, String>();
137: args.put("hl", "false");
138: args.put("hl.fl", "t_text");
139: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
140: "standard", 0, 200, args);
141:
142: assertU(adoc("t_text", "a long day's night", "id", "1"));
143: assertU(commit());
144: assertU(optimize());
145: assertQ("Basic summarization", sumLRF
146: .makeRequest("t_text:long"),
147: "not(//lst[@name='highlighting'])");
148:
149: }
150:
151: public void testTwoFieldHighlight() {
152:
153: // do summarization using re-analysis of the field
154: HashMap<String, String> args = new HashMap<String, String>();
155: args.put("hl", "true");
156: args.put("hl.fl", "t_text tv_text");
157: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
158: "standard", 0, 200, args);
159:
160: assertU(adoc("t_text", "a long day's night", "id", "1",
161: "tv_text", "a long night's day"));
162: assertU(commit());
163: assertU(optimize());
164: assertQ("Basic summarization", sumLRF
165: .makeRequest("t_text:long"),
166: "//lst[@name='highlighting']/lst[@name='1']",
167: "//lst[@name='1']/arr[@name='t_text']/str",
168: "//lst[@name='1']/arr[@name='tv_text']/str");
169: }
170:
171: public void testFieldMatch() {
172: assertU(adoc("t_text1", "random words for highlighting tests",
173: "id", "1", "t_text2",
174: "more random words for second field"));
175: assertU(commit());
176: assertU(optimize());
177:
178: HashMap<String, String> args = new HashMap<String, String>();
179: args.put("hl", "true");
180: args.put("hl.fl", "t_text1 t_text2");
181:
182: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
183: "standard", 0, 200, args);
184: // default should highlight both random and words in both fields
185: assertQ(
186: "Test Default",
187: sumLRF.makeRequest("t_text1:random OR t_text2:words"),
188: "//lst[@name='highlighting']/lst[@name='1']",
189: "//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> <em>words</em> for highlighting tests']",
190: "//lst[@name='1']/arr[@name='t_text2']/str[.='more <em>random</em> <em>words</em> for second field']");
191:
192: // requireFieldMatch=true - highlighting should only occur if term matched in that field
193: args.put("hl.requireFieldMatch", "true");
194: sumLRF = h.getRequestFactory("standard", 0, 200, args);
195: assertQ(
196: "Test RequireFieldMatch",
197: sumLRF.makeRequest("t_text1:random OR t_text2:words"),
198: "//lst[@name='highlighting']/lst[@name='1']",
199: "//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
200: "//lst[@name='1']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']");
201: }
202:
203: public void testCustomSimpleFormatterHighlight() {
204:
205: // do summarization using a custom formatter
206: HashMap<String, String> args = new HashMap<String, String>();
207: args.put("hl", "true");
208: args.put("hl.fl", "t_text");
209: args.put("hl.simple.pre", "<B>");
210: args.put("hl.simple.post", "</B>");
211: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
212: "standard", 0, 200, args);
213:
214: assertU(adoc("t_text", "a long days night", "id", "1"));
215: assertU(commit());
216: assertU(optimize());
217: assertQ("Basic summarization", sumLRF
218: .makeRequest("t_text:long"),
219: "//lst[@name='highlighting']/lst[@name='1']",
220: "//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']");
221:
222: // test a per-field override
223: args.put("f.t_text.hl.simple.pre", "<I>");
224: args.put("f.t_text.hl.simple.post", "</I>");
225: sumLRF = h.getRequestFactory("standard", 0, 200, args);
226: assertQ("Basic summarization", sumLRF
227: .makeRequest("t_text:long"),
228: "//lst[@name='highlighting']/lst[@name='1']",
229: "//lst[@name='1']/arr[@name='t_text']/str[.='a <I>long</I> days night']");
230:
231: }
232:
233: public void testLongFragment() {
234:
235: HashMap<String, String> args = new HashMap<String, String>();
236: args.put("hl", "true");
237: args.put("hl.fl", "tv_text");
238: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
239: "standard", 0, 200, args);
240:
241: String text = "junit: [mkdir] Created dir: /home/klaas/worio/backend/trunk/build-src/solr-nightly/build/test-results [junit] Running org.apache.solr.BasicFunctionalityTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 5.36 sec [junit] Running org.apache.solr.ConvertedLegacyTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 8.268 sec [junit] Running org.apache.solr.DisMaxRequestHandlerTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.56 sec [junit] Running org.apache.solr.HighlighterTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 4.979 sec [junit] Running org.apache.solr.OutputWriterTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.797 sec [junit] Running org.apache.solr.SampleTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 1.021 sec [junit] Running org.apache.solr.analysis.TestBufferedTokenStream [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.05 sec [junit] Running org.apache.solr.analysis.TestRemoveDuplicatesTokenFilter [junit] Tests run: 3, Failures: 0, Errors: 0, Time elapsed: 0.054 sec [junit] Running org.apache.solr.analysis.TestSynonymFilter [junit] Tests run: 6, Failures: 0, Errors: 0, Time elapsed: 0.081 sec [junit] Running org.apache.solr.analysis.TestWordDelimiterFilter [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.714 sec [junit] Running org.apache.solr.search.TestDocSet [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 0.788 sec [junit] Running org.apache.solr.util.SolrPluginUtilsTest [junit] Tests run: 5, Failures: 0, Errors: 0, Time elapsed: 3.519 sec [junit] Running org.apache.solr.util.TestOpenBitSet [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.533 sec";
242: assertU(adoc("tv_text", text, "id", "1"));
243: assertU(commit());
244: assertU(optimize());
245: assertQ("Basic summarization", sumLRF
246: .makeRequest("tv_text:dir"),
247: "//lst[@name='highlighting']/lst[@name='1']",
248: "//lst[@name='1']/arr[@name='tv_text']/str");
249: }
250:
251: public void testVariableFragsize() {
252: assertU(adoc(
253: "tv_text",
254: "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all",
255: "id", "1"));
256: assertU(commit());
257: assertU(optimize());
258:
259: // default length
260: HashMap<String, String> args = new HashMap<String, String>();
261: args.put("hl", "true");
262: args.put("hl.fl", "tv_text");
263: TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
264: "standard", 0, 200, args);
265: assertQ(
266: "Basic summarization",
267: sumLRF.makeRequest("tv_text:long"),
268: "//lst[@name='highlighting']/lst[@name='1']",
269: "//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which']");
270:
271: // 25
272: args.put("hl.fragsize", "25");
273: sumLRF = h.getRequestFactory("standard", 0, 200, args);
274: assertQ("Basic summarization", sumLRF
275: .makeRequest("tv_text:long"),
276: "//lst[@name='highlighting']/lst[@name='1']",
277: "//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night']");
278:
279: // 0 - NullFragmenter
280: args.put("hl.fragsize", "0");
281: sumLRF = h.getRequestFactory("standard", 0, 200, args);
282: assertQ(
283: "Basic summarization",
284: sumLRF.makeRequest("tv_text:long"),
285: "//lst[@name='highlighting']/lst[@name='1']",
286: "//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all']");
287: }
288: }
|