001: /*
002: * Copyright 2004-2006 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.compass.core.test.highlighter;
018:
019: import java.util.LinkedHashSet;
020: import java.util.Set;
021:
022: import org.compass.core.CompassDetachedHits;
023: import org.compass.core.CompassHighlighter;
024: import org.compass.core.CompassHits;
025: import org.compass.core.CompassSession;
026: import org.compass.core.CompassTransaction;
027: import org.compass.core.Resource;
028: import org.compass.core.config.CompassSettings;
029: import org.compass.core.engine.SearchEngineException;
030: import org.compass.core.lucene.LuceneEnvironment;
031: import org.compass.core.lucene.engine.queryparser.DefaultLuceneQueryParser;
032: import org.compass.core.test.AbstractTestCase;
033:
034: /**
035: * @author kimchy
036: */
037: public class HighlighterTests extends AbstractTestCase {
038:
039: private static String texts[] = {
040: "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
041: "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
042: "JFK has been shot", "John Kennedy has been shot",
043: "This text has a typo in referring to Keneddy" };
044:
045: protected String[] getMappings() {
046: return new String[] { "highlighter/highlighter.cpm.xml" };
047: }
048:
049: protected void addSettings(CompassSettings settings) {
050: super .addSettings(settings);
051: settings
052: .setGroupSettings(
053: LuceneEnvironment.Highlighter.PREFIX,
054: "smallFragmenter",
055: new String[] { LuceneEnvironment.Highlighter.Fragmenter.SIMPLE_SIZE },
056: new String[] { "20" });
057: settings
058: .setGroupSettings(
059: LuceneEnvironment.Highlighter.PREFIX,
060: "commaSeparator",
061: new String[] { LuceneEnvironment.Highlighter.SEPARATOR },
062: new String[] { "," });
063: settings
064: .setGroupSettings(
065: LuceneEnvironment.QueryParser.PREFIX,
066: "noConstantScorePrefix",
067: new String[] {
068: LuceneEnvironment.QueryParser.TYPE,
069: LuceneEnvironment.QueryParser.DEFAULT_PARSER_ALLOW_CONSTANT_SCORE_PREFIX_QUERY },
070: new String[] {
071: DefaultLuceneQueryParser.class
072: .getName(), "false" });
073: }
074:
075: public void testSimpleHighlighting() {
076: CompassSession session = openSession();
077: CompassTransaction tr = session.beginTransaction();
078:
079: setUpData(session);
080:
081: CompassHits hits = session.find("Kennedy");
082: String fragment = hits.highlighter(0).fragment("text");
083: assertEquals("John <b>Kennedy</b> has been shot", fragment);
084:
085: fragment = hits.highlighter(0).fragment("text", texts[3]);
086: assertEquals("John <b>Kennedy</b> has been shot", fragment);
087:
088: // test automatic storing of highlighted text
089: assertEquals(fragment, hits.highlightedText(0)
090: .getHighlightedText());
091: CompassDetachedHits detachedHits = hits.detach();
092: assertEquals(fragment, detachedHits.highlightedText(0)
093: .getHighlightedText());
094:
095: fragment = hits.highlighter(1)
096: .setHighlighter("smallFragmenter")
097: .setMaxNumFragments(3).fragmentsWithSeparator("text");
098: assertEquals(
099: "This piece of text refers to <b>Kennedy</b>... to <b>Kennedy</b>",
100: fragment);
101: assertEquals(fragment, hits.highlightedText(1)
102: .getHighlightedText());
103: detachedHits = hits.detach();
104: assertEquals(fragment, detachedHits.highlightedText(1)
105: .getHighlightedText());
106:
107: String fragments[] = hits.highlighter(1).setHighlighter(
108: "smallFragmenter").setMaxNumFragments(3).fragments(
109: "text");
110: assertEquals(2, fragments.length);
111: assertEquals("This piece of text refers to <b>Kennedy</b>",
112: fragments[0]);
113: assertEquals(" to <b>Kennedy</b>", fragments[1]);
114:
115: hits = session.find("Kenn*");
116: fragment = hits.highlighter(0).fragment("text");
117: assertNull(fragment);
118:
119: hits = session.queryBuilder().queryString("Kenn*")
120: .setQueryParser("noConstantScorePrefix").toQuery()
121: .hits();
122: fragment = hits.highlighter(0).fragment("text");
123: assertEquals("John <b>Kennedy</b> has been shot", fragment);
124:
125: tr.commit();
126: }
127:
128: public void testNoTermVectorException() {
129: CompassSession session = openSession();
130: CompassTransaction tr = session.beginTransaction();
131:
132: setUpData(session);
133:
134: CompassHits hits = session.find("Kennedy");
135:
136: String fragment = hits.highlighter(1).setTextTokenizer(
137: CompassHighlighter.TextTokenizer.ANALYZER)
138: .setHighlighter("smallFragmenter")
139: .setMaxNumFragments(3).fragmentsWithSeparator("text");
140: assertEquals(
141: "This piece of text refers to <b>Kennedy</b>... to <b>Kennedy</b>",
142: fragment);
143:
144: try {
145: hits.highlighter(0).setTextTokenizer(
146: CompassHighlighter.TextTokenizer.TERM_VECTOR)
147: .fragment("text");
148: fail();
149: } catch (SearchEngineException e) {
150:
151: }
152:
153: tr.commit();
154: }
155:
156: public void testWithTermVectorException() {
157: CompassSession session = openSession();
158: CompassTransaction tr = session.beginTransaction();
159:
160: setUpData(session, "a1");
161:
162: tr.commit();
163: session.close();
164:
165: session = openSession();
166: tr = session.beginTransaction();
167:
168: CompassHits hits = session.find("Kennedy");
169:
170: String fragment = hits.highlighter(1).setTextTokenizer(
171: CompassHighlighter.TextTokenizer.ANALYZER)
172: .setHighlighter("smallFragmenter")
173: .setMaxNumFragments(3).fragmentsWithSeparator("text");
174: assertEquals(
175: "This piece of text refers to <b>Kennedy</b>... to <b>Kennedy</b>",
176: fragment);
177:
178: fragment = hits.highlighter(1).setTextTokenizer(
179: CompassHighlighter.TextTokenizer.TERM_VECTOR)
180: .setHighlighter("smallFragmenter")
181: .setMaxNumFragments(3).fragmentsWithSeparator("text");
182: assertEquals(
183: "This piece of text refers to <b>Kennedy</b>... to <b>Kennedy</b>",
184: fragment);
185:
186: hits = session.find("Kenn*");
187: fragment = hits.highlighter(0).fragment("text");
188: assertNull(fragment);
189:
190: tr.commit();
191: }
192:
193: public void testMultiResourceHighlighter() {
194: CompassSession session = openSession();
195: CompassTransaction tr = session.beginTransaction();
196:
197: setUpMultiPropertyData(session, new String[] { "Lucene",
198: "Luke", "Lukas" });
199:
200: CompassHits hits = session.find("Lu*e");
201: assertEquals(1, hits.length());
202:
203: String[] fragments = hits.highlighter(0).multiResourceFragment(
204: "text");
205: assertEquals(2, fragments.length);
206:
207: String fragment = hits.highlighter(0).setHighlighter(
208: "commaSeparator").multiResourceFragmentWithSeparator(
209: "text");
210: assertEquals("<b>Lucene</b>,<b>Luke</b>", fragment);
211:
212: tr.commit();
213: }
214:
215: public void testSimpleHighlightingWithAlias() {
216: CompassSession session = openSession();
217: CompassTransaction tr = session.beginTransaction();
218:
219: setUpMultiPropertyData(session, new String[] { "parent bla" });
220:
221: CompassHits hits = session.queryBuilder().queryString("bla")
222: .setQueryParser("noConstantScorePrefix").toQuery()
223: .setAliases("parent").hits();
224: String fragment = hits.highlighter(0).fragment("text");
225: assertEquals("parent <b>bla</b>", fragment);
226:
227: tr.commit();
228: }
229:
230: private void setUpData(CompassSession session) {
231: setUpData(session, "a");
232: }
233:
234: private void setUpData(CompassSession session, String alias) {
235: for (int i = 0; i < texts.length; i++) {
236: Resource resource = getResourceFactory().createResource(
237: alias);
238: resource.addProperty("id", "" + i);
239: resource.addProperty("text", texts[i]);
240: session.save(resource);
241: }
242: }
243:
244: private void setUpMultiPropertyData(CompassSession session,
245: String[] words) {
246: A a = new A();
247: a.setId(Long.decode("1"));
248: Set setOfB = new LinkedHashSet();
249: for (int i = 0; i < words.length; i++) {
250: B b1 = new B();
251: b1.setText(words[i]);
252: setOfB.add(b1);
253: }
254: a.setB(setOfB);
255: session.save(a);
256: }
257:
258: }
|