Source Code Cross Referenced for TestMultiSearcher.java in » Net » lucene-connector » org » apache » lucene » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.search

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        package org.apache.lucene.search;
002:
003:        /**
004:         * Licensed to the Apache Software Foundation (ASF) under one or more
005:         * contributor license agreements.  See the NOTICE file distributed with
006:         * this work for additional information regarding copyright ownership.
007:         * The ASF licenses this file to You under the Apache License, Version 2.0
008:         * (the "License"); you may not use this file except in compliance with
009:         * the License.  You may obtain a copy of the License at
010:         *
011:         *     http://www.apache.org/licenses/LICENSE-2.0
012:         *
013:         * Unless required by applicable law or agreed to in writing, software
014:         * distributed under the License is distributed on an "AS IS" BASIS,
015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016:         * See the License for the specific language governing permissions and
017:         * limitations under the License.
018:         */
019:
020:        import org.apache.lucene.util.LuceneTestCase;
021:        import org.apache.lucene.analysis.KeywordAnalyzer;
022:        import org.apache.lucene.analysis.standard.StandardAnalyzer;
023:        import org.apache.lucene.document.Document;
024:        import org.apache.lucene.document.Field;
025:        import org.apache.lucene.document.SetBasedFieldSelector;
026:        import org.apache.lucene.index.IndexReader;
027:        import org.apache.lucene.index.IndexWriter;
028:        import org.apache.lucene.index.Term;
029:        import org.apache.lucene.queryParser.QueryParser;
030:        import org.apache.lucene.store.Directory;
031:        import org.apache.lucene.store.RAMDirectory;
032:        import org.apache.lucene.store.MockRAMDirectory;
033:
034:        import java.io.IOException;
035:        import java.util.Collections;
036:        import java.util.HashSet;
037:        import java.util.Set;
038:
039:        /**
040:         * Tests {@link MultiSearcher} class.
041:         *
042:         * @version $Id: TestMultiSearcher.java 583534 2007-10-10 16:46:35Z mikemccand $
043:         */
044:        public class TestMultiSearcher extends LuceneTestCase {
045:            public TestMultiSearcher(String name) {
046:                super (name);
047:            }
048:
049:            /**
050:             * ReturnS a new instance of the concrete MultiSearcher class
051:             * used in this test.
052:             */
053:            protected MultiSearcher getMultiSearcherInstance(
054:                    Searcher[] searchers) throws IOException {
055:                return new MultiSearcher(searchers);
056:            }
057:
058:            public void testEmptyIndex() throws Exception {
059:                // creating two directories for indices
060:                Directory indexStoreA = new MockRAMDirectory();
061:                Directory indexStoreB = new MockRAMDirectory();
062:
063:                // creating a document to store
064:                Document lDoc = new Document();
065:                lDoc.add(new Field("fulltext", "Once upon a time.....",
066:                        Field.Store.YES, Field.Index.TOKENIZED));
067:                lDoc.add(new Field("id", "doc1", Field.Store.YES,
068:                        Field.Index.UN_TOKENIZED));
069:                lDoc.add(new Field("handle", "1", Field.Store.YES,
070:                        Field.Index.UN_TOKENIZED));
071:
072:                // creating a document to store
073:                Document lDoc2 = new Document();
074:                lDoc2.add(new Field("fulltext",
075:                        "in a galaxy far far away.....", Field.Store.YES,
076:                        Field.Index.TOKENIZED));
077:                lDoc2.add(new Field("id", "doc2", Field.Store.YES,
078:                        Field.Index.UN_TOKENIZED));
079:                lDoc2.add(new Field("handle", "1", Field.Store.YES,
080:                        Field.Index.UN_TOKENIZED));
081:
082:                // creating a document to store
083:                Document lDoc3 = new Document();
084:                lDoc3.add(new Field("fulltext",
085:                        "a bizarre bug manifested itself....", Field.Store.YES,
086:                        Field.Index.TOKENIZED));
087:                lDoc3.add(new Field("id", "doc3", Field.Store.YES,
088:                        Field.Index.UN_TOKENIZED));
089:                lDoc3.add(new Field("handle", "1", Field.Store.YES,
090:                        Field.Index.UN_TOKENIZED));
091:
092:                // creating an index writer for the first index
093:                IndexWriter writerA = new IndexWriter(indexStoreA,
094:                        new StandardAnalyzer(), true);
095:                // creating an index writer for the second index, but writing nothing
096:                IndexWriter writerB = new IndexWriter(indexStoreB,
097:                        new StandardAnalyzer(), true);
098:
099:                //--------------------------------------------------------------------
100:                // scenario 1
101:                //--------------------------------------------------------------------
102:
103:                // writing the documents to the first index
104:                writerA.addDocument(lDoc);
105:                writerA.addDocument(lDoc2);
106:                writerA.addDocument(lDoc3);
107:                writerA.optimize();
108:                writerA.close();
109:
110:                // closing the second index
111:                writerB.close();
112:
113:                // creating the query
114:                QueryParser parser = new QueryParser("fulltext",
115:                        new StandardAnalyzer());
116:                Query query = parser.parse("handle:1");
117:
118:                // building the searchables
119:                Searcher[] searchers = new Searcher[2];
120:                // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
121:                searchers[0] = new IndexSearcher(indexStoreB);
122:                searchers[1] = new IndexSearcher(indexStoreA);
123:                // creating the multiSearcher
124:                Searcher mSearcher = getMultiSearcherInstance(searchers);
125:                // performing the search
126:                Hits hits = mSearcher.search(query);
127:
128:                assertEquals(3, hits.length());
129:
130:                // iterating over the hit documents
131:                for (int i = 0; i < hits.length(); i++) {
132:                    Document d = hits.doc(i);
133:                }
134:                mSearcher.close();
135:
136:                //--------------------------------------------------------------------
137:                // scenario 2
138:                //--------------------------------------------------------------------
139:
140:                // adding one document to the empty index
141:                writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(),
142:                        false);
143:                writerB.addDocument(lDoc);
144:                writerB.optimize();
145:                writerB.close();
146:
147:                // building the searchables
148:                Searcher[] searchers2 = new Searcher[2];
149:                // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
150:                searchers2[0] = new IndexSearcher(indexStoreB);
151:                searchers2[1] = new IndexSearcher(indexStoreA);
152:                // creating the mulitSearcher
153:                MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2);
154:                // performing the same search
155:                Hits hits2 = mSearcher2.search(query);
156:
157:                assertEquals(4, hits2.length());
158:
159:                // iterating over the hit documents
160:                for (int i = 0; i < hits2.length(); i++) {
161:                    // no exception should happen at this point
162:                    Document d = hits2.doc(i);
163:                }
164:
165:                // test the subSearcher() method:
166:                Query subSearcherQuery = parser.parse("id:doc1");
167:                hits2 = mSearcher2.search(subSearcherQuery);
168:                assertEquals(2, hits2.length());
169:                assertEquals(0, mSearcher2.subSearcher(hits2.id(0))); // hit from searchers2[0]
170:                assertEquals(1, mSearcher2.subSearcher(hits2.id(1))); // hit from searchers2[1]
171:                subSearcherQuery = parser.parse("id:doc2");
172:                hits2 = mSearcher2.search(subSearcherQuery);
173:                assertEquals(1, hits2.length());
174:                assertEquals(1, mSearcher2.subSearcher(hits2.id(0))); // hit from searchers2[1]
175:                mSearcher2.close();
176:
177:                //--------------------------------------------------------------------
178:                // scenario 3
179:                //--------------------------------------------------------------------
180:
181:                // deleting the document just added, this will cause a different exception to take place
182:                Term term = new Term("id", "doc1");
183:                IndexReader readerB = IndexReader.open(indexStoreB);
184:                readerB.deleteDocuments(term);
185:                readerB.close();
186:
187:                // optimizing the index with the writer
188:                writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(),
189:                        false);
190:                writerB.optimize();
191:                writerB.close();
192:
193:                // building the searchables
194:                Searcher[] searchers3 = new Searcher[2];
195:
196:                searchers3[0] = new IndexSearcher(indexStoreB);
197:                searchers3[1] = new IndexSearcher(indexStoreA);
198:                // creating the mulitSearcher
199:                Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
200:                // performing the same search
201:                Hits hits3 = mSearcher3.search(query);
202:
203:                assertEquals(3, hits3.length());
204:
205:                // iterating over the hit documents
206:                for (int i = 0; i < hits3.length(); i++) {
207:                    Document d = hits3.doc(i);
208:                }
209:                mSearcher3.close();
210:                indexStoreA.close();
211:                indexStoreB.close();
212:            }
213:
214:            private static Document createDocument(String contents1,
215:                    String contents2) {
216:                Document document = new Document();
217:
218:                document.add(new Field("contents", contents1, Field.Store.YES,
219:                        Field.Index.UN_TOKENIZED));
220:                document.add(new Field("other", "other contents",
221:                        Field.Store.YES, Field.Index.UN_TOKENIZED));
222:                if (contents2 != null) {
223:                    document.add(new Field("contents", contents2,
224:                            Field.Store.YES, Field.Index.UN_TOKENIZED));
225:                }
226:
227:                return document;
228:            }
229:
230:            private static void initIndex(Directory directory, int nDocs,
231:                    boolean create, String contents2) throws IOException {
232:                IndexWriter indexWriter = null;
233:
234:                try {
235:                    indexWriter = new IndexWriter(directory,
236:                            new KeywordAnalyzer(), create);
237:
238:                    for (int i = 0; i < nDocs; i++) {
239:                        indexWriter.addDocument(createDocument("doc" + i,
240:                                contents2));
241:                    }
242:                } finally {
243:                    if (indexWriter != null) {
244:                        indexWriter.close();
245:                    }
246:                }
247:            }
248:
249:            public void testFieldSelector() throws Exception {
250:                RAMDirectory ramDirectory1, ramDirectory2;
251:                IndexSearcher indexSearcher1, indexSearcher2;
252:
253:                ramDirectory1 = new RAMDirectory();
254:                ramDirectory2 = new RAMDirectory();
255:                Query query = new TermQuery(new Term("contents", "doc0"));
256:
257:                // Now put the documents in a different index
258:                initIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
259:                initIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
260:
261:                indexSearcher1 = new IndexSearcher(ramDirectory1);
262:                indexSearcher2 = new IndexSearcher(ramDirectory2);
263:
264:                MultiSearcher searcher = getMultiSearcherInstance(new Searcher[] {
265:                        indexSearcher1, indexSearcher2 });
266:                assertTrue("searcher is null and it shouldn't be",
267:                        searcher != null);
268:                Hits hits = searcher.search(query);
269:                assertTrue("hits is null and it shouldn't be", hits != null);
270:                assertTrue(hits.length() + " does not equal: " + 2, hits
271:                        .length() == 2);
272:                Document document = searcher.doc(hits.id(0));
273:                assertTrue("document is null and it shouldn't be",
274:                        document != null);
275:                assertTrue("document.getFields() Size: "
276:                        + document.getFields().size() + " is not: " + 2,
277:                        document.getFields().size() == 2);
278:                //Should be one document from each directory
279:                //they both have two fields, contents and other
280:                Set ftl = new HashSet();
281:                ftl.add("other");
282:                SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl,
283:                        Collections.EMPTY_SET);
284:                document = searcher.doc(hits.id(0), fs);
285:                assertTrue("document is null and it shouldn't be",
286:                        document != null);
287:                assertTrue("document.getFields() Size: "
288:                        + document.getFields().size() + " is not: " + 1,
289:                        document.getFields().size() == 1);
290:                String value = document.get("contents");
291:                assertTrue("value is not null and it should be", value == null);
292:                value = document.get("other");
293:                assertTrue("value is null and it shouldn't be", value != null);
294:                ftl.clear();
295:                ftl.add("contents");
296:                fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
297:                document = searcher.doc(hits.id(1), fs);
298:                value = document.get("contents");
299:                assertTrue("value is null and it shouldn't be", value != null);
300:                value = document.get("other");
301:                assertTrue("value is not null and it should be", value == null);
302:            }
303:
304:            /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
305:            public void testNormalization1() throws IOException {
306:               testNormalization(1, "Using 1 document per index:");
307:            }
308:             */
309:
310:            public void testNormalization10() throws IOException {
311:                testNormalization(10, "Using 10 documents per index:");
312:            }
313:
314:            private void testNormalization(int nDocs, String message)
315:                    throws IOException {
316:                Query query = new TermQuery(new Term("contents", "doc0"));
317:
318:                RAMDirectory ramDirectory1;
319:                IndexSearcher indexSearcher1;
320:                Hits hits;
321:
322:                ramDirectory1 = new MockRAMDirectory();
323:
324:                // First put the documents in the same index
325:                initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
326:                initIndex(ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
327:
328:                indexSearcher1 = new IndexSearcher(ramDirectory1);
329:
330:                hits = indexSearcher1.search(query);
331:
332:                assertEquals(message, 2, hits.length());
333:
334:                assertEquals(message, 1, hits.score(0), 1e-6); // hits.score(0) is 0.594535 if only a single document is in first index
335:
336:                // Store the scores for use later
337:                float[] scores = { hits.score(0), hits.score(1) };
338:
339:                assertTrue(message, scores[0] > scores[1]);
340:
341:                indexSearcher1.close();
342:                ramDirectory1.close();
343:                hits = null;
344:
345:                RAMDirectory ramDirectory2;
346:                IndexSearcher indexSearcher2;
347:
348:                ramDirectory1 = new MockRAMDirectory();
349:                ramDirectory2 = new MockRAMDirectory();
350:
351:                // Now put the documents in a different index
352:                initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
353:                initIndex(ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
354:
355:                indexSearcher1 = new IndexSearcher(ramDirectory1);
356:                indexSearcher2 = new IndexSearcher(ramDirectory2);
357:
358:                Searcher searcher = getMultiSearcherInstance(new Searcher[] {
359:                        indexSearcher1, indexSearcher2 });
360:
361:                hits = searcher.search(query);
362:
363:                assertEquals(message, 2, hits.length());
364:
365:                // The scores should be the same (within reason)
366:                assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from ramDirectory1
367:                assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from ramDirectory2
368:
369:                // Adding a Sort.RELEVANCE object should not change anything
370:                hits = searcher.search(query, Sort.RELEVANCE);
371:
372:                assertEquals(message, 2, hits.length());
373:
374:                assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from ramDirectory1
375:                assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from ramDirectory2
376:
377:                searcher.close();
378:
379:                ramDirectory1.close();
380:                ramDirectory2.close();
381:            }
382:
383:            /**
384:             * test that custom similarity is in effect when using MultiSearcher (LUCENE-789).
385:             * @throws IOException 
386:             */
387:            public void testCustomSimilarity() throws IOException {
388:                RAMDirectory dir = new RAMDirectory();
389:                initIndex(dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
390:                IndexSearcher srchr = new IndexSearcher(dir);
391:                MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[] { srchr });
392:
393:                Similarity customSimilarity = new DefaultSimilarity() {
394:                    // overide all
395:                    public float idf(int docFreq, int numDocs) {
396:                        return 100.0f;
397:                    }
398:
399:                    public float coord(int overlap, int maxOverlap) {
400:                        return 1.0f;
401:                    }
402:
403:                    public float lengthNorm(String fieldName, int numTokens) {
404:                        return 1.0f;
405:                    }
406:
407:                    public float queryNorm(float sumOfSquaredWeights) {
408:                        return 1.0f;
409:                    }
410:
411:                    public float sloppyFreq(int distance) {
412:                        return 1.0f;
413:                    }
414:
415:                    public float tf(float freq) {
416:                        return 1.0f;
417:                    }
418:                };
419:
420:                srchr.setSimilarity(customSimilarity);
421:                msrchr.setSimilarity(customSimilarity);
422:
423:                Query query = new TermQuery(new Term("contents", "doc0"));
424:
425:                // Get a score from IndexSearcher
426:                TopDocs topDocs = srchr.search(query, null, 1);
427:                float score1 = topDocs.getMaxScore();
428:
429:                // Get the score from MultiSearcher
430:                topDocs = msrchr.search(query, null, 1);
431:                float scoreN = topDocs.getMaxScore();
432:
433:                // The scores from the IndexSearcher and Multisearcher should be the same
434:                // if the same similarity is used.
435:                assertEquals(
436:                        "MultiSearcher score must be equal to single esrcher score!",
437:                        score1, scoreN, 1e-6);
438:            }
439:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.