/*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using DocumentLucene.Net.Documents.Document;
using TermLucene.Net.Index.Term;
namespace Lucene.Net.Search{
/// <summary>Implements search over a set of <code>Searchables</code>.
///
/// <p>Applications usually need only call the inherited {@link #Search(Query)}
/// or {@link #Search(Query,Filter)} methods.
/// </summary>
public class MultiSearcher : Searcher
{
private class AnonymousClassHitCollector : HitCollector
{
public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
{
InitBlock(results, start, enclosingInstance);
}
private void InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
{
this.results = results;
this.start = start;
this.enclosingInstance = enclosingInstance;
}
private Lucene.Net.Search.HitCollector results;
private int start;
private MultiSearcher enclosingInstance;
public MultiSearcher Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public override void Collect(int doc, float score)
{
results.Collect(doc + start, score);
}
}
/// <summary> Document Frequency cache acting as a Dummy-Searcher.
/// This class is no full-fledged Searcher, but only supports
/// the methods necessary to initialize Weights.
/// </summary>
private class CachedDfSource:Searcher
{
private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
private int maxDoc; // document count
public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
{
this.dfMap = dfMap;
this.maxDoc = maxDoc;
}
public override int DocFreq(Term term)
{
int df;
try
{
df = ((System.Int32) dfMap[term]);
}
catch (System.NullReferenceException)
{
throw new System.ArgumentException("df for term " + term.Text() + " not available");
}
return df;
}
public override int[] DocFreqs(Term[] terms)
{
int[] result = new int[terms.Length];
for (int i = 0; i < terms.Length; i++)
{
result[i] = DocFreq(terms[i]);
}
return result;
}
public override int MaxDoc()
{
return maxDoc;
}
public override Query Rewrite(Query query)
{
// this is a bit of a hack. We know that a query which
// creates a Weight based on this Dummy-Searcher is
// always already rewritten (see preparedWeight()).
// Therefore we just return the unmodified query here
return query;
}
public override void Close()
{
throw new System.NotSupportedException();
}
public override Document Doc(int i)
{
throw new System.NotSupportedException();
}
public override Explanation Explain(Weight weight, int doc)
{
throw new System.NotSupportedException();
}
public override void Search(Weight weight, Filter filter, HitCollector results)
{
throw new System.NotSupportedException();
}
public override TopDocs Search(Weight weight, Filter filter, int n)
{
throw new System.NotSupportedException();
}
public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
{
throw new System.NotSupportedException();
}
}
private Lucene.Net.Search.Searchable[] searchables;
private int[] starts;
private int maxDoc = 0;
/// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
{
this.searchables = searchables;
starts = new int[searchables.Length + 1]; // build starts array
for (int i = 0; i < searchables.Length; i++)
{
starts[i] = maxDoc;
maxDoc += searchables[i].MaxDoc(); // compute maxDocs
}
starts[searchables.Length] = maxDoc;
}
/// <summary>Return the array of {@link Searchable}s this searches. </summary>
public virtual Lucene.Net.Search.Searchable[] GetSearchables()
{
return searchables;
}
protected internal virtual int[] GetStarts()
{
return starts;
}
// inherit javadoc
public override void Close()
{
for (int i = 0; i < searchables.Length; i++)
searchables[i].Close();
}
public override int DocFreq(Term term)
{
int docFreq = 0;
for (int i = 0; i < searchables.Length; i++)
docFreq += searchables[i].DocFreq(term);
return docFreq;
}
// inherit javadoc
public override Document Doc(int n)
{
int i = SubSearcher(n); // find searcher index
return searchables[i].Doc(n - starts[i]); // dispatch to searcher
}
/// <summary>Call {@link #subSearcher} instead.</summary>
/// <deprecated>
/// </deprecated>
public virtual int SearcherIndex(int n)
{
return SubSearcher(n);
}
/// <summary>Returns index of the searcher for document <code>n</code> in the array
/// used to construct this searcher.
/// </summary>
public virtual int SubSearcher(int n)
{
// find searcher for doc n:
// replace w/ call to Arrays.binarySearch in Java 1.2
int lo = 0; // search starts array
int hi = searchables.Length - 1; // for first element less
// than n, return its index
while (hi >= lo)
{
int mid = (lo + hi) >> 1;
int midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else
{
// found a match
while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
{
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
/// <summary>Returns the document number of document <code>n</code> within its
/// sub-index.
/// </summary>
public virtual int SubDoc(int n)
{
return n - starts[SubSearcher(n)];
}
public override int MaxDoc()
{
return maxDoc;
}
public override TopDocs Search(Weight weight, Filter filter, int nDocs)
{
HitQueue hq = new HitQueue(nDocs);
int totalHits = 0;
for (int i = 0; i < searchables.Length; i++)
{
// search each searcher
TopDocs docs = searchables[i].Search(weight, filter, nDocs);
totalHits += docs.totalHits; // update totalHits
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.Length; j++)
{
// merge scoreDocs into hq
ScoreDoc scoreDoc = scoreDocs[j];
scoreDoc.doc += starts[i]; // convert doc
if (!hq.Insert(scoreDoc))
break; // no more scores > minScore
}
}
ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
// put docs in array
scoreDocs2[i] = (ScoreDoc) hq.Pop();
float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
return new TopDocs(totalHits, scoreDocs2, maxScore);
}
public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
{
FieldDocSortedHitQueue hq = null;
int totalHits = 0;
float maxScore = System.Single.NegativeInfinity;
for (int i = 0; i < searchables.Length; i++)
{
// search each searcher
TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
if (hq == null)
hq = new FieldDocSortedHitQueue(docs.fields, n);
totalHits += docs.totalHits; // update totalHits
maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.Length; j++)
{
// merge scoreDocs into hq
ScoreDoc scoreDoc = scoreDocs[j];
scoreDoc.doc += starts[i]; // convert doc
if (!hq.Insert(scoreDoc))
break; // no more scores > minScore
}
}
ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
// put docs in array
scoreDocs2[i] = (ScoreDoc) hq.Pop();
return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
}
// inherit javadoc
public override void Search(Weight weight, Filter filter, HitCollector results)
{
for (int i = 0; i < searchables.Length; i++)
{
int start = starts[i];
searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
}
}
public override Query Rewrite(Query original)
{
Query[] queries = new Query[searchables.Length];
for (int i = 0; i < searchables.Length; i++)
{
queries[i] = searchables[i].Rewrite(original);
}
return queries[0].Combine(queries);
}
public override Explanation Explain(Weight weight, int doc)
{
int i = SubSearcher(doc); // find searcher index
return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
}
/// <summary> Create weight in multiple index scenario.
///
/// Distributed query processing is done in the following steps:
/// 1. rewrite query
/// 2. extract necessary terms
/// 3. collect dfs for these terms from the Searchables
/// 4. create query weight using aggregate dfs.
/// 5. distribute that weight to Searchables
/// 6. merge results
///
/// Steps 1-4 are done here, 5+6 in the search() methods
///
/// </summary>
/// <returns> rewritten queries
/// </returns>
protected internal override Weight CreateWeight(Query original)
{
// step 1
Query rewrittenQuery = Rewrite(original);
// step 2
System.Collections.Hashtable terms = new System.Collections.Hashtable();
rewrittenQuery.ExtractTerms(terms);
// step3
Term[] allTermsArray = new Term[terms.Count];
int index = 0;
System.Collections.IEnumerator e = terms.GetEnumerator();
while (e.MoveNext())
allTermsArray[index++] = e.Current as Term;
int[] aggregatedDfs = new int[terms.Count];
for (int i = 0; i < searchables.Length; i++)
{
int[] dfs = searchables[i].DocFreqs(allTermsArray);
for (int j = 0; j < aggregatedDfs.Length; j++)
{
aggregatedDfs[j] += dfs[j];
}
}
System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
for (int i = 0; i < allTermsArray.Length; i++)
{
dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
}
// step4
int numDocs = MaxDoc();
CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
return rewrittenQuery.Weight(cacheSim);
}
}
}
|