MultiSearcher.cs :  » Search-Engines » dotLucene » Lucene » Net » Search » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » Search Engines » dotLucene 
dotLucene » Lucene » Net » Search » MultiSearcher.cs
/*
 * Copyright 2004 The Apache Software Foundation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;
using DocumentLucene.Net.Documents.Document;
using TermLucene.Net.Index.Term;

namespace Lucene.Net.Search{
  
  /// <summary>Implements search over a set of <code>Searchables</code>.
  /// 
  /// <p>Applications usually need only call the inherited {@link #Search(Query)}
  /// or {@link #Search(Query,Filter)} methods.
  /// </summary>
  public class MultiSearcher : Searcher
  {
    private class AnonymousClassHitCollector : HitCollector
    {
      public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
      {
        InitBlock(results, start, enclosingInstance);
      }
      private void  InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
      {
        this.results = results;
        this.start = start;
        this.enclosingInstance = enclosingInstance;
      }
      private Lucene.Net.Search.HitCollector results;
      private int start;
      private MultiSearcher enclosingInstance;
      public MultiSearcher Enclosing_Instance
      {
        get
        {
          return enclosingInstance;
        }
        
      }
      public override void  Collect(int doc, float score)
      {
        results.Collect(doc + start, score);
      }
    }
    /// <summary> Document Frequency cache acting as a Dummy-Searcher.
    /// This class is no full-fledged Searcher, but only supports
    /// the methods necessary to initialize Weights.
    /// </summary>
    private class CachedDfSource:Searcher
    {
      private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
      private int maxDoc; // document count
      
      public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
      {
        this.dfMap = dfMap;
        this.maxDoc = maxDoc;
      }
      
      public override int DocFreq(Term term)
      {
        int df;
        try
        {
          df = ((System.Int32) dfMap[term]);
        }
        catch (System.NullReferenceException)
        {
          throw new System.ArgumentException("df for term " + term.Text() + " not available");
        }
        return df;
      }
      
      public override int[] DocFreqs(Term[] terms)
      {
        int[] result = new int[terms.Length];
        for (int i = 0; i < terms.Length; i++)
        {
          result[i] = DocFreq(terms[i]);
        }
        return result;
      }
      
      public override int MaxDoc()
      {
        return maxDoc;
      }
      
      public override Query Rewrite(Query query)
      {
        // this is a bit of a hack. We know that a query which
        // creates a Weight based on this Dummy-Searcher is
        // always already rewritten (see preparedWeight()).
        // Therefore we just return the unmodified query here
        return query;
      }
      
      public override void  Close()
      {
        throw new System.NotSupportedException();
      }
      
      public override Document Doc(int i)
      {
        throw new System.NotSupportedException();
      }
      
      public override Explanation Explain(Weight weight, int doc)
      {
        throw new System.NotSupportedException();
      }
      
      public override void  Search(Weight weight, Filter filter, HitCollector results)
      {
        throw new System.NotSupportedException();
      }
      
      public override TopDocs Search(Weight weight, Filter filter, int n)
      {
        throw new System.NotSupportedException();
      }
      
      public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
      {
        throw new System.NotSupportedException();
      }
    }
    
    
    
    private Lucene.Net.Search.Searchable[] searchables;
    private int[] starts;
    private int maxDoc = 0;
    
    /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
    public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
    {
      this.searchables = searchables;
      
      starts = new int[searchables.Length + 1]; // build starts array
      for (int i = 0; i < searchables.Length; i++)
      {
        starts[i] = maxDoc;
        maxDoc += searchables[i].MaxDoc(); // compute maxDocs
      }
      starts[searchables.Length] = maxDoc;
    }
    
    /// <summary>Return the array of {@link Searchable}s this searches. </summary>
    public virtual Lucene.Net.Search.Searchable[] GetSearchables()
    {
      return searchables;
    }
    
    protected internal virtual int[] GetStarts()
    {
      return starts;
    }
    
    // inherit javadoc
    public override void  Close()
    {
      for (int i = 0; i < searchables.Length; i++)
        searchables[i].Close();
    }
    
    public override int DocFreq(Term term)
    {
      int docFreq = 0;
      for (int i = 0; i < searchables.Length; i++)
        docFreq += searchables[i].DocFreq(term);
      return docFreq;
    }
    
    // inherit javadoc
    public override Document Doc(int n)
    {
      int i = SubSearcher(n); // find searcher index
      return searchables[i].Doc(n - starts[i]); // dispatch to searcher
    }
    
    /// <summary>Call {@link #subSearcher} instead.</summary>
    /// <deprecated>
    /// </deprecated>
    public virtual int SearcherIndex(int n)
    {
      return SubSearcher(n);
    }
    
    /// <summary>Returns index of the searcher for document <code>n</code> in the array
    /// used to construct this searcher. 
    /// </summary>
    public virtual int SubSearcher(int n)
    {
      // find searcher for doc n:
      // replace w/ call to Arrays.binarySearch in Java 1.2
      int lo = 0; // search starts array
      int hi = searchables.Length - 1; // for first element less
      // than n, return its index
      while (hi >= lo)
      {
        int mid = (lo + hi) >> 1;
        int midValue = starts[mid];
        if (n < midValue)
          hi = mid - 1;
        else if (n > midValue)
          lo = mid + 1;
        else
        {
          // found a match
          while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
          {
            mid++; // scan to last match
          }
          return mid;
        }
      }
      return hi;
    }
    
    /// <summary>Returns the document number of document <code>n</code> within its
    /// sub-index. 
    /// </summary>
    public virtual int SubDoc(int n)
    {
      return n - starts[SubSearcher(n)];
    }
    
    public override int MaxDoc()
    {
      return maxDoc;
    }
    
    public override TopDocs Search(Weight weight, Filter filter, int nDocs)
    {
      
      HitQueue hq = new HitQueue(nDocs);
      int totalHits = 0;
      
      for (int i = 0; i < searchables.Length; i++)
      {
        // search each searcher
        TopDocs docs = searchables[i].Search(weight, filter, nDocs);
        totalHits += docs.totalHits; // update totalHits
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        for (int j = 0; j < scoreDocs.Length; j++)
        {
          // merge scoreDocs into hq
          ScoreDoc scoreDoc = scoreDocs[j];
          scoreDoc.doc += starts[i]; // convert doc
          if (!hq.Insert(scoreDoc))
            break; // no more scores > minScore
        }
      }
      
      ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
      for (int i = hq.Size() - 1; i >= 0; i--)
          // put docs in array
        scoreDocs2[i] = (ScoreDoc) hq.Pop();
      
      float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
      
      return new TopDocs(totalHits, scoreDocs2, maxScore);
    }
    
    public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
    {
      FieldDocSortedHitQueue hq = null;
      int totalHits = 0;
      
      float maxScore = System.Single.NegativeInfinity;
      
      for (int i = 0; i < searchables.Length; i++)
      {
        // search each searcher
        TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
        
        if (hq == null)
          hq = new FieldDocSortedHitQueue(docs.fields, n);
        totalHits += docs.totalHits; // update totalHits
        maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        for (int j = 0; j < scoreDocs.Length; j++)
        {
          // merge scoreDocs into hq
          ScoreDoc scoreDoc = scoreDocs[j];
          scoreDoc.doc += starts[i]; // convert doc
          if (!hq.Insert(scoreDoc))
            break; // no more scores > minScore
        }
      }
      
      ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
      for (int i = hq.Size() - 1; i >= 0; i--)
          // put docs in array
        scoreDocs2[i] = (ScoreDoc) hq.Pop();
      
      return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
    }
    
    
    // inherit javadoc
    public override void  Search(Weight weight, Filter filter, HitCollector results)
    {
      for (int i = 0; i < searchables.Length; i++)
      {
        
        int start = starts[i];
        
        searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
      }
    }
    
    public override Query Rewrite(Query original)
    {
      Query[] queries = new Query[searchables.Length];
      for (int i = 0; i < searchables.Length; i++)
      {
        queries[i] = searchables[i].Rewrite(original);
      }
      return queries[0].Combine(queries);
    }
    
    public override Explanation Explain(Weight weight, int doc)
    {
      int i = SubSearcher(doc); // find searcher index
      return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
    }
    
    /// <summary> Create weight in multiple index scenario.
    /// 
    /// Distributed query processing is done in the following steps:
    /// 1. rewrite query
    /// 2. extract necessary terms
    /// 3. collect dfs for these terms from the Searchables
    /// 4. create query weight using aggregate dfs.
    /// 5. distribute that weight to Searchables
    /// 6. merge results
    /// 
    /// Steps 1-4 are done here, 5+6 in the search() methods
    /// 
    /// </summary>
    /// <returns> rewritten queries
    /// </returns>
    protected internal override Weight CreateWeight(Query original)
    {
      // step 1
      Query rewrittenQuery = Rewrite(original);
      
      // step 2
      System.Collections.Hashtable terms = new System.Collections.Hashtable();
      rewrittenQuery.ExtractTerms(terms);
      
      // step3
      Term[] allTermsArray = new Term[terms.Count];
            int index = 0;
            System.Collections.IEnumerator e = terms.GetEnumerator();
            while (e.MoveNext())
                allTermsArray[index++] = e.Current as Term;
      int[] aggregatedDfs = new int[terms.Count];
      for (int i = 0; i < searchables.Length; i++)
      {
        int[] dfs = searchables[i].DocFreqs(allTermsArray);
        for (int j = 0; j < aggregatedDfs.Length; j++)
        {
          aggregatedDfs[j] += dfs[j];
        }
      }
      
      System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
      for (int i = 0; i < allTermsArray.Length; i++)
      {
        dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
      }
      
      // step4
      int numDocs = MaxDoc();
      CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
      
      return rewrittenQuery.Weight(cacheSim);
    }
  }
}
www.java2v.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.