SegmentReader.cs :  » Search-Engines » dotLucene » Lucene » Net » Index » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » Search Engines » dotLucene 
dotLucene » Lucene » Net » Index » SegmentReader.cs
/*
 * Copyright 2004 The Apache Software Foundation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;
using DocumentLucene.Net.Documents.Document;
using FieldLucene.Net.Documents.Field;
using DefaultSimilarityLucene.Net.Search.DefaultSimilarity;
using DirectoryLucene.Net.Store.Directory;
using IndexInputLucene.Net.Store.IndexInput;
using IndexOutputLucene.Net.Store.IndexOutput;
using BitVectorLucene.Net.Util.BitVector;

namespace Lucene.Net.Index{
  
  /// <version>  $Id: SegmentReader.java 329523 2005-10-30 05:37:11Z yonik $
  /// </version>
  class SegmentReader : IndexReader
  {
    private System.String segment;
    
    internal FieldInfos fieldInfos;
    private FieldsReader fieldsReader;
    
    internal TermInfosReader tis;
    internal TermVectorsReader termVectorsReaderOrig = null;
    internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
    
    internal BitVector deletedDocs = null;
    private bool deletedDocsDirty = false;
    private bool normsDirty = false;
    private bool undeleteAll = false;
    
    internal IndexInput freqStream;
    internal IndexInput proxStream;
    
    // Compound File Reader when based on a compound file segment
    internal CompoundFileReader cfsReader = null;
    
    private class Norm
    {
      private void  InitBlock(SegmentReader enclosingInstance)
      {
        this.enclosingInstance = enclosingInstance;
      }
      private SegmentReader enclosingInstance;
      public SegmentReader Enclosing_Instance
      {
        get
        {
          return enclosingInstance;
        }
        
      }
      public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
      {
        InitBlock(enclosingInstance);
        this.in_Renamed = in_Renamed;
        this.number = number;
      }
      
      public IndexInput in_Renamed;
      public byte[] bytes;
      public bool dirty;
      public int number;
      
      public void  ReWrite()
      {
        // NOTE: norms are re-written in regular directory, not cfs
        IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
        try
        {
          out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
        }
        finally
        {
          out_Renamed.Close();
        }
        System.String fileName;
        if (Enclosing_Instance.cfsReader == null)
          fileName = Enclosing_Instance.segment + ".f" + number;
        else
        {
          // use a different file name if we have compound format
          fileName = Enclosing_Instance.segment + ".s" + number;
        }
        Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
        this.dirty = false;
      }
    }
    
    private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
    
    /// <summary>The class which implements SegmentReader. </summary>
    private static System.Type IMPL;
    
    public SegmentReader() : base(null)
    {
    }
    
    public static SegmentReader Get(SegmentInfo si)
    {
      return Get(si.dir, si, null, false, false);
    }
    
    public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
    {
      return Get(si.dir, si, sis, closeDir, true);
    }
    
    public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
    {
      SegmentReader instance;
      try
      {
        instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
      }
      catch (System.Exception e)
      {
        throw new System.SystemException("cannot load SegmentReader class: " + e);
      }
      instance.Init(dir, sis, closeDir, ownDir);
      instance.Initialize(si);
      return instance;
    }
    
    private void  Initialize(SegmentInfo si)
    {
      segment = si.name;
      
      // Use compound file directory for some files, if it exists
      Directory cfsDir = Directory();
      if (Directory().FileExists(segment + ".cfs"))
      {
        cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
        cfsDir = cfsReader;
      }
      
      // No compound file exists - use the multi-file format
      fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
      
      tis = new TermInfosReader(cfsDir, segment, fieldInfos);
      
      // NOTE: the bitvector is stored using the regular directory, not cfs
      if (HasDeletions(si))
        deletedDocs = new BitVector(Directory(), segment + ".del");
      
      // make sure that all index files have been read or are kept open
      // so that if an index update removes them we'll still have them
      freqStream = cfsDir.OpenInput(segment + ".frq");
      proxStream = cfsDir.OpenInput(segment + ".prx");
      OpenNorms(cfsDir);
      
      if (fieldInfos.HasVectors())
      {
        // open term vector files only as needed
        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
      }
    }
    
    ~SegmentReader()
    {
      // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
      System.Threading.Thread.SetData(termVectorsLocal, null);
    }
    
    protected internal override void  DoCommit()
    {
      if (deletedDocsDirty)
      {
        // re-write deleted
        deletedDocs.Write(Directory(), segment + ".tmp");
        Directory().RenameFile(segment + ".tmp", segment + ".del");
      }
      if (undeleteAll && Directory().FileExists(segment + ".del"))
      {
        Directory().DeleteFile(segment + ".del");
      }
      if (normsDirty)
      {
        // re-write norms
        System.Collections.IEnumerator values = norms.Values.GetEnumerator();
        while (values.MoveNext())
        {
          Norm norm = (Norm) values.Current;
          if (norm.dirty)
          {
            norm.ReWrite();
          }
        }
      }
      deletedDocsDirty = false;
      normsDirty = false;
      undeleteAll = false;
    }
    
    protected internal override void  DoClose()
    {
      fieldsReader.Close();
      tis.Close();
      
      if (freqStream != null)
        freqStream.Close();
      if (proxStream != null)
        proxStream.Close();
      
      CloseNorms();
      
      if (termVectorsReaderOrig != null)
        termVectorsReaderOrig.Close();
      
      if (cfsReader != null)
        cfsReader.Close();
    }
    
    internal static bool HasDeletions(SegmentInfo si)
    {
      return si.dir.FileExists(si.name + ".del");
    }
    
    public override bool HasDeletions()
    {
      return deletedDocs != null;
    }
    
    
    internal static bool UsesCompoundFile(SegmentInfo si)
    {
      return si.dir.FileExists(si.name + ".cfs");
    }
    
    internal static bool HasSeparateNorms(SegmentInfo si)
    {
      System.String[] result = si.dir.List();
      System.String pattern = si.name + ".s";
      int patternLength = pattern.Length;
      for (int i = 0; i < result.Length; i++)
      {
        if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
          return true;
      }
      return false;
    }
    
    protected internal override void  DoDelete(int docNum)
    {
      if (deletedDocs == null)
        deletedDocs = new BitVector(MaxDoc());
      deletedDocsDirty = true;
      undeleteAll = false;
      deletedDocs.Set(docNum);
    }
    
    protected internal override void  DoUndeleteAll()
    {
      deletedDocs = null;
      deletedDocsDirty = false;
      undeleteAll = true;
    }
    
    internal virtual System.Collections.ArrayList Files()
    {
      System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
      
      for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
      {
        System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
        if (Directory().FileExists(name))
          files.Add(name);
      }
      
      for (int i = 0; i < fieldInfos.Size(); i++)
      {
        FieldInfo fi = fieldInfos.FieldInfo(i);
        if (fi.isIndexed && !fi.omitNorms)
        {
          System.String name;
          if (cfsReader == null)
            name = segment + ".f" + i;
          else
            name = segment + ".s" + i;
          if (Directory().FileExists(name))
            files.Add(name);
        }
      }
      return files;
    }
    
    public override TermEnum Terms()
    {
      return tis.Terms();
    }
    
    public override TermEnum Terms(Term t)
    {
      return tis.Terms(t);
    }
    
    public override Document Document(int n)
    {
      lock (this)
      {
        if (IsDeleted(n))
          throw new System.ArgumentException("attempt to access a deleted document");
        return fieldsReader.Doc(n);
      }
    }
    
    public override bool IsDeleted(int n)
    {
      lock (this)
      {
        return (deletedDocs != null && deletedDocs.Get(n));
      }
    }
    
    public override TermDocs TermDocs()
    {
      return new SegmentTermDocs(this);
    }
    
    public override TermPositions TermPositions()
    {
      return new SegmentTermPositions(this);
    }
    
    public override int DocFreq(Term t)
    {
      TermInfo ti = tis.Get(t);
      if (ti != null)
        return ti.docFreq;
      else
        return 0;
    }
    
    public override int NumDocs()
    {
      int n = MaxDoc();
      if (deletedDocs != null)
        n -= deletedDocs.Count();
      return n;
    }
    
    public override int MaxDoc()
    {
      return fieldsReader.Size();
    }
    
    /// <seealso cref="IndexReader.GetFieldNames()">
    /// </seealso>
    /// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
    /// </deprecated>
    public override System.Collections.ICollection GetFieldNames()
    {
      // maintain a unique set of field names
      System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
      for (int i = 0; i < fieldInfos.Size(); i++)
      {
        FieldInfo fi = fieldInfos.FieldInfo(i);
        fieldSet.Add(fi.name, fi.name);
      }
      return fieldSet;
    }
    
    /// <seealso cref="IndexReader.GetFieldNames(boolean)">
    /// </seealso>
    /// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
    /// </deprecated>
    public override System.Collections.ICollection GetFieldNames(bool indexed)
    {
      // maintain a unique set of field names
      System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
      for (int i = 0; i < fieldInfos.Size(); i++)
      {
        FieldInfo fi = fieldInfos.FieldInfo(i);
        if (fi.isIndexed == indexed)
          fieldSet.Add(fi.name, fi.name);
      }
      return fieldSet;
    }
    
    /// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
    /// </seealso>
    /// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
    /// </deprecated>
    public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
    {
      bool storedTermVector;
      bool storePositionWithTermVector;
      bool storeOffsetWithTermVector;
      
      if (tvSpec == Field.TermVector.NO)
      {
        storedTermVector = false;
        storePositionWithTermVector = false;
        storeOffsetWithTermVector = false;
      }
      else if (tvSpec == Field.TermVector.YES)
      {
        storedTermVector = true;
        storePositionWithTermVector = false;
        storeOffsetWithTermVector = false;
      }
      else if (tvSpec == Field.TermVector.WITH_POSITIONS)
      {
        storedTermVector = true;
        storePositionWithTermVector = true;
        storeOffsetWithTermVector = false;
      }
      else if (tvSpec == Field.TermVector.WITH_OFFSETS)
      {                                                                           
        storedTermVector = true;
        storePositionWithTermVector = false;
        storeOffsetWithTermVector = true;
      }
      else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
      {
        storedTermVector = true;
        storePositionWithTermVector = true;
        storeOffsetWithTermVector = true;
      }
      else
      {
        throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
      }
      
      // maintain a unique set of field names
      System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
      for (int i = 0; i < fieldInfos.Size(); i++)
      {
        FieldInfo fi = fieldInfos.FieldInfo(i);
        if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
        {
          fieldSet.Add(fi.name, fi.name);
        }
      }
      return fieldSet;
    }
    
    /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
    /// </seealso>
    public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
    {
      System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
      for (int i = 0; i < fieldInfos.Size(); i++)
      {
        FieldInfo fi = fieldInfos.FieldInfo(i);
        if (fieldOption == IndexReader.FieldOption.ALL)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
        {
          fieldSet.Add(fi.name, fi.name);
        }
        else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
        {
          fieldSet.Add(fi.name, fi.name);
        }
      }
      return fieldSet;
    }
    
    
    public override bool HasNorms(System.String field)
    {
      lock (this)
      {
        return norms.ContainsKey(field);
      }
    }
    
    internal static byte[] CreateFakeNorms(int size)
    {
      byte[] ones = new byte[size];
            byte[] byteArray = new byte[ones.Length];
            for (int index = 0; index < ones.Length; index++)
                byteArray[index] = (byte) ones[index];
            byte val = DefaultSimilarity.EncodeNorm(1.0f);
            for (int index = 0; index < byteArray.Length; index++)
                byteArray.SetValue(val, index);

      return ones;
    }
    
    private byte[] ones;
    private byte[] FakeNorms()
    {
      if (ones == null)
        ones = CreateFakeNorms(MaxDoc());
      return ones;
    }
    
    // can return null if norms aren't stored
    protected internal virtual byte[] GetNorms(System.String field)
    {
      lock (this)
      {
        Norm norm = (Norm) norms[field];
        if (norm == null)
          return null; // not indexed, or norms not stored
        
        if (norm.bytes == null)
        {
          // value not yet read
          byte[] bytes = new byte[MaxDoc()];
          Norms(field, bytes, 0);
          norm.bytes = bytes; // cache it
        }
        return norm.bytes;
      }
    }
    
    // returns fake norms if norms aren't available
    public override byte[] Norms(System.String field)
    {
      lock (this)
      {
        byte[] bytes = GetNorms(field);
        if (bytes == null)
          bytes = FakeNorms();
        return bytes;
      }
    }
    
    protected internal override void  DoSetNorm(int doc, System.String field, byte value_Renamed)
    {
      Norm norm = (Norm) norms[field];
      if (norm == null)
      // not an indexed field
        return ;
      norm.dirty = true; // mark it dirty
      normsDirty = true;
      
      Norms(field)[doc] = value_Renamed; // set the value
    }
    
    /// <summary>Read norms into a pre-allocated array. </summary>
    public override void  Norms(System.String field, byte[] bytes, int offset)
    {
      lock (this)
      {
        
        Norm norm = (Norm) norms[field];
        if (norm == null)
        {
          Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
          return ;
        }
        
        if (norm.bytes != null)
        {
          // can copy from cache
          Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
          return ;
        }
        
        IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
        try
        {
          // read from disk
          normStream.Seek(0);
          normStream.ReadBytes(bytes, offset, MaxDoc());
        }
        finally
        {
          normStream.Close();
        }
      }
    }
    
    
    private void  OpenNorms(Directory cfsDir)
    {
      for (int i = 0; i < fieldInfos.Size(); i++)
      {
        FieldInfo fi = fieldInfos.FieldInfo(i);
        if (fi.isIndexed && !fi.omitNorms)
        {
          // look first if there are separate norms in compound format
          System.String fileName = segment + ".s" + fi.number;
          Directory d = Directory();
          if (!d.FileExists(fileName))
          {
            fileName = segment + ".f" + fi.number;
            d = cfsDir;
          }
          norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
        }
      }
    }
    
    private void  CloseNorms()
    {
      lock (norms.SyncRoot)
      {
        System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
        while (enumerator.MoveNext())
        {
          Norm norm = (Norm) enumerator.Current;
          norm.in_Renamed.Close();
        }
      }
    }
    
    /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
    /// <returns> TermVectorsReader
    /// </returns>
    private TermVectorsReader GetTermVectorsReader()
    {
      TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
      if (tvReader == null)
      {
        tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
        System.Threading.Thread.SetData(termVectorsLocal, tvReader);
      }
      return tvReader;
    }
    
    /// <summary>Return a term frequency vector for the specified document and field. The
    /// vector returned contains term numbers and frequencies for all terms in
    /// the specified field of this document, if the field had storeTermVector
    /// flag set.  If the flag was not set, the method returns null.
    /// </summary>
    /// <throws>  IOException </throws>
    public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
    {
      // Check if this field is invalid or has no stored term vector
      FieldInfo fi = fieldInfos.FieldInfo(field);
      if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
        return null;
      
      TermVectorsReader termVectorsReader = GetTermVectorsReader();
      if (termVectorsReader == null)
        return null;
      
      return termVectorsReader.Get(docNumber, field);
    }
    
    
    /// <summary>Return an array of term frequency vectors for the specified document.
    /// The array contains a vector for each vectorized field in the document.
    /// Each vector vector contains term numbers and frequencies for all terms
    /// in a given vectorized field.
    /// If no such fields existed, the method returns null.
    /// </summary>
    /// <throws>  IOException </throws>
    public override TermFreqVector[] GetTermFreqVectors(int docNumber)
    {
      if (termVectorsReaderOrig == null)
        return null;
      
      TermVectorsReader termVectorsReader = GetTermVectorsReader();
      if (termVectorsReader == null)
        return null;
      
      return termVectorsReader.Get(docNumber);
    }

        static SegmentReader()
    {
      {
        try
        {
                    System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
          IMPL = System.Type.GetType(name);
        }
        catch (System.Security.SecurityException)
        {
          try
          {
            IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
          }
          catch (System.Exception e)
          {
            throw new System.SystemException("cannot load default SegmentReader class: " + e);
          }
        }
                catch (System.Exception e)
                {
                    throw new System.SystemException("cannot load SegmentReader class: " + e);
                }
            }
    }
  }
}
www.java2v.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.