IFilter.cs :  » Content-Management-Systems-CMS » Kooboo » EPocalipse » IFilter » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » Content Management Systems CMS » Kooboo 
Kooboo » EPocalipse » IFilter » IFilter.cs
using System;
using System.Text;
using System.Runtime.InteropServices;

//Contains IFilter interface translation
//Most translations are from PInvoke.net

namespace EPocalipse.IFilter{
  [StructLayout(LayoutKind.Sequential)]
  public struct FULLPROPSPEC 
  {
    public Guid guidPropSet;
    public PROPSPEC psProperty;
  }

  [StructLayout(LayoutKind.Sequential)]
  internal struct FILTERREGION 
  {
    public int idChunk;
    public int cwcStart;
    public int cwcExtent;
  }

  [StructLayout(LayoutKind.Explicit)]
  public struct PROPSPEC
  {
    [FieldOffset(0)] public int ulKind;     // 0 - string used; 1 - PROPID
    [FieldOffset(4)] public int propid;    
    [FieldOffset(4)] public IntPtr lpwstr;
  }

  [Flags]
  internal enum IFILTER_FLAGS 
  {
    /// <summary>
    /// The caller should use the IPropertySetStorage and IPropertyStorage
    /// interfaces to locate additional properties. 
    /// When this flag is set, properties available through COM
    /// enumerators should not be returned from IFilter. 
    /// </summary>
    IFILTER_FLAGS_OLE_PROPERTIES = 1
  }

  /// <summary>
  /// Flags controlling the operation of the FileFilter
  /// instance.
  /// </summary>
  [Flags]
  internal enum IFILTER_INIT
  {
    NONE = 0,
    /// <summary>
    /// Paragraph breaks should be marked with the Unicode PARAGRAPH
    /// SEPARATOR (0x2029)
    /// </summary>
    CANON_PARAGRAPHS = 1,

    /// <summary>
    /// Soft returns, such as the newline character in Microsoft Word, should
    /// be replaced by hard returnsLINE SEPARATOR (0x2028). Existing hard
    /// returns can be doubled. A carriage return (0x000D), line feed (0x000A),
    /// or the carriage return and line feed in combination should be considered
    /// a hard return. The intent is to enable pattern-expression matches that
    /// match against observed line breaks. 
    /// </summary>
    HARD_LINE_BREAKS = 2,

    /// <summary>
    /// Various word-processing programs have forms of hyphens that are not
    /// represented in the host character set, such as optional hyphens
    /// (appearing only at the end of a line) and nonbreaking hyphens. This flag
    /// indicates that optional hyphens are to be converted to nulls, and
    /// non-breaking hyphens are to be converted to normal hyphens (0x2010), or
    /// HYPHEN-MINUSES (0x002D). 
    /// </summary>
    CANON_HYPHENS = 4,

    /// <summary>
    /// Just as the CANON_HYPHENS flag standardizes hyphens,
    /// this one standardizes spaces. All special space characters, such as
    /// nonbreaking spaces, are converted to the standard space character
    /// (0x0020). 
    /// </summary>
    CANON_SPACES = 8,

    /// <summary>
    /// Indicates that the client wants text split into chunks representing
    /// public value-type properties. 
    /// </summary>
    APPLY_INDEX_ATTRIBUTES = 16,

    /// <summary>
    /// Indicates that the client wants text split into chunks representing
    /// properties determined during the indexing process. 
    /// </summary>
    APPLY_CRAWL_ATTRIBUTES = 256,

    /// <summary>
    /// Any properties not covered by the APPLY_INDEX_ATTRIBUTES
    /// and APPLY_CRAWL_ATTRIBUTES flags should be emitted. 
    /// </summary>
    APPLY_OTHER_ATTRIBUTES = 32,

    /// <summary>
    /// Optimizes IFilter for indexing because the client calls the
    /// IFilter::Init method only once and does not call IFilter::BindRegion.
    /// This eliminates the possibility of accessing a chunk both before and
    /// after accessing another chunk. 
    /// </summary>
    INDEXING_ONLY = 64,

    /// <summary>
    /// The text extraction process must recursively search all linked
    /// objects within the document. If a link is unavailable, the
    /// IFilter::GetChunk call that would have obtained the first chunk of the
    /// link should return FILTER_E_LINK_UNAVAILABLE. 
    /// </summary>
    SEARCH_LINKS = 128,

    /// <summary>
    /// The content indexing process can return property values set by the  filter. 
    /// </summary>
    FILTER_OWNED_VALUE_OK = 512
  }

  public struct STAT_CHUNK 
  {
    /// <summary>
    /// The chunk identifier. Chunk identifiers must be unique for the
    /// current instance of the IFilter interface. 
    /// Chunk identifiers must be in ascending order. The order in which
    /// chunks are numbered should correspond to the order in which they appear
    /// in the source document. Some search engines can take advantage of the
    /// proximity of chunks of various properties. If so, the order in which
    /// chunks with different properties are emitted will be important to the
    /// search engine. 
    /// </summary>
    public int idChunk;

    /// <summary>
    /// The type of break that separates the previous chunk from the current
    ///  chunk. Values are from the CHUNK_BREAKTYPE enumeration. 
    /// </summary>
    [MarshalAs(UnmanagedType.U4)]
    public CHUNK_BREAKTYPE breakType;

    /// <summary>
    /// Flags indicate whether this chunk contains a text-type or a
    /// value-type property. 
    /// Flag values are taken from the CHUNKSTATE enumeration. If the CHUNK_TEXT flag is set, 
    /// IFilter::GetText should be used to retrieve the contents of the chunk
    /// as a series of words. 
    /// If the CHUNK_VALUE flag is set, IFilter::GetValue should be used to retrieve 
    /// the value and treat it as a single property value. If the filter dictates that the same 
    /// content be treated as both text and as a value, the chunk should be emitted twice in two       
    /// different chunks, each with one flag set. 
    /// </summary>
    [MarshalAs(UnmanagedType.U4)]
    public CHUNKSTATE flags;

    /// <summary>
    /// The language and sublanguage associated with a chunk of text. Chunk locale is used 
    /// by document indexers to perform proper word breaking of text. If the chunk is 
    /// neither text-type nor a value-type with data type VT_LPWSTR, VT_LPSTR or VT_BSTR, 
    /// this field is ignored. 
    /// </summary>
    public int locale;

    /// <summary>
    /// The property to be applied to the chunk. If a filter requires that       the same text 
    /// have more than one property, it needs to emit the text once for each       property 
    /// in separate chunks. 
    /// </summary>
    public FULLPROPSPEC attribute;

    /// <summary>
    /// The ID of the source of a chunk. The value of the idChunkSource     member depends on the nature of the chunk: 
    /// If the chunk is a text-type property, the value of the idChunkSource       member must be the same as the value of the idChunk member. 
    /// If the chunk is an public value-type property derived from textual       content, the value of the idChunkSource member is the chunk ID for the
    /// text-type chunk from which it is derived. 
    /// If the filter attributes specify to return only public value-type
    /// properties, there is no content chunk from which to derive the current
    /// public value-type property. In this case, the value of the
    /// idChunkSource member must be set to zero, which is an invalid chunk. 
    /// </summary>
    public int idChunkSource;

    /// <summary>
    /// The offset from which the source text for a derived chunk starts in
    /// the source chunk. 
    /// </summary>
    public int cwcStartSource;

    /// <summary>
    /// The length in characters of the source text from which the current
    /// chunk was derived. 
    /// A zero value signifies character-by-character correspondence between
    /// the source text and 
    /// the derived text. A nonzero value means that no such direct
    /// correspondence exists
    /// </summary>
    public int cwcLenSource;
  }

  /// <summary>
  /// Enumerates the different breaking types that occur between 
  /// chunks of text read out by the FileFilter.
  /// </summary>
  public enum CHUNK_BREAKTYPE
  {
    /// <summary>
    /// No break is placed between the current chunk and the previous chunk.
    /// The chunks are glued together. 
    /// </summary>
    CHUNK_NO_BREAK = 0,
    /// <summary>
    /// A word break is placed between this chunk and the previous chunk that
    /// had the same attribute. 
    /// Use of CHUNK_EOW should be minimized because the choice of word
    /// breaks is language-dependent, 
    /// so determining word breaks is best left to the search engine. 
    /// </summary>
    CHUNK_EOW = 1,
    /// <summary>
    /// A sentence break is placed between this chunk and the previous chunk
    /// that had the same attribute. 
    /// </summary>
    CHUNK_EOS = 2,
    /// <summary>
    /// A paragraph break is placed between this chunk and the previous chunk
    /// that had the same attribute.
    /// </summary>     
    CHUNK_EOP = 3,
    /// <summary>
    /// A chapter break is placed between this chunk and the previous chunk
    /// that had the same attribute. 
    /// </summary>
    CHUNK_EOC = 4
  }


  public enum CHUNKSTATE 
  {
    /// <summary>
    /// The current chunk is a text-type property.
    /// </summary>
    CHUNK_TEXT = 0x1,
    /// <summary>
    /// The current chunk is a value-type property. 
    /// </summary>
    CHUNK_VALUE = 0x2,
    /// <summary>
    /// Reserved
    /// </summary>
    CHUNK_FILTER_OWNED_VALUE = 0x4
  }

  internal enum IFilterReturnCode : uint 
  {
    /// <summary>
    /// Success
    /// </summary>
    S_OK = 0,
    /// <summary>
    /// The function was denied access to the filter file. 
    /// </summary>
    E_ACCESSDENIED = 0x80070005,
    /// <summary>
    /// The function encountered an invalid handle,
    /// probably due to a low-memory situation. 
    /// </summary>
    E_HANDLE = 0x80070006,
    /// <summary>
    /// The function received an invalid parameter.
    /// </summary>
    E_INVALIDARG = 0x80070057,
    /// <summary>
    /// Out of memory
    /// </summary>
    E_OUTOFMEMORY = 0x8007000E,
    /// <summary>
    /// Not implemented
    /// </summary>
    E_NOTIMPL = 0x80004001,
    /// <summary>
    /// Unknown error
    /// </summary>
    E_FAIL = 0x80000008,
    /// <summary>
    /// File not filtered due to password protection
    /// </summary>
    FILTER_E_PASSWORD = 0x8004170B,
    /// <summary>
    /// The document format is not recognised by the filter
    /// </summary>
    FILTER_E_UNKNOWNFORMAT = 0x8004170C,
    /// <summary>
    /// No text in current chunk
    /// </summary>
    FILTER_E_NO_TEXT = 0x80041705,
    /// <summary>
    /// No more chunks of text available in object
    /// </summary>
    FILTER_E_END_OF_CHUNKS = 0x80041700,
    /// <summary>
    /// No more text available in chunk
    /// </summary>
    FILTER_E_NO_MORE_TEXT = 0x80041701,
    /// <summary>
    /// No more property values available in chunk
    /// </summary>
    FILTER_E_NO_MORE_VALUES = 0x80041702,
    /// <summary>
    /// Unable to access object
    /// </summary>
    FILTER_E_ACCESS = 0x80041703,
    /// <summary>
    /// Moniker doesn't cover entire region
    /// </summary>
    FILTER_W_MONIKER_CLIPPED = 0x00041704,
    /// <summary>
    /// Unable to bind IFilter for embedded object
    /// </summary>
    FILTER_E_EMBEDDING_UNAVAILABLE = 0x80041707,
    /// <summary>
    /// Unable to bind IFilter for linked object
    /// </summary>
    FILTER_E_LINK_UNAVAILABLE = 0x80041708,
    /// <summary>
    ///  This is the last text in the current chunk
    /// </summary>
    FILTER_S_LAST_TEXT = 0x00041709,
    /// <summary>
    /// This is the last value in the current chunk
    /// </summary>
    FILTER_S_LAST_VALUES = 0x0004170A
  }

  [ComImport, Guid("89BCB740-6119-101A-BCB7-00DD010655AF")]
  [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
  internal interface IFilter
  {
    /// <summary>
    /// The IFilter::Init method initializes a filtering session.
    /// </summary>
    [PreserveSig]
    IFilterReturnCode Init(
      //[in] Flag settings from the IFILTER_INIT enumeration for
      // controlling text standardization, property output, embedding
      // scope, and IFilter access patterns. 
      IFILTER_INIT grfFlags,

      // [in] The size of the attributes array. When nonzero, cAttributes
      //  takes 
      // precedence over attributes specified in grfFlags. If no
      // attribute flags 
      // are specified and cAttributes is zero, the default is given by
      // the 
      // PSGUID_STORAGE storage property set, which contains the date and
      //  time 
      // of the last write to the file, size, and so on; and by the
      //  PID_STG_CONTENTS 
      // 'contents' property, which maps to the main contents of the
      // file. 
      // For more information about properties and property sets, see
      // Property Sets. 
      int cAttributes,

      //[in] Array of pointers to FULLPROPSPEC structures for the
      // requested properties. 
      // When cAttributes is nonzero, only the properties in aAttributes
      // are returned. 
      IntPtr aAttributes,

      // [out] Information about additional properties available to the
      //  caller; from the IFILTER_FLAGS enumeration. 
      out IFILTER_FLAGS pdwFlags);

    /// <summary>
    /// The IFilter::GetChunk method positions the filter at the beginning
    /// of the next chunk, 
    /// or at the first chunk if this is the first call to the GetChunk
    /// method, and returns a description of the current chunk. 
    /// </summary>
    [PreserveSig]
    IFilterReturnCode GetChunk(out STAT_CHUNK pStat);

    /// <summary>
    /// The IFilter::GetText method retrieves text (text-type properties)
    /// from the current chunk, 
    /// which must have a CHUNKSTATE enumeration value of CHUNK_TEXT.
    /// </summary>
    [PreserveSig]
    IFilterReturnCode GetText(
      // [in/out] On entry, the size of awcBuffer array in wide/Unicode
      // characters. On exit, the number of Unicode characters written to
      // awcBuffer. 
      // Note that this value is not the number of bytes in the buffer. 
      ref uint pcwcBuffer,

      // Text retrieved from the current chunk. Do not terminate the
      // buffer with a character.  
      [Out(), MarshalAs(UnmanagedType.LPArray)] 
      char[] awcBuffer);

    /// <summary>
    /// The IFilter::GetValue method retrieves a value (public
    /// value-type property) from a chunk, 
    /// which must have a CHUNKSTATE enumeration value of CHUNK_VALUE.
    /// </summary>
    [PreserveSig]
    int GetValue(
      // Allocate the PROPVARIANT structure with CoTaskMemAlloc. Some
      // PROPVARIANT 
      // structures contain pointers, which can be freed by calling the
      // PropVariantClear function. 
      // It is up to the caller of the GetValue method to call the
      //   PropVariantClear method.            
      // ref IntPtr ppPropValue
      // [MarshalAs(UnmanagedType.Struct)]
      ref IntPtr PropVal);

    /// <summary>
    /// The IFilter::BindRegion method retrieves an interface representing
    /// the specified portion of the object. 
    /// Currently reserved for future use.
    /// </summary>
    [PreserveSig]
    int BindRegion(ref FILTERREGION origPos,
      ref Guid riid, ref object ppunk);
  }


}
www.java2v.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.