01: /*
02: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/documents/I_CmsSearchExtractor.java,v $
03: * Date : $Date: 2008-02-27 12:05:21 $
04: * Version: $Revision: 1.8 $
05: *
06: * This library is part of OpenCms -
07: * the Open Source Content Management System
08: *
09: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
10: *
11: * This library is free software; you can redistribute it and/or
12: * modify it under the terms of the GNU Lesser General Public
13: * License as published by the Free Software Foundation; either
14: * version 2.1 of the License, or (at your option) any later version.
15: *
16: * This library is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19: * Lesser General Public License for more details.
20: *
21: * For further information about Alkacon Software GmbH, please see the
22: * company website: http://www.alkacon.com
23: *
24: * For further information about OpenCms, please see the
25: * project website: http://www.opencms.org
26: *
27: * You should have received a copy of the GNU Lesser General Public
28: * License along with this library; if not, write to the Free Software
29: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30: */
31:
32: package org.opencms.search.documents;
33:
34: import org.opencms.file.CmsObject;
35: import org.opencms.file.CmsResource;
36: import org.opencms.main.CmsException;
37: import org.opencms.search.CmsSearchIndex;
38: import org.opencms.search.extractors.I_CmsExtractionResult;
39:
40: /**
41: * Defines a text extractor for the integrated search engine.<p>
42: *
43: * The job of a search extractor is to extract indexable plain text from
44: * a resource in the OpenCms VFS. This may be from the resource content, for example
45: * from a PDF file, or from the resource properties, for example the Title, Keywords and
46: * Description properties.<p>
47: *
48: * @author Carsten Weinholz
49: *
50: * @version $Revision: 1.8 $
51: *
52: * @since 6.0.0
53: */
54: public interface I_CmsSearchExtractor {
55:
56: /**
57: * Extractes the content of a given index resource according to the resource file type and the
58: * configuration of the given index.<p>
59: *
60: * @param cms the cms object
61: * @param resource the resource to extract the content from
62: * @param index the index to extract the content for
63: *
64: * @return the extracted content of the resource
65: *
66: * @throws CmsException if somethin goes wrong
67: */
68: I_CmsExtractionResult extractContent(CmsObject cms,
69: CmsResource resource, CmsSearchIndex index)
70: throws CmsException;
71:
72: }
|