001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/documents/I_CmsDocumentFactory.java,v $
003: * Date : $Date: 2008-02-27 12:05:21 $
004: * Version: $Revision: 1.27 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.search.documents;
033:
034: import org.opencms.file.CmsObject;
035: import org.opencms.file.CmsResource;
036: import org.opencms.main.CmsException;
037: import org.opencms.search.CmsSearchIndex;
038:
039: import java.util.List;
040:
041: import org.apache.lucene.document.Document;
042:
043: /**
044: * Used to create index Lucene Documents for OpenCms resources,
045: * controls the text extraction algorithm used for a specific OpenCms resource type / MIME type combination.<p>
046: *
047: * The configuration of the search index is defined in <code>opencms-search.xml</code>.
048: * There you can associate a combintion of OpenCms resource types and MIME types to an instance
049: * of this factory. This rather complex configuration is required because only the combination of
050: * OpenCms resource type and MIME type can decide what to use for search indexing.
051: * For example, if the OpenCms resource type is <code>plain</code>,
052: * the extraction algorithm for MIME types <code>.html</code> and <code>.txt</code> must be different.
053: * On the other hand, the MIME type <code>.html</code> in OpenCms can be almost any resource type,
054: * like <code>xmlpage</code>, <code>xmlcontent</code> or even <code>jsp</code>.<p>
055: *
056: * @author Carsten Weinholz
057: * @author Thomas Weckert
058: * @author Alexander Kandzior
059: *
060: * @version $Revision: 1.27 $
061: *
062: * @since 6.0.0
063: */
064: public interface I_CmsDocumentFactory extends I_CmsSearchExtractor {
065:
066: /**
067: * Creates the Lucene Document for the given index resource and the given search index.<p>
068: *
069: * This triggers the indexing process for the given index resource accoring to the configuration
070: * of the provided index.<p>
071: *
072: * The provided index resource contains the basic contents to index.
073: * The provided search index contains the configuration what to index, such as the locale and
074: * possible special field mappings.<p>
075: *
076: * @param cms the cms object used to access the OpenCms VFS
077: * @param resource the search index resource to create the Lucene document from
078: * @param index the search index to create the Document for
079: *
080: * @return the Lucene Document for the given index resource and the given search index
081: *
082: * @throws CmsException if something goes wrong
083: */
084: Document createDocument(CmsObject cms, CmsResource resource,
085: CmsSearchIndex index) throws CmsException;
086:
087: /**
088: * Returns the disk based cache used to store the raw extraction results.<p>
089: *
090: * In case <code>null</code> is returned, then result caching is not supported for this factory.<p>
091: *
092: * @return the disk based cache used to store the raw extraction results
093: */
094: CmsExtractionResultCache getCache();
095:
096: /**
097: * Returns the list of accepted keys for the resource types that can be indexed using this document factory.<p>
098: *
099: * The result List contains String objects.
100: * This String is later matched against {@link A_CmsVfsDocument#getDocumentKey(String, String)} to find
101: * the corrospondig {@link I_CmsDocumentFactory} for a resource to index.<p>
102: *
103: * The list of accepted resource types may contain a catch-all entry "*";
104: * in this case, a list for all possible resource types is returned,
105: * calculated by a logic depending on the document handler class.<p>
106: *
107: * @param resourceTypes list of accepted resource types
108: * @param mimeTypes list of accepted mime types
109: *
110: * @return the list of accepted keys for the resource types that can be indexed using this document factory (String objects)
111: *
112: * @throws CmsException if something goes wrong
113: */
114: List getDocumentKeys(List resourceTypes, List mimeTypes)
115: throws CmsException;
116:
117: /**
118: * Returns the name of this document type factory.<p>
119: *
120: * @return the name of this document type factory
121: */
122: String getName();
123:
124: /**
125: * Returns <code>true</code> if this document factory is locale depended.<p>
126: *
127: * @return <code>true</code> if this document factory is locale depended
128: */
129: boolean isLocaleDependend();
130:
131: /**
132: * Returns <code>true</code> if result caching is supported for this factory.<p>
133: *
134: * @return <code>true</code> if result caching is supported for this factory
135: */
136: boolean isUsingCache();
137:
138: /**
139: * Sets the disk based cache used to store the raw extraction results.<p>
140: *
141: * This should only be used for factories where {@link #isUsingCache()} returns <code>true</code>.<p>
142: *
143: * @param cache the disk based cache used to store the raw extraction results
144: */
145: void setCache(CmsExtractionResultCache cache);
146: }
|