001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/documents/CmsDocumentOpenOffice.java,v $
003: * Date : $Date: 2008-02-27 12:05:21 $
004: * Version: $Revision: 1.2 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.search.documents;
033:
034: import org.opencms.file.CmsFile;
035: import org.opencms.file.CmsObject;
036: import org.opencms.file.CmsResource;
037: import org.opencms.main.CmsException;
038: import org.opencms.search.CmsIndexException;
039: import org.opencms.search.CmsSearchIndex;
040: import org.opencms.search.extractors.CmsExtractorOpenOffice;
041: import org.opencms.search.extractors.I_CmsExtractionResult;
042:
043: /**
044: * Lucene document factory class to extract index data from a cms resource
045: * containing Open Document Format data.<p>
046: *
047: * @author Dirk Oelkers
048: *
049: * @version $Revision: 1.2 $
050: *
051: * @since 7.0.4
052: */
053: public class CmsDocumentOpenOffice extends A_CmsVfsDocument {
054:
055: /**
056: * Creates a new instance of this lucene document factory.<p>
057: *
058: * @param name name of the document type
059: */
060: public CmsDocumentOpenOffice(String name) {
061:
062: super (name);
063: }
064:
065: /**
066: * Returns the raw text content of a given vfs resource containing MS Word data.<p>
067: *
068: * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex)
069: */
070: public I_CmsExtractionResult extractContent(CmsObject cms,
071: CmsResource resource, CmsSearchIndex index)
072: throws CmsIndexException, CmsException {
073:
074: CmsFile file = readFile(cms, resource);
075:
076: try {
077: return CmsExtractorOpenOffice.getExtractor().extractText(
078: file.getContents());
079: } catch (Exception e) {
080: throw new CmsIndexException(Messages.get().container(
081: Messages.ERR_TEXT_EXTRACTION_1,
082: resource.getRootPath()), e);
083: }
084: }
085:
086: /**
087: * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend()
088: */
089: public boolean isLocaleDependend() {
090:
091: return false;
092: }
093:
094: /**
095: * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache()
096: */
097: public boolean isUsingCache() {
098:
099: return true;
100: }
101: }
|