001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/extractors/I_CmsExtractionResult.java,v $
003: * Date : $Date: 2008-02-27 12:05:30 $
004: * Version: $Revision: 1.10 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.search.extractors;
033:
034: import java.util.Map;
035:
036: /**
037: * The result of a document text extraction.<p>
038: *
039: * This data structure contains the extracted text as well as (optional)
040: * meta information extracted from the document.<p>
041: *
042: * @author Alexander Kandzior
043: *
044: * @version $Revision: 1.10 $
045: *
046: * @since 6.0.0
047: */
048: public interface I_CmsExtractionResult {
049:
050: /** Key to access the document author name in the item map. */
051: String ITEM_AUTHOR = "author";
052:
053: /** Key to access the document catrgory in the item map. */
054: String ITEM_CATEGORY = "category";
055:
056: /** Key to access the document comments in the item map. */
057: String ITEM_COMMENTS = "comments";
058:
059: /** Key to access the document company name in the item map. */
060: String ITEM_COMPANY = "company";
061:
062: /** Key for accessing the default (combined) content in {@link #getContentItems()}. */
063: String ITEM_CONTENT = "__content";
064:
065: /** Key to access the document creator name in the item map. */
066: String ITEM_CREATOR = "creator";
067:
068: /** Key to access the document keywords in the item map. */
069: String ITEM_KEYWORDS = "keywords";
070:
071: /** Key to access the document manager name in the item map. */
072: String ITEM_MANAGER = "manager";
073:
074: /** Key to access the document producer name in the item map. */
075: String ITEM_PRODUCER = "producer";
076:
077: /** Key for accessing the raw content in {@link #getContentItems()}. */
078: String ITEM_RAW = "__raw";
079:
080: /** Key to access the document subject in the item map. */
081: String ITEM_SUBJECT = "subject";
082:
083: /** Key to access the document title in the item map. */
084: String ITEM_TITLE = "title";
085:
086: /**
087: * Returns the extracted content combined as a String.<p>
088: *
089: * @return the extracted content combined as a String
090: */
091: String getContent();
092:
093: /**
094: * Returns the extracted content as individual items.<p>
095: *
096: * The result Map contains all content items extracted
097: * by the extractor. The key is always a String, and contains the name of the item.
098: * The value is also a String and contains the extracted text.<p>
099: *
100: * The detailed form will depend on the resource type indexed:
101: * <ul>
102: * <li>For a <code>xmlpage</code>, the key will be the element name, and the value
103: * will be the text of the element.
104: * <li>For a <code>xmlcontent</code>, the key will be the xpath of the XML node,
105: * and the value will be the text of that XML node.
106: * <li>In case the document contains meta information (for example PDF or MS Office documents),
107: * the meta information is stored with the name of the meta field as key and the content as value.
108: * <li>For all other resource types, there will be only ony key {@link #ITEM_CONTENT},
109: * which will contain the value of the complete content.
110: * </ul>
111: *
112: * @return the extracted content as individual items
113: */
114: Map getContentItems();
115:
116: /**
117: * Releases the information stored in this extraction result, to free up the memory used.<p>
118: */
119: void release();
120: }
|