001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.search;
018:
019: import org.apache.avalon.framework.component.Component;
020: import org.apache.cocoon.ProcessingException;
021:
022: import java.net.URL;
023: import java.util.List;
024:
025: /**
026: * The avalon behavioural component interface of generating
027: * lucene documents from an xml content.
028: *
029: * <p>
030: * The well-known fields of a lucene documents are defined as
031: * <code>*_FIELD</code> constants.
032: * </p>
033: * <p>
034: * You may access generated lucene documents via
035: * <code>allDocuments()</code>, or <code>iterator()</code>.
036: * </p>
037: * <p>
038: * You trigger the generating of lucene documents via
039: * <code>build()</code>.
040: * </p>
041: *
042: * @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
043: * @version CVS $Id: LuceneXMLIndexer.java 433543 2006-08-22 06:22:54Z crossley $
044: */
045: public interface LuceneXMLIndexer extends Component {
046:
047: /**
048: * The ROLE name of this avalon component.
049: * <p>
050: * Its value if the FQN of this interface,
051: * ie. <code>org.apache.cocoon.components.search.LuceneXMLIndexer</code>.
052: * </p>
053: *
054: * @since
055: */
056: String ROLE = "org.apache.cocoon.components.search.LuceneXMLIndexer";
057:
058: /**
059: * A Lucene document field name, containing xml content text of all xml elements.
060: * <p>
061: * A concrete implementation of this interface SHOULD
062: * provides a field named body.
063: * </p>
064: * <p>
065: * A concrete implementation MAY provide additional lucene
066: * document fields.
067: * </p>
068: *
069: * @since
070: */
071: String BODY_FIELD = "body";
072:
073: /**
074: * A Lucene document field name, containg the URI/URL of the indexed
075: * document.
076: * <p>
077: * A concrete implementation of this interface SHOULD
078: * provide a field named url.
079: * </p>
080: *
081: * @since
082: */
083: String URL_FIELD = "url";
084:
085: /**
086: * A Lucene document field name, containg the a unique key of the indexed
087: * document.
088: * <p>
089: * This document field is used internally to track document
090: * changes, and updates.
091: * </p>
092: * <p>
093: * A concrete implementation of this interface SHOULD
094: * provide a field named uid.
095: * </p>
096: *
097: * @since
098: */
099: String UID_FIELD = "uid";
100:
101: /**
102: * Build lucene documents from a URL.
103: * <p>
104: * This method will read the content of the URL, and generates
105: * one or more lucene documents. The generated lucence documents
106: * can be fetched using methods allDocuments(), and iterator().
107: * </p>
108: *
109: * @param url the content of this url gets indexed.
110: * @exception ProcessingException Description of Exception
111: * @since
112: */
113: List build(URL url) throws ProcessingException;
114: }
|