Source Code Cross Referenced for SimpleLuceneCocoonIndexerImpl.java in » Content-Management-System » apache-lenya-2.0 » org » apache » cocoon » components » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Content Management System » apache lenya 2.0 » org.apache.cocoon.components.search

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /*
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         * contributor license agreements.  See the NOTICE file distributed with
004:         * this work for additional information regarding copyright ownership.
005:         * The ASF licenses this file to You under the Apache License, Version 2.0
006:         * (the "License"); you may not use this file except in compliance with
007:         * the License.  You may obtain a copy of the License at
008:         * 
009:         *      http://www.apache.org/licenses/LICENSE-2.0
010:         * 
011:         * Unless required by applicable law or agreed to in writing, software
012:         * distributed under the License is distributed on an "AS IS" BASIS,
013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         * See the License for the specific language governing permissions and
015:         * limitations under the License.
016:         */
017:        package org.apache.cocoon.components.search;
018:
019:        import org.apache.avalon.framework.activity.Disposable;
020:        import org.apache.avalon.framework.configuration.Configurable;
021:        import org.apache.avalon.framework.configuration.Configuration;
022:        import org.apache.avalon.framework.configuration.ConfigurationException;
023:        import org.apache.avalon.framework.logger.AbstractLogEnabled;
024:        import org.apache.avalon.framework.service.ServiceException;
025:        import org.apache.avalon.framework.service.ServiceManager;
026:        import org.apache.avalon.framework.service.Serviceable;
027:        import org.apache.cocoon.ProcessingException;
028:        import org.apache.cocoon.components.crawler.CocoonCrawler;
029:        import org.apache.lucene.analysis.Analyzer;
030:        import org.apache.lucene.document.Document;
031:        import org.apache.lucene.index.IndexReader;
032:        import org.apache.lucene.index.IndexWriter;
033:        import org.apache.lucene.index.Term;
034:        import org.apache.lucene.index.TermEnum;
035:        import org.apache.lucene.store.Directory;
036:
037:        import java.io.IOException;
038:        import java.net.URL;
039:        import java.util.Iterator;
040:
041:        /**
042:         * A lucene indexer.
043:         *
044:         * <p>
045:         *  XML documents are indexed using lucene.
046:         *  Links to XML documents are supplied by
047:         *  a crawler, requesting links of documents by specifying a cocoon-view, and
048:         *  HTTP protocol.
049:         * </p>
050:         *
051:         * @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
052:         * @version CVS $Id: SimpleLuceneCocoonIndexerImpl.java 433543 2006-08-22 06:22:54Z crossley $
053:         */
054:        public class SimpleLuceneCocoonIndexerImpl extends AbstractLogEnabled
055:                implements  LuceneCocoonIndexer, Configurable, Serviceable,
056:                Disposable {
057:
058:            /**
059:             * configuration tagname for specifying the analyzer class
060:             */
061:            public final static String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
062:
063:            /**
064:             * configuration default analyzer class
065:             */
066:            public final static String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
067:
068:            /**
069:             * configuration tagname for specifying lucene's index directory
070:             */
071:            public final static String DIRECTORY_CONFIG = "directory";
072:
073:            /**
074:             * configuration default directory, ie. no default.
075:             */
076:            public final static String DIRECTORY_DEFAULT = null;
077:
078:            /**
079:             * configuration tagname for specifying lucene's merge factor.
080:             */
081:            public final static String MERGE_FACTOR_CONFIG = "merge-factor";
082:
083:            /**
084:             * configuration default value for
085:             * <a href="http://www.mail-archive.com/lucene-user@jakarta.apache.org/msg00373.html">lucene's merge factor</a>.
086:             */
087:            public final static int MERGE_FACTOR_DEFAULT = 10;
088:
089:            /**
090:             * The service manager for looking up components used.
091:             */
092:            protected ServiceManager manager = null;
093:
094:            protected Analyzer analyzer;
095:            //    private String analyzerClassnameDefault = ANALYZER_CLASSNAME_DEFAULT;
096:            private int mergeFactor = MERGE_FACTOR_DEFAULT;
097:
098:            /**
099:             *Sets the analyzer attribute of the SimpleLuceneCocoonIndexerImpl object
100:             *
101:             * @param  analyzer  The new analyzer value
102:             */
103:            public void setAnalyzer(Analyzer analyzer) {
104:                this .analyzer = analyzer;
105:            }
106:
107:            /**
108:             * Configure this component.
109:             *
110:             * @param  conf                        is the configuration
111:             * @exception  ConfigurationException  is thrown if configuring fails
112:             */
113:            public void configure(Configuration conf)
114:                    throws ConfigurationException {
115:                Configuration child;
116:
117:                /*        child = conf.getChild(ANALYZER_CLASSNAME_CONFIG, false);
118:                 if (child != null) {
119:                 // fix Bugzilla Bug 25277, use child.getValue
120:                 // and in all following blocks
121:                 String value = child.getValue(ANALYZER_CLASSNAME_DEFAULT);
122:                 if (value != null) {
123:                 analyzerClassnameDefault = value;
124:                 }
125:                 }
126:                 */
127:                child = conf.getChild(MERGE_FACTOR_CONFIG, false);
128:                if (child != null) {
129:                    // fix Bugzilla Bug 25277, use child instead of conf
130:                    int int_value = child
131:                            .getValueAsInteger(MERGE_FACTOR_DEFAULT);
132:                    mergeFactor = int_value;
133:                }
134:            }
135:
136:            /**
137:             * Set the current <code>ServiceManager</code> instance used by this
138:             * <code>Serviceable</code>.
139:             *
140:             * @param  manager                 used by this component
141:             * @exception  ServiceException  is never thrown
142:             */
143:            public void service(ServiceManager manager) throws ServiceException {
144:                this .manager = manager;
145:            }
146:
147:            /**
148:             * Dispose this component.
149:             */
150:            public void dispose() {
151:            }
152:
153:            /**
154:             * index content of base_url, index content of links from base_url.
155:             *
156:             * @param  index                    the lucene store to write the index to
157:             * @param  create                   if true create, or overwrite existing index, else
158:             *   update existing index.
159:             * @param  base_url                 index content of base_url, and crawl through all its
160:             *   links recursivly.
161:             * @exception  ProcessingException  is thrown if indexing fails
162:             */
163:            public void index(Directory index, boolean create, URL base_url)
164:                    throws ProcessingException {
165:
166:                IndexWriter writer = null;
167:                LuceneXMLIndexer lxi = null;
168:                CocoonCrawler cocoonCrawler = null;
169:
170:                try {
171:                    lxi = (LuceneXMLIndexer) manager
172:                            .lookup(LuceneXMLIndexer.ROLE);
173:
174:                    writer = new IndexWriter(index, analyzer, create);
175:                    writer.mergeFactor = this .mergeFactor;
176:
177:                    cocoonCrawler = (CocoonCrawler) manager
178:                            .lookup(CocoonCrawler.ROLE);
179:                    cocoonCrawler.crawl(base_url);
180:
181:                    Iterator cocoonCrawlerIterator = cocoonCrawler.iterator();
182:                    while (cocoonCrawlerIterator.hasNext()) {
183:                        URL crawl_url = (URL) cocoonCrawlerIterator.next();
184:                        // result of fix Bugzilla Bug 25270, in SimpleCocoonCrawlerImpl
185:                        // check if crawl_url is null
186:                        if (crawl_url == null) {
187:                            continue;
188:                        } else if (!crawl_url.getHost().equals(
189:                                base_url.getHost())
190:                                || crawl_url.getPort() != base_url.getPort()) {
191:
192:                            // skip urls using different host, or port than host,
193:                            // or port of base url
194:                            if (getLogger().isDebugEnabled()) {
195:                                getLogger().debug(
196:                                        "Skipping crawling URL "
197:                                                + crawl_url.toString()
198:                                                + " as base_url is "
199:                                                + base_url.toString());
200:                            }
201:                            continue;
202:                        }
203:
204:                        // build lucene documents from the content of the crawl_url
205:                        Iterator i = lxi.build(crawl_url).iterator();
206:
207:                        // add all built lucene documents
208:                        while (i.hasNext()) {
209:                            writer.addDocument((Document) i.next());
210:                        }
211:                    }
212:                    // optimize it
213:                    writer.optimize();
214:                } catch (IOException ioe) {
215:                    throw new ProcessingException("IOException in index()", ioe);
216:                } catch (ServiceException se) {
217:                    throw new ProcessingException(
218:                            "Could not lookup service in index()", se);
219:                } finally {
220:                    if (writer != null) {
221:                        try {
222:                            writer.close();
223:                        } catch (IOException ioe) {
224:                        }
225:                        writer = null;
226:                    }
227:
228:                    if (lxi != null) {
229:                        manager.release(lxi);
230:                        lxi = null;
231:                    }
232:                    if (cocoonCrawler != null) {
233:                        manager.release(cocoonCrawler);
234:                        cocoonCrawler = null;
235:                    }
236:                }
237:            }
238:
239:            /**
240:             * A document iterator deleting "old" documents form the index.
241:             * 
242:             * TODO: use this class before indexing, in non-creating mode.
243:             */
244:            static class DocumentDeletableIterator {
245:                private IndexReader reader;
246:                // existing index
247:                private TermEnum uidIter;
248:
249:                // document id iterator
250:
251:                /**
252:                 *Constructor for the DocumentDeletableIterator object
253:                 *
254:                 * @param  directory        Description of Parameter
255:                 * @exception  IOException  Description of Exception
256:                 */
257:                public DocumentDeletableIterator(Directory directory)
258:                        throws IOException {
259:                    reader = IndexReader.open(directory);
260:                    // open existing index
261:                    uidIter = reader.terms(new Term("uid", ""));
262:                    // init uid iterator
263:                }
264:
265:                /**
266:                 *Description of the Method
267:                 *
268:                 * @exception  IOException  Description of Exception
269:                 */
270:                public void deleteAllStaleDocuments() throws IOException {
271:                    while (uidIter.term() != null
272:                            && uidIter.term().field().equals("uid")) {
273:                        reader.delete(uidIter.term());
274:                        uidIter.next();
275:                    }
276:                }
277:
278:                /**
279:                 *Description of the Method
280:                 *
281:                 * @param  uid              Description of Parameter
282:                 * @exception  IOException  Description of Exception
283:                 */
284:                public void deleteModifiedDocuments(String uid)
285:                        throws IOException {
286:                    while (documentHasBeenModified(uidIter.term(), uid)) {
287:                        reader.delete(uidIter.term());
288:                        uidIter.next();
289:                    }
290:                    if (documentHasNotBeenModified(uidIter.term(), uid)) {
291:                        uidIter.next();
292:                    }
293:                }
294:
295:                /**
296:                 *Description of the Method
297:                 *
298:                 * @exception  Throwable  Description of Exception
299:                 */
300:                protected void finalize() throws Throwable {
301:                    super .finalize();
302:                    if (uidIter != null) {
303:                        uidIter.close();
304:                        // close uid iterator
305:                        uidIter = null;
306:                    }
307:                    if (reader != null) {
308:                        reader.close();
309:                        // close existing index
310:                        reader = null;
311:                    }
312:                }
313:
314:                /**
315:                 *Description of the Method
316:                 *
317:                 * @param  term  Description of Parameter
318:                 * @return       Description of the Returned Value
319:                 */
320:                boolean documentIsDeletable(Term term) {
321:                    return term != null && term.field() == "uid";
322:                }
323:
324:                /**
325:                 *Description of the Method
326:                 *
327:                 * @param  term  Description of Parameter
328:                 * @param  uid   Description of Parameter
329:                 * @return       Description of the Returned Value
330:                 */
331:                boolean documentHasBeenModified(Term term, String uid) {
332:                    return documentIsDeletable(term)
333:                            && term.text().compareTo(uid) < 0;
334:                }
335:
336:                /**
337:                 *Description of the Method
338:                 *
339:                 * @param  term  Description of Parameter
340:                 * @param  uid   Description of Parameter
341:                 * @return       Description of the Returned Value
342:                 */
343:                boolean documentHasNotBeenModified(Term term, String uid) {
344:                    return documentIsDeletable(term)
345:                            && term.text().compareTo(uid) == 0;
346:                }
347:            }
348:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.