Source Code Cross Referenced for LuceneSearchEngine.java in » Wiki-Engine » JAMWiki » org » jamwiki » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Wiki Engine » JAMWiki » org.jamwiki.search
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /**
002:         * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE, version 2.1, dated February 1999.
003:         *
004:         * This program is free software; you can redistribute it and/or modify
005:         * it under the terms of the latest version of the GNU Lesser General
006:         * Public License as published by the Free Software Foundation;
007:         *
008:         * This program is distributed in the hope that it will be useful,
009:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
010:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
011:         * GNU Lesser General Public License for more details.
012:         *
013:         * You should have received a copy of the GNU Lesser General Public License
014:         * along with this program (LICENSE.txt); if not, write to the Free Software
015:         * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
016:         */package org.jamwiki.search;
017:
018:        import java.io.File;
019:        import java.io.StringReader;
020:        import java.util.Collection;
021:        import java.util.Iterator;
022:        import java.util.Vector;
023:        import org.apache.commons.lang.StringEscapeUtils;
024:        import org.apache.commons.lang.StringUtils;
025:        import org.apache.lucene.analysis.TokenStream;
026:        import org.apache.lucene.analysis.KeywordAnalyzer;
027:        import org.apache.lucene.analysis.standard.StandardAnalyzer;
028:        import org.apache.lucene.document.Document;
029:        import org.apache.lucene.document.Field;
030:        import org.apache.lucene.index.IndexWriter;
031:        import org.apache.lucene.index.Term;
032:        import org.apache.lucene.queryParser.QueryParser;
033:        import org.apache.lucene.search.BooleanQuery;
034:        import org.apache.lucene.search.Hits;
035:        import org.apache.lucene.search.IndexSearcher;
036:        import org.apache.lucene.search.PhraseQuery;
037:        import org.apache.lucene.search.Query;
038:        import org.apache.lucene.search.BooleanClause.Occur;
039:        import org.apache.lucene.search.highlight.Highlighter;
040:        import org.apache.lucene.search.highlight.QueryScorer;
041:        import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
042:        import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
043:        import org.apache.lucene.store.FSDirectory;
044:        import org.jamwiki.Environment;
045:        import org.jamwiki.SearchEngine;
046:        import org.jamwiki.WikiBase;
047:        import org.jamwiki.model.Topic;
048:        import org.jamwiki.model.VirtualWiki;
049:        import org.jamwiki.parser.ParserOutput;
050:        import org.jamwiki.parser.ParserUtil;
051:        import org.jamwiki.utils.WikiLogger;
052:
053:        /**
054:         * An implementation of {@link org.jamwiki.search.SearchEngine} that uses
055:         * <a href="http://lucene.apache.org/java/">Lucene</a> to perform searches of
056:         * Wiki content.
057:         */
058:        public class LuceneSearchEngine implements  SearchEngine {
059:
060:            /** Where to log to */
061:            private static final WikiLogger logger = WikiLogger
062:                    .getLogger(LuceneSearchEngine.class.getName());
063:            /** Directory for search index files */
064:            private static final String SEARCH_DIR = "search";
065:            /** Id stored with documents to indicate the searchable topic name */
066:            private static final String ITYPE_TOPIC = "topic";
067:            /** Id stored with documents to indicate the searchable content. */
068:            private static final String ITYPE_CONTENT = "content";
069:            /** Id stored with documents to indicate the raw Wiki markup */
070:            private static final String ITYPE_CONTENT_PLAIN = "content_plain";
071:            /** Id stored with documents to indicate the topic name. */
072:            private static final String ITYPE_TOPIC_PLAIN = "topic_plain";
073:            /** Id stored with the document to indicate the search names of topics linked from the page.  */
074:            private static final String ITYPE_TOPIC_LINK = "topic_link";
075:
076:            /**
077:             * Add a topic to the search index.
078:             *
079:             * @param topic The Topic object that is to be added to the index.
080:             * @param links A collection containing the topic names for all topics that link
081:             *  to the current topic.
082:             */
083:            public synchronized void addToIndex(Topic topic, Collection links) {
084:                String virtualWiki = topic.getVirtualWiki();
085:                String topicName = topic.getName();
086:                IndexWriter writer = null;
087:                try {
088:                    FSDirectory directory = FSDirectory
089:                            .getDirectory(getSearchIndexPath(virtualWiki));
090:                    // FIXME - move synchronization to the writer instance for this directory
091:                    try {
092:                        writer = new IndexWriter(directory,
093:                                new StandardAnalyzer(), false);
094:                        KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
095:                        writer.optimize();
096:                        Document standardDocument = createStandardDocument(topic);
097:                        writer.addDocument(standardDocument);
098:                        Document keywordDocument = createKeywordDocument(topic,
099:                                links);
100:                        writer.addDocument(keywordDocument, keywordAnalyzer);
101:                    } finally {
102:                        try {
103:                            if (writer != null) {
104:                                writer.optimize();
105:                            }
106:                        } catch (Exception e) {
107:                        }
108:                        try {
109:                            if (writer != null) {
110:                                writer.close();
111:                            }
112:                        } catch (Exception e) {
113:                        }
114:                    }
115:                    directory.close();
116:                } catch (Exception e) {
117:                    logger.severe("Exception while adding topic " + topicName,
118:                            e);
119:                }
120:            }
121:
122:            /**
123:             * Create a basic Lucene document to add to the index that does treats
124:             * the topic content as a single keyword and does not tokenize it.
125:             */
126:            private Document createKeywordDocument(Topic topic, Collection links)
127:                    throws Exception {
128:                String topicContent = topic.getTopicContent();
129:                if (topicContent == null) {
130:                    topicContent = "";
131:                }
132:                Document doc = new Document();
133:                // store topic name for later retrieval
134:                doc.add(new Field(ITYPE_TOPIC_PLAIN, topic.getName(),
135:                        Field.Store.YES, Field.Index.UN_TOKENIZED));
136:                if (links == null) {
137:                    links = new Vector();
138:                }
139:                // index topic links for search purposes
140:                for (Iterator iter = links.iterator(); iter.hasNext();) {
141:                    String linkTopic = (String) iter.next();
142:                    doc.add(new Field(ITYPE_TOPIC_LINK, linkTopic,
143:                            Field.Store.NO, Field.Index.UN_TOKENIZED));
144:                }
145:                return doc;
146:            }
147:
148:            /**
149:             * Create a basic Lucene document to add to the index.  This document
150:             * is suitable to be parsed with the StandardAnalyzer.
151:             */
152:            private Document createStandardDocument(Topic topic)
153:                    throws Exception {
154:                String topicContent = topic.getTopicContent();
155:                if (topicContent == null) {
156:                    topicContent = "";
157:                }
158:                Document doc = new Document();
159:                // store topic name and content for later retrieval
160:                doc.add(new Field(ITYPE_TOPIC_PLAIN, topic.getName(),
161:                        Field.Store.YES, Field.Index.UN_TOKENIZED));
162:                doc.add(new Field(ITYPE_CONTENT_PLAIN, topicContent,
163:                        Field.Store.YES, Field.Index.NO));
164:                // index topic name and content for search purposes
165:                doc.add(new Field(ITYPE_TOPIC,
166:                        new StringReader(topic.getName())));
167:                doc
168:                        .add(new Field(ITYPE_CONTENT, new StringReader(
169:                                topicContent)));
170:                return doc;
171:            }
172:
173:            /**
174:             * Remove a topic from the search index.
175:             *
176:             * @param topic The topic object that is to be removed from the index.
177:             */
178:            public synchronized void deleteFromIndex(Topic topic) {
179:                String virtualWiki = topic.getVirtualWiki();
180:                String topicName = topic.getName();
181:                IndexWriter writer = null;
182:                try {
183:                    FSDirectory directory = FSDirectory
184:                            .getDirectory(getSearchIndexPath(virtualWiki));
185:                    // delete the current document
186:                    // FIXME - move synchronization to the writer instance for this directory
187:                    try {
188:                        writer = new IndexWriter(directory,
189:                                new StandardAnalyzer(), false);
190:                        writer.deleteDocuments(new Term(ITYPE_TOPIC_PLAIN,
191:                                topicName));
192:                    } finally {
193:                        if (writer != null) {
194:                            try {
195:                                writer.close();
196:                            } catch (Exception e) {
197:                            }
198:                        }
199:                    }
200:                    directory.close();
201:                } catch (Exception e) {
202:                    logger.severe("Exception while adding topic " + topicName,
203:                            e);
204:                }
205:            }
206:
207:            /**
208:             * Find all documents that link to a specified topic.
209:             *
210:             * @param virtualWiki The virtual wiki for the topic.
211:             * @param topicName The name of the topic.
212:             * @return A collection of SearchResultEntry objects for all documents that
213:             *  link to the topic.
214:             */
215:            public Collection findLinkedTo(String virtualWiki, String topicName) {
216:                Collection results = new Vector();
217:                IndexSearcher searcher = null;
218:                try {
219:                    PhraseQuery query = new PhraseQuery();
220:                    Term term = new Term(ITYPE_TOPIC_LINK, topicName);
221:                    query.add(term);
222:                    searcher = new IndexSearcher(FSDirectory
223:                            .getDirectory(getSearchIndexPath(virtualWiki)));
224:                    // actually perform the search
225:                    Hits hits = searcher.search(query);
226:                    for (int i = 0; i < hits.length(); i++) {
227:                        SearchResultEntry result = new SearchResultEntry();
228:                        result.setRanking(hits.score(i));
229:                        result.setTopic(hits.doc(i).get(ITYPE_TOPIC_PLAIN));
230:                        results.add(result);
231:                    }
232:                } catch (Exception e) {
233:                    logger.severe("Exception while searching for " + topicName,
234:                            e);
235:                } finally {
236:                    if (searcher != null) {
237:                        try {
238:                            searcher.close();
239:                        } catch (Exception e) {
240:                        }
241:                    }
242:                }
243:                return results;
244:            }
245:
246:            /**
247:             * Find all documents that contain a specific search term, ordered by relevance.
248:             * This method supports all Lucene search query syntax.
249:             *
250:             * @param virtualWiki The virtual wiki for the topic.
251:             * @param text The search term being searched for.
252:             * @return A collection of SearchResultEntry objects for all documents that
253:             *  contain the search term.
254:             */
255:            public Collection findResults(String virtualWiki, String text) {
256:                StandardAnalyzer analyzer = new StandardAnalyzer();
257:                Collection results = new Vector();
258:                logger.fine("search text: " + text);
259:                IndexSearcher searcher = null;
260:                try {
261:                    BooleanQuery query = new BooleanQuery();
262:                    QueryParser qp;
263:                    qp = new QueryParser(ITYPE_TOPIC, analyzer);
264:                    query.add(qp.parse(text), Occur.SHOULD);
265:                    qp = new QueryParser(ITYPE_CONTENT, analyzer);
266:                    query.add(qp.parse(text), Occur.SHOULD);
267:                    searcher = new IndexSearcher(FSDirectory
268:                            .getDirectory(getSearchIndexPath(virtualWiki)));
269:                    // rewrite the query to expand it - required for wildcards to work with highlighter
270:                    Query rewrittenQuery = searcher.rewrite(query);
271:                    // actually perform the search
272:                    Hits hits = searcher.search(rewrittenQuery);
273:                    Highlighter highlighter = new Highlighter(
274:                            new SimpleHTMLFormatter(
275:                                    "<span class=\"highlight\">", "</span>"),
276:                            new SimpleHTMLEncoder(), new QueryScorer(
277:                                    rewrittenQuery));
278:                    for (int i = 0; i < hits.length(); i++) {
279:                        String summary = retrieveResultSummary(hits.doc(i),
280:                                highlighter, analyzer);
281:                        SearchResultEntry result = new SearchResultEntry();
282:                        result.setRanking(hits.score(i));
283:                        result.setTopic(hits.doc(i).get(ITYPE_TOPIC_PLAIN));
284:                        result.setSummary(summary);
285:                        results.add(result);
286:                    }
287:                } catch (Exception e) {
288:                    logger.severe("Exception while searching for " + text, e);
289:                } finally {
290:                    if (searcher != null) {
291:                        try {
292:                            searcher.close();
293:                        } catch (Exception e) {
294:                        }
295:                    }
296:                }
297:                return results;
298:            }
299:
300:            /**
301:             * Get the path, which holds all index files
302:             */
303:            private String getSearchIndexPath(String virtualWiki) {
304:                File parent = new File(Environment
305:                        .getValue(Environment.PROP_BASE_FILE_DIR), SEARCH_DIR);
306:                try {
307:                    if (System.getProperty("org.apache.lucene.lockdir") == null) {
308:                        // set the Lucene lock directory.  this defaults to java.io.tmpdir,
309:                        // which may not be writable on some systems.
310:                        System.setProperty("org.apache.lucene.lockdir", parent
311:                                .getPath());
312:                    }
313:                } catch (Exception e) {
314:                    // probably a security exception
315:                    logger
316:                            .warning("Unable to specify Lucene lock directory, default will be used: "
317:                                    + e.getMessage());
318:                }
319:                File child = new File(parent.getPath(), "index" + virtualWiki
320:                        + File.separator);
321:                if (!child.exists()) {
322:                    child.mkdirs();
323:                    IndexWriter writer = null;
324:                    try {
325:                        // create the search instance
326:                        FSDirectory directory = FSDirectory
327:                                .getDirectory(getSearchIndexPath(virtualWiki));
328:                        writer = new IndexWriter(directory,
329:                                new StandardAnalyzer(), true);
330:                        directory.close();
331:                    } catch (Exception e) {
332:                        logger.severe("Unable to create search instance "
333:                                + child.getPath(), e);
334:                    } finally {
335:                        try {
336:                            if (writer != null) {
337:                                writer.close();
338:                            }
339:                        } catch (Exception e) {
340:                            logger.severe("Exception during close", e);
341:                        }
342:                    }
343:                }
344:                return child.getPath();
345:            }
346:
347:            /**
348:             * Refresh the current search index by re-visiting all topic pages.
349:             *
350:             * @throws Exception Thrown if any error occurs while re-indexing the Wiki.
351:             */
352:            public synchronized void refreshIndex() throws Exception {
353:                Collection allWikis = WikiBase.getDataHandler()
354:                        .getVirtualWikiList(null);
355:                Topic topic;
356:                for (Iterator iterator = allWikis.iterator(); iterator
357:                        .hasNext();) {
358:                    long start = System.currentTimeMillis();
359:                    int count = 0;
360:                    VirtualWiki virtualWiki = (VirtualWiki) iterator.next();
361:                    FSDirectory directory = FSDirectory.getDirectory(this 
362:                            .getSearchIndexPath(virtualWiki.getName()));
363:                    KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
364:                    IndexWriter writer = null;
365:                    // FIXME - move synchronization to the writer instance for this directory
366:                    try {
367:                        writer = new IndexWriter(directory,
368:                                new StandardAnalyzer(), true);
369:                        Collection topicNames = WikiBase.getDataHandler()
370:                                .getAllTopicNames(virtualWiki.getName());
371:                        for (Iterator iter = topicNames.iterator(); iter
372:                                .hasNext();) {
373:                            String topicName = (String) iter.next();
374:                            topic = WikiBase.getDataHandler().lookupTopic(
375:                                    virtualWiki.getName(), topicName, false,
376:                                    null);
377:                            Document standardDocument = createStandardDocument(topic);
378:                            writer.addDocument(standardDocument);
379:                            // FIXME - parsing all documents will be intolerably slow with even a
380:                            // moderately large Wiki
381:                            ParserOutput parserOutput = ParserUtil
382:                                    .parserOutput(topic.getTopicContent(),
383:                                            virtualWiki.getName(), topicName);
384:                            Document keywordDocument = createKeywordDocument(
385:                                    topic, parserOutput.getLinks());
386:                            writer
387:                                    .addDocument(keywordDocument,
388:                                            keywordAnalyzer);
389:                            count++;
390:                        }
391:                    } catch (Exception ex) {
392:                        logger.severe("Failure while refreshing search index",
393:                                ex);
394:                    } finally {
395:                        try {
396:                            if (writer != null) {
397:                                writer.optimize();
398:                            }
399:                        } catch (Exception e) {
400:                            logger.severe("Exception during optimize", e);
401:                        }
402:                        try {
403:                            if (writer != null) {
404:                                writer.close();
405:                            }
406:                        } catch (Exception e) {
407:                            logger.severe("Exception during close", e);
408:                        }
409:                    }
410:                    directory.close();
411:                    logger.info("Rebuilt search index for "
412:                            + virtualWiki.getName() + " (" + count
413:                            + " documents) in "
414:                            + ((System.currentTimeMillis() - start) / 1000.000)
415:                            + " seconds");
416:                }
417:            }
418:
419:            /**
420:             *
421:             */
422:            private String retrieveResultSummary(Document document,
423:                    Highlighter highlighter, StandardAnalyzer analyzer)
424:                    throws Exception {
425:                String content = document.get(ITYPE_CONTENT_PLAIN);
426:                TokenStream tokenStream = analyzer.tokenStream(
427:                        ITYPE_CONTENT_PLAIN, new StringReader(content));
428:                String summary = highlighter.getBestFragments(tokenStream,
429:                        content, 3, "...");
430:                if (StringUtils.isBlank(summary)
431:                        && !StringUtils.isBlank(content)) {
432:                    summary = StringEscapeUtils.escapeHtml(content.substring(0,
433:                            Math.min(200, content.length())));
434:                    if (Math.min(200, content.length()) == 200) {
435:                        summary += "...";
436:                    }
437:                }
438:                return summary;
439:            }
440:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.