Source Code Cross Referenced for LuceneSearchProvider.java in  » Wiki-Engine » JSPWiki » com » ecyrd » jspwiki » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Wiki Engine » JSPWiki » com.ecyrd.jspwiki.search 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:        JSPWiki - a JSP-based WikiWiki clone.
003:
004:        Copyright (C) 2005 Janne Jalkanen (Janne.Jalkanen@iki.fi)
005:
006:        This program is free software; you can redistribute it and/or modify
007:        it under the terms of the GNU Lesser General Public License as published by
008:        the Free Software Foundation; either version 2.1 of the License, or
009:        (at your option) any later version.
010:
011:        This program is distributed in the hope that it will be useful,
012:        but WITHOUT ANY WARRANTY; without even the implied warranty of
013:        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014:        GNU Lesser General Public License for more details.
015:
016:        You should have received a copy of the GNU Lesser General Public License
017:        along with this program; if not, write to the Free Software
018:        Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
019:         */
020:        package com.ecyrd.jspwiki.search;
021:
022:        import java.io.*;
023:        import java.util.*;
024:
025:        import org.apache.commons.lang.StringUtils;
026:        import org.apache.log4j.Logger;
027:        import org.apache.lucene.analysis.Analyzer;
028:        import org.apache.lucene.analysis.TokenStream;
029:        import org.apache.lucene.document.Document;
030:        import org.apache.lucene.document.Field;
031:        import org.apache.lucene.index.IndexReader;
032:        import org.apache.lucene.index.IndexWriter;
033:        import org.apache.lucene.index.Term;
034:        import org.apache.lucene.queryParser.MultiFieldQueryParser;
035:        import org.apache.lucene.queryParser.ParseException;
036:        import org.apache.lucene.queryParser.QueryParser;
037:        import org.apache.lucene.search.Hits;
038:        import org.apache.lucene.search.IndexSearcher;
039:        import org.apache.lucene.search.Query;
040:        import org.apache.lucene.search.Searcher;
041:        import org.apache.lucene.search.highlight.Highlighter;
042:        import org.apache.lucene.search.highlight.QueryScorer;
043:        import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
044:        import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
045:        import org.apache.lucene.store.Directory;
046:        import org.apache.lucene.store.FSDirectory;
047:
048:        import com.ecyrd.jspwiki.*;
049:        import com.ecyrd.jspwiki.attachment.Attachment;
050:        import com.ecyrd.jspwiki.attachment.AttachmentManager;
051:        import com.ecyrd.jspwiki.parser.MarkupParser;
052:        import com.ecyrd.jspwiki.providers.ProviderException;
053:        import com.ecyrd.jspwiki.providers.WikiPageProvider;
054:        import com.ecyrd.jspwiki.util.ClassUtil;
055:        import com.ecyrd.jspwiki.util.WatchDog;
056:        import com.ecyrd.jspwiki.util.WikiBackgroundThread;
057:
058:        /**
059:         *  Interface for the search providers that handle searching the Wiki
060:         *
061:         *  @author Arent-Jan Banck
062:         *  @since 2.2.21.
063:         */
064:        public class LuceneSearchProvider implements  SearchProvider {
065:            protected static final Logger log = Logger
066:                    .getLogger(LuceneSearchProvider.class);
067:
068:            private WikiEngine m_engine;
069:
070:            // Lucene properties.
071:
072:            /** Which analyzer to use.  Default is StandardAnalyzer. */
073:            public static final String PROP_LUCENE_ANALYZER = "jspwiki.lucene.analyzer";
074:
075:            private static final String PROP_LUCENE_INDEXDELAY = "jspwiki.lucene.indexdelay";
076:            private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay";
077:
078:            private String m_analyzerClass = "org.apache.lucene.analysis.standard.StandardAnalyzer";
079:
080:            private static final String LUCENE_DIR = "lucene";
081:
082:            /**
083:             *  Number of page updates before we optimize the index.
084:             */
085:            public static final int LUCENE_OPTIMIZE_COUNT = 10;
086:            protected static final String LUCENE_ID = "id";
087:            protected static final String LUCENE_PAGE_CONTENTS = "contents";
088:            protected static final String LUCENE_AUTHOR = "author";
089:            protected static final String LUCENE_ATTACHMENTS = "attachment";
090:            protected static final String LUCENE_PAGE_NAME = "name";
091:
092:            private String m_luceneDirectory = null;
093:            private int m_updateCount = 0;
094:            protected Vector m_updates = new Vector(); // Vector because multi-threaded.
095:
096:            /** Maximum number of fragments from search matches. */
097:            private static final int MAX_FRAGMENTS = 3;
098:
099:            private static String c_punctuationSpaces = StringUtils.repeat(" ",
100:                    MarkupParser.PUNCTUATION_CHARS_ALLOWED.length());
101:
102:            /**
103:             *  {@inheritDoc}
104:             */
105:            public void initialize(WikiEngine engine, Properties props)
106:                    throws NoRequiredPropertyException, IOException {
107:                m_engine = engine;
108:
109:                m_luceneDirectory = engine.getWorkDir() + File.separator
110:                        + LUCENE_DIR;
111:
112:                int initialDelay = TextUtil.getIntegerProperty(props,
113:                        PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY);
114:                int indexDelay = TextUtil.getIntegerProperty(props,
115:                        PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY);
116:
117:                m_analyzerClass = TextUtil.getStringProperty(props,
118:                        PROP_LUCENE_ANALYZER, m_analyzerClass);
119:                // FIXME: Just to be simple for now, we will do full reindex
120:                // only if no files are in lucene directory.
121:
122:                File dir = new File(m_luceneDirectory);
123:
124:                log.info("Lucene enabled, cache will be in: "
125:                        + dir.getAbsolutePath());
126:
127:                try {
128:                    if (!dir.exists()) {
129:                        dir.mkdirs();
130:                    }
131:
132:                    if (!dir.exists() || !dir.canWrite() || !dir.canRead()) {
133:                        log
134:                                .error("Cannot write to Lucene directory, disabling Lucene: "
135:                                        + dir.getAbsolutePath());
136:                        throw new IOException("Invalid Lucene directory.");
137:                    }
138:
139:                    String[] filelist = dir.list();
140:
141:                    if (filelist == null) {
142:                        throw new IOException(
143:                                "Invalid Lucene directory: cannot produce listing: "
144:                                        + dir.getAbsolutePath());
145:                    }
146:                } catch (IOException e) {
147:                    log
148:                            .error(
149:                                    "Problem while creating Lucene index - not using Lucene.",
150:                                    e);
151:                }
152:
153:                // Start the Lucene update thread, which waits first
154:                // for a little while before starting to go through
155:                // the Lucene "pages that need updating".
156:                LuceneUpdater updater = new LuceneUpdater(m_engine, this ,
157:                        initialDelay, indexDelay);
158:                updater.start();
159:            }
160:
161:            /**
162:             *  Returns the handling engine.
163:             *
164:             *  @return Current WikiEngine
165:             */
166:            protected WikiEngine getEngine() {
167:                return m_engine;
168:            }
169:
170:            /**
171:             *  Performs a full Lucene reindex, if necessary.
172:             *
173:             *  @throws IOException If there's a problem during indexing
174:             */
175:            protected void doFullLuceneReindex() throws IOException {
176:                File dir = new File(m_luceneDirectory);
177:
178:                String[] filelist = dir.list();
179:
180:                if (filelist == null) {
181:                    throw new IOException(
182:                            "Invalid Lucene directory: cannot produce listing: "
183:                                    + dir.getAbsolutePath());
184:                }
185:
186:                try {
187:                    if (filelist.length == 0) {
188:                        //
189:                        //  No files? Reindex!
190:                        //
191:                        Date start = new Date();
192:                        IndexWriter writer = null;
193:
194:                        log
195:                                .info("Starting Lucene reindexing, this can take a couple minutes...");
196:
197:                        //
198:                        //  Do lock recovery, in case JSPWiki was shut down forcibly
199:                        //
200:                        Directory luceneDir = FSDirectory.getDirectory(dir,
201:                                false);
202:
203:                        if (IndexReader.isLocked(luceneDir)) {
204:                            log
205:                                    .info("JSPWiki was shut down while Lucene was indexing - unlocking now.");
206:                            IndexReader.unlock(luceneDir);
207:                        }
208:
209:                        try {
210:                            writer = new IndexWriter(m_luceneDirectory,
211:                                    getLuceneAnalyzer(), true);
212:                            Collection allPages = m_engine.getPageManager()
213:                                    .getAllPages();
214:
215:                            for (Iterator iterator = allPages.iterator(); iterator
216:                                    .hasNext();) {
217:                                WikiPage page = (WikiPage) iterator.next();
218:                                String text = m_engine.getPageManager()
219:                                        .getPageText(page.getName(),
220:                                                WikiProvider.LATEST_VERSION);
221:                                luceneIndexPage(page, text, writer);
222:                            }
223:
224:                            Collection allAttachments = m_engine
225:                                    .getAttachmentManager().getAllAttachments();
226:                            for (Iterator iterator = allAttachments.iterator(); iterator
227:                                    .hasNext();) {
228:                                Attachment att = (Attachment) iterator.next();
229:                                String text = getAttachmentContent(att
230:                                        .getName(), WikiProvider.LATEST_VERSION);
231:                                luceneIndexPage(att, text, writer);
232:                            }
233:
234:                            writer.optimize();
235:                        } finally {
236:                            try {
237:                                if (writer != null)
238:                                    writer.close();
239:                            } catch (IOException e) {
240:                            }
241:                        }
242:
243:                        Date end = new Date();
244:                        log.info("Full Lucene index finished in "
245:                                + (end.getTime() - start.getTime())
246:                                + " milliseconds.");
247:                    } else {
248:                        log
249:                                .info("Files found in Lucene directory, not reindexing.");
250:                    }
251:                } catch (NoClassDefFoundError e) {
252:                    log
253:                            .info("Lucene libraries do not exist - not using Lucene.");
254:                } catch (IOException e) {
255:                    log
256:                            .error(
257:                                    "Problem while creating Lucene index - not using Lucene.",
258:                                    e);
259:                } catch (ProviderException e) {
260:                    log
261:                            .error(
262:                                    "Problem reading pages while creating Lucene index (JSPWiki won't start.)",
263:                                    e);
264:                    throw new IllegalArgumentException(
265:                            "unable to create Lucene index");
266:                } catch (ClassNotFoundException e) {
267:                    log.error("Illegal Analyzer specified:", e);
268:                } catch (Exception e) {
269:                    log.error("Unable to start lucene", e);
270:                }
271:
272:            }
273:
274:            /**
275:             *  Fetches the attachment content from the repository.
276:             *  Content is flat text that can be used for indexing/searching or display
277:             */
278:            protected String getAttachmentContent(String attachmentName,
279:                    int version) {
280:                AttachmentManager mgr = m_engine.getAttachmentManager();
281:
282:                try {
283:                    Attachment att = mgr.getAttachmentInfo(attachmentName,
284:                            version);
285:                    //FIXME: Find out why sometimes att is null
286:                    if (att != null) {
287:                        return getAttachmentContent(att);
288:                    }
289:                } catch (ProviderException e) {
290:                    log.error("Attachment cannot be loaded", e);
291:                }
292:                // Something was wrong, no result is returned.
293:                return null;
294:            }
295:
296:            /**
297:             * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment.
298:             * @return String representing the content of the file.
299:             * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing.
300:             * This should be replaced /moved to Attachment search providers or some other 'plugable' wat to search attachments
301:             */
302:            protected String getAttachmentContent(Attachment att) {
303:                AttachmentManager mgr = m_engine.getAttachmentManager();
304:                //FIXME: Add attachment plugin structure
305:
306:                String filename = att.getFileName();
307:
308:                if (filename.endsWith(".txt") || filename.endsWith(".xml")
309:                        || filename.endsWith(".ini")
310:                        || filename.endsWith(".html")) {
311:                    InputStream attStream;
312:
313:                    try {
314:                        attStream = mgr.getAttachmentStream(att);
315:
316:                        StringWriter sout = new StringWriter();
317:                        FileUtil.copyContents(new InputStreamReader(attStream),
318:                                sout);
319:
320:                        attStream.close();
321:                        sout.close();
322:
323:                        return sout.toString();
324:                    } catch (ProviderException e) {
325:                        log.error("Attachment cannot be loaded", e);
326:                        return null;
327:                    } catch (IOException e) {
328:                        log.error("Attachment cannot be loaded", e);
329:                        return null;
330:                    }
331:                }
332:
333:                return null;
334:            }
335:
336:            /**
337:             *  Updates the lucene index for a single page.
338:             *
339:             *  @param page The WikiPage to check
340:             *  @param text The page text to index.
341:             */
342:            protected synchronized void updateLuceneIndex(WikiPage page,
343:                    String text) {
344:                IndexWriter writer = null;
345:
346:                log.debug("Updating Lucene index for page '" + page.getName()
347:                        + "'...");
348:
349:                try {
350:                    pageRemoved(page);
351:
352:                    // Now add back the new version.
353:                    writer = new IndexWriter(m_luceneDirectory,
354:                            getLuceneAnalyzer(), false);
355:                    luceneIndexPage(page, text, writer);
356:                    m_updateCount++;
357:                    if (m_updateCount >= LUCENE_OPTIMIZE_COUNT) {
358:                        writer.optimize();
359:                        m_updateCount = 0;
360:                    }
361:                } catch (IOException e) {
362:                    log.error("Unable to update page '" + page.getName()
363:                            + "' from Lucene index", e);
364:                } catch (Exception e) {
365:                    log
366:                            .error(
367:                                    "Unexpected Lucene exception - please check configuration!",
368:                                    e);
369:                } finally {
370:                    try {
371:                        if (writer != null)
372:                            writer.close();
373:                    } catch (IOException e) {
374:                    }
375:                }
376:
377:                log.debug("Done updating Lucene index for page '"
378:                        + page.getName() + "'.");
379:            }
380:
381:            private Analyzer getLuceneAnalyzer() throws ClassNotFoundException,
382:                    InstantiationException, IllegalAccessException {
383:                Class clazz = ClassUtil.findClass("", m_analyzerClass);
384:                Analyzer analyzer = (Analyzer) clazz.newInstance();
385:                return analyzer;
386:            }
387:
388:            /**
389:             *  Indexes page using the given IndexWriter.
390:             *
391:             *  @param page WikiPage
392:             *  @param text Page text to index
393:             *  @param writer The Lucene IndexWriter to use for indexing
394:             *  @return the created index Document
395:             *  @throws IOException If there's an indexing problem
396:             */
397:            protected Document luceneIndexPage(WikiPage page, String text,
398:                    IndexWriter writer) throws IOException {
399:                // make a new, empty document
400:                Document doc = new Document();
401:
402:                if (text == null)
403:                    return doc;
404:
405:                // Raw name is the keyword we'll use to refer to this document for updates.
406:                Field field = new Field(LUCENE_ID, page.getName(),
407:                        Field.Store.YES, Field.Index.UN_TOKENIZED);
408:                doc.add(field);
409:
410:                // Body text.  It is stored in the doc for search contexts.
411:                field = new Field(LUCENE_PAGE_CONTENTS, text, Field.Store.YES,
412:                        Field.Index.TOKENIZED, Field.TermVector.NO);
413:                doc.add(field);
414:
415:                // Allow searching by page name. Both beautified and raw
416:                String unTokenizedTitle = StringUtils.replaceChars(page
417:                        .getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED,
418:                        c_punctuationSpaces);
419:
420:                field = new Field(LUCENE_PAGE_NAME, TextUtil
421:                        .beautifyString(page.getName())
422:                        + " " + unTokenizedTitle, Field.Store.YES,
423:                        Field.Index.TOKENIZED, Field.TermVector.NO);
424:                doc.add(field);
425:
426:                // Allow searching by authorname
427:
428:                if (page.getAuthor() != null) {
429:                    field = new Field(LUCENE_AUTHOR, page.getAuthor(),
430:                            Field.Store.YES, Field.Index.TOKENIZED,
431:                            Field.TermVector.NO);
432:                    doc.add(field);
433:                }
434:
435:                // Now add the names of the attachments of this page
436:                try {
437:                    Collection attachments = m_engine.getAttachmentManager()
438:                            .listAttachments(page);
439:                    String attachmentNames = "";
440:
441:                    for (Iterator it = attachments.iterator(); it.hasNext();) {
442:                        Attachment att = (Attachment) it.next();
443:                        attachmentNames += att.getName() + ";";
444:                    }
445:                    field = new Field(LUCENE_ATTACHMENTS, attachmentNames,
446:                            Field.Store.YES, Field.Index.TOKENIZED,
447:                            Field.TermVector.NO);
448:                    doc.add(field);
449:
450:                } catch (ProviderException e) {
451:                    // Unable to read attachments
452:                    log.error("Failed to get attachments for page", e);
453:                }
454:                writer.addDocument(doc);
455:
456:                return doc;
457:            }
458:
459:            /**
460:             *  {@inheritDoc}
461:             */
462:            public void pageRemoved(WikiPage page) {
463:                try {
464:                    // Must first remove existing version of page.
465:                    IndexReader reader = IndexReader.open(m_luceneDirectory);
466:                    reader.deleteDocuments(new Term(LUCENE_ID, page.getName()));
467:                    reader.close();
468:                } catch (IOException e) {
469:                    log.error("Unable to update page '" + page.getName()
470:                            + "' from Lucene index", e);
471:                }
472:            }
473:
474:            /**
475:             *  Adds a page-text pair to the lucene update queue.  Safe to call always
476:             *
477:             *  @param page WikiPage to add to the update queue.
478:             */
479:            public void reindexPage(WikiPage page) {
480:                if (page != null) {
481:                    String text;
482:
483:                    // TODO: Think if this was better done in the thread itself?
484:
485:                    if (page instanceof  Attachment) {
486:                        text = getAttachmentContent((Attachment) page);
487:                    } else {
488:                        text = m_engine.getPureText(page);
489:                    }
490:
491:                    if (text != null) {
492:                        // Add work item to m_updates queue.
493:                        Object[] pair = new Object[2];
494:                        pair[0] = page;
495:                        pair[1] = text;
496:                        m_updates.add(pair);
497:                        log.debug("Scheduling page " + page.getName()
498:                                + " for index update");
499:                    }
500:                }
501:            }
502:
503:            /**
504:             *  {@inheritDoc}
505:             */
506:            public Collection findPages(String query) throws ProviderException {
507:                return findPages(query, FLAG_CONTEXTS);
508:            }
509:
510:            /**
511:             *  Create contexts also.  Generating contexts can be expensive,
512:             *  so they're not on by default.
513:             */
514:            public static final int FLAG_CONTEXTS = 0x01;
515:
516:            /**
517:             *  Searches pages using a particular combination of flags.
518:             *
519:             *  @param query The query to perform in Lucene query language
520:             *  @param flags A set of flags
521:             *  @return A Collection of SearchResult instances
522:             *  @throws ProviderException if there is a problem with the backend
523:             */
524:            public Collection findPages(String query, int flags)
525:                    throws ProviderException {
526:                Searcher searcher = null;
527:                ArrayList list = null;
528:                Highlighter highlighter = null;
529:
530:                try {
531:                    String[] queryfields = { LUCENE_PAGE_CONTENTS,
532:                            LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
533:                    QueryParser qp = new MultiFieldQueryParser(queryfields,
534:                            getLuceneAnalyzer());
535:
536:                    //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
537:                    Query luceneQuery = qp.parse(query);
538:
539:                    if ((flags & FLAG_CONTEXTS) != 0) {
540:                        highlighter = new Highlighter(new SimpleHTMLFormatter(
541:                                "<span class=\"searchmatch\">", "</span>"),
542:                                new SimpleHTMLEncoder(), new QueryScorer(
543:                                        luceneQuery));
544:                    }
545:
546:                    try {
547:                        searcher = new IndexSearcher(m_luceneDirectory);
548:                    } catch (Exception ex) {
549:                        log.info("Lucene not yet ready; indexing not started",
550:                                ex);
551:                        return null;
552:                    }
553:
554:                    Hits hits = searcher.search(luceneQuery);
555:
556:                    list = new ArrayList(hits.length());
557:                    for (int curr = 0; curr < hits.length(); curr++) {
558:                        Document doc = hits.doc(curr);
559:                        String pageName = doc.get(LUCENE_ID);
560:                        WikiPage page = m_engine.getPage(pageName,
561:                                WikiPageProvider.LATEST_VERSION);
562:
563:                        if (page != null) {
564:                            if (page instanceof  Attachment) {
565:                                // Currently attachments don't look nice on the search-results page
566:                                // When the search-results are cleaned up this can be enabled again.
567:                            }
568:
569:                            int score = (int) (hits.score(curr) * 100);
570:
571:                            // Get highlighted search contexts
572:                            String text = doc.get(LUCENE_PAGE_CONTENTS);
573:
574:                            String[] fragments = new String[0];
575:                            if (text != null && highlighter != null) {
576:                                TokenStream tokenStream = getLuceneAnalyzer()
577:                                        .tokenStream(LUCENE_PAGE_CONTENTS,
578:                                                new StringReader(text));
579:                                fragments = highlighter.getBestFragments(
580:                                        tokenStream, text, MAX_FRAGMENTS);
581:
582:                            }
583:
584:                            SearchResult result = new SearchResultImpl(page,
585:                                    score, fragments);
586:                            list.add(result);
587:                        } else {
588:                            log
589:                                    .error("Lucene found a result page '"
590:                                            + pageName
591:                                            + "' that could not be loaded, removing from Lucene cache");
592:                            pageRemoved(new WikiPage(m_engine, pageName));
593:                        }
594:                    }
595:                } catch (IOException e) {
596:                    log.error("Failed during lucene search", e);
597:                } catch (InstantiationException e) {
598:                    log.error("Unable to get a Lucene analyzer", e);
599:                } catch (IllegalAccessException e) {
600:                    log.error("Unable to get a Lucene analyzer", e);
601:                } catch (ClassNotFoundException e) {
602:                    log.error("Specified Lucene analyzer does not exist", e);
603:                } catch (ParseException e) {
604:                    log.info("Broken query; cannot parse", e);
605:
606:                    throw new ProviderException(
607:                            "You have entered a query Lucene cannot process: "
608:                                    + e.getMessage());
609:                } finally {
610:                    if (searcher != null) {
611:                        try {
612:                            searcher.close();
613:                        } catch (IOException e) {
614:                        }
615:                    }
616:                }
617:
618:                return list;
619:            }
620:
621:            /**
622:             *  {@inheritDoc}
623:             */
624:            public String getProviderInfo() {
625:                return "LuceneSearchProvider";
626:            }
627:
628:            /**
629:             * Updater thread that updates Lucene indexes.
630:             */
631:            private static final class LuceneUpdater extends
632:                    WikiBackgroundThread {
633:                protected static final int INDEX_DELAY = 1;
634:                protected static final int INITIAL_DELAY = 60;
635:                private final LuceneSearchProvider m_provider;
636:
637:                private int m_initialDelay;
638:
639:                private WatchDog m_watchdog;
640:
641:                private LuceneUpdater(WikiEngine engine,
642:                        LuceneSearchProvider provider, int initialDelay,
643:                        int indexDelay) {
644:                    super (engine, indexDelay);
645:                    m_provider = provider;
646:                    setName("JSPWiki Lucene Indexer");
647:                }
648:
649:                public void startupTask() throws Exception {
650:                    m_watchdog = getEngine().getCurrentWatchDog();
651:
652:                    // Sleep initially...
653:                    try {
654:                        Thread.sleep(m_initialDelay * 1000L);
655:                    } catch (InterruptedException e) {
656:                        throw new InternalWikiException(
657:                                "Interrupted while waiting to start.");
658:                    }
659:
660:                    m_watchdog.enterState("Full reindex");
661:                    // Reindex everything
662:                    m_provider.doFullLuceneReindex();
663:                    m_watchdog.exitState();
664:                }
665:
666:                public void backgroundTask() throws Exception {
667:                    m_watchdog.enterState("Emptying index queue", 60);
668:
669:                    synchronized (m_provider.m_updates) {
670:                        while (m_provider.m_updates.size() > 0) {
671:                            Object[] pair = (Object[]) m_provider.m_updates
672:                                    .remove(0);
673:                            WikiPage page = (WikiPage) pair[0];
674:                            String text = (String) pair[1];
675:                            m_provider.updateLuceneIndex(page, text);
676:                        }
677:                    }
678:
679:                    m_watchdog.exitState();
680:                }
681:
682:            }
683:
684:            // FIXME: This class is dumb; needs to have a better implementation
685:            private static class SearchResultImpl implements  SearchResult {
686:                private WikiPage m_page;
687:                private int m_score;
688:                private String[] m_contexts;
689:
690:                public SearchResultImpl(WikiPage page, int score,
691:                        String[] contexts) {
692:                    m_page = page;
693:                    m_score = score;
694:                    m_contexts = contexts;
695:                }
696:
697:                public WikiPage getPage() {
698:                    return m_page;
699:                }
700:
701:                /* (non-Javadoc)
702:                 * @see com.ecyrd.jspwiki.SearchResult#getScore()
703:                 */
704:                public int getScore() {
705:                    return m_score;
706:                }
707:
708:                public String[] getContexts() {
709:                    return m_contexts;
710:                }
711:            }
712:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.