Source Code Cross Referenced for LuceneSearchProviderEx.java in  » Project-Management » EmForce » ru » emdev » EmForge » wiki » providers » searchprovider » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Project Management » EmForce » ru.emdev.EmForge.wiki.providers.searchprovider 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package ru.emdev.EmForge.wiki.providers.searchprovider;
002:
003:        import java.io.File;
004:        import java.io.IOException;
005:        import java.io.InputStream;
006:        import java.io.InputStreamReader;
007:        import java.io.StringReader;
008:        import java.io.StringWriter;
009:        import java.util.Collection;
010:        import java.util.Date;
011:        import java.util.LinkedList;
012:        import java.util.Properties;
013:        import java.util.Vector;
014:
015:        import org.acegisecurity.Authentication;
016:        import org.acegisecurity.context.SecurityContextHolder;
017:        import org.acegisecurity.providers.UsernamePasswordAuthenticationToken;
018:        import org.apache.commons.lang.StringUtils;
019:        import org.apache.commons.logging.Log;
020:        import org.apache.commons.logging.LogFactory;
021:        import org.apache.lucene.analysis.Analyzer;
022:        import org.apache.lucene.analysis.TokenStream;
023:        import org.apache.lucene.document.Document;
024:        import org.apache.lucene.document.Field;
025:        import org.apache.lucene.index.IndexReader;
026:        import org.apache.lucene.index.IndexWriter;
027:        import org.apache.lucene.index.Term;
028:        import org.apache.lucene.queryParser.MultiFieldQueryParser;
029:        import org.apache.lucene.queryParser.ParseException;
030:        import org.apache.lucene.queryParser.QueryParser;
031:        import org.apache.lucene.search.Hits;
032:        import org.apache.lucene.search.IndexSearcher;
033:        import org.apache.lucene.search.Query;
034:        import org.apache.lucene.search.Searcher;
035:        import org.apache.lucene.search.highlight.Highlighter;
036:        import org.apache.lucene.search.highlight.QueryScorer;
037:        import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
038:        import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
039:        import org.apache.lucene.store.Directory;
040:        import org.apache.lucene.store.FSDirectory;
041:        import org.emforge.BpmService;
042:        import org.emforge.xfer.CommentTO;
043:        import org.emforge.xfer.TaskTO;
044:        import org.springframework.transaction.PlatformTransactionManager;
045:        import org.springframework.transaction.TransactionStatus;
046:        import org.springframework.transaction.support.TransactionCallbackWithoutResult;
047:        import org.springframework.transaction.support.TransactionTemplate;
048:
049:        import ru.emdev.EmForge.security.EmForgeUserImpl;
050:        import ru.emdev.EmForge.util.Helper;
051:
052:        import com.ecyrd.jspwiki.FileUtil;
053:        import com.ecyrd.jspwiki.InternalWikiException;
054:        import com.ecyrd.jspwiki.NoRequiredPropertyException;
055:        import com.ecyrd.jspwiki.SearchResult;
056:        import com.ecyrd.jspwiki.TextUtil;
057:        import com.ecyrd.jspwiki.WikiEngine;
058:        import com.ecyrd.jspwiki.WikiPage;
059:        import com.ecyrd.jspwiki.WikiProvider;
060:        import com.ecyrd.jspwiki.attachment.Attachment;
061:        import com.ecyrd.jspwiki.attachment.AttachmentManager;
062:        import com.ecyrd.jspwiki.providers.ProviderException;
063:        import com.ecyrd.jspwiki.providers.WikiPageProvider;
064:        import com.ecyrd.jspwiki.search.SearchProvider;
065:        import com.ecyrd.jspwiki.util.ClassUtil;
066:        import com.ecyrd.jspwiki.util.WatchDog;
067:        import com.ecyrd.jspwiki.util.WikiBackgroundThread;
068:
069:        /** Expands ability of "com.ecyrd.jspwiki.search.LuceneSearchProvider"(2.2.21) 
070:         * in order to look up attachments content, processes name and tasks commentes 
071:         * along with the searching the Wiki. 
072:         * (Lucena source since 2.2.21)
073:         * 
074:         * @author spopov
075:         * @since 0.20
076:         * 
077:         */
078:        public class LuceneSearchProviderEx implements  SearchProvider {
079:            protected final Log log = LogFactory.getLog(getClass());
080:
081:            private WikiEngine m_engine;
082:
083:            private BpmService m_bpmService;
084:            private TransactionTemplate transactionTemplate;
085:
086:            private String userName;
087:
088:            private String userPassword;
089:
090:            // Lucene properties.
091:
092:            /** Which analyzer to use.  Default is StandardAnalyzer. */
093:            public static final String PROP_LUCENE_ANALYZER = "jspwiki.lucene.analyzer";
094:
095:            private static final String PROP_LUCENE_INDEXDELAY = "jspwiki.lucene.indexdelay";
096:            private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay";
097:
098:            private String m_analyzerClass = "org.apache.lucene.analysis.standard.StandardAnalyzer";
099:
100:            private static final String LUCENE_DIR = "lucene";
101:
102:            // Number of page updates before we optimize the index.
103:            public static final int LUCENE_OPTIMIZE_COUNT = 10;
104:            protected static final String LUCENE_ID = "id";
105:            protected static final String LUCENE_PAGE_CONTENTS = "contents";
106:            protected static final String LUCENE_AUTHOR = "author";
107:            protected static final String LUCENE_ATTACHMENTS = "attachment";
108:            protected static final String LUCENE_PAGE_NAME = "name";
109:            protected static final String LUCENE_PROCESS_TITLE = "processtitle";
110:            protected static final String LUCENE_PROCESS_COMMENTS = "processcomments";
111:
112:            private String m_luceneDirectory = null;
113:            private int m_updateCount = 0;
114:            protected Vector<Object[]> m_updates = new Vector<Object[]>(); // Vector because multi-threaded.
115:
116:            /** Maximum number of fragments from search matches. */
117:            private static final int MAX_FRAGMENTS = 3;
118:
119:            public void setBpmService(BpmService service) {
120:                m_bpmService = service;
121:            }
122:
123:            public void setTxManager(PlatformTransactionManager i_txManager) {
124:                transactionTemplate = new TransactionTemplate(i_txManager);
125:            }
126:
127:            public void initialize(WikiEngine engine, Properties props)
128:                    throws NoRequiredPropertyException, IOException {
129:                m_engine = engine;
130:
131:                m_luceneDirectory = engine.getWorkDir() + File.separator
132:                        + LUCENE_DIR;
133:
134:                int initialDelay = TextUtil.getIntegerProperty(props,
135:                        PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY);
136:                int indexDelay = TextUtil.getIntegerProperty(props,
137:                        PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY);
138:
139:                m_analyzerClass = TextUtil.getStringProperty(props,
140:                        PROP_LUCENE_ANALYZER, m_analyzerClass);
141:                // FIXME: Just to be simple for now, we will do full reindex
142:                // only if no files are in lucene directory.
143:
144:                // since this method may be called in separate thread and acegi security context may be not initialized
145:                // we should initialize it here: since it may be used during processing.
146:                // We do not need initialy it is default user is anonymous (null)
147:                // as well as we do not need initialize it if it is already initialized
148:                // related bug is: ^66026
149:                if (!((userName.equals("")) || (userPassword.equals("")))
150:                        && SecurityContextHolder.getContext()
151:                                .getAuthentication() == null) {
152:                    EmForgeUserImpl emForgeUser = new EmForgeUserImpl(userName,
153:                            userPassword, "", null, null, true, null);
154:                    Authentication auth = new UsernamePasswordAuthenticationToken(
155:                            emForgeUser, emForgeUser);
156:                    SecurityContextHolder.getContext().setAuthentication(auth);
157:                }
158:
159:                File dir = new File(m_luceneDirectory);
160:
161:                log.info("Lucene enabled, cache will be in: "
162:                        + dir.getAbsolutePath());
163:
164:                try {
165:                    if (!dir.exists()) {
166:                        dir.mkdirs();
167:                    }
168:
169:                    if (!dir.exists() || !dir.canWrite() || !dir.canRead()) {
170:                        log
171:                                .error("Cannot write to Lucene directory, disabling Lucene: "
172:                                        + dir.getAbsolutePath());
173:                        throw new IOException("Invalid Lucene directory.");
174:                    }
175:
176:                    String[] filelist = dir.list();
177:
178:                    if (filelist == null) {
179:                        throw new IOException(
180:                                "Invalid Lucene directory: cannot produce listing: "
181:                                        + dir.getAbsolutePath());
182:                    }
183:                } catch (IOException e) {
184:                    log
185:                            .error(
186:                                    "Problem while creating Lucene index - not using Lucene.",
187:                                    e);
188:                }
189:
190:                // Start the Lucene update thread, which waits first
191:                // for a little while before starting to go through
192:                // the Lucene "pages that need updating".
193:                LuceneUpdater updater = new LuceneUpdater(m_engine, this ,
194:                        initialDelay, indexDelay);
195:                updater.start();
196:            }
197:
198:            /**
199:             *  Returns the handling engine. 
200:             */
201:            protected WikiEngine getEngine() {
202:                return m_engine;
203:            }
204:
205:            /**
206:             *  Performs a full Lucene reindex, if necessary.
207:             *  @throws IOException
208:             */
209:            @SuppressWarnings("unchecked")
210:            protected void doFullLuceneReindex() throws IOException {
211:                File dir = new File(m_luceneDirectory);
212:
213:                String[] filelist = dir.list();
214:
215:                if (filelist == null) {
216:                    throw new IOException(
217:                            "Invalid Lucene directory: cannot produce listing: "
218:                                    + dir.getAbsolutePath());
219:                }
220:
221:                try {
222:                    if (filelist.length == 0) {
223:                        //
224:                        //  No files? Reindex!
225:                        //
226:                        Date start = new Date();
227:                        IndexWriter writer = null;
228:
229:                        log
230:                                .info("Starting Lucene reindexing, this can take a couple minutes...");
231:
232:                        //
233:                        //  Do lock recovery, in case JSPWiki was shut down forcibly
234:                        //
235:                        Directory luceneDir = FSDirectory.getDirectory(dir,
236:                                false);
237:
238:                        if (IndexReader.isLocked(luceneDir)) {
239:                            log
240:                                    .info("JSPWiki was shut down while Lucene was indexing - unlocking now.");
241:                            IndexReader.unlock(luceneDir);
242:                        }
243:
244:                        try {
245:                            writer = new IndexWriter(m_luceneDirectory,
246:                                    getLuceneAnalyzer(), true);
247:                            Collection<WikiPage> allPages = m_engine
248:                                    .getPageManager().getAllPages();
249:
250:                            for (WikiPage page : allPages) {
251:                                String text = m_engine.getPageManager()
252:                                        .getPageText(page.getName(),
253:                                                WikiProvider.LATEST_VERSION);
254:                                luceneIndexPage(page, text, writer);
255:                            }
256:
257:                            Collection<Attachment> allAttachments = m_engine
258:                                    .getAttachmentManager().getAllAttachments();
259:                            for (Attachment att : allAttachments) {
260:                                String text = getAttachmentContent(att
261:                                        .getName(), WikiProvider.LATEST_VERSION);
262:                                luceneIndexPage(att, text, writer);
263:                            }
264:
265:                            writer.optimize();
266:                        } finally {
267:                            try {
268:                                if (writer != null)
269:                                    writer.close();
270:                            } catch (IOException e) {
271:                            }
272:                        }
273:
274:                        Date end = new Date();
275:                        log.info("Full Lucene index finished in "
276:                                + (end.getTime() - start.getTime())
277:                                + " milliseconds.");
278:                    } else {
279:                        log
280:                                .info("Files found in Lucene directory, not reindexing.");
281:                    }
282:                } catch (NoClassDefFoundError e) {
283:                    log
284:                            .info("Lucene libraries do not exist - not using Lucene.");
285:                } catch (IOException e) {
286:                    log
287:                            .error(
288:                                    "Problem while creating Lucene index - not using Lucene.",
289:                                    e);
290:                } catch (ProviderException e) {
291:                    log
292:                            .error(
293:                                    "Problem reading pages while creating Lucene index (JSPWiki won't start.)",
294:                                    e);
295:                    throw new IllegalArgumentException(
296:                            "unable to create Lucene index");
297:                } catch (ClassNotFoundException e) {
298:                    log.error("Illegal Analyzer specified:", e);
299:                } catch (Exception e) {
300:                    log.error("Unable to start lucene", e);
301:                }
302:
303:            }
304:
305:            /**
306:             *  Fetches the attachment content from the repository.
307:             *  Content is flat text that can be used for indexing/searching or display
308:             */
309:            private String getAttachmentContent(String attachmentName,
310:                    int version) {
311:                AttachmentManager mgr = m_engine.getAttachmentManager();
312:
313:                try {
314:                    Attachment att = mgr.getAttachmentInfo(attachmentName,
315:                            version);
316:                    //FIXME: Find out why sometimes att is null
317:                    if (att != null) {
318:                        return getAttachmentContent(att);
319:                    }
320:                } catch (ProviderException e) {
321:                    log.error("Attachment cannot be loaded", e);
322:                }
323:                // Something was wrong, no result is returned.
324:                return null;
325:            }
326:
327:            /**
328:             * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment.
329:             * @return String representing the content of the file.
330:             * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing.
331:             * This should be replaced /moved to Attachment search providers or some other 'plugable' wat to search attachments  
332:             */
333:            private String getAttachmentContent(Attachment att) {
334:                AttachmentManager mgr = m_engine.getAttachmentManager();
335:                //FIXME: Add attachment plugin structure
336:
337:                String filename = att.getFileName();
338:
339:                if (filename.endsWith(".txt") || filename.endsWith(".xml")
340:                        || filename.endsWith(".ini")
341:                        || filename.endsWith(".html")) {
342:                    InputStream attStream;
343:
344:                    try {
345:                        attStream = mgr.getAttachmentStream(att);
346:
347:                        StringWriter sout = new StringWriter();
348:                        FileUtil.copyContents(new InputStreamReader(attStream),
349:                                sout);
350:
351:                        attStream.close();
352:                        sout.close();
353:
354:                        return sout.toString();
355:                    } catch (ProviderException e) {
356:                        log.error("Attachment cannot be loaded", e);
357:                        return null;
358:                    } catch (IOException e) {
359:                        log.error("Attachment cannot be loaded", e);
360:                        return null;
361:                    }
362:                }
363:
364:                return null;
365:            }
366:
367:            protected synchronized void updateLuceneIndex(WikiPage page,
368:                    String text) {
369:                IndexWriter writer = null;
370:
371:                log.debug("Updating Lucene index for page '" + page.getName()
372:                        + "'...");
373:
374:                try {
375:                    pageRemoved(page);
376:
377:                    // Now add back the new version.
378:                    writer = new IndexWriter(m_luceneDirectory,
379:                            getLuceneAnalyzer(), false);
380:                    luceneIndexPage(page, text, writer);
381:                    m_updateCount++;
382:                    if (m_updateCount >= LUCENE_OPTIMIZE_COUNT) {
383:                        writer.optimize();
384:                        m_updateCount = 0;
385:                    }
386:                } catch (IOException e) {
387:                    log.error("Unable to update page '" + page.getName()
388:                            + "' from Lucene index", e);
389:                } catch (Exception e) {
390:                    log
391:                            .error(
392:                                    "Unexpected Lucene exception - please check configuration!",
393:                                    e);
394:                } finally {
395:                    try {
396:                        if (writer != null)
397:                            writer.close();
398:                    } catch (IOException e) {
399:                    }
400:                }
401:
402:                log.debug("Done updating Lucene index for page '"
403:                        + page.getName() + "'.");
404:            }
405:
406:            @SuppressWarnings("unchecked")
407:            private Analyzer getLuceneAnalyzer() throws ClassNotFoundException,
408:                    InstantiationException, IllegalAccessException {
409:                Class<Analyzer> clazz = ClassUtil
410:                        .findClass("", m_analyzerClass);
411:                Analyzer analyzer = clazz.newInstance();
412:                return analyzer;
413:            }
414:
415:            @SuppressWarnings("unchecked")
416:            protected void luceneIndexPage(final WikiPage page,
417:                    final String text, final IndexWriter writer)
418:                    throws IOException {
419:                log.info("Build index for page: " + page.getName());
420:
421:                transactionTemplate
422:                        .execute(new TransactionCallbackWithoutResult() {
423:
424:                            // the code in this method executes in a transactional context
425:                            public void doInTransactionWithoutResult(
426:                                    TransactionStatus status) {
427:                                // make a new, empty document
428:                                Document doc = new Document();
429:
430:                                if (text == null)
431:                                    return;
432:
433:                                // Raw name is the keyword we'll use to refer to this document for updates.
434:                                Field field = new Field(LUCENE_ID, page
435:                                        .getName(), Field.Store.YES,
436:                                        Field.Index.UN_TOKENIZED);
437:                                doc.add(field);
438:
439:                                // Body text.  It is stored in the doc for search contexts.
440:                                field = new Field(LUCENE_PAGE_CONTENTS, text,
441:                                        Field.Store.YES, Field.Index.TOKENIZED,
442:                                        Field.TermVector.NO);
443:                                doc.add(field);
444:
445:                                // Allow searching by page name. Both beautified and raw
446:                                field = new Field(LUCENE_PAGE_NAME, TextUtil
447:                                        .beautifyString(page.getName())
448:                                        + " " + page.getName(),
449:                                        Field.Store.YES, Field.Index.TOKENIZED,
450:                                        Field.TermVector.NO);
451:                                doc.add(field);
452:
453:                                // Allow searching by authorname
454:
455:                                if (page.getAuthor() != null) {
456:                                    field = new Field(LUCENE_AUTHOR, page
457:                                            .getAuthor(), Field.Store.YES,
458:                                            Field.Index.TOKENIZED,
459:                                            Field.TermVector.NO);
460:                                    doc.add(field);
461:                                }
462:
463:                                // Now add the names of the attachments of this page
464:                                try {
465:                                    Collection<Attachment> attachments = m_engine
466:                                            .getAttachmentManager()
467:                                            .listAttachments(page);
468:                                    if (attachments != null) {
469:                                        String attachmentNames = "";
470:
471:                                        for (Attachment att : attachments) {
472:                                            attachmentNames += att.getName()
473:                                                    + ";";
474:                                        }
475:                                        field = new Field(LUCENE_ATTACHMENTS,
476:                                                attachmentNames,
477:                                                Field.Store.YES,
478:                                                Field.Index.TOKENIZED,
479:                                                Field.TermVector.NO);
480:                                        doc.add(field);
481:                                    }
482:                                } catch (ProviderException e) {
483:                                    // Unable to read attachments
484:                                    log
485:                                            .error(
486:                                                    "Failed to get attachments for page",
487:                                                    e);
488:                                }
489:
490:                                try {
491:                                    luceneIndexProcessData(page, doc);
492:
493:                                    writer.addDocument(doc);
494:                                } catch (IOException ex) {
495:                                    log.error("Cannot reindex page "
496:                                            + page.getName(), ex);
497:                                }
498:                            }
499:                        });
500:            }
501:
502:            /**
503:             * index process data (title, comments) if page represents a description
504:             *  
505:             * @param wiki  
506:             * @param io_doc
507:             */
508:            protected void luceneIndexProcessData(WikiPage i_page,
509:                    Document io_doc) throws IOException {
510:                // don't do it if the page is attachment
511:                if (i_page instanceof  Attachment) {
512:                    return;
513:                }
514:                // check if the page represents a process description
515:                String taskId = i_page.getName();
516:                if (!Helper.isNumber(taskId)) {
517:                    return;
518:                }
519:                log.debug("indexing process title...");
520:
521:                try {
522:
523:                    TaskTO task = m_bpmService.getTask(Long.valueOf(taskId));
524:                    if (task != null) {
525:                        // allow to find in a process title
526:                        Field field = new Field(LUCENE_PROCESS_TITLE, task
527:                                .getTitle(), Field.Store.YES,
528:                                Field.Index.TOKENIZED, Field.TermVector.NO);
529:                        io_doc.add(field);
530:
531:                        // allow to find in comments
532:                        CommentTO[] comments = m_bpmService
533:                                .getTaskComments(task.getId());
534:
535:                        if (comments != null && comments.length > 0) {
536:                            StringBuffer commentMessages = new StringBuffer();
537:                            for (CommentTO comment : comments) {
538:                                commentMessages.append(comment.getMessage()
539:                                        + " ");
540:                            }
541:                            field = new Field(LUCENE_PROCESS_COMMENTS,
542:                                    commentMessages.toString(),
543:                                    Field.Store.YES, Field.Index.TOKENIZED,
544:                                    Field.TermVector.NO);
545:                            io_doc.add(field);
546:                        }
547:
548:                    }
549:                } catch (Exception e) {
550:                    log.warn("Process data indexing failed: processId "
551:                            + taskId + ": " + e.getMessage());
552:                    return;
553:                }
554:            }
555:
556:            public void pageRemoved(WikiPage page) {
557:                try {
558:                    // Must first remove existing version of page.
559:                    IndexReader reader = IndexReader.open(m_luceneDirectory);
560:                    reader.deleteDocuments(new Term(LUCENE_ID, page.getName()));
561:                    reader.close();
562:                } catch (IOException e) {
563:                    log.error("Unable to update page '" + page.getName()
564:                            + "' from Lucene index", e);
565:                }
566:            }
567:
568:            /**
569:             *  Adds a page-text pair to the lucene update queue.  Safe to call always
570:             */
571:            public void reindexPage(WikiPage page) {
572:                if (page != null) {
573:                    String text;
574:
575:                    // TODO: Think if this was better done in the thread itself?
576:
577:                    if (page instanceof  Attachment) {
578:                        text = getAttachmentContent((Attachment) page);
579:                    } else {
580:                        text = m_engine.getPureText(page);
581:                    }
582:
583:                    if (text != null) {
584:                        // Add work item to m_updates queue.
585:                        Object[] pair = new Object[2];
586:                        pair[0] = page;
587:                        pair[1] = text;
588:                        m_updates.add(pair);
589:                        log.debug("Scheduling page " + page.getName()
590:                                + " for index update");
591:                    }
592:                }
593:            }
594:
595:            public Collection<SearchResult> findPages(String query)
596:                    throws ProviderException {
597:                Searcher searcher = null;
598:                LinkedList<SearchResult> list = null;
599:
600:                try {
601:                    String[] queryfields = { LUCENE_PAGE_CONTENTS,
602:                            LUCENE_PAGE_NAME, LUCENE_AUTHOR,
603:                            LUCENE_ATTACHMENTS, LUCENE_PROCESS_TITLE,
604:                            LUCENE_PROCESS_COMMENTS };
605:                    QueryParser qp = new MultiFieldQueryParser(queryfields,
606:                            getLuceneAnalyzer());
607:
608:                    //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
609:
610:                    Query luceneQuery = null;
611:
612:                    try {
613:                        luceneQuery = qp.parse(query);
614:                    } catch (ParseException ex) {
615:                        // seems query is incorrect - let try to simplify query
616:                        query = simplifyQuery(query);
617:
618:                        if (!StringUtils.isEmpty(query)) {
619:                            luceneQuery = qp.parse(query);
620:                        }
621:                    }
622:
623:                    if (luceneQuery == null) {
624:                        return new LinkedList<SearchResult>();
625:                    }
626:
627:                    Highlighter highlighter = new Highlighter(
628:                            new SimpleHTMLFormatter(
629:                                    "<span class=\"searchmatch\">", "</span>"),
630:                            new SimpleHTMLEncoder(), new QueryScorer(
631:                                    luceneQuery));
632:
633:                    try {
634:                        searcher = new IndexSearcher(m_luceneDirectory);
635:                    } catch (Exception ex) {
636:                        log.info("Lucene not yet ready; indexing not started",
637:                                ex);
638:                        return null;
639:                    }
640:
641:                    Hits hits = searcher.search(luceneQuery);
642:
643:                    list = new LinkedList<SearchResult>();
644:                    for (int curr = 0; curr < hits.length(); curr++) {
645:                        Document doc = hits.doc(curr);
646:                        String pageName = doc.get(LUCENE_ID);
647:                        WikiPage page = m_engine.getPage(pageName,
648:                                WikiPageProvider.LATEST_VERSION);
649:
650:                        if (page != null) {
651:                            if (page instanceof  Attachment) {
652:                                // Currently attachments don't look nice on the search-results page
653:                                // When the search-results are cleaned up this can be enabled again.
654:                                String text2 = doc.get(LUCENE_ATTACHMENTS);
655:                                log.debug(text2);
656:                            }
657:
658:                            int score = (int) (hits.score(curr) * 100);
659:
660:                            // Get highlighted search contexts
661:                            String text = doc.get(LUCENE_PAGE_CONTENTS);
662:
663:                            String fragments[] = new String[0];
664:                            if (text != null) {
665:                                TokenStream tokenStream = getLuceneAnalyzer()
666:                                        .tokenStream(LUCENE_PAGE_CONTENTS,
667:                                                new StringReader(text));
668:                                fragments = highlighter.getBestFragments(
669:                                        tokenStream, text, MAX_FRAGMENTS);
670:
671:                            }
672:
673:                            SearchResult result = new SearchResultImpl(page,
674:                                    score, fragments);
675:                            list.add(result);
676:                        } else {
677:                            log
678:                                    .error("Lucene found a result page '"
679:                                            + pageName
680:                                            + "' that could not be loaded, removing from Lucene cache");
681:                            pageRemoved(new WikiPage(m_engine, pageName));
682:                        }
683:                    }
684:                } catch (IOException e) {
685:                    log.error("Failed during lucene search", e);
686:                } catch (InstantiationException e) {
687:                    log.error("Unable to get a Lucene analyzer", e);
688:                } catch (IllegalAccessException e) {
689:                    log.error("Unable to get a Lucene analyzer", e);
690:                } catch (ClassNotFoundException e) {
691:                    log.error("Specified Lucene analyzer does not exist", e);
692:                } catch (ParseException e) {
693:                    log.info("Broken query; cannot parse", e);
694:
695:                    throw new ProviderException(
696:                            "You have entered a query Lucene cannot process: "
697:                                    + e.getMessage());
698:                } finally {
699:                    if (searcher != null)
700:                        try {
701:                            searcher.close();
702:                        } catch (IOException e) {
703:                        }
704:                }
705:
706:                return list;
707:            }
708:
709:            private String simplifyQuery(String i_query) {
710:                String newQuery = i_query;
711:
712:                newQuery = newQuery.replace('!', ' ');
713:                newQuery = newQuery.replace('?', ' ');
714:                newQuery = newQuery.replace('*', ' ');
715:                newQuery = newQuery.trim();
716:
717:                return newQuery;
718:
719:            }
720:
721:            public String getProviderInfo() {
722:                return "LuceneSearchProviderEx";
723:            }
724:
725:            /**
726:             * Updater thread that updates Lucene indexes.
727:             */
728:            private static class LuceneUpdater extends WikiBackgroundThread {
729:                protected static final int INDEX_DELAY = 1;
730:                protected static final int INITIAL_DELAY = 60;
731:                private final LuceneSearchProviderEx m_provider;
732:
733:                private int m_initialDelay;
734:
735:                private WatchDog m_watchdog;
736:
737:                private LuceneUpdater(WikiEngine engine,
738:                        LuceneSearchProviderEx provider, int initialDelay,
739:                        int indexDelay) {
740:                    super (engine, indexDelay);
741:                    m_provider = provider;
742:                    setName("JSPWiki Lucene Indexer");
743:                }
744:
745:                public void startupTask() throws Exception {
746:                    m_watchdog = getEngine().getCurrentWatchDog();
747:
748:                    // Sleep initially...
749:                    try {
750:                        Thread.sleep(m_initialDelay * 1000L);
751:                    } catch (InterruptedException e) {
752:                        throw new InternalWikiException(
753:                                "Interrupted while waiting to start.");
754:                    }
755:
756:                    m_watchdog.enterState("Full reindex");
757:                    // Reindex everything
758:                    m_provider.doFullLuceneReindex();
759:                    m_watchdog.exitState();
760:                }
761:
762:                public void backgroundTask() throws Exception {
763:                    m_watchdog.enterState("Emptying index queue", 60);
764:
765:                    synchronized (m_provider.m_updates) {
766:                        while (m_provider.m_updates.size() > 0) {
767:                            Object[] pair = m_provider.m_updates.remove(0);
768:                            WikiPage page = (WikiPage) pair[0];
769:                            String text = (String) pair[1];
770:                            m_provider.updateLuceneIndex(page, text);
771:                        }
772:                    }
773:
774:                    m_watchdog.exitState();
775:                }
776:
777:            }
778:
779:            // FIXME: This class is dumb; needs to have a better implementation
780:            private static class SearchResultImpl implements  SearchResult {
781:                private WikiPage m_page;
782:                private int m_score;
783:                private String[] m_contexts;
784:
785:                public SearchResultImpl(WikiPage page, int score,
786:                        String[] contexts) {
787:                    m_page = page;
788:                    m_score = score;
789:                    m_contexts = contexts;
790:                }
791:
792:                public WikiPage getPage() {
793:                    return m_page;
794:                }
795:
796:                /* (non-Javadoc)
797:                 * @see com.ecyrd.jspwiki.SearchResult#getScore()
798:                 */
799:                public int getScore() {
800:                    return m_score;
801:                }
802:
803:                public String[] getContexts() {
804:                    return m_contexts;
805:                }
806:            }
807:
808:            public String getUserName() {
809:                return userName;
810:            }
811:
812:            public void setUserName(String userName) {
813:                this .userName = userName;
814:            }
815:
816:            public String getUserPassword() {
817:                return userPassword;
818:            }
819:
820:            public void setUserPassword(String userPassword) {
821:                this.userPassword = userPassword;
822:            }
823:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.