001: /*
002: * Copyright (c) JForum Team
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms,
006: * with or without modification, are permitted provided
007: * that the following conditions are met:
008: *
009: * 1) Redistributions of source code must retain the above
010: * copyright notice, this list of conditions and the
011: * following disclaimer.
012: * 2) Redistributions in binary form must reproduce the
013: * above copyright notice, this list of conditions and
014: * the following disclaimer in the documentation and/or
015: * other materials provided with the distribution.
016: * 3) Neither the name of "Rafael Steil" nor
017: * the names of its contributors may be used to endorse
018: * or promote products derived from this software without
019: * specific prior written permission.
020: *
021: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
022: * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
023: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
024: * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
025: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR
026: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
027: * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
028: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
029: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES
030: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
031: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
032: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
033: * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
034: * IN CONTRACT, STRICT LIABILITY, OR TORT
035: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
036: * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
037: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
038: *
039: * Created on 27/07/2007 15:10:51
040: *
041: * The JForum Project
042: * http://www.jforum.net
043: */
044: package net.jforum.search;
045:
046: import java.io.IOException;
047: import java.io.StringReader;
048: import java.util.Iterator;
049: import java.util.List;
050:
051: import net.jforum.dao.DataAccessDriver;
052: import net.jforum.entities.Post;
053: import net.jforum.exceptions.ForumException;
054:
055: import org.apache.lucene.analysis.TokenStream;
056: import org.apache.lucene.document.Document;
057: import org.apache.lucene.search.Hits;
058: import org.apache.lucene.search.Query;
059: import org.apache.lucene.search.highlight.Highlighter;
060: import org.apache.lucene.search.highlight.QueryScorer;
061: import org.apache.lucene.search.highlight.Scorer;
062:
063: /**
064: * @author Rafael Steil
065: * @version $Id: LuceneContentCollector.java,v 1.8 2007/07/30 14:06:44 rafaelsteil Exp $
066: */
067: public class LuceneContentCollector implements LuceneResultCollector {
068: private LuceneSettings settings;
069:
070: public LuceneContentCollector(LuceneSettings settings) {
071: this .settings = settings;
072: }
073:
074: /**
075: * @see net.jforum.search.LuceneResultCollector#collect(SearchArgs, org.apache.lucene.search.Hits, org.apache.lucene.search.Query)
076: */
077: public List collect(SearchArgs args, Hits hits, Query query) {
078: try {
079: int[] postIds = new int[Math.min(args.fetchCount(), hits
080: .length())];
081:
082: for (int docIndex = args.startFrom(), i = 0; docIndex < args
083: .startFrom()
084: + args.fetchCount()
085: && docIndex < hits.length(); docIndex++, i++) {
086: Document doc = hits.doc(docIndex);
087: postIds[i] = Integer.parseInt(doc
088: .get(SearchFields.Keyword.POST_ID));
089: }
090:
091: return this .retrieveRealPosts(postIds, query);
092: } catch (Exception e) {
093: throw new ForumException(e.toString(), e);
094: }
095: }
096:
097: private List retrieveRealPosts(int[] postIds, Query query)
098: throws IOException {
099: List posts = DataAccessDriver.getInstance().newLuceneDAO()
100: .getPostsData(postIds);
101:
102: for (Iterator iter = posts.iterator(); iter.hasNext();) {
103: Post post = (Post) iter.next();
104:
105: Scorer scorer = new QueryScorer(query);
106: Highlighter highlighter = new Highlighter(scorer);
107:
108: TokenStream tokenStream = this .settings.analyzer()
109: .tokenStream(SearchFields.Indexed.CONTENTS,
110: new StringReader(post.getText()));
111:
112: String fragment = highlighter.getBestFragment(tokenStream,
113: post.getText());
114: post.setText(fragment != null ? fragment : post.getText());
115: }
116:
117: return posts;
118: }
119: }
|