001: package prefuse.data.search;
002:
003: import java.io.IOException;
004: import java.util.HashMap;
005:
006: import org.apache.lucene.analysis.Analyzer;
007: import org.apache.lucene.analysis.standard.StandardAnalyzer;
008: import org.apache.lucene.document.Document;
009: import org.apache.lucene.index.IndexReader;
010: import org.apache.lucene.index.IndexWriter;
011: import org.apache.lucene.queryParser.MultiFieldQueryParser;
012: import org.apache.lucene.queryParser.ParseException;
013: import org.apache.lucene.queryParser.QueryParser;
014: import org.apache.lucene.search.Hits;
015: import org.apache.lucene.search.IndexSearcher;
016: import org.apache.lucene.search.Query;
017: import org.apache.lucene.search.Searcher;
018: import org.apache.lucene.store.Directory;
019: import org.apache.lucene.store.RAMDirectory;
020:
021: /**
022: * Adapter class for interfacing with the Lucene search engine. By default,
023: * instances of this class use an in-memory search index for English language
024: * text, for use within a single application session. The class can, however,
025: * be parameterized for any number of other configurations, including accessing
026: * persistent search indices.
027: *
028: * @version 1.0
029: * @author <a href="http://jheer.org">jeffrey heer</a>
030: */
031: public class LuceneSearcher {
032:
033: /** Default Document field used to index text. */
034: public static final String FIELD = "prefuse-text";
035: /** Document field used to store the document ID number. */
036: public static final String ID = "prefuse-id";
037:
038: private Directory directory;
039: private Analyzer analyzer;
040: private String[] fields;
041:
042: private Searcher searcher;
043: private IndexReader reader;
044: private IndexWriter writer;
045: private boolean m_readMode = true;
046: private boolean m_readOnly = false;
047:
048: private HashMap m_hitCountCache;
049:
050: /**
051: * Create a new LuceneSearcher using an in-memory search index.
052: */
053: public LuceneSearcher() {
054: this (new RAMDirectory(), FIELD, false);
055: }
056:
057: /**
058: * Create a new LuceneSearcher using the specified search index location.
059: * @param dir the Lucene Directory indicating the search index to use.
060: */
061: public LuceneSearcher(Directory dir) {
062: this (dir, FIELD, false);
063: }
064:
065: /**
066: * Create a new LuceneSearcher using a specified search index location,
067: * a particular Document field to index, and given read/write status.
068: * @param dir the Lucene Directory indicating the search index to use.
069: * @param field the Lucene Document field that should be indexed.
070: * @param readOnly if this index is read-only or is writable.
071: */
072: public LuceneSearcher(Directory dir, String field, boolean readOnly) {
073: this (dir, new String[] { field }, readOnly);
074: }
075:
076: /**
077: * Create a new LuceneSearcher using a specified search index location,
078: * a particular Document fields to index, and given read/write status.
079: * @param dir the Lucene Directory indicating the search index to use.
080: * @param fields the Lucene Document fields that should be indexed.
081: * @param readOnly if this index is read-only or is writable.
082: */
083: public LuceneSearcher(Directory dir, String[] fields,
084: boolean readOnly) {
085: m_hitCountCache = new HashMap();
086: directory = dir;
087: analyzer = new StandardAnalyzer();
088: this .fields = (String[]) fields.clone();
089: try {
090: writer = new IndexWriter(directory, analyzer, !readOnly);
091: writer.close();
092: writer = null;
093: } catch (IOException e1) {
094: e1.printStackTrace();
095: }
096: m_readOnly = readOnly;
097: if (!readOnly) {
098: setReadMode(false);
099: } else {
100: m_readMode = false;
101: setReadMode(true);
102: }
103: }
104:
105: // ------------------------------------------------------------------------
106:
107: /**
108: * Sets if this LuceneSearcher is in read mode or write mode. In read more
109: * searches can be issued, in write mode new Documents can be indexed.
110: * Read-only LuceneSearcher instances can not be put into write mode.
111: * @param mode true for read mode, false for write mode.
112: * @return true if the mode was successfully set, false otherwise.
113: */
114: public boolean setReadMode(boolean mode) {
115: // return false if this is read-only
116: if (m_readOnly && mode == false)
117: return false;
118: // do nothing if already in the mode
119: if (m_readMode == mode)
120: return true;
121: // otherwise switch modes
122: if (!mode) {
123: // close any open searcher and reader
124: try {
125: if (searcher != null)
126: searcher.close();
127: if (reader != null)
128: reader.close();
129: } catch (Exception e) {
130: e.printStackTrace();
131: return false;
132: }
133: // open the writer
134: try {
135: writer = new IndexWriter(directory, analyzer, false);
136: } catch (IOException e1) {
137: e1.printStackTrace();
138: return false;
139: }
140: } else {
141: // optimize index and close writer
142: try {
143: if (writer != null) {
144: writer.optimize();
145: writer.close();
146: }
147: } catch (IOException e1) {
148: e1.printStackTrace();
149: return false;
150: }
151: // open the reader and searcher
152: try {
153: reader = IndexReader.open(directory);
154: searcher = new IndexSearcher(reader);
155: } catch (Exception e) {
156: e.printStackTrace();
157: return false;
158: }
159: }
160: m_readMode = mode;
161: return true;
162: }
163:
164: /**
165: * Searches the Lucene index using the given query String, returns an object
166: * which provides access to the search results.
167: * @param query the search query
168: * @return the search Hits
169: * @throws ParseException if the query is not parsed successfully
170: * @throws IOException if an input/ouput error occurs
171: * @throws IllegalStateException if the searcher is in write mode
172: */
173: public Hits search(String query) throws ParseException, IOException {
174: if (m_readMode) {
175: Query q;
176: if (fields.length == 1) {
177: q = QueryParser.parse(query, fields[0], analyzer);
178: } else {
179: q = MultiFieldQueryParser
180: .parse(query, fields, analyzer);
181: }
182: return searcher.search(q);
183: } else {
184: throw new IllegalStateException(
185: "Searches can only be performed when "
186: + "the LuceneSearcher is in read mode");
187: }
188: }
189:
190: /**
191: * Return the result count for the given search query. To allow quick
192: * repeated look ups, the hit count is cached (this cache is cleared
193: * whenever a change to the search index occurs).
194: * @param query the search query
195: * @return the number of matches to the query
196: * @throws ParseException if the query is not parsed successfully
197: * @throws IOException if an input/ouput error occurs
198: * @throws IllegalStateException if the searcher is in write mode
199: */
200: public int numHits(String query) throws ParseException, IOException {
201: Integer count;
202: if ((count = (Integer) m_hitCountCache.get(query)) == null) {
203: Hits hits = search(query);
204: count = new Integer(hits.length());
205: m_hitCountCache.put(query, count);
206: }
207: return count.intValue();
208: }
209:
210: /**
211: * Add a document to the Lucene search index.
212: * @param d the Document to add
213: * @throws IllegalStateException if the searcher is not in write mode
214: */
215: public void addDocument(Document d) {
216: if (!m_readMode) {
217: try {
218: writer.addDocument(d);
219: m_hitCountCache.clear();
220: } catch (IOException e) {
221: e.printStackTrace();
222: }
223: } else {
224: throw new IllegalStateException(
225: "Documents can not be added to the index unless"
226: + "the LuceneSearcher is not in read mode");
227: }
228: }
229:
230: /**
231: * Returns the Analyzer used to process text. See Lucene documentation
232: * for more details.
233: * @return returns the analyzer.
234: */
235: public Analyzer getAnalyzer() {
236: return analyzer;
237: }
238:
239: /**
240: * Sets the Analyzer used to process text. See Lucene documentation
241: * for more details.
242: * @param analyzer the analyzer to set
243: */
244: public void setAnalyzer(Analyzer analyzer) {
245: this .analyzer = analyzer;
246: }
247:
248: /**
249: * Returns the indexed Document fields. These fields determine which
250: * fields are indexed as Documents are added and which fields are
251: * queried over when searches are issued.
252: * @return returns the indexed Document fields
253: */
254: public String[] getFields() {
255: return (String[]) fields.clone();
256: }
257:
258: /**
259: * Sets the indexed Document fields. These fields determine which
260: * fields are indexed as Documents are added and which fields are
261: * queried over when searches are issued.
262: * param fields the indexed Document fields to use
263: */
264: public void setFields(String[] fields) {
265: this .fields = (String[]) fields.clone();
266: }
267:
268: /**
269: * Returns the Lucene IndexReader. See Lucene documentation
270: * for more details.
271: * @return teturns the IndexReader.
272: */
273: public IndexReader getIndexReader() {
274: return reader;
275: }
276:
277: /**
278: * Returns the Lucene IndexSearcher. See Lucene documentation
279: * for more details.
280: * @return returns the IndexSearcher.
281: */
282: public Searcher getIndexSearcher() {
283: return searcher;
284: }
285:
286: /**
287: * Indicates if ths LuceneSearcher is read-only.
288: * @return true if read-only, false if writes are allowed
289: */
290: public boolean isReadOnly() {
291: return m_readOnly;
292: }
293:
294: } // end of class LuceneSearcher
|