001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.handler;
017:
018: import org.apache.lucene.index.IndexReader;
019: import org.apache.lucene.search.IndexSearcher;
020: import org.apache.lucene.search.spell.Dictionary;
021: import org.apache.lucene.search.spell.LuceneDictionary;
022: import org.apache.lucene.search.spell.SpellChecker;
023: import org.apache.lucene.store.Directory;
024: import org.apache.lucene.store.FSDirectory;
025: import org.apache.lucene.store.RAMDirectory;
026: import org.apache.solr.request.SolrParams;
027: import org.apache.solr.request.SolrQueryRequest;
028: import org.apache.solr.request.SolrQueryResponse;
029: import org.apache.solr.util.NamedList;
030: import org.apache.solr.core.SolrCore;
031: import org.apache.solr.core.SolrException;
032:
033: import java.io.File;
034: import java.io.IOException;
035: import java.net.URL;
036: import java.util.Arrays;
037: import java.util.logging.Logger;
038:
039: /**
040: * Takes a string (e.g. a query string) as the value of the "q" parameter
041: * and looks up alternative spelling suggestions in the spellchecker.
042: * The spellchecker used by this handler is the Lucene contrib SpellChecker.
043: * @see <a href="http://wiki.apache.org/jakarta-lucene/SpellChecker">The Lucene Spellchecker documentation</a>
044: *
045: * @author Otis Gospodnetic
046: */
047: public class SpellCheckerRequestHandler extends RequestHandlerBase {
048:
049: private static Logger log = Logger
050: .getLogger(SpellCheckerRequestHandler.class.getName());
051:
052: private SpellChecker spellChecker;
053:
054: /*
055: * From http://wiki.apache.org/jakarta-lucene/SpellChecker
056: * If reader and restrictToField are both not null:
057: * 1. The returned words are restricted only to the words presents in the field
058: * "restrictToField "of the Lucene Index "reader".
059: *
060: * 2. The list is also sorted with a second criterium: the popularity (the
061: * frequence) of the word in the user field.
062: *
063: * 3. If "onlyMorePopular" is true and the mispelled word exist in the user field,
064: * return only the words more frequent than this.
065: *
066: */
067: private static IndexReader nullReader = null;
068: private String restrictToField = null;
069: private boolean onlyMorePopular = false;
070:
071: private Directory spellcheckerIndexDir = new RAMDirectory();
072: private String dirDescription = "(ramdir)";
073: private String termSourceField;
074: private static final float DEFAULT_ACCURACY = 0.5f;
075: private static final int DEFAULT_NUM_SUGGESTIONS = 1;
076:
077: public void init(NamedList args) {
078: super .init(args);
079: SolrParams p = SolrParams.toSolrParams(args);
080: termSourceField = p.get("termSourceField");
081:
082: try {
083: String dir = p.get("spellcheckerIndexDir");
084: if (null != dir) {
085: File f = new File(dir);
086: if (!f.isAbsolute()) {
087: f = new File(SolrCore.getSolrCore().getDataDir(),
088: dir);
089: }
090: dirDescription = f.getAbsolutePath();
091: log.info("using spell directory: " + dirDescription);
092: spellcheckerIndexDir = FSDirectory.getDirectory(f);
093: } else {
094: log.info("using RAM based spell directory");
095: }
096: spellChecker = new SpellChecker(spellcheckerIndexDir);
097: } catch (IOException e) {
098: throw new RuntimeException(
099: "Cannot open SpellChecker index", e);
100: }
101: }
102:
103: public void handleRequestBody(SolrQueryRequest req,
104: SolrQueryResponse rsp) throws Exception {
105: SolrParams p = req.getParams();
106: String words = p.get("q");
107: String cmd = p.get("cmd");
108: if (cmd != null) {
109: cmd = cmd.trim();
110: if (cmd.equals("rebuild")) {
111: rebuild(req);
112: rsp.add("cmdExecuted", "rebuild");
113: } else if (cmd.equals("reopen")) {
114: reopen();
115: rsp.add("cmdExecuted", "reopen");
116: } else {
117: throw new SolrException(
118: SolrException.ErrorCode.BAD_REQUEST,
119: "Unrecognized Command: " + cmd);
120: }
121: }
122:
123: Float accuracy;
124: int numSug;
125: try {
126: accuracy = p.getFloat("accuracy", DEFAULT_ACCURACY);
127: spellChecker.setAccuracy(accuracy);
128: } catch (NumberFormatException e) {
129: throw new RuntimeException(
130: "Accuracy must be a valid positive float", e);
131: }
132: try {
133: numSug = p.getInt("suggestionCount",
134: DEFAULT_NUM_SUGGESTIONS);
135: } catch (NumberFormatException e) {
136: throw new RuntimeException(
137: "Spelling suggestion count must be a valid positive integer",
138: e);
139: }
140:
141: if (null != words && !"".equals(words.trim())) {
142: String[] suggestions = spellChecker.suggestSimilar(words,
143: numSug, nullReader, restrictToField,
144: onlyMorePopular);
145:
146: rsp.add("suggestions", Arrays.asList(suggestions));
147: }
148: }
149:
150: /** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
151: * index pointed to by the current {@link IndexSearcher}.
152: */
153: private void rebuild(SolrQueryRequest req) throws IOException,
154: SolrException {
155: if (null == termSourceField) {
156: throw new SolrException(
157: SolrException.ErrorCode.SERVER_ERROR,
158: "can't rebuild spellchecker index without termSourceField configured");
159: }
160:
161: IndexReader indexReader = req.getSearcher().getReader();
162: Dictionary dictionary = new LuceneDictionary(indexReader,
163: termSourceField);
164: spellChecker.clearIndex();
165: spellChecker.indexDictionary(dictionary);
166: reopen();
167: }
168:
169: /**
170: * Reopens the SpellChecker index directory.
171: * Useful if an external process is responsible for building
172: * the spell checker index.
173: */
174: private void reopen() throws IOException {
175: spellChecker.setSpellIndex(spellcheckerIndexDir);
176: }
177:
178: //////////////////////// SolrInfoMBeans methods //////////////////////
179:
180: public String getVersion() {
181: return "$Revision: 542679 $";
182: }
183:
184: public String getDescription() {
185: return "The SpellChecker Solr request handler for SpellChecker index: "
186: + dirDescription;
187: }
188:
189: public String getSourceId() {
190: return "$Id: SpellCheckerRequestHandler.java 542679 2007-05-29 22:28:21Z ryan $";
191: }
192:
193: public String getSource() {
194: return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.2/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java $";
195: }
196:
197: public URL[] getDocs() {
198: return null;
199: }
200: }
|