001: package org.apache.lucene.wordnet;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.*;
021: import org.apache.lucene.search.*;
022: import org.apache.lucene.index.*;
023: import org.apache.lucene.document.*;
024: import org.apache.lucene.analysis.*;
025: import java.io.*;
026: import java.util.*;
027:
028: /**
029: * Test program to look up synonyms.
030: */
031: public class SynLookup {
032:
033: public static void main(String[] args) throws IOException {
034: if (args.length != 2) {
035: System.out
036: .println("java org.apache.lucene.wordnet.SynLookup <index path> <word>");
037: }
038:
039: FSDirectory directory = FSDirectory
040: .getDirectory(args[0], false);
041: IndexSearcher searcher = new IndexSearcher(directory);
042:
043: String word = args[1];
044: Hits hits = searcher.search(new TermQuery(new Term(
045: Syns2Index.F_WORD, word)));
046:
047: if (hits.length() == 0) {
048: System.out.println("No synonyms found for " + word);
049: } else {
050: System.out.println("Synonyms found for \"" + word + "\":");
051: }
052:
053: for (int i = 0; i < hits.length(); i++) {
054: Document doc = hits.doc(i);
055:
056: String[] values = doc.getValues(Syns2Index.F_SYN);
057:
058: for (int j = 0; j < values.length; j++) {
059: System.out.println(values[j]);
060: }
061: }
062:
063: searcher.close();
064: directory.close();
065: }
066:
067: /**
068: * Perform synonym expansion on a query.
069: *
070: * @param query
071: * @param syns
072: * @param a
073: * @param field
074: * @param boost
075: */
076: public static Query expand(String query, Searcher syns, Analyzer a,
077: String field, float boost) throws IOException {
078: Set already = new HashSet(); // avoid dups
079: List top = new LinkedList(); // needs to be separately listed..
080:
081: // [1] Parse query into separate words so that when we expand we can avoid dups
082: TokenStream ts = a.tokenStream(field, new StringReader(query));
083: org.apache.lucene.analysis.Token t;
084: while ((t = ts.next()) != null) {
085: String word = t.termText();
086: if (already.add(word))
087: top.add(word);
088: }
089: BooleanQuery tmp = new BooleanQuery();
090:
091: // [2] form query
092: Iterator it = top.iterator();
093: while (it.hasNext()) {
094: // [2a] add to level words in
095: String word = (String) it.next();
096: TermQuery tq = new TermQuery(new Term(field, word));
097: tmp.add(tq, BooleanClause.Occur.SHOULD);
098:
099: // [2b] add in unique synonums
100: Hits hits = syns.search(new TermQuery(new Term(
101: Syns2Index.F_WORD, word)));
102: for (int i = 0; i < hits.length(); i++) {
103: Document doc = hits.doc(i);
104: String[] values = doc.getValues(Syns2Index.F_SYN);
105: for (int j = 0; j < values.length; j++) {
106: String syn = values[j];
107: if (already.add(syn)) {
108: tq = new TermQuery(new Term(field, syn));
109: if (boost > 0) // else keep normal 1.0
110: tq.setBoost(boost);
111: tmp.add(tq, BooleanClause.Occur.SHOULD);
112: }
113: }
114: }
115: }
116:
117: return tmp;
118: }
119:
120: }
|