01: /*
02: * Original from: lucene source code package
03: * Changes as described in http://www.geocrawler.com/archives/3/2624/2000/11/0/4746798/
04: */
05:
06: package vqwiki.utils.lucene;
07:
08: /* ====================================================================
09: * The Apache Software License, Version 1.1
10: *
11: * Copyright (c) 2001 The Apache Software Foundation. All rights
12: * reserved.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: *
18: * 1. Redistributions of source code must retain the above copyright
19: * notice, this list of conditions and the following disclaimer.
20: *
21: * 2. Redistributions in binary form must reproduce the above copyright
22: * notice, this list of conditions and the following disclaimer in
23: * the documentation and/or other materials provided with the
24: * distribution.
25: *
26: * 3. The end-user documentation included with the redistribution,
27: * if any, must include the following acknowledgment:
28: * "This product includes software developed by the
29: * Apache Software Foundation (http://www.apache.org/)."
30: * Alternately, this acknowledgment may appear in the software itself,
31: * if and wherever such third-party acknowledgments normally appear.
32: *
33: * 4. The names "Apache" and "Apache Software Foundation" and
34: * "Apache Lucene" must not be used to endorse or promote products
35: * derived from this software without prior written permission. For
36: * written permission, please contact apache@apache.org.
37: *
38: * 5. Products derived from this software may not be called "Apache",
39: * "Apache Lucene", nor may "Apache" appear in their name, without
40: * prior written permission of the Apache Software Foundation.
41: *
42: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
43: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
45: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
46: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
49: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
50: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
51: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
52: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53: * SUCH DAMAGE.
54: * ====================================================================
55: *
56: * This software consists of voluntary contributions made by many
57: * individuals on behalf of the Apache Software Foundation. For more
58: * information on the Apache Software Foundation, please see
59: * <http://www.apache.org/>.
60: */
61:
62: import java.io.Reader;
63: import java.util.Hashtable;
64:
65: import org.apache.lucene.analysis.Analyzer;
66: import org.apache.lucene.analysis.StopFilter;
67: import org.apache.lucene.analysis.TokenStream;
68:
69: /** Filters LetterTokenizer with LowerCaseFilter and StopFilter. */
70:
71: public final class StopKeepNumbersAnalyzer extends Analyzer {
72: private Hashtable stopTable;
73:
74: /** An array containing some common English words that are not usually useful
75: for searching. */
76: public static final String[] ENGLISH_STOP_WORDS = { "a", "and",
77: "are", "as", "at", "be", "but", "by", "for", "if", "in",
78: "into", "is", "it", "no", "not", "of", "on", "or", "s",
79: "such", "t", "that", "the", "their", "then", "there",
80: "these", "they", "this", "to", "was", "will", "with" };
81:
82: /** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
83: public StopKeepNumbersAnalyzer() {
84: stopTable = StopFilter.makeStopTable(ENGLISH_STOP_WORDS);
85: }
86:
87: /** Builds an analyzer which removes words in the provided array. */
88: public StopKeepNumbersAnalyzer(String[] stopWords) {
89: stopTable = StopFilter.makeStopTable(stopWords);
90: }
91:
92: /** Filters LowerCaseTokenizer with StopFilter. */
93: public final TokenStream tokenStream(String fieldName, Reader reader) {
94: return new StopFilter(
95: new LowerCaseKeepNumbersTokenizer(reader), stopTable);
96: }
97: }
|