001: package org.apache.lucene.analysis.nl;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.File;
021: import java.io.FileReader;
022: import java.io.IOException;
023: import java.io.LineNumberReader;
024: import java.util.HashMap;
025:
026: /**
027: * <p/>
028: * Loads a text file and adds every line as an entry to a Hashtable. Every line
029: * should contain only one word. If the file is not found or on any error, an
030: * empty table is returned.
031: *
032: * @author Gerhard Schwarz
033: * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader} instead
034: */
035: public class WordlistLoader {
036: /**
037: * @param path Path to the wordlist
038: * @param wordfile Name of the wordlist
039: * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
040: */
041: public static HashMap getWordtable(String path, String wordfile) {
042: if (path == null || wordfile == null) {
043: return new HashMap();
044: }
045: return getWordtable(new File(path, wordfile));
046: }
047:
048: /**
049: * @param wordfile Complete path to the wordlist
050: * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
051: */
052: public static HashMap getWordtable(String wordfile) {
053: if (wordfile == null) {
054: return new HashMap();
055: }
056: return getWordtable(new File(wordfile));
057: }
058:
059: /**
060: * Reads a stemsdictionary. Each line contains:
061: * word \t stem
062: * i.e. tab seperated)
063: *
064: * @return Stem dictionary that overrules, the stemming algorithm
065: * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getStemDict(File)} instead
066: */
067: public static HashMap getStemDict(File wordstemfile) {
068: if (wordstemfile == null) {
069: return new HashMap();
070: }
071: HashMap result = new HashMap();
072: try {
073: LineNumberReader lnr = new LineNumberReader(new FileReader(
074: wordstemfile));
075: String line;
076: String[] wordstem;
077: while ((line = lnr.readLine()) != null) {
078: wordstem = line.split("\t", 2);
079: result.put(wordstem[0], wordstem[1]);
080: }
081: } catch (IOException e) {
082: }
083: return result;
084: }
085:
086: /**
087: * @param wordfile File containing the wordlist
088: * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
089: */
090: public static HashMap getWordtable(File wordfile) {
091: if (wordfile == null) {
092: return new HashMap();
093: }
094: HashMap result = null;
095: try {
096: LineNumberReader lnr = new LineNumberReader(new FileReader(
097: wordfile));
098: String word = null;
099: String[] stopwords = new String[100];
100: int wordcount = 0;
101: while ((word = lnr.readLine()) != null) {
102: wordcount++;
103: if (wordcount == stopwords.length) {
104: String[] tmp = new String[stopwords.length + 50];
105: System.arraycopy(stopwords, 0, tmp, 0, wordcount);
106: stopwords = tmp;
107: }
108: stopwords[wordcount - 1] = word;
109: }
110: result = makeWordTable(stopwords, wordcount);
111: }
112: // On error, use an empty table
113: catch (IOException e) {
114: result = new HashMap();
115: }
116: return result;
117: }
118:
119: /**
120: * Builds the wordlist table.
121: *
122: * @param words Word that where read
123: * @param length Amount of words that where read into <tt>words</tt>
124: */
125: private static HashMap makeWordTable(String[] words, int length) {
126: HashMap table = new HashMap(length);
127: for (int i = 0; i < length; i++) {
128: table.put(words[i], words[i]);
129: }
130: return table;
131: }
132: }
|