001: /*
002:
003: * LIUS - Lucene Index Update and Search
004: * http://sourceforge.net/projects/lius/
005: *
006: * Copyright (c) 2005, Laval University Library. All rights reserved.
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
021: */
022:
023: package ca.ulaval.bibl.lius.index.TXT;
024:
025: import java.io.BufferedReader;
026: import java.io.FileInputStream;
027: import java.io.FileNotFoundException;
028: import java.io.IOException;
029: import java.io.InputStream;
030: import java.io.InputStreamReader;
031: import java.util.ArrayList;
032: import java.util.Collection;
033: import java.util.Iterator;
034:
035: import org.apache.log4j.Logger;
036: import org.apache.lucene.document.Document;
037:
038: import ca.ulaval.bibl.lius.Lucene.LuceneActions;
039: import ca.ulaval.bibl.lius.config.LiusConfig;
040: import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
041: import ca.ulaval.bibl.lius.config.LiusField;
042: import ca.ulaval.bibl.lius.index.Indexer;
043:
044: /**
045: *
046: * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
047: *
048: */
049:
050: public class TXTIndexer extends Indexer {
051:
052: static Logger logger = Logger.getRootLogger();
053:
054: public static Object parse(Object file) {
055:
056: StringBuffer sb = new StringBuffer();
057:
058: try {
059:
060: InputStream stream = new FileInputStream((String) file);
061:
062: BufferedReader br = new BufferedReader(
063: new InputStreamReader(stream));
064:
065: String line = null;
066:
067: while ((line = br.readLine()) != null) {
068:
069: sb.append(line);
070:
071: sb.append(" ");
072:
073: }
074:
075: }
076:
077: catch (FileNotFoundException ex) {
078:
079: logger.error(ex.getMessage());
080:
081: }
082:
083: catch (IOException ex1) {
084:
085: logger.error(ex1.getMessage());
086:
087: }
088:
089: return sb.toString();
090:
091: }
092:
093: public Document createLuceneDocument(String file, LiusConfig lc) {
094:
095: Document doc = createLuceneDocument(file, lc.getTxtFields());
096:
097: return doc;
098:
099: }
100:
101: public Collection getLiusFields(LiusConfig lc) {
102:
103: return lc.getTxtFields();
104:
105: }
106:
107: public Collection getPopulatedCollection(Object file,
108: Collection liusFields) {
109:
110: LuceneActions la = LuceneActions.getSingletonInstance();
111:
112: Collection coll = new ArrayList();
113:
114: Iterator it = liusFields.iterator();
115:
116: while (it.hasNext()) {
117:
118: Object field = it.next();
119:
120: if (field instanceof LiusField) {
121:
122: LiusField lf = (LiusField) field;
123:
124: if (lf.getGet() != null) {
125:
126: if (lf.getGet().equalsIgnoreCase("content")) {
127:
128: String content = (String) parse((String) file);
129:
130: lf.setValue(content);
131:
132: coll.add(lf);
133:
134: }
135:
136: }
137:
138: }
139:
140: else {
141:
142: coll.add(field);
143:
144: }
145:
146: }
147:
148: return coll;
149:
150: }
151:
152: public Collection getPopulatedCollection(Object file,
153: String liusConfig) {
154:
155: LiusConfig lc = LiusConfigBuilder.getSingletonInstance()
156: .getLiusConfig(
157:
158: liusConfig);
159:
160: return getPopulatedCollection(file, lc);
161:
162: }
163:
164: public Collection getPopulatedCollection(Object file, LiusConfig lc) {
165:
166: return getPopulatedCollection(file, lc.getTxtFields());
167:
168: }
169:
170: }
|