001: /*
002:
003: * LIUS - Lucene Index Update and Search
004: * http://sourceforge.net/projects/lius/
005: *
006: * Copyright (c) 2005, Laval University Library. All rights reserved.
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
021: */
022:
023: package ca.ulaval.bibl.lius.index.RTF;
024:
025: import java.io.FileInputStream;
026: import java.io.IOException;
027: import java.util.ArrayList;
028: import java.util.Collection;
029: import java.util.Iterator;
030:
031: import javax.swing.text.BadLocationException;
032: import javax.swing.text.DefaultStyledDocument;
033: import javax.swing.text.rtf.RTFEditorKit;
034:
035: import org.apache.log4j.Logger;
036: import org.apache.lucene.document.Document;
037:
038: import ca.ulaval.bibl.lius.Lucene.LuceneActions;
039: import ca.ulaval.bibl.lius.config.LiusConfig;
040: import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
041: import ca.ulaval.bibl.lius.config.LiusField;
042: import ca.ulaval.bibl.lius.index.Indexer;
043:
044: /**
045: *
046: * Classe permettant d'indexer des fichiers RTF.
047: *
048: * <br/><br/>
049: *
050: * Class that indexes RTF files.
051: *
052: * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
053: *
054: */
055:
056: public class RTFIndexer
057:
058: extends Indexer {
059:
060: static Logger logger = Logger.getRootLogger();
061:
062: public Object parse(Object file) {
063:
064: String content = "";
065:
066: try {
067:
068: FileInputStream stream = new FileInputStream((String) file);
069:
070: DefaultStyledDocument sd = new DefaultStyledDocument();
071:
072: RTFEditorKit kit = new RTFEditorKit();
073:
074: kit.read(stream, sd, 0);
075:
076: content = sd.getText(0, sd.getLength());
077:
078: }
079:
080: catch (IOException e) {
081:
082: logger.error(e.getMessage());
083:
084: }
085:
086: catch (BadLocationException j) {
087:
088: logger.error(j.getMessage());
089:
090: }
091:
092: return content;
093:
094: }
095:
096: /**
097: *
098: * Méthode retournant un objet de type Lucene document à partir du fichier à
099: *
100: * indexer et du fichier de configuration de Lius exprimé sous forme d'objet
101: *
102: * de type LiusConfig.
103: *
104: * <br/><br/>
105: *
106: * Method that returns an Lucene Document object from the file to index and
107: *
108: * the configuration file as a LiusConfig object.
109: *
110: */
111:
112: public Document createLuceneDocument(String file, LiusConfig lc) {
113:
114: Document doc = createLuceneDocument(file, lc.getRtfFields());
115:
116: return doc;
117:
118: }
119:
120: /**
121: *
122: * Retourne une collection contenant les champs avec les valeurs à indexer
123: *
124: * comme par exemple : le texte integral, titre etc.
125: *
126: * <br/><br/>
127: *
128: * Returns a collection containing the fields with the values to index
129: *
130: * like : full text, title, etc.
131: *
132: */
133:
134: public Collection getPopulatedCollection(Object file,
135: Collection liusFields) {
136:
137: LuceneActions la = LuceneActions.getSingletonInstance();
138:
139: Collection coll = new ArrayList();
140:
141: Iterator it = liusFields.iterator();
142:
143: while (it.hasNext()) {
144:
145: Object field = it.next();
146:
147: if (field instanceof LiusField) {
148:
149: LiusField lf = (LiusField) field;
150:
151: if (lf.getGet() != null) {
152:
153: if (lf.getGet().equalsIgnoreCase("content")) {
154:
155: String text = (String) parse(file);
156:
157: lf.setValue(text);
158:
159: coll.add(lf);
160:
161: }
162:
163: }
164:
165: }
166:
167: else {
168:
169: coll.add(field);
170:
171: }
172:
173: }
174:
175: return coll;
176:
177: }
178:
179: public Collection getPopulatedCollection(Object file,
180: String liusConfig) {
181:
182: LiusConfig lc = LiusConfigBuilder.getSingletonInstance()
183: .getLiusConfig(
184:
185: liusConfig);
186:
187: return getPopulatedCollection(file, lc);
188:
189: }
190:
191: public Collection getPopulatedCollection(Object file, LiusConfig lc) {
192:
193: return getPopulatedCollection(file, lc.getRtfFields());
194:
195: }
196:
197: /**
198: *
199: * Permet de récupérer les champs de Lius à partir du fichier de
200: * configuration
201: *
202: * pour effectuer l'indexation.
203: *
204: * <br/><br/>
205: *
206: * Gets the Lius field from the configuration file for indexation.
207: *
208: */
209:
210: public Collection getLiusFields(LiusConfig lc) {
211:
212: return lc.getRtfFields();
213:
214: }
215:
216: }
|