001: /*
002:
003: * LIUS - Lucene Index Update and Search
004: * http://sourceforge.net/projects/lius/
005: *
006: * Copyright (c) 2005, Laval University Library. All rights reserved.
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
021: */
022:
023: package ca.ulaval.bibl.lius.index.MixteIndexing;
024:
025: import java.io.File;
026: import java.lang.reflect.InvocationTargetException;
027: import java.util.ArrayList;
028: import java.util.Collection;
029: import java.util.Iterator;
030: import java.util.List;
031:
032: import org.apache.commons.beanutils.BeanUtils;
033: import org.apache.lucene.document.Document;
034:
035: import ca.ulaval.bibl.lius.Lucene.LuceneActions;
036: import ca.ulaval.bibl.lius.config.LiusConfig;
037: import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
038: import ca.ulaval.bibl.lius.config.LiusField;
039: import ca.ulaval.bibl.lius.index.Indexer;
040: import ca.ulaval.bibl.lius.index.IndexerFactory;
041:
042: /**
043: *
044: * Classe permettant d'effectuer une indexation mixte. Cette
045: *
046: * indexation permet d'integrer dans le même "Lucene Document"
047: *
048: * des méta-données dans format XML et le texte integral dans
049: *
050: * un fichier PDF Word etc.
051: *
052: * <br/><br/>
053: *
054: * Class for mixed indexation. This indexation allows for integrating in the
055: *
056: * same Lucene document XML metadata and full text from a PDF file, Word file,
057: * etc.
058: *
059: * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
060: *
061: */
062:
063: public class MixteIndexer
064:
065: extends Indexer {
066:
067: private LiusConfig lcP = null;
068:
069: public Object parse(Object file) {
070:
071: File f = new File((String) file);
072:
073: String[] lf = null;
074:
075: if (f.isDirectory()) {
076:
077: lf = f.list();
078:
079: }
080:
081: return lf;
082:
083: }
084:
085: /**
086: *
087: * Méthode retournant un objet de type Lucene document à partir du fichier à
088: *
089: * indexer et du fichier de configuration de Lius.
090: *
091: * <br/><br/>
092: *
093: * Method that returns a Lucene Document object from the file to index and
094: *
095: * the Lius configuration file.
096: *
097: */
098:
099: public Document createLuceneDocument(String file,
100:
101: String liusXmlConfigFilePath) {
102:
103: LiusConfig lc = LiusConfigBuilder.getSingletonInstance().
104:
105: getLiusConfig(liusXmlConfigFilePath);
106:
107: lcP = lc;
108:
109: Document doc = createLuceneDocument(file, lc);
110:
111: return doc;
112:
113: }
114:
115: /**
116: *
117: * Méthode retournant un objet de type Lucene document à partir du fichier
118: *
119: * à indexer et du fichier de configuration de Lius exprimé sous forme
120: *
121: * d'objet de type LiusConfig.
122: *
123: * <br/><br/>
124: *
125: * Method that returns a Lucene document object from a file to index and
126: *
127: * the Lius Configuration as a LiusConfig object.
128: *
129: */
130:
131: public Document createLuceneDocument(String file, LiusConfig lc) {
132:
133: lcP = lc;
134:
135: Document doc = createLuceneDocument(file, lc
136: .getMixteIndexingElements());
137:
138: return doc;
139:
140: }
141:
142: /**
143: *
144: * Méthode retournant un objet de type Lucene document à partir du fichier
145: *
146: * à indexer et d'une collection d'objets de type LiusField. Chaque
147: *
148: * objet LiusField contient de l'information sur le nom du champs Lucene,
149: *
150: * le type, etc.
151: *
152: * <br/><br/>
153: *
154: * Method that returns a Lucene document object from the file to index
155: *
156: * and a collection of LiusField objects. Each LiusField object contains
157: *
158: * information about the name of the Lucene Field, the type, etc.
159: *
160: */
161:
162: public Document createLuceneDocument(String file,
163:
164: Collection mixteIndexingElements) {
165:
166: List populatedList = (List) getPopulatedCollection(file,
167:
168: mixteIndexingElements);
169:
170: Document doc = LuceneActions.getSingletonInstance()
171: .populateLuceneDoc(populatedList);
172:
173: return doc;
174:
175: }
176:
177: /**
178: *
179: * Retourne une collection contenant les champs avec les valeurs à
180: *
181: * indexer comme par exemple: le texte integrale, titre etc.
182: *
183: * <br/><br/>
184: *
185: * Returns a collection containing the fieds with the values to index,
186: *
187: * like : full text, title, etc.
188: *
189: */
190:
191: public Collection getPopulatedCollection(Object file,
192:
193: Collection mixteIndexingElements) {
194:
195: String sep = System.getProperty("file.separator");
196:
197: String[] lf = (String[]) parse((String) file);
198:
199: List populatedList = new ArrayList();
200:
201: for (int i = 0; i < lf.length; i++) {
202:
203: Indexer indexer = null;
204:
205: String fileName = lf[i];
206:
207: File fileNameF = new File(fileName);
208:
209: String filePath = (String) file + sep + fileName;
210:
211: indexer = IndexerFactory.getIndexer(filePath,
212:
213: (List) mixteIndexingElements);
214:
215: if (indexer != null) {
216:
217: if (indexer.getLiusFields(lcP).size() > 0) {
218:
219: Collection populCollFile = indexer
220: .getPopulatedCollection(filePath,
221:
222: indexer.getLiusFields(lcP));
223:
224: Iterator it = populCollFile.iterator();
225:
226: while (it.hasNext()) {
227:
228: LiusField f = new LiusField();
229:
230: f = (LiusField) it.next();
231:
232: LiusField newLF = new LiusField();
233:
234: try {
235:
236: BeanUtils.copyProperties(newLF, f);
237:
238: }
239:
240: catch (InvocationTargetException ex) {
241:
242: ex.printStackTrace();
243:
244: }
245:
246: catch (IllegalAccessException ex) {
247:
248: ex.printStackTrace();
249:
250: }
251:
252: populatedList.add(newLF);
253:
254: }
255:
256: }
257:
258: }
259:
260: }
261:
262: return populatedList;
263:
264: }
265:
266: /**
267: *
268: * Méthode retournant un objet de type Lucene document à partir du fichier à
269: *
270: * indexer et d'une collection d'objets de type LiusField. Chaque objet
271: *
272: * LiusField contient de l'information sur le nom du champs Lucene, le type,
273: *
274: * etc.
275: *
276: * <br/><br/>
277: *
278: * Method that return a Lucene object from the configuration file and a
279: * collection
280: *
281: * of LiusField objects. Each LiusField object contains information about
282: * the Lucene
283: *
284: * field, the type, etc.
285: *
286: */
287:
288: public Collection getLiusFields(LiusConfig lc) {
289:
290: return null;
291:
292: }
293:
294: public Collection getPopulatedCollection(Object file,
295: String liusConfig) {
296:
297: LiusConfig lc = LiusConfigBuilder.getSingletonInstance()
298: .getLiusConfig(
299:
300: liusConfig);
301:
302: return getPopulatedCollection(file, lc);
303:
304: }
305:
306: public Collection getPopulatedCollection(Object file, LiusConfig lc) {
307:
308: return getPopulatedCollection(file, lc
309: .getMixteIndexingElements());
310:
311: }
312:
313: }
|