001: /*
002: * LIUS - Lucene Index Update and Search
003: * http://sourceforge.net/projects/lius/
004: *
005: * Copyright (c) 2005, Laval University Library. All rights reserved.
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation; either
010: * version 2.1 of the License, or (at your option) any later version.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public
018: * License along with this library; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020: */
021:
022: package ca.ulaval.bibl.lius.index;
023:
024: import java.util.List;
025:
026: import org.apache.log4j.Logger;
027:
028: import ca.ulaval.bibl.lius.config.LiusConfig;
029: import ca.ulaval.bibl.lius.index.Excel.ExcelIndexer;
030: import ca.ulaval.bibl.lius.index.HTML.HtmlIndexer;
031: import ca.ulaval.bibl.lius.index.MSWord.WordIndexer;
032: import ca.ulaval.bibl.lius.index.OpenOffice.OOIndexer;
033: import ca.ulaval.bibl.lius.index.OpenOffice.OOIndexer2;
034: import ca.ulaval.bibl.lius.index.PDF.PdfIndexer;
035: import ca.ulaval.bibl.lius.index.PowerPoint.PPTIndexer;
036: import ca.ulaval.bibl.lius.index.RTF.RTFIndexer;
037: import ca.ulaval.bibl.lius.index.TXT.TXTIndexer;
038: import ca.ulaval.bibl.lius.index.XML.XmlFileIndexer;
039: import de.teamskill.lius.index.application.TexIndexer;
040: import de.teamskill.lius.index.application.VCardIndexer;
041: import de.teamskill.lius.index.audio.MP3Indexer;
042:
043: /**
044: *
045: * Changelog: <br/>
046: *
047: * 2005/06/02: added support for VCardIndexer and TexIndexer (jf)
048: *
049: * 2005/06/03: added support for FontIndexer (jf)
050: *
051: *
052: *
053: * @author Rida Benjelloun
054: *
055: *
056: *
057: */
058:
059: public class IndexerFactory {
060:
061: static Logger logger = Logger.getRootLogger();
062:
063: public static Indexer getIndexer(String fileName) {
064:
065: Indexer indexer = null;
066:
067: if (fileName.toLowerCase().endsWith(".xml")) {
068:
069: indexer = new XmlFileIndexer();
070:
071: } else if (fileName.toLowerCase().endsWith(".pdf")) {
072:
073: indexer = new PdfIndexer();
074:
075: } else if (fileName.toLowerCase().endsWith(".html")
076:
077: || fileName.toLowerCase().endsWith(".htm")
078:
079: || fileName.toLowerCase().endsWith(".jsp")
080:
081: || fileName.toLowerCase().endsWith(".asp")
082:
083: || fileName.toLowerCase().endsWith(".aspx")) {
084:
085: indexer = new HtmlIndexer();
086:
087: } else if (fileName.toLowerCase().endsWith(".doc")) {
088:
089: indexer = new WordIndexer();
090:
091: } else if (fileName.toLowerCase().endsWith(".xls")) {
092:
093: indexer = new ExcelIndexer();
094:
095: } else if (fileName.toLowerCase().endsWith(".rtf")) {
096:
097: indexer = new RTFIndexer();
098:
099: } else if (fileName.toLowerCase().endsWith(".txt")) {
100:
101: indexer = new TXTIndexer();
102:
103: } else if (fileName.toLowerCase().endsWith(".sxw")
104:
105: || fileName.toLowerCase().endsWith(".sxc")
106:
107: || fileName.toLowerCase().endsWith(".sxi")
108:
109: || fileName.toLowerCase().endsWith(".stw")) {
110:
111: indexer = new OOIndexer();
112:
113: } else if (fileName.toLowerCase().endsWith(".odt")) {
114: indexer = new OOIndexer2();
115: } else if (fileName.toLowerCase().endsWith(".ppt")) {
116:
117: indexer = new PPTIndexer();
118:
119: } else if (fileName.toLowerCase().endsWith(".mp3")) {
120:
121: indexer = new MP3Indexer();
122:
123: } else if (fileName.toLowerCase().endsWith(".vcf")
124:
125: || fileName.toLowerCase().endsWith(".vcard")) {
126:
127: indexer = new VCardIndexer();
128:
129: } else if (fileName.toLowerCase().endsWith(".tex")) {
130:
131: indexer = new TexIndexer();
132:
133: } else if (fileName.toLowerCase().endsWith(".gif")
134:
135: || fileName.toLowerCase().endsWith(".jpg")
136:
137: || fileName.toLowerCase().endsWith(".png")
138:
139: || fileName.toLowerCase().endsWith(".tif")
140:
141: || fileName.toLowerCase().endsWith(".eps")
142:
143: || fileName.toLowerCase().endsWith(".svg")
144:
145: || fileName.toLowerCase().endsWith(".ps")) {
146:
147: indexer = null;
148:
149: }
150:
151: /*
152: *
153: * else { throw new LiusException("Le fichier : " + "\"" + fileName +
154: *
155: * "\" n'est pas supporté par Lius"); }
156: *
157: */
158:
159: return indexer;
160:
161: }
162:
163: public static Indexer getIndexer(String fileName, List elemToIndex) {
164:
165: Indexer indexer = null;
166:
167: for (int i = 0; i < elemToIndex.size(); i++) {
168:
169: if (fileName.toLowerCase().endsWith(".xml")
170:
171: && elemToIndex.get(i).toString().equalsIgnoreCase("xml")) {
172:
173: indexer = new XmlFileIndexer();
174:
175: break;
176:
177: } else if (fileName.toLowerCase().endsWith(".pdf")
178:
179: && elemToIndex.get(i).toString().equalsIgnoreCase("pdf")) {
180:
181: indexer = new PdfIndexer();
182:
183: break;
184:
185: } else if ((fileName.toLowerCase().endsWith(".html")
186:
187: || fileName.toLowerCase().endsWith(".htm")
188:
189: || fileName.toLowerCase().endsWith(".jsp")
190:
191: || fileName.toLowerCase().endsWith(".asp") || fileName
192:
193: .toLowerCase().endsWith(".aspx"))
194:
195: && elemToIndex.get(i).toString()
196:
197: .equalsIgnoreCase("html")) {
198:
199: indexer = new HtmlIndexer();
200:
201: break;
202:
203: } else if (fileName.toLowerCase().endsWith(".doc")
204:
205: && elemToIndex.get(i).toString().equalsIgnoreCase(
206:
207: "msword")) {
208:
209: indexer = new WordIndexer();
210:
211: break;
212:
213: } else if (fileName.toLowerCase().endsWith(".rtf")
214:
215: && elemToIndex.get(i).toString().equalsIgnoreCase("rtf")) {
216:
217: indexer = new RTFIndexer();
218:
219: break;
220:
221: } else if (fileName.toLowerCase().endsWith(".xls")
222:
223: && elemToIndex.get(i).toString().equalsIgnoreCase(
224:
225: "msexcel")) {
226:
227: indexer = new ExcelIndexer();
228:
229: break;
230:
231: } else if (fileName.toLowerCase().endsWith(".mp3")
232:
233: && elemToIndex.get(i).toString().equalsIgnoreCase("mp3")) {
234:
235: indexer = new MP3Indexer();
236:
237: break;
238:
239: } else if ((fileName.toLowerCase().endsWith(".vcf") || fileName
240:
241: .toLowerCase().endsWith(".vcard"))
242:
243: && elemToIndex.get(i).toString().equalsIgnoreCase(
244:
245: "vcard")) {
246:
247: indexer = new VCardIndexer();
248:
249: break;
250:
251: } else if ((fileName.toLowerCase().endsWith(".ttf") || fileName
252:
253: .toLowerCase().endsWith(".pfb"))
254:
255: && elemToIndex.get(i).toString().equalsIgnoreCase(
256:
257: "font")) {
258:
259: indexer = new VCardIndexer();
260:
261: break;
262:
263: } else if (fileName.toLowerCase().endsWith(".tex")
264:
265: && elemToIndex.get(i).toString().equalsIgnoreCase("tex")) {
266:
267: indexer = new TexIndexer();
268:
269: break;
270:
271: }
272:
273: else if (fileName.toLowerCase().endsWith(".gif")
274:
275: || fileName.toLowerCase().endsWith(".jpg")
276:
277: || fileName.toLowerCase().endsWith(".png")
278:
279: || fileName.toLowerCase().endsWith(".tif")
280:
281: || fileName.toLowerCase().endsWith(".eps")
282:
283: || fileName.toLowerCase().endsWith(".svg")
284:
285: || fileName.toLowerCase().endsWith(".ps")) {
286:
287: indexer = null;
288:
289: break;
290:
291: } else if (fileName.toLowerCase().endsWith(".txt")
292:
293: && elemToIndex.get(i).toString().equalsIgnoreCase("txt")) {
294:
295: indexer = new TXTIndexer();
296:
297: break;
298:
299: } else if ((fileName.toLowerCase().endsWith(".sxw")
300:
301: || fileName.toLowerCase().endsWith(".sxc")
302:
303: || fileName.toLowerCase().endsWith(".sxi") || fileName
304:
305: .toLowerCase().endsWith(".stw"))
306:
307: && elemToIndex.get(i).toString().equalsIgnoreCase(
308:
309: "openoffice")) {
310:
311: indexer = new OOIndexer();
312:
313: break;
314:
315: } else if (fileName.toLowerCase().endsWith(".odt")
316: && elemToIndex.get(i).toString().equalsIgnoreCase(
317:
318: "openoffice")) {
319: indexer = new OOIndexer2();
320: break;
321: }
322:
323: else if (fileName.toLowerCase().endsWith(".ppt")
324:
325: && elemToIndex.get(i).toString().equalsIgnoreCase(
326:
327: "mspowerpoint")) {
328:
329: indexer = new PPTIndexer();
330:
331: break;
332:
333: }
334:
335: }
336:
337: return indexer;
338:
339: }
340:
341: public static Indexer getIndexer(String fileName, LiusConfig lc) {
342:
343: Indexer indexer = null;
344:
345: if ((lc.getXmlFileFields() != null && fileName.toLowerCase()
346:
347: .endsWith(".xml"))
348:
349: || (lc.getPdfFields().size() > 0 && fileName
350: .toLowerCase()
351:
352: .endsWith(".pdf"))
353:
354: || (lc.getMsWordFields().size() > 0 && fileName
355: .toLowerCase()
356:
357: .endsWith(".doc"))
358:
359: || (lc.getHtmlFields().size() > 0 && (fileName
360: .toLowerCase()
361:
362: .endsWith(".html")
363:
364: || fileName.toLowerCase().endsWith(".htm")
365:
366: || fileName.toLowerCase().endsWith(".jsp")
367:
368: || fileName.toLowerCase().endsWith(".asp") || fileName
369:
370: .toLowerCase().endsWith(".aspx")))
371:
372: || (lc.getRtfFields().size() > 0 && fileName
373: .toLowerCase()
374:
375: .endsWith(".rtf"))
376:
377: || (lc.getExcelFields().size() > 0 && fileName
378: .toLowerCase()
379:
380: .endsWith(".xls"))
381:
382: || (lc.getTxtFields().size() > 0 && fileName
383: .toLowerCase()
384:
385: .endsWith(".txt"))
386:
387: || (lc.getOOFields().size() > 0
388:
389: && fileName.toLowerCase().endsWith(".sxw")
390:
391: || fileName.toLowerCase().endsWith(".sxc")
392:
393: || fileName.toLowerCase().endsWith(".sxi") || fileName
394:
395: .toLowerCase().endsWith(".stw") || fileName
396: .toLowerCase().endsWith(".odt"))
397:
398: || (lc.getPPTFields().size() > 0 && fileName
399: .toLowerCase()
400:
401: .endsWith(".ppt"))
402:
403: || (lc.getMP3Fields().size() > 0 && fileName
404: .toLowerCase()
405:
406: .endsWith(".mp3"))
407:
408: || (lc.getTexFields().size() > 0 && fileName
409: .toLowerCase()
410:
411: .endsWith(".tex"))
412:
413: || (lc.getVCardFields().size() > 0
414:
415: && fileName.toLowerCase().endsWith(".vcf") || fileName
416:
417: .toLowerCase().endsWith(".vcard"))) {
418:
419: indexer = IndexerFactory.getIndexer(fileName);
420:
421: }
422:
423: return indexer;
424:
425: }
426:
427: public static Indexer flush() {
428:
429: return null;
430:
431: }
432:
433: //org.apache.turbine.services.mimetype.util.MimeTypeMap
434:
435: }
|