001: /*
002:
003: * LIUS - Lucene Index Update and Search
004: * http://sourceforge.net/projects/lius/
005: *
006: * Copyright (c) 2005, Laval University Library. All rights reserved.
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
021: */
022:
023: package ca.ulaval.bibl.lius.index.PowerPoint;
024:
025: import java.io.ByteArrayOutputStream;
026: import java.io.FileInputStream;
027: import java.io.FileNotFoundException;
028: import java.io.InputStream;
029: import java.util.ArrayList;
030: import java.util.Collection;
031: import java.util.Iterator;
032:
033: import org.apache.log4j.Logger;
034: import org.apache.lucene.document.Document;
035: import org.apache.poi.poifs.eventfilesystem.POIFSReader;
036: import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
037: import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
038: import org.apache.poi.poifs.filesystem.DocumentInputStream;
039: import org.apache.poi.util.LittleEndian;
040:
041: import ca.ulaval.bibl.lius.Lucene.LuceneActions;
042: import ca.ulaval.bibl.lius.config.LiusConfig;
043: import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
044: import ca.ulaval.bibl.lius.config.LiusField;
045: import ca.ulaval.bibl.lius.index.Indexer;
046:
047: /**
048: *
049: * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
050: *
051: */
052:
053: public class PPTIndexer
054:
055: extends Indexer implements POIFSReaderListener {
056:
057: private ByteArrayOutputStream writer;
058:
059: static Logger logger = Logger.getRootLogger();
060:
061: public Object parse(Object file) {
062:
063: InputStream input = null;
064:
065: try {
066:
067: input = new FileInputStream((String) file);
068:
069: }
070:
071: catch (FileNotFoundException ex1) {
072:
073: logger.error(ex1.getMessage());
074:
075: }
076:
077: String contents = "";
078:
079: try {
080:
081: POIFSReader reader = new POIFSReader();
082:
083: writer = new ByteArrayOutputStream();
084:
085: reader.registerListener(this );
086:
087: reader.read(input);
088:
089: contents = writer.toString();
090:
091: } catch (Exception ex) {
092:
093: logger.error(ex.getMessage());
094:
095: }
096:
097: return contents;
098:
099: }
100:
101: public Document createLuceneDocument(String file, LiusConfig lc) {
102:
103: Document doc = createLuceneDocument(file, lc.getPPTFields());
104:
105: return doc;
106:
107: }
108:
109: public Collection getLiusFields(LiusConfig lc) {
110:
111: return lc.getPPTFields();
112:
113: }
114:
115: public Collection getPopulatedCollection(Object file,
116: Collection liusFields) {
117:
118: LuceneActions la = LuceneActions.getSingletonInstance();
119:
120: Collection coll = new ArrayList();
121:
122: Iterator it = liusFields.iterator();
123:
124: while (it.hasNext()) {
125:
126: Object field = it.next();
127:
128: if (field instanceof LiusField) {
129:
130: LiusField lf = (LiusField) field;
131:
132: if (lf.getGet() != null) {
133:
134: if (lf.getGet().equalsIgnoreCase("content")) {
135:
136: String content = (String) parse((String) file);
137:
138: lf.setValue(content);
139:
140: coll.add(lf);
141:
142: }
143:
144: }
145:
146: }
147:
148: else {
149:
150: coll.add(field);
151:
152: }
153:
154: }
155:
156: return coll;
157:
158: }
159:
160: public Collection getPopulatedCollection(Object file,
161: String liusConfig) {
162:
163: LiusConfig lc = LiusConfigBuilder.getSingletonInstance()
164: .getLiusConfig(
165:
166: liusConfig);
167:
168: return getPopulatedCollection(file, lc);
169:
170: }
171:
172: public Collection getPopulatedCollection(Object file, LiusConfig lc) {
173:
174: return getPopulatedCollection(file, lc.getPPTFields());
175:
176: }
177:
178: public void processPOIFSReaderEvent(POIFSReaderEvent event) {
179:
180: try {
181:
182: if (!event.getName()
183: .equalsIgnoreCase("PowerPoint Document"))
184:
185: return;
186:
187: DocumentInputStream input = event.getStream();
188:
189: byte[] buffer = new byte[input.available()];
190:
191: input.read(buffer, 0, input.available());
192:
193: for (int i = 0; i < buffer.length - 20; i++) {
194:
195: long type = LittleEndian.getUShort(buffer, i + 2);
196:
197: long size = LittleEndian.getUInt(buffer, i + 4);
198:
199: if (type == 4008L) {
200:
201: writer.write(buffer, i + 4 + 1, (int) size + 3);
202:
203: i = i + 4 + 1 + (int) size - 1;
204:
205: }
206:
207: }
208:
209: } catch (Exception ex) {
210:
211: logger.error(ex.getMessage());
212:
213: }
214:
215: }
216:
217: }
|