001: /***************************************************************
002: * This file is part of the [fleXive](R) project.
003: *
004: * Copyright (c) 1999-2008
005: * UCS - unique computing solutions gmbh (http://www.ucs.at)
006: * All rights reserved
007: *
008: * The [fleXive](R) project is free software; you can redistribute
009: * it and/or modify it under the terms of the GNU General Public
010: * License as published by the Free Software Foundation;
011: * either version 2 of the License, or (at your option) any
012: * later version.
013: *
014: * The GNU General Public License can be found at
015: * http://www.gnu.org/copyleft/gpl.html.
016: * A copy is found in the textfile GPL.txt and important notices to the
017: * license from the author are found in LICENSE.txt distributed with
018: * these libraries.
019: *
020: * This library is distributed in the hope that it will be useful,
021: * but WITHOUT ANY WARRANTY; without even the implied warranty of
022: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023: * GNU General Public License for more details.
024: *
025: * For further information about UCS - unique computing solutions gmbh,
026: * please see the company website: http://www.ucs.at
027: *
028: * For further information about [fleXive](R), please see the
029: * project website: http://www.flexive.org
030: *
031: *
032: * This copyright notice MUST APPEAR in all copies of the file!
033: ***************************************************************/package com.flexive.extractor;
034:
035: import com.flexive.shared.FxSharedUtils;
036: import org.apache.poi.hpsf.PropertySetFactory;
037: import org.apache.poi.hpsf.SummaryInformation;
038: import org.apache.poi.poifs.eventfilesystem.POIFSReader;
039: import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
040: import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
041: import org.apache.poi.poifs.filesystem.DocumentInputStream;
042: import org.apache.poi.util.LittleEndian;
043:
044: import java.io.ByteArrayOutputStream;
045: import java.io.InputStream;
046:
047: class PowerpointExtractor implements POIFSReaderListener {
048: private FxSummaryInformation fxsi;
049: private ByteArrayOutputStream writer;
050:
051: /**
052: * Extracts the text informations from the powerpoint file.
053: *
054: * @param in the input stream to read from
055: * @return the extraxted informations, or null if no text extraction was possible
056: */
057: public ExtractedData extract(final InputStream in) {
058: try {
059: writer = new ByteArrayOutputStream();
060: POIFSReader reader = new POIFSReader();
061: reader.registerListener(this );
062: //FxSummaryInformation.getSummaryInformation(fileName);
063: reader.read(in);
064: if (fxsi != null) {
065: writer.write(FxSharedUtils.getBytes(fxsi
066: .getFTIndexInformations()));
067: }
068: writer.flush();
069: return new ExtractedData(fxsi, writer.toString());
070: } catch (Exception ex) {
071: return null;
072: } finally {
073: try {
074: writer.close();
075: } catch (Exception exc) {/*ignore*/
076: }
077: }
078: }
079:
080: private void processContent(byte[] buffer, int beginIndex,
081: int endIndex) {
082: while (beginIndex < endIndex) {
083: int containerFlag = LittleEndian.getUShort(buffer,
084: beginIndex);
085: int recordType = LittleEndian.getUShort(buffer,
086: beginIndex + 2);
087: long recordLength = LittleEndian.getUInt(buffer,
088: beginIndex + 4);
089: beginIndex += 8;
090: if ((containerFlag & 0x0f) == 0x0f) {
091: processContent(buffer, beginIndex, beginIndex
092: + (int) recordLength);
093: } else if (recordType == 4008) {
094: writer.write(buffer, beginIndex, (int) recordLength);
095: writer.write(' ');
096: }
097: beginIndex += (int) recordLength;
098: }
099: }
100:
101: public void processPOIFSReaderEvent(POIFSReaderEvent event) {
102: try {
103: if (event.getName().equalsIgnoreCase("PowerPoint Document")) {
104: DocumentInputStream input = event.getStream();
105: byte[] buffer = new byte[input.available()];
106: //noinspection ResultOfMethodCallIgnored
107: input.read(buffer, 0, input.available());
108: processContent(buffer, 0, buffer.length);
109: } else if (event.getName().equals("\005SummaryInformation")) {
110: SummaryInformation si = (SummaryInformation) PropertySetFactory
111: .create(event.getStream());
112: fxsi = new FxSummaryInformation(si);
113: }
114: } catch (Exception ex) {
115: ex.printStackTrace();
116: }
117: }
118:
119: }
|