001: /***************************************************************
002: * This file is part of the [fleXive](R) project.
003: *
004: * Copyright (c) 1999-2008
005: * UCS - unique computing solutions gmbh (http://www.ucs.at)
006: * All rights reserved
007: *
008: * The [fleXive](R) project is free software; you can redistribute
009: * it and/or modify it under the terms of the GNU General Public
010: * License as published by the Free Software Foundation;
011: * either version 2 of the License, or (at your option) any
012: * later version.
013: *
014: * The GNU General Public License can be found at
015: * http://www.gnu.org/copyleft/gpl.html.
016: * A copy is found in the textfile GPL.txt and important notices to the
017: * license from the author are found in LICENSE.txt distributed with
018: * these libraries.
019: *
020: * This library is distributed in the hope that it will be useful,
021: * but WITHOUT ANY WARRANTY; without even the implied warranty of
022: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023: * GNU General Public License for more details.
024: *
025: * For further information about UCS - unique computing solutions gmbh,
026: * please see the company website: http://www.ucs.at
027: *
028: * For further information about [fleXive](R), please see the
029: * project website: http://www.flexive.org
030: *
031: *
032: * This copyright notice MUST APPEAR in all copies of the file!
033: ***************************************************************/package com.flexive.extractor;
034:
035: import com.flexive.shared.FxSharedUtils;
036: import org.apache.poi.hpsf.PropertySetFactory;
037: import org.apache.poi.hpsf.SummaryInformation;
038: import org.apache.poi.hssf.usermodel.HSSFCell;
039: import org.apache.poi.hssf.usermodel.HSSFRow;
040: import org.apache.poi.hssf.usermodel.HSSFSheet;
041: import org.apache.poi.hssf.usermodel.HSSFWorkbook;
042: import org.apache.poi.poifs.eventfilesystem.POIFSReader;
043: import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
044: import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
045:
046: import java.io.BufferedInputStream;
047: import java.io.ByteArrayOutputStream;
048: import java.io.InputStream;
049:
050: class ExcelExtractor implements POIFSReaderListener {
051:
052: private FxSummaryInformation fxsi = null;
053: ByteArrayOutputStream writer = null;
054:
055: /**
056: * Proccesses the Summary section.
057: *
058: * @param event the summary section event.
059: */
060: public void processPOIFSReaderEvent(POIFSReaderEvent event) {
061:
062: try {
063: SummaryInformation si = (SummaryInformation) PropertySetFactory
064: .create(event.getStream());
065: fxsi = new FxSummaryInformation(si);
066: } catch (Exception ex) {
067: //
068: }
069: }
070:
071: /**
072: * Extracts the text informations from the excel file.
073: *
074: * @param in the input stream to read from
075: * @return the extraxted informations, or null if no text extraction was possible
076: */
077: public ExtractedData extract(final InputStream in) {
078:
079: BufferedInputStream bis = null;
080: try {
081:
082: writer = new ByteArrayOutputStream();
083:
084: // We need to read the stream 2 times, so we use a buffered input stream and mark the
085: // beginning
086: bis = new BufferedInputStream(in);
087: bis.mark(Integer.MAX_VALUE);
088:
089: // Retrieve summary information
090: POIFSReader r = new POIFSReader();
091: r.registerListener(this , "\005SummaryInformation");
092: r.read(bis);
093: bis.reset();
094:
095: // Retrieve text by processing all sheets
096: HSSFWorkbook wb = new HSSFWorkbook(bis);
097: for (int i = 0; i < wb.getNumberOfSheets(); i++) {
098: HSSFSheet sheet = wb.getSheetAt(i);
099: processSheet(sheet);
100: }
101:
102: // Append summary info to text
103: if (fxsi != null) {
104: writer.write(FxSharedUtils.getBytes(fxsi
105: .getFTIndexInformations()));
106: }
107: writer.flush();
108:
109: return new ExtractedData(fxsi, writer.toString());
110: } catch (Exception exc) {
111: exc.printStackTrace();
112: return null;
113: } finally {
114: try {
115: if (writer != null)
116: writer.close();
117: } catch (Exception exc) {/*ignore*/
118: }
119: try {
120: if (bis != null)
121: bis.close();
122: } catch (Exception exc) {/*ignore*/
123: }
124: }
125: }
126:
127: private void processSheet(HSSFSheet sheet) {
128: try {
129: // Use the HFFS functions for the number of rows & columns
130: int rowCount = sheet.getPhysicalNumberOfRows();
131: int colCount = sheet.getRow(0).getPhysicalNumberOfCells();
132: HSSFRow row;
133: HSSFCell cell;
134: String cellValue;
135: for (int i = 0; i < rowCount; i++) {
136: row = sheet.getRow(i);
137: for (short j = 0; j < colCount; j++) {
138: cell = row.getCell(j);
139: if (cell != null) {
140: try {
141: switch (cell.getCellType()) {
142: case HSSFCell.CELL_TYPE_BOOLEAN:
143: cellValue = String.valueOf(cell
144: .getBooleanCellValue());
145: break;
146: case HSSFCell.CELL_TYPE_NUMERIC:
147: cellValue = String.valueOf(cell
148: .getNumericCellValue());
149: break;
150: case HSSFCell.CELL_TYPE_FORMULA:
151: // Doesnt make much sense to index a cell formula
152: cellValue = "";
153: break;
154: case HSSFCell.CELL_TYPE_ERROR:
155: cellValue = String.valueOf(cell
156: .getErrorCellValue());
157: break;
158: case HSSFCell.CELL_TYPE_BLANK:
159: cellValue = "";
160: break;
161: default:
162: cellValue = cell.getStringCellValue();
163: }
164: } catch (Exception exc) {
165: cellValue = "";
166: }
167: writer.write(FxSharedUtils.getBytes(cellValue));
168: }
169: }
170: }
171: } catch (Exception eN) {
172: System.out.println("Error reading sheet:" + eN.toString());
173: }
174: }
175:
176: }
|