001: package org.enhydra.snapper.parsers.fileparsers.parsers;
002:
003: import java.io.File;
004: import java.io.FileInputStream;
005: import java.io.InputStream;
006: import java.lang.reflect.Constructor;
007: import java.lang.reflect.InvocationTargetException;
008: import java.util.Iterator;
009: import java.util.Map;
010:
011: import javax.swing.text.Document;
012: import javax.swing.text.rtf.RTFEditorKit;
013: import javax.xml.transform.Transformer;
014:
015: import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
016: import org.apache.poi.hssf.eventusermodel.HSSFRequest;
017: import org.apache.poi.poifs.eventfilesystem.POIFSReader;
018: import org.apache.poi.poifs.filesystem.POIFSFileSystem;
019: import org.enhydra.snapper.parsers.fileparsers.FileParserException;
020: import org.enhydra.snapper.parsers.fileparsers.api.Compound;
021: import org.enhydra.snapper.parsers.fileparsers.api.Converter;
022: import org.enhydra.snapper.parsers.fileparsers.parsers.excelparser.ExcelConverter;
023: import org.enhydra.snapper.parsers.fileparsers.utils.TextFilter;
024:
025: /**
026: * @author Milin Radivoj
027: *
028: */
029: public class ExcelParser implements Converter, Compound {
030:
031: private File originalFile;
032:
033: private String parsedText = "";
034:
035: private String convertedText = "";
036:
037: private int characterLimit = 0;
038:
039: private String extractorClassName = "org.enhydra.snapper.parsers.fileparsers.parsers.excelparser.ExcelToText";
040:
041: private String extension = "txt";
042:
043: private String author;
044:
045: private String lastAuthor;
046:
047: private String properties = "";
048:
049: public Map customproperties;
050:
051: private int fileSizeLimit = 0;
052:
053: private long timeLimit = 0;
054:
055: public void parse(File originalFile, String tempDir)
056: throws FileParserException {
057: this .originalFile = originalFile;
058: parse();
059: }
060:
061: public void setLimit(int limit) {
062: characterLimit = limit;
063: }
064:
065: public String getTitle() {
066: return originalFile.getName();
067: }
068:
069: public String getParsedText() {
070: return (parsedText != null) ? parsedText : "";
071: }
072:
073: public String getFileProperties() {
074: return properties;
075: }
076:
077: public String getConvertedText() {
078: return (convertedText != null) ? convertedText : "";
079: }
080:
081: public String getConvertedExtension() {
082:
083: return extension;
084: }
085:
086: public String getAuthor() {
087: return author;
088: }
089:
090: public String getLastSavedBy() {
091: return lastAuthor;
092: }
093:
094: public void parse() throws FileParserException {
095:
096: InputStream din = null;
097: POIFSFileSystem fs = null;
098: FileInputStream in = null, fis = null;
099: try {
100: in = new FileInputStream(originalFile);
101: fs = new POIFSFileSystem(in);
102: din = fs.createDocumentInputStream("Workbook");
103: HSSFRequest req = new HSSFRequest();
104:
105: ExcelConverter listener;
106:
107: if (extractorClassName == null) {
108: extractorClassName = "org.enhydra.snapper.parsers.fileparsers.parsers.excelparser.ExcelToText";
109: }
110:
111: Constructor c;
112: try {
113: c = Class.forName(extractorClassName).getConstructor(
114: new Class[] {});
115: } catch (SecurityException e) {
116: throw new FileParserException(e);
117: } catch (NoSuchMethodException e) {
118: throw new FileParserException(e);
119: } catch (ClassNotFoundException e) {
120: throw new FileParserException(e);
121: }
122:
123: try {
124: listener = (ExcelConverter) c
125: .newInstance(new Object[] {});
126: } catch (IllegalArgumentException e) {
127: throw new FileParserException(e);
128: } catch (InstantiationException e) {
129: throw new FileParserException(e);
130: } catch (IllegalAccessException e) {
131: throw new FileParserException(e);
132: } catch (InvocationTargetException e) {
133: throw new FileParserException(e);
134: }
135:
136: listener.init(originalFile, characterLimit, timeLimit);
137:
138: req.addListenerForAllRecords(listener);
139: HSSFEventFactory factory = new HSSFEventFactory();
140: factory.processEvents(req, din);
141:
142: din.close();
143: din = null;
144:
145: parsedText = listener.getPureText();
146: convertedText = listener.getConvertedContent();
147: extension = listener.getExtension();
148:
149: if (parsedText == null)
150: parsedText = "";
151: listener.close();
152: if (parsedText != null)
153: parsedText = TextFilter.filterForbiddenCharacters(
154: parsedText,
155: TextFilter.DEFAULT_FORBIDDEN_CHARACTERS);
156:
157: fs = null;
158:
159: in.close();
160: in = null;
161:
162: try {
163: POIFSReader r = new POIFSReader();
164: MyPOIFSReaderListener mypoi = new MyPOIFSReaderListener();
165: r.registerListener(mypoi, "\005SummaryInformation");
166: DocumentPOIFSReaderListener docpoi = new DocumentPOIFSReaderListener();
167: r.registerListener(docpoi,
168: "\005DocumentSummaryInformation");
169: fis = new FileInputStream(originalFile);
170: r.read(fis);
171: // title = mypoi.getTitle();
172: author = mypoi.getAuthor();
173: lastAuthor = mypoi.getLastAuthor();
174: properties = mypoi.getProperties();
175: fis.close();
176: fis = null;
177:
178: if (properties == null) {
179: properties = "";
180: }
181:
182: if (docpoi.getCustomProperties() != null) {
183: customproperties = docpoi.getCustomProperties();
184: createString(customproperties);
185: }
186: } catch (Exception e) {
187: }
188:
189: } catch (Exception e) {
190: if (e
191: .getClass()
192: .getName()
193: .equals(
194: "org.apache.poi.poifs.storage.RTFSignatureException")) {
195:
196: FileInputStream inf = null;
197: try {
198: inf = new FileInputStream(originalFile);
199: RTFEditorKit kit = new RTFEditorKit();
200: Document doc = kit.createDefaultDocument();
201: kit.read(inf, doc, 0);
202: parsedText = doc.getText(0, doc.getLength());
203: inf.close();
204: inf = null;
205: } catch (Exception ex) {
206: try {
207: inf.close();
208: inf = null;
209: } catch (Exception exx) {
210: }
211: throw new FileParserException(originalFile
212: .getPath(), ex);
213: }
214:
215: } else
216: throw new FileParserException(originalFile.getPath(), e);
217: } catch (Throwable e) {
218: FileParserException exx = new FileParserException(e
219: .getMessage());
220: exx.setFileName(originalFile.getPath());
221: throw exx;
222: } finally {
223: if (fis != null) {
224: try {
225: fis.close();
226: } catch (Exception ex) {
227: }
228: fis = null;
229: }
230: if (in != null) {
231: try {
232: in.close();
233: } catch (Exception ex) {
234: }
235: in = null;
236: }
237: if (din != null) {
238: try {
239: din.close();
240: } catch (Exception ex) {
241: }
242: din = null;
243: }
244: }
245:
246: }
247:
248: private void createString(Map customproperties) {
249:
250: for (Iterator it = customproperties.entrySet().iterator(); it
251: .hasNext();) {
252: Map.Entry entry = (Map.Entry) it.next();
253: properties += entry.getKey().toString() + " = "
254: + entry.getValue() + " \n ";
255: }
256: }
257:
258: public void close() {
259: originalFile = null;
260: parsedText = null;
261: }
262:
263: public void setConverter(String converterClassName,
264: String conversionPath) {
265: extractorClassName = converterClassName;
266:
267: }
268:
269: public void setFileSizeLimit(int limit) {
270: fileSizeLimit = limit;
271: }
272:
273: public void setTimeLimit(long limit) {
274: timeLimit = limit;
275: }
276:
277: public void setHTMLTransformer(Transformer transformer) {
278: // TODO Auto-generated method stub
279:
280: }
281:
282: public void setTextTransformer(Transformer transformer) {
283: // TODO Auto-generated method stub
284:
285: }
286: }
|