001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017: package org.apache.poi.hssf.eventusermodel.examples;
018:
019: import java.io.FileInputStream;
020: import java.io.FileNotFoundException;
021: import java.io.IOException;
022: import java.io.PrintStream;
023: import java.text.DateFormat;
024: import java.text.DecimalFormat;
025: import java.text.SimpleDateFormat;
026: import java.util.ArrayList;
027: import java.util.Date;
028: import java.util.Hashtable;
029: import java.util.List;
030: import java.util.Map;
031:
032: import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
033: import org.apache.poi.hssf.eventusermodel.HSSFListener;
034: import org.apache.poi.hssf.eventusermodel.HSSFRequest;
035: import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
036: import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
037: import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
038: import org.apache.poi.hssf.record.BlankRecord;
039: import org.apache.poi.hssf.record.BoolErrRecord;
040: import org.apache.poi.hssf.record.CellValueRecordInterface;
041: import org.apache.poi.hssf.record.ExtendedFormatRecord;
042: import org.apache.poi.hssf.record.FormatRecord;
043: import org.apache.poi.hssf.record.FormulaRecord;
044: import org.apache.poi.hssf.record.LabelRecord;
045: import org.apache.poi.hssf.record.LabelSSTRecord;
046: import org.apache.poi.hssf.record.NoteRecord;
047: import org.apache.poi.hssf.record.NumberRecord;
048: import org.apache.poi.hssf.record.RKRecord;
049: import org.apache.poi.hssf.record.Record;
050: import org.apache.poi.hssf.record.SSTRecord;
051: import org.apache.poi.hssf.usermodel.HSSFDataFormat;
052: import org.apache.poi.hssf.usermodel.HSSFDateUtil;
053: import org.apache.poi.poifs.filesystem.POIFSFileSystem;
054:
055: /**
056: * A XLS -> CSV processor, that uses the MissingRecordAware
057: * EventModel code to ensure it outputs all columns and rows.
058: * @author Nick Burch
059: */
060: public class XLS2CSVmra implements HSSFListener {
061: private int minColumns;
062: private POIFSFileSystem fs;
063: private PrintStream output;
064:
065: private int lastRowNumber;
066: private int lastColumnNumber;
067:
068: /** Should we output the formula, or the value it has? */
069: private boolean outputFormulaValues = true;
070:
071: // Records we pick up as we process
072: private SSTRecord sstRecord;
073: private Map customFormatRecords = new Hashtable();
074: private List xfRecords = new ArrayList();
075:
076: /**
077: * Creates a new XLS -> CSV converter
078: * @param fs The POIFSFileSystem to process
079: * @param output The PrintStream to output the CSV to
080: * @param minColumns The minimum number of columns to output, or -1 for no minimum
081: */
082: public XLS2CSVmra(POIFSFileSystem fs, PrintStream output,
083: int minColumns) {
084: this .fs = fs;
085: this .output = output;
086: this .minColumns = minColumns;
087: }
088:
089: /**
090: * Creates a new XLS -> CSV converter
091: * @param filename The file to process
092: * @param minColumns The minimum number of columns to output, or -1 for no minimum
093: * @throws IOException
094: * @throws FileNotFoundException
095: */
096: public XLS2CSVmra(String filename, int minColumns)
097: throws IOException, FileNotFoundException {
098: this (new POIFSFileSystem(new FileInputStream(filename)),
099: System.out, minColumns);
100: }
101:
102: /**
103: * Initiates the processing of the XLS file to CSV
104: */
105: public void process() throws IOException {
106: MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
107: this );
108: HSSFEventFactory factory = new HSSFEventFactory();
109: HSSFRequest request = new HSSFRequest();
110: request.addListenerForAllRecords(listener);
111:
112: factory.processWorkbookEvents(request, fs);
113: }
114:
115: /**
116: * Main HSSFListener method, processes events, and outputs the
117: * CSV as the file is processed.
118: */
119: public void processRecord(Record record) {
120: int this Row = -1;
121: int this Column = -1;
122: String this Str = null;
123:
124: switch (record.getSid()) {
125: case SSTRecord.sid:
126: sstRecord = (SSTRecord) record;
127: break;
128: case FormatRecord.sid:
129: FormatRecord fr = (FormatRecord) record;
130: customFormatRecords.put(new Integer(fr.getIndexCode()), fr);
131: break;
132: case ExtendedFormatRecord.sid:
133: ExtendedFormatRecord xr = (ExtendedFormatRecord) record;
134: xfRecords.add(xr);
135: break;
136:
137: case BlankRecord.sid:
138: BlankRecord brec = (BlankRecord) record;
139:
140: this Row = brec.getRow();
141: this Column = brec.getColumn();
142: this Str = "";
143: break;
144: case BoolErrRecord.sid:
145: BoolErrRecord berec = (BoolErrRecord) record;
146:
147: this Row = berec.getRow();
148: this Column = berec.getColumn();
149: this Str = "";
150: break;
151: case FormulaRecord.sid:
152: FormulaRecord frec = (FormulaRecord) record;
153:
154: this Row = frec.getRow();
155: this Column = frec.getColumn();
156:
157: if (outputFormulaValues) {
158: this Str = formatNumberDateCell(frec, frec.getValue());
159: } else {
160: // TODO: Output the formula string
161: this Str = '"' + frec.toString() + '"';
162: }
163: break;
164: case LabelRecord.sid:
165: LabelRecord lrec = (LabelRecord) record;
166:
167: this Row = lrec.getRow();
168: this Column = lrec.getColumn();
169: this Str = '"' + lrec.getValue() + '"';
170: break;
171: case LabelSSTRecord.sid:
172: LabelSSTRecord lsrec = (LabelSSTRecord) record;
173:
174: this Row = lsrec.getRow();
175: this Column = lsrec.getColumn();
176: if (sstRecord == null) {
177: this Str = '"' + "(No SST Record, can't identify string)" + '"';
178: } else {
179: this Str = '"' + sstRecord
180: .getString(lsrec.getSSTIndex()).toString() + '"';
181: }
182: break;
183: case NoteRecord.sid:
184: NoteRecord nrec = (NoteRecord) record;
185:
186: this Row = nrec.getRow();
187: this Column = nrec.getColumn();
188: // TODO: Find object to match nrec.getShapeId()
189: this Str = '"' + "(TODO)" + '"';
190: break;
191: case NumberRecord.sid:
192: NumberRecord numrec = (NumberRecord) record;
193:
194: this Row = numrec.getRow();
195: this Column = numrec.getColumn();
196:
197: // Format
198: this Str = formatNumberDateCell(numrec, numrec.getValue());
199: break;
200: case RKRecord.sid:
201: RKRecord rkrec = (RKRecord) record;
202:
203: this Row = rkrec.getRow();
204: this Column = rkrec.getColumn();
205: this Str = '"' + "(TODO)" + '"';
206: break;
207: default:
208: break;
209: }
210:
211: // Handle new row
212: if (this Row != -1 && this Row != lastRowNumber) {
213: lastColumnNumber = -1;
214: }
215:
216: // Handle missing column
217: if (record instanceof MissingCellDummyRecord) {
218: MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
219: this Row = mc.getRow();
220: this Column = mc.getColumn();
221: this Str = "";
222: }
223:
224: // If we got something to print out, do so
225: if (this Str != null) {
226: if (this Column > 0) {
227: output.print(',');
228: }
229: output.print(this Str);
230: }
231:
232: // Update column and row count
233: if (this Row > -1)
234: lastRowNumber = this Row;
235: if (this Column > -1)
236: lastColumnNumber = this Column;
237:
238: // Handle end of row
239: if (record instanceof LastCellOfRowDummyRecord) {
240: // Print out any missing commas if needed
241: if (minColumns > 0) {
242: // Columns are 0 based
243: if (lastColumnNumber == -1) {
244: lastColumnNumber = 0;
245: }
246: for (int i = lastColumnNumber; i < (minColumns); i++) {
247: output.print(',');
248: }
249: }
250:
251: // We're onto a new row
252: lastColumnNumber = -1;
253:
254: // End the row
255: output.println();
256: }
257: }
258:
259: /**
260: * Formats a number or date cell, be that a real number, or the
261: * answer to a formula
262: */
263: private String formatNumberDateCell(CellValueRecordInterface cell,
264: double value) {
265: // Get the built in format, if there is one
266: ExtendedFormatRecord xfr = (ExtendedFormatRecord) xfRecords
267: .get(cell.getXFIndex());
268: if (xfr == null) {
269: System.err.println("Cell " + cell.getRow() + ","
270: + cell.getColumn() + " uses XF with index "
271: + cell.getXFIndex() + ", but we don't have that");
272: return Double.toString(value);
273: } else {
274: int formatIndex = xfr.getFormatIndex();
275: String format;
276: if (formatIndex >= HSSFDataFormat
277: .getNumberOfBuiltinBuiltinFormats()) {
278: FormatRecord tfr = (FormatRecord) customFormatRecords
279: .get(new Integer(formatIndex));
280: format = tfr.getFormatString();
281: } else {
282: format = HSSFDataFormat.getBuiltinFormat(xfr
283: .getFormatIndex());
284: }
285:
286: // Is it a date?
287: if (HSSFDateUtil.isADateFormat(formatIndex, format)
288: && HSSFDateUtil.isValidExcelDate(value)) {
289: // Java wants M not m for month
290: format = format.replace('m', 'M');
291: // Change \- into -, if it's there
292: format = format.replaceAll("\\\\-", "-");
293:
294: // Format as a date
295: Date d = HSSFDateUtil.getJavaDate(value, false);
296: DateFormat df = new SimpleDateFormat(format);
297: return df.format(d);
298: } else {
299: if (format == "General") {
300: // Some sort of wierd default
301: return Double.toString(value);
302: }
303:
304: // Format as a number
305: DecimalFormat df = new DecimalFormat(format);
306: return df.format(value);
307: }
308: }
309: }
310:
311: public static void main(String[] args) throws Exception {
312: if (args.length < 1) {
313: System.err.println("Use:");
314: System.err.println(" XLS2CSVmra <xls file> [min columns]");
315: System.exit(1);
316: }
317:
318: int minColumns = -1;
319: if (args.length >= 2) {
320: minColumns = Integer.parseInt(args[1]);
321: }
322:
323: XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);
324: xls2csv.process();
325: }
326: }
|