001: /*
002: * ============================================================================
003: * GNU Lesser General Public License
004: * ============================================================================
005: *
006: * JasperReports - Free Java report-generating library.
007: * Copyright (C) 2001-2006 JasperSoft Corporation http://www.jaspersoft.com
008: *
009: * This library is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU Lesser General Public
011: * License as published by the Free Software Foundation; either
012: * version 2.1 of the License, or (at your option) any later version.
013: *
014: * This library is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017: * Lesser General Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public
020: * License along with this library; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
022: *
023: * JasperSoft Corporation
024: * 303 Second Street, Suite 450 North
025: * San Francisco, CA 94107
026: * http://www.jaspersoft.com
027: */
028: package net.sf.jasperreports.engine.data;
029:
030: import java.io.BufferedReader;
031: import java.io.File;
032: import java.io.FileInputStream;
033: import java.io.FileNotFoundException;
034: import java.io.IOException;
035: import java.io.InputStream;
036: import java.io.InputStreamReader;
037: import java.io.Reader;
038: import java.io.UnsupportedEncodingException;
039: import java.math.BigDecimal;
040: import java.math.BigInteger;
041: import java.text.DateFormat;
042: import java.text.DecimalFormat;
043: import java.text.NumberFormat;
044: import java.text.SimpleDateFormat;
045: import java.util.HashMap;
046: import java.util.Vector;
047:
048: import net.sf.jasperreports.engine.JRDataSource;
049: import net.sf.jasperreports.engine.JRException;
050: import net.sf.jasperreports.engine.JRField;
051: import net.sf.jasperreports.engine.JRRuntimeException;
052:
053: /**
054: * This datasource implementation reads a CSV stream. Datasource rows are separated by a record delimiter string and
055: * fields inside a row are separated by a field delimiter character. Fields containing delimiter characters can be
056: * placed inside quotes. If fields contain quotes themselves, these are duplicated (example: <i>"John ""Doe"""<i> will be
057: * displayed as <i>John "Doe"</i>).
058: * <p>
059: * Since CSV does not specify column names, the default naming convention is to name report fields COLUMN_x and map each
060: * column with the field found at index x in each row (these indices start with 0). To avoid this situation, users can
061: * either specify a collection of column names or set a flag to read the column names from the first row of the CSV file.
062: *
063: * @author Ionut Nedelcu (ionutned@users.sourceforge.net)
064: * @version $Id: JRCsvDataSource.java 1529 2006-12-21 14:05:08Z teodord $
065: */
066: public class JRCsvDataSource implements JRDataSource {
067: private DateFormat dateFormat = new SimpleDateFormat();
068: private NumberFormat numberFormat = new DecimalFormat();
069: private char fieldDelimiter = ',';
070: private String recordDelimiter = "\n";
071: private HashMap columnNames = new HashMap();
072: private boolean useFirstRowAsHeader;
073:
074: private Vector fields;
075: private Reader reader;
076: private char buffer[] = new char[1024];
077: private int position;
078: private int bufSize;
079: private boolean processingStarted;
080:
081: /**
082: * Creates a datasource instance from a CSV data input stream, using the default encoding.
083: * @param stream an input stream containing CSV data
084: */
085: public JRCsvDataSource(InputStream stream) {
086: this (new BufferedReader(new InputStreamReader(stream)));
087: }
088:
089: /**
090: * Creates a datasource instance from a CSV data input stream, using the specified encoding.
091: * @param stream an input stream containing CSV data
092: * @param charsetName the encoding to use
093: */
094: public JRCsvDataSource(InputStream stream, String charsetName)
095: throws UnsupportedEncodingException {
096: this (new BufferedReader(new InputStreamReader(stream,
097: charsetName)));
098: }
099:
100: /**
101: * Creates a datasource instance from a CSV file, using the default encoding.
102: * @param file a file containing CSV data
103: */
104: public JRCsvDataSource(File file) throws FileNotFoundException {
105: this (new FileInputStream(file));
106: }
107:
108: /**
109: * Creates a datasource instance from a CSV file, using the specified encoding.
110: * @param file a file containing CSV data
111: * @param charsetName the encoding to use
112: */
113: public JRCsvDataSource(File file, String charsetName)
114: throws FileNotFoundException, UnsupportedEncodingException {
115: this (new FileInputStream(file), charsetName);
116: }
117:
118: /**
119: * Creates a datasource instance from a CSV data reader.
120: * @param reader a <tt>Reader</tt> instance, for reading the stream
121: */
122: public JRCsvDataSource(Reader reader) {
123: this .reader = reader;
124: }
125:
126: /**
127: *
128: */
129: public boolean next() throws JRException {
130: try {
131: if (!processingStarted) {
132: if (useFirstRowAsHeader) {
133: parseRow();
134: for (int i = 0; i < fields.size(); i++) {
135: String name = (String) fields.get(i);
136: this .columnNames.put(name, new Integer(i));
137: }
138: }
139: processingStarted = true;
140: }
141:
142: return parseRow();
143: } catch (IOException e) {
144: throw new JRException(e);
145: }
146: }
147:
148: /**
149: *
150: */
151: public Object getFieldValue(JRField jrField) throws JRException {
152: String fieldName = jrField.getName();
153:
154: Integer columnIndex = (Integer) columnNames.get(fieldName);
155: if (columnIndex == null && fieldName.startsWith("COLUMN_")) {
156: columnIndex = Integer.valueOf(fieldName.substring(7));
157: }
158: if (columnIndex == null)
159: throw new JRException("Unknown column name : " + fieldName);
160:
161: if (fields.size() > columnIndex.intValue()) {
162: String fieldValue = (String) fields.get(columnIndex
163: .intValue());
164: Class valueClass = jrField.getValueClass();
165:
166: if (valueClass.equals(String.class))
167: return fieldValue;
168:
169: fieldValue = fieldValue.trim();
170:
171: if (fieldValue.length() == 0)
172: return null;
173:
174: try {
175: if (valueClass.equals(Boolean.class)) {
176: return fieldValue.equalsIgnoreCase("true") ? Boolean.TRUE
177: : Boolean.FALSE;
178: } else if (valueClass.equals(Byte.class)) {
179: return new Byte((numberFormat.parse(fieldValue))
180: .byteValue());
181: } else if (valueClass.equals(Integer.class)) {
182: return new Integer((numberFormat.parse(fieldValue))
183: .intValue());
184: } else if (valueClass.equals(Long.class)) {
185: return new Long((numberFormat.parse(fieldValue))
186: .longValue());
187: } else if (valueClass.equals(Short.class)) {
188: return new Short((numberFormat.parse(fieldValue))
189: .shortValue());
190: } else if (valueClass.equals(Double.class)) {
191: return new Double((numberFormat.parse(fieldValue))
192: .doubleValue());
193: } else if (valueClass.equals(Float.class)) {
194: return new Float((numberFormat.parse(fieldValue))
195: .floatValue());
196: } else if (valueClass.equals(BigDecimal.class)) {
197: return new BigDecimal((numberFormat
198: .parse(fieldValue)).toString());
199: } else if (valueClass.equals(BigInteger.class)) {
200: return new BigInteger(String.valueOf(numberFormat
201: .parse(fieldValue).longValue()));
202: } else if (valueClass.equals(java.lang.Number.class)) {
203: return numberFormat.parse(fieldValue);
204: } else if (valueClass.equals(java.util.Date.class)) {
205: return dateFormat.parse(fieldValue);
206: } else if (valueClass.equals(java.sql.Timestamp.class)) {
207: return new java.sql.Timestamp(dateFormat.parse(
208: fieldValue).getTime());
209: } else if (valueClass.equals(java.sql.Time.class)) {
210: return new java.sql.Time(dateFormat.parse(
211: fieldValue).getTime());
212: } else
213: throw new JRException("Field '" + jrField.getName()
214: + "' is of class '" + valueClass.getName()
215: + "' and can not be converted");
216: } catch (Exception e) {
217: throw new JRException("Unable to get value for field '"
218: + jrField.getName() + "' of class '"
219: + valueClass.getName() + "'", e);
220: }
221: }
222:
223: throw new JRException("Unknown column name : " + fieldName);
224: }
225:
226: /**
227: * Parses a row of CSV data and extracts the fields it contains
228: */
229: private boolean parseRow() throws IOException {
230: int pos = 0;
231: int startFieldPos = 0;
232: boolean insideQuotes = false;
233: boolean hadQuotes = false;
234: boolean misplacedQuote = false;
235: char c;
236: fields = new Vector();
237:
238: String row = getRow();
239: if (row == null || row.length() == 0)
240: return false;
241:
242: while (pos < row.length()) {
243: c = row.charAt(pos);
244:
245: if (c == '"') {
246: // already inside a text containing quotes
247: if (!insideQuotes) {
248: if (!hadQuotes) {
249: insideQuotes = true;
250: hadQuotes = true;
251: } else
252: // the field contains a bad string, like "fo"o", instead of "fo""o"
253: misplacedQuote = true;
254: }
255: // found a quote when already inside quotes, expecting two consecutive quotes, otherwise it means
256: // it's a closing quote
257: else {
258: if (pos + 1 < row.length()
259: && row.charAt(pos + 1) == '"')
260: pos++;
261: else
262: insideQuotes = false;
263: }
264: }
265: // field delimiter found, copy the field contents to the field array
266: if (c == fieldDelimiter && !insideQuotes) {
267: String field = row.substring(startFieldPos, pos);
268: // if an illegal quote was found, the entire field is considered illegal
269: if (misplacedQuote) {
270: misplacedQuote = false;
271: hadQuotes = false;
272: field = "";
273: }
274: // if the field was between quotes, remove them and turn any escaped quotes inside the text into normal quotes
275: else if (hadQuotes) {
276: field = field.trim();
277: if (field.startsWith("\"") && field.endsWith("\"")) {
278: field = field.substring(1, field.length() - 1);
279: field = replaceAll(field, "\"\"", "\"");
280: } else
281: field = "";
282: hadQuotes = false;
283: }
284:
285: fields.add(field);
286: startFieldPos = pos + 1;
287: }
288:
289: pos++;
290: // if the record delimiter was found inside a quoted field, it is not an actual record delimiter,
291: // so another line should be read
292: if ((pos == row.length()) && insideQuotes) {
293: row = row + recordDelimiter + getRow();
294: }
295: }
296:
297: // end of row was reached, so the final characters form the last field in the record
298: String field = row.substring(startFieldPos, pos);
299: if (field == null)
300: return true;
301:
302: if (misplacedQuote)
303: field = "";
304: else if (hadQuotes) {
305: field = field.trim();
306: if (field.startsWith("\"") && field.endsWith("\"")) {
307: field = field.substring(1, field.length() - 1);
308: field = replaceAll(field, "\"\"", "\"");
309: } else
310: field = "";
311: }
312: fields.add(field);
313:
314: return true;
315: }
316:
317: /**
318: * Reads a row from the stream. A row is a sequence of characters separated by the record delimiter.
319: */
320: private String getRow() throws IOException {
321: StringBuffer row = new StringBuffer();
322: char c;
323:
324: while (true) {
325: try {
326: c = getChar();
327:
328: // searches for the first character of the record delimiter
329: if (c == recordDelimiter.charAt(0)) {
330: int i;
331: char[] temp = new char[recordDelimiter.length()];
332: temp[0] = c;
333: boolean isDelimiter = true;
334: // checks if the following characters in the stream form the record delimiter
335: for (i = 1; i < recordDelimiter.length()
336: && isDelimiter; i++) {
337: temp[i] = getChar();
338: if (temp[i] != recordDelimiter.charAt(i))
339: isDelimiter = false;
340: }
341:
342: if (isDelimiter)
343: return row.toString();
344:
345: row.append(temp, 0, i);
346: }
347:
348: row.append(c);
349: } catch (JRException e) {
350: return row.toString();
351: }
352:
353: } // end while
354: }
355:
356: /**
357: * Reads a character from the stream.
358: * @throws IOException if any I/O error occurs
359: * @throws JRException if end of stream has been reached
360: */
361: private char getChar() throws IOException, JRException {
362: // end of buffer, fill a new buffer
363: if (position + 1 > bufSize) {
364: bufSize = reader.read(buffer);
365: position = 0;
366: if (bufSize == -1)
367: throw new JRException("No more chars");
368: }
369:
370: return buffer[position++];
371: }
372:
373: /**
374: * Gets the date format that will be used to parse date fields
375: */
376: public DateFormat getDateFormat() {
377: return dateFormat;
378: }
379:
380: /**
381: * Sets the desired date format to be used for parsing date fields
382: */
383: public void setDateFormat(DateFormat dateFormat) {
384: if (processingStarted)
385: throw new JRRuntimeException(
386: "Cannot modify data source properties after data reading has started");
387: this .dateFormat = dateFormat;
388: }
389:
390: /**
391: * Returns the field delimiter character.
392: */
393: public char getFieldDelimiter() {
394: return fieldDelimiter;
395: }
396:
397: /**
398: * Sets the field delimiter character. The default is comma. If characters such as comma or quotes are specified,
399: * the results can be unpredictable.
400: * @param fieldDelimiter
401: */
402: public void setFieldDelimiter(char fieldDelimiter) {
403: if (processingStarted)
404: throw new JRRuntimeException(
405: "Cannot modify data source properties after data reading has started");
406: this .fieldDelimiter = fieldDelimiter;
407: }
408:
409: /**
410: * Returns the record delimiter string.
411: */
412: public String getRecordDelimiter() {
413: return recordDelimiter;
414: }
415:
416: /**
417: * Sets the record delimiter string. The default is line feed (\n).
418: * @param recordDelimiter
419: */
420: public void setRecordDelimiter(String recordDelimiter) {
421: if (processingStarted)
422: throw new JRRuntimeException(
423: "Cannot modify data source properties after data reading has started");
424: this .recordDelimiter = recordDelimiter;
425: }
426:
427: /**
428: * Specifies an array of strings representing column names matching field names in the report template
429: */
430: public void setColumnNames(String[] columnNames) {
431: if (processingStarted)
432: throw new JRRuntimeException(
433: "Cannot modify data source properties after data reading has started");
434: for (int i = 0; i < columnNames.length; i++)
435: this .columnNames.put(columnNames[i], new Integer(i));
436: }
437:
438: /**
439: * Specifies whether the first line of the CSV file should be considered a table
440: * header, containing column names matching field names in the report template
441: */
442: public void setUseFirstRowAsHeader(boolean useFirstRowAsHeader) {
443: if (processingStarted)
444: throw new JRRuntimeException(
445: "Cannot modify data source properties after data reading has started");
446: this .useFirstRowAsHeader = useFirstRowAsHeader;
447: }
448:
449: /**
450: * Closes the reader. Users of this data source should close it after usage.
451: */
452: public void close() {
453: try {
454: reader.close();
455: } catch (IOException e) {
456: //nothing to do
457: }
458: }
459:
460: private String replaceAll(String string, String substring,
461: String replacement) {
462: StringBuffer result = new StringBuffer();
463: int index = string.indexOf(substring);
464: int oldIndex = 0;
465: while (index >= 0) {
466: result.append(string.substring(oldIndex, index));
467: result.append(replacement);
468: index += substring.length();
469: oldIndex = index;
470:
471: index = string.indexOf(substring, index);
472: }
473:
474: if (oldIndex < string.length())
475: result.append(string.substring(oldIndex, string.length()));
476:
477: return result.toString();
478: }
479:
480: public NumberFormat getNumberFormat() {
481: return numberFormat;
482: }
483:
484: public void setNumberFormat(NumberFormat numberFormat) {
485: this.numberFormat = numberFormat;
486: }
487: }
|