001: /*
002: * Copyright (C) 2005 - 2008 JasperSoft Corporation. All rights reserved.
003: * http://www.jaspersoft.com.
004: *
005: * Unless you have purchased a commercial license agreement from JasperSoft,
006: * the following license terms apply:
007: *
008: * This program is free software; you can redistribute it and/or modify
009: * it under the terms of the GNU General Public License version 2 as published by
010: * the Free Software Foundation.
011: *
012: * This program is distributed WITHOUT ANY WARRANTY; and without the
013: * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
014: * See the GNU General Public License for more details.
015: *
016: * You should have received a copy of the GNU General Public License
017: * along with this program; if not, see http://www.gnu.org/licenses/gpl.txt
018: * or write to:
019: *
020: * Free Software Foundation, Inc.,
021: * 59 Temple Place - Suite 330,
022: * Boston, MA USA 02111-1307
023: *
024: *
025: *
026: *
027: * JRCsvDataSourceInspector.java
028: *
029: * Created on February 16, 2006, 1:27 PM
030: *
031: */
032:
033: package it.businesslogic.ireport.connection;
034:
035: import it.businesslogic.ireport.JRField;
036: import java.io.File;
037: import java.io.FileNotFoundException;
038: import java.io.FileReader;
039: import java.io.IOException;
040: import java.io.InputStream;
041: import java.io.InputStreamReader;
042: import java.io.Reader;
043: import java.math.BigDecimal;
044: import java.text.DateFormat;
045: import java.text.SimpleDateFormat;
046: import java.util.HashMap;
047: import java.util.Vector;
048: import net.sf.jasperreports.engine.JRException;
049: import net.sf.jasperreports.engine.JRRuntimeException;
050:
051: /**
052: *
053: * @author gtoffoli
054: * This class derives from JRCsvDataSource and is used to read the field names.
055: */
056: public class JRCsvDataSourceInspector {
057:
058: private DateFormat dateFormat = new SimpleDateFormat();
059: private char fieldDelimiter = ',';
060: private String recordDelimiter = "\n";
061: private HashMap columnNames = new HashMap();
062:
063: private Vector fields;
064: private Reader reader;
065: private char buffer[] = new char[1024];
066: private int position;
067: private int bufSize;
068:
069: /**
070: * @param stream an input stream containing CSV data
071: */
072: public JRCsvDataSourceInspector(InputStream stream) {
073: this (new InputStreamReader(stream));
074: }
075:
076: /**
077: * Builds a datasource instance.
078: * @param file a file containing CSV data
079: */
080: public JRCsvDataSourceInspector(File file)
081: throws FileNotFoundException {
082: this (new FileReader(file));
083: }
084:
085: /**
086: * Builds a datasource instance.
087: * @param reader a <tt>Reader</tt> instance, for reading the stream
088: */
089: public JRCsvDataSourceInspector(Reader reader) {
090: this .reader = reader;
091: }
092:
093: /**
094: * Parses a row of CSV data and extracts the fields it contains
095: */
096: private boolean parseRow() throws IOException {
097: int pos = 0;
098: int startFieldPos = 0;
099: boolean insideQuotes = false;
100: boolean hadQuotes = false;
101: boolean misplacedQuote = false;
102: char c;
103: fields = new Vector();
104:
105: String row = getRow();
106: if (row == null || row.length() == 0)
107: return false;
108:
109: while (pos < row.length()) {
110: c = row.charAt(pos);
111:
112: if (c == '"') {
113: // already inside a text containing quotes
114: if (!insideQuotes) {
115: if (!hadQuotes) {
116: insideQuotes = true;
117: hadQuotes = true;
118: } else
119: // the field contains a bad string, like "fo"o", instead of "fo""o"
120: misplacedQuote = true;
121: }
122: // found a quote when already inside quotes, expecting two consecutive quotes, otherwise it means
123: // it's a closing quote
124: else {
125: if (pos + 1 < row.length()
126: && row.charAt(pos + 1) == '"')
127: pos++;
128: else
129: insideQuotes = false;
130: }
131: }
132: // field delimiter found, copy the field contents to the field array
133: if (c == fieldDelimiter && !insideQuotes) {
134: String field = row.substring(startFieldPos, pos);
135: // if an illegal quote was found, the entire field is considered illegal
136: if (misplacedQuote) {
137: misplacedQuote = false;
138: hadQuotes = false;
139: field = "";
140: }
141: // if the field was between quotes, remove them and turn any escaped quotes inside the text into normal quotes
142: else if (hadQuotes) {
143: field = field.trim();
144: if (field.startsWith("\"") && field.endsWith("\"")) {
145: field = field.substring(1, field.length() - 1);
146: field = replaceAll(field, "\"\"", "\"");
147: } else
148: field = "";
149: hadQuotes = false;
150: }
151:
152: fields.add(field);
153: startFieldPos = pos + 1;
154: }
155:
156: pos++;
157: // if the record delimiter was found inside a quoted field, it is not an actual record delimiter,
158: // so another line should be read
159: if ((pos == row.length()) && insideQuotes) {
160: row = row + recordDelimiter + getRow();
161: }
162: }
163:
164: // end of row was reached, so the final characters form the last field in the record
165: String field = row.substring(startFieldPos, pos);
166: if (field == null || field.length() == 0)
167: return true;
168:
169: if (misplacedQuote)
170: field = "";
171: else if (hadQuotes) {
172: field = field.trim();
173: if (field.startsWith("\"") && field.endsWith("\"")) {
174: field = field.substring(1, field.length() - 1);
175: field = replaceAll(field, "\"\"", "\"");
176: } else
177: field = "";
178: }
179: fields.add(field);
180:
181: return true;
182: }
183:
184: /**
185: * Reads a row from the stream. A row is a sequence of characters separated by the record delimiter.
186: */
187: private String getRow() throws IOException {
188: StringBuffer row = new StringBuffer();
189: char c;
190:
191: while (true) {
192: try {
193: c = getChar();
194:
195: // searches for the first character of the record delimiter
196: if (c == recordDelimiter.charAt(0)) {
197: int i;
198: char[] temp = new char[recordDelimiter.length()];
199: temp[0] = c;
200: boolean isDelimiter = true;
201: // checks if the following characters in the stream form the record delimiter
202: for (i = 1; i < recordDelimiter.length()
203: && isDelimiter; i++) {
204: temp[i] = getChar();
205: if (temp[i] != recordDelimiter.charAt(i))
206: isDelimiter = false;
207: }
208:
209: if (isDelimiter)
210: return row.toString();
211:
212: row.append(temp, 0, i);
213: }
214:
215: row.append(c);
216: } catch (JRException e) {
217: return row.toString();
218: }
219:
220: } // end while
221: }
222:
223: /**
224: * Reads a character from the stream.
225: * @throws IOException if any I/O error occurs
226: * @throws JRException if end of stream has been reached
227: */
228: private char getChar() throws IOException, JRException {
229: // end of buffer, fill a new buffer
230: if (position + 1 > bufSize) {
231: bufSize = reader.read(buffer);
232: position = 0;
233: if (bufSize == -1)
234: throw new JRException("No more chars");
235: }
236:
237: return buffer[position++];
238: }
239:
240: /**
241: * Gets the date format that will be used to parse date fields
242: */
243: public DateFormat getDateFormat() {
244: return dateFormat;
245: }
246:
247: /**
248: * Sets the desired date format to be used for parsing date fields
249: */
250: public void setDateFormat(DateFormat dateFormat) {
251: this .dateFormat = dateFormat;
252: }
253:
254: /**
255: * Returns the field delimiter character.
256: */
257: public char getFieldDelimiter() {
258: return fieldDelimiter;
259: }
260:
261: /**
262: * Sets the field delimiter character. The default is comma. If characters such as comma or quotes are specified,
263: * the results can be unpredictable.
264: * @param fieldDelimiter
265: */
266: public void setFieldDelimiter(char fieldDelimiter) {
267: this .fieldDelimiter = fieldDelimiter;
268: }
269:
270: /**
271: * Returns the record delimiter string.
272: */
273: public String getRecordDelimiter() {
274: return recordDelimiter;
275: }
276:
277: /**
278: * Sets the record delimiter string. The default is line feed (\n).
279: * @param recordDelimiter
280: */
281: public void setRecordDelimiter(String recordDelimiter) {
282: this .recordDelimiter = recordDelimiter;
283: }
284:
285: /**
286: * Specifies an array of strings representing column names matching field names in the report template
287: */
288: public void setColumnNames(String[] columnNames) {
289: this .columnNames.clear();
290: for (int i = 0; i < columnNames.length; i++)
291: this .columnNames.put(columnNames[i], new Integer(i));
292: }
293:
294: public Vector getColumnNames() throws java.io.IOException {
295: parseRow();
296: return fields;
297: }
298:
299: private String replaceAll(String string, String substring,
300: String replacement) {
301: StringBuffer result = new StringBuffer();
302: int index = string.indexOf(substring);
303: int oldIndex = 0;
304: while (index >= 0) {
305: result.append(string.substring(oldIndex, index));
306: result.append(replacement);
307: index += substring.length();
308: oldIndex = index;
309:
310: index = string.indexOf(substring, index);
311: }
312:
313: if (oldIndex < string.length())
314: result.append(string.substring(oldIndex, string.length()));
315:
316: return result.toString();
317: }
318:
319: }
|