001: /*
002: * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
003: * (http://h2database.com/html/license.html).
004: * Initial Developer: H2 Group
005: */
006: package org.h2.tools;
007:
008: import java.io.BufferedInputStream;
009: import java.io.BufferedOutputStream;
010: import java.io.BufferedReader;
011: import java.io.BufferedWriter;
012: import java.io.FileOutputStream;
013: import java.io.IOException;
014: import java.io.InputStream;
015: import java.io.InputStreamReader;
016: import java.io.OutputStream;
017: import java.io.OutputStreamWriter;
018: import java.io.Reader;
019: import java.io.Writer;
020: import java.sql.Connection;
021: import java.sql.ResultSet;
022: import java.sql.ResultSetMetaData;
023: import java.sql.SQLException;
024: import java.sql.Statement;
025: import java.sql.Types;
026: import java.util.ArrayList;
027:
028: import org.h2.constant.SysProperties;
029: import org.h2.message.Message;
030: import org.h2.util.FileUtils;
031: import org.h2.util.IOUtils;
032: import org.h2.util.JdbcUtils;
033: import org.h2.util.StringCache;
034: import org.h2.util.StringUtils;
035:
036: /**
037: * A facility to read from and write to CSV (comma separated values) files.
038: *
039: * @author Thomas Mueller, Sylvain Cuaz
040: */
041: public class Csv implements SimpleRowSource {
042:
043: private String charset = StringUtils.getDefaultCharset();
044: private int bufferSize = 8 * 1024;
045: private String[] columnNames;
046: private char fieldSeparatorRead = ',';
047: private char commentLineStart = '#';
048: private String fieldSeparatorWrite = ",";
049: private String rowSeparatorWrite;
050: private char fieldDelimiter = '\"';
051: private char escapeCharacter = '\"';
052: private String lineSeparator = SysProperties.LINE_SEPARATOR;
053: private String nullString = "";
054: private String fileName;
055: private Reader reader;
056: private Writer writer;
057: private int back;
058: private boolean endOfLine, endOfFile;
059:
060: /**
061: * Get a new object of this class.
062: *
063: * @return the new instance
064: */
065: public static Csv getInstance() {
066: return new Csv();
067: }
068:
069: private int writeResultSet(ResultSet rs) throws SQLException {
070: try {
071: ResultSetMetaData meta = rs.getMetaData();
072: int rows = 0;
073: int columnCount = meta.getColumnCount();
074: String[] row = new String[columnCount];
075: for (int i = 0; i < columnCount; i++) {
076: row[i] = meta.getColumnLabel(i + 1);
077: }
078: writeRow(row);
079: while (rs.next()) {
080: for (int i = 0; i < columnCount; i++) {
081: row[i] = rs.getString(i + 1);
082: }
083: writeRow(row);
084: rows++;
085: }
086: writer.close();
087: return rows;
088: } catch (IOException e) {
089: throw Message.convertIOException(e, null);
090: } finally {
091: close();
092: JdbcUtils.closeSilently(rs);
093: }
094: }
095:
096: /**
097: * Writes the result set to a file in the CSV format.
098: *
099: * @param writer
100: * the writer
101: * @param rs
102: * the result set
103: * @return the number of rows written
104: * @throws SQLException,
105: * IOException
106: */
107: public int write(Writer writer, ResultSet rs) throws SQLException,
108: IOException {
109: this .writer = writer;
110: return writeResultSet(rs);
111: }
112:
113: /**
114: * Writes the result set to a file in the CSV format.
115: *
116: * @param fileName
117: * the name of the csv file
118: * @param rs
119: * the result set
120: * @param charset
121: * the charset or null to use UTF-8
122: * @return the number of rows written
123: * @throws SQLException
124: */
125: public int write(String fileName, ResultSet rs, String charset)
126: throws SQLException {
127: init(fileName, charset);
128: try {
129: initWrite();
130: return writeResultSet(rs);
131: } catch (IOException e) {
132: throw convertException("IOException writing " + fileName, e);
133: }
134: }
135:
136: /**
137: * Writes the result set of a query to a file in the CSV format.
138: *
139: * @param conn
140: * the connection
141: * @param fileName
142: * the file name
143: * @param sql
144: * the query
145: * @param charset
146: * the charset or null to use UTF-8
147: * @return the number of rows written
148: * @throws SQLException
149: */
150: public int write(Connection conn, String fileName, String sql,
151: String charset) throws SQLException {
152: Statement stat = conn.createStatement();
153: ResultSet rs = stat.executeQuery(sql);
154: int rows = write(fileName, rs, charset);
155: stat.close();
156: return rows;
157: }
158:
159: /**
160: * Reads from the CSV file and returns a result set. The rows in the result
161: * set are created on demand, that means the file is kept open until all
162: * rows are read or the result set is closed.
163: *
164: * @param fileName the file name
165: * @param colNames or null if the column names should be read from the CSV file
166: * @param charset the charset or null to use UTF-8
167: * @return the result set
168: * @throws SQLException
169: */
170: public ResultSet read(String fileName, String[] colNames,
171: String charset) throws SQLException {
172: init(fileName, charset);
173: try {
174: return readResultSet(colNames);
175: } catch (IOException e) {
176: throw convertException("IOException reading " + fileName, e);
177: }
178: }
179:
180: /**
181: * Reads CSV data from a reader and returns a result set. The rows in the
182: * result set are created on demand, that means the reader is kept open
183: * until all rows are read or the result set is closed.
184: *
185: * @param reader the reader
186: * @param colNames or null if the column names should be read from the CSV file
187: * @return the result set
188: * @throws SQLException, IOException
189: */
190: public ResultSet read(Reader reader, String[] colNames)
191: throws SQLException, IOException {
192: init(null, null);
193: this .reader = reader;
194: return readResultSet(colNames);
195: }
196:
197: private ResultSet readResultSet(String[] colNames)
198: throws SQLException, IOException {
199: this .columnNames = colNames;
200: initRead();
201: SimpleResultSet result = new SimpleResultSet(this );
202: makeColumnNamesUnique();
203: for (int i = 0; i < columnNames.length; i++) {
204: result.addColumn(columnNames[i], Types.VARCHAR,
205: Integer.MAX_VALUE, 0);
206: }
207: return result;
208: }
209:
210: private void makeColumnNamesUnique() {
211: for (int i = 0; i < columnNames.length; i++) {
212: String x = columnNames[i];
213: if (x == null || x.length() == 0) {
214: x = "C" + (i + 1);
215: }
216: for (int j = 0; j < i; j++) {
217: String y = columnNames[j];
218: if (x.equals(y)) {
219: x = x + "1";
220: j = -1;
221: }
222: }
223: columnNames[i] = x;
224: }
225: }
226:
227: private Csv() {
228: }
229:
230: private void init(String fileName, String charset) {
231: this .fileName = fileName;
232: if (charset != null) {
233: this .charset = charset;
234: }
235: }
236:
237: private void initWrite() throws IOException {
238: if (writer == null) {
239: try {
240: OutputStream out = new FileOutputStream(fileName);
241: out = new BufferedOutputStream(out, bufferSize);
242: writer = new BufferedWriter(new OutputStreamWriter(out,
243: charset));
244: } catch (IOException e) {
245: close();
246: throw e;
247: }
248: }
249: }
250:
251: private void writeRow(String[] values) throws IOException {
252: for (int i = 0; i < values.length; i++) {
253: if (i > 0) {
254: if (fieldSeparatorWrite != null) {
255: writer.write(fieldSeparatorWrite);
256: }
257: }
258: String s = values[i];
259: if (s != null) {
260: if (escapeCharacter != 0) {
261: if (fieldDelimiter != 0) {
262: writer.write(fieldDelimiter);
263:
264: }
265: writer.write(escape(s));
266: if (fieldDelimiter != 0) {
267: writer.write(fieldDelimiter);
268: }
269: } else {
270: writer.write(s);
271: }
272: } else if (nullString.length() > 0) {
273: writer.write(nullString);
274: }
275: }
276: if (rowSeparatorWrite != null) {
277: writer.write(rowSeparatorWrite);
278: }
279: writer.write(lineSeparator);
280: }
281:
282: private String escape(String data) {
283: if (data.indexOf(fieldDelimiter) < 0) {
284: if (escapeCharacter == fieldDelimiter
285: || data.indexOf(escapeCharacter) < 0) {
286: return data;
287: }
288: }
289: StringBuffer buff = new StringBuffer(data.length());
290: for (int i = 0; i < data.length(); i++) {
291: char ch = data.charAt(i);
292: if (ch == fieldDelimiter || ch == escapeCharacter) {
293: buff.append(escapeCharacter);
294: }
295: buff.append(ch);
296: }
297: return buff.toString();
298: }
299:
300: private void initRead() throws IOException {
301: if (reader == null) {
302: try {
303: InputStream in = FileUtils
304: .openFileInputStream(fileName);
305: in = new BufferedInputStream(in, bufferSize);
306: reader = new InputStreamReader(in, charset);
307: reader = new BufferedReader(reader);
308: } catch (IOException e) {
309: close();
310: throw e;
311: }
312: }
313: if (columnNames == null) {
314: readHeader();
315: }
316: }
317:
318: private void readHeader() throws IOException {
319: ArrayList list = new ArrayList();
320: while (true) {
321: String v = readValue();
322: if (v == null) {
323: if (endOfLine) {
324: if (endOfFile || list.size() > 0) {
325: break;
326: }
327: } else {
328: list.add("COLUMN" + list.size());
329: }
330: } else {
331: list.add(v);
332: }
333: }
334: columnNames = new String[list.size()];
335: list.toArray(columnNames);
336: }
337:
338: private void pushBack(int ch) {
339: back = ch;
340: }
341:
342: private int readChar() throws IOException {
343: int ch = back;
344: if (ch != -1) {
345: back = -1;
346: return ch;
347: } else if (endOfFile) {
348: return -1;
349: }
350: ch = reader.read();
351: if (ch < 0) {
352: endOfFile = true;
353: close();
354: }
355: return ch;
356: }
357:
358: private String readValue() throws IOException {
359: endOfLine = false;
360: String value = null;
361: outer: while (true) {
362: int ch = readChar();
363: if (ch < 0 || ch == '\r' || ch == '\n') {
364: endOfLine = true;
365: break;
366: } else if (ch <= ' ') {
367: // ignore spaces
368: continue;
369: } else if (ch == fieldSeparatorRead) {
370: // null
371: break;
372: } else if (ch == fieldDelimiter) {
373: // delimited value
374: StringBuffer buff = new StringBuffer();
375: boolean containsEscape = false;
376: while (true) {
377: ch = readChar();
378: if (ch < 0) {
379: value = buff.toString();
380: break outer;
381: } else if (ch == fieldDelimiter) {
382: ch = readChar();
383: if (ch == fieldDelimiter) {
384: buff.append((char) ch);
385: } else {
386: pushBack(ch);
387: break;
388: }
389: } else if (ch == escapeCharacter) {
390: buff.append((char) ch);
391: ch = readChar();
392: if (ch < 0) {
393: break;
394: }
395: containsEscape = true;
396: buff.append((char) ch);
397: } else {
398: buff.append((char) ch);
399: }
400: }
401: value = buff.toString();
402: if (containsEscape) {
403: value = unEscape(value);
404: }
405: while (true) {
406: ch = readChar();
407: if (ch < 0) {
408: break;
409: } else if (ch == ' ' || ch == '\t') {
410: // ignore
411: } else if (ch == fieldSeparatorRead) {
412: break;
413: } else if (ch == '\r' || ch == '\n') {
414: pushBack(ch);
415: endOfLine = true;
416: break;
417: } else {
418: pushBack(ch);
419: break;
420: }
421: }
422: break;
423: } else if (ch == commentLineStart) {
424: // comment until end of line
425: while (true) {
426: ch = readChar();
427: if (ch < 0 || ch == '\r' || ch == '\n') {
428: break;
429: }
430: }
431: endOfLine = true;
432: break;
433: } else {
434: // un-delimited value
435: StringBuffer buff = new StringBuffer();
436: buff.append((char) ch);
437: while (true) {
438: ch = readChar();
439: if (ch == fieldSeparatorRead) {
440: break;
441: } else if (ch == '\r' || ch == '\n') {
442: pushBack(ch);
443: endOfLine = true;
444: break;
445: } else if (ch < 0) {
446: break;
447: }
448: buff.append((char) ch);
449: }
450: // check un-delimited value for nullString
451: value = readNull(buff.toString().trim());
452: break;
453: }
454: }
455: // save memory
456: return StringCache.get(value);
457: }
458:
459: private String readNull(String s) {
460: return s.equals(nullString) ? null : s;
461: }
462:
463: private String unEscape(String s) {
464: StringBuffer buff = new StringBuffer(s.length());
465: int start = 0;
466: char[] chars = null;
467: while (true) {
468: int idx = s.indexOf(escapeCharacter, start);
469: if (idx < 0) {
470: break;
471: }
472: if (chars == null) {
473: chars = s.toCharArray();
474: }
475: buff.append(chars, start, idx - start);
476: if (idx == s.length() - 1) {
477: start = s.length();
478: break;
479: }
480: buff.append(chars[idx + 1]);
481: start = idx + 2;
482: }
483: buff.append(s.substring(start));
484: return buff.toString();
485: }
486:
487: /**
488: * INTERNAL
489: */
490: public Object[] readRow() throws SQLException {
491: if (reader == null) {
492: return null;
493: }
494: String[] row = new String[columnNames.length];
495: try {
496: for (int i = 0;; i++) {
497: String v = readValue();
498: if (v == null) {
499: if (endOfFile && i == 0) {
500: return null;
501: }
502: if (endOfLine) {
503: if (i == 0) {
504: // empty line
505: i--;
506: continue;
507: }
508: break;
509: }
510: }
511: if (i < row.length) {
512: row[i] = v;
513: }
514: }
515: } catch (IOException e) {
516: throw convertException("IOException reading from "
517: + fileName, e);
518: }
519: return row;
520: }
521:
522: private SQLException convertException(String message, Exception e) {
523: SQLException s = new SQLException(message, "CSV");
524: //#ifdef JDK14
525: s.initCause(e);
526: //#endif
527: return s;
528: }
529:
530: /**
531: * INTERNAL
532: */
533: public void close() {
534: IOUtils.closeSilently(reader);
535: reader = null;
536: IOUtils.closeSilently(writer);
537: writer = null;
538: }
539:
540: /**
541: * INTERNAL
542: */
543: public void reset() throws SQLException {
544: throw new SQLException("Method is not supported", "CSV");
545: }
546:
547: /**
548: * Override the field separator for writing. The default is ",".
549: *
550: * @param fieldSeparatorWrite the field separator
551: */
552: public void setFieldSeparatorWrite(String fieldSeparatorWrite) {
553: this .fieldSeparatorWrite = fieldSeparatorWrite;
554: }
555:
556: /**
557: * Get the current field separator for writing.
558: *
559: * @return the field separator
560: */
561: public String getFieldSeparatorWrite() {
562: return fieldSeparatorWrite;
563: }
564:
565: /**
566: * Override the field separator for reading. The default is ','.
567: *
568: * @param fieldSeparatorRead the field separator
569: */
570: public void setFieldSeparatorRead(char fieldSeparatorRead) {
571: this .fieldSeparatorRead = fieldSeparatorRead;
572: }
573:
574: /**
575: * Get the current field separator for reading.
576: *
577: * @return the field separator
578: */
579: public char getFieldSeparatorRead() {
580: return fieldSeparatorRead;
581: }
582:
583: /**
584: * Get the current row separator for writing.
585: *
586: * @return the row separator
587: */
588: public String getRowSeparatorWrite() {
589: return rowSeparatorWrite;
590: }
591:
592: /**
593: * Override the end-of-row marker for writing. The default is null. After
594: * writing the end-of-row marker, a line feed is written (\n or \r\n
595: * depending on the system settings).
596: *
597: * @param rowSeparatorWrite the row separator
598: */
599: public void setRowSeparatorWrite(String rowSeparatorWrite) {
600: this .rowSeparatorWrite = rowSeparatorWrite;
601: }
602:
603: /**
604: * Set the field delimiter. The default is " (a double quote).
605: * 0 means no field delimiter is used.
606: *
607: * @param fieldDelimiter the field delimiter
608: */
609: public void setFieldDelimiter(char fieldDelimiter) {
610: this .fieldDelimiter = fieldDelimiter;
611: }
612:
613: /**
614: * Get the current field delimiter.
615: * 0 means no field delimiter is used.
616: *
617: * @return the field delimiter
618: */
619: public char getFieldDelimiter() {
620: return fieldDelimiter;
621: }
622:
623: /**
624: * Set the escape character (used to escape the field delimiter). The
625: * default is " (a double quote). 0 means no escape character is used.
626: *
627: * @param escapeCharacter the escape character
628: */
629: public void setEscapeCharacter(char escapeCharacter) {
630: this .escapeCharacter = escapeCharacter;
631: }
632:
633: /**
634: * Get the current escape character.
635: * 0 means no escape character is used.
636: *
637: * @return the escape character
638: */
639: public char getEscapeCharacter() {
640: return escapeCharacter;
641: }
642:
643: /**
644: * Set the line separator.
645: *
646: * @param lineSeparator the line separator
647: */
648: public void setLineSeparator(String lineSeparator) {
649: this .lineSeparator = lineSeparator;
650: }
651:
652: /**
653: * Set the value that represents NULL.
654: *
655: * @param nullString the null
656: */
657: public void setNullString(String nullString) {
658: this.nullString = nullString;
659: }
660:
661: }
|