001: /**
002: Copyright (C) 2002-2003 Together
003:
004: This library is free software; you can redistribute it and/or
005: modify it under the terms of the GNU Lesser General Public
006: License as published by the Free Software Foundation; either
007: version 2.1 of the License, or (at your option) any later version.
008:
009: This library is distributed in the hope that it will be useful,
010: but WITHOUT ANY WARRANTY; without even the implied warranty of
011: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: Lesser General Public License for more details.
013:
014: You should have received a copy of the GNU Lesser General Public
015: License along with this library; if not, write to the Free Software
016: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
017:
018: */package org.relique.jdbc.csv;
019:
020: import java.io.BufferedReader;
021: import java.io.File;
022: import java.io.FileInputStream;
023: import java.io.IOException;
024: import java.io.InputStreamReader;
025: import java.sql.SQLException;
026: import java.util.ArrayList;
027: import java.util.HashMap;
028: import java.util.Map;
029: import java.util.Vector;
030:
031: /**
032: * This class is a helper class that handles the reading and parsing of data
033: * from a .csv file.
034: *
035: * @author Zoran Milakovic
036: */
037:
038: public class CsvReader {
039: private BufferedReader bufReader;
040: private CsvRandomAccessFile randomReader;
041: private String[] columnNames;
042: private Vector colTypes = new Vector();
043: private Map columnTypes1 = new KeyInsensitiveHashMap();
044: private Map columnTypes = new KeyInsensitiveHashMap();
045: private String[] columns;
046: private java.lang.String buf = null;
047: private char separator = CsvDriver.DEFAULT_SEPARATOR;
048: private long maxFileSize = CsvDriver.DEFAULT_FILE_MAXSIZE;
049: private String extension = CsvDriver.DEFAULT_EXTENSION;
050: private boolean suppressHeaders = false;
051: private String lineBreakEscape = CsvDriver.DEFAULT_LINE_BREAK_ESCAPE;
052: private String doubleQuoteEscape = CsvDriver.DEFAULT_DOUBLE_QUOTE_ESCAPE;
053: private String carriageReturnEscape = CsvDriver.DEFAULT_CARRIAGE_RETURN_ESCAPE;
054: private String tableName;
055: private String fileName;
056: private String charset = null;
057: private boolean trimString = false;
058:
059: /**
060: *
061: * @param fileName
062: * @param separator
063: * @param suppressHeaders
064: * @param charset
065: * @param extension
066: * @throws java.lang.Exception
067: */
068: public CsvReader(String fileName, char separator,
069: boolean suppressHeaders, String charset, String extension,
070: String lineBreakEscape, String carriageReturnEscape,
071: boolean trimString) throws java.lang.Exception {
072: this .separator = separator;
073: this .suppressHeaders = suppressHeaders;
074: this .fileName = fileName;
075: this .charset = charset;
076: this .lineBreakEscape = lineBreakEscape;
077: this .carriageReturnEscape = carriageReturnEscape;
078: this .trimString = trimString;
079: if (extension != null)
080: this .extension = extension;
081:
082: if (charset != null) {
083: if (Utils.isUTF16(charset))
084: randomReader = new CsvRandomAccessFile(fileName,
085: charset);
086: else
087: bufReader = new BufferedReader(new InputStreamReader(
088: new FileInputStream(fileName), charset));
089: } else {
090: bufReader = new BufferedReader(new InputStreamReader(
091: new FileInputStream(fileName)));
092: }
093: if (this .suppressHeaders) {
094: // No column names available.
095: // Read first data line and determine number of colums.
096: buf = this .readLine();
097: String[] data = parseCsvLineAsHeader(buf);
098: columnNames = new String[data.length];
099: String[] columnType = new String[colTypes.size()];
100: colTypes.copyInto(columnType);
101: for (int i = 0; i < data.length; i++) {
102: columnNames[i] = "COLUMN" + String.valueOf(i + 1);
103: columnTypes.put(columnNames[i], columnType[i]);
104: }
105: data = null;
106: } else {
107: String headerLine = this .readLine();
108: columnNames = parseCsvLineAsHeader(headerLine);
109: columnTypes = columnTypes1;
110: }
111: }
112:
113: /**
114: * Gets the columnNames attribute of the CsvReader object
115: *
116: * @return The columnNames value
117: */
118: public String[] getColumnNames() {
119: return columnNames;
120: }
121:
122: /**
123: *
124: * @return array with column types
125: */
126: public Map getColumnTypes() {
127: return columnTypes;
128: }
129:
130: public String getTableName() {
131: if (tableName != null)
132: return tableName;
133:
134: int lastSlash = 0;
135: for (int i = fileName.length() - 1; i >= 0; i--)
136: if (fileName.charAt(i) == '/' || fileName.charAt(i) == '\\') {
137: lastSlash = i;
138: break;
139: }
140: tableName = fileName.substring(lastSlash + 1,
141: fileName.length() - 4);
142: return tableName;
143: }
144:
145: /**
146: * Get the value of the column at the specified index.
147: *
148: * @param columnIndex Description of Parameter
149: * @return The column value
150: * @since
151: */
152:
153: public String getColumn(int columnIndex) throws SQLException {
154: if (columnIndex >= columns.length) {
155: return null;
156: }
157: return formatString(columns[columnIndex]);
158: }
159:
160: /**
161: * Get value from column at specified name.
162: * If the column name is not found, throw an error.
163: *
164: * @param columnName Description of Parameter
165: * @return The column value
166: * @exception SQLException Description of Exception
167: * @since
168: */
169:
170: public String getColumn(String columnName) throws SQLException {
171: for (int loop = 0; loop < columnNames.length; loop++) {
172: if (columnName.equalsIgnoreCase(columnNames[loop])
173: || columnName.equalsIgnoreCase(getTableName() + "."
174: + columnNames[loop])) {
175: return getColumn(loop);
176: }
177: }
178: throw new SQLException("Column '" + columnName + "' not found.");
179: }
180:
181: /**
182: *Description of the Method
183: *
184: * @return Description of the Returned Value
185: * @exception SQLException Description of Exception
186: * @since
187: */
188: public boolean next() throws SQLException {
189: columns = new String[columnNames.length];
190: String dataLine = null;
191: try {
192: if (suppressHeaders && (buf != null)) {
193: // The buffer is not empty yet, so use this first.
194: dataLine = buf;
195: buf = null;
196: } else {
197: // read new line of data from input.
198: dataLine = this .readLine();
199: }
200: if (dataLine == null) {
201: String nextFileName = getNextFileName();
202: if (new File(nextFileName).exists()) {
203: this .fileName = nextFileName;
204: if (charset != null) {
205: if (Utils.isUTF16(charset))
206: randomReader = new CsvRandomAccessFile(
207: fileName, charset);
208: else
209: bufReader = new BufferedReader(
210: new InputStreamReader(
211: new FileInputStream(
212: fileName), charset));
213: } else {
214: bufReader = new BufferedReader(
215: new InputStreamReader(
216: new FileInputStream(fileName)));
217: }
218: //skip header
219: dataLine = this .readLine();
220: dataLine = this .readLine();
221: } else {
222: this .closeInputs();
223: return false;
224: }
225: }
226:
227: } catch (IOException e) {
228: throw new SQLException(e.toString());
229: }
230: columns = parseCsvLine(dataLine);
231: return true;
232: }
233:
234: private String getNextFileName() {
235: String currentFileName = this .fileName;
236: String newName = "";
237: String number = "";
238: //name without extension
239: String currentFileExtension = currentFileName.substring(
240: currentFileName.lastIndexOf("."), currentFileName
241: .length());
242: currentFileName = currentFileName.substring(0, currentFileName
243: .lastIndexOf("."));
244: if (currentFileExtension.endsWith(CsvDriver.FILE_NAME_EXT)) {
245: number += currentFileName.substring(currentFileName
246: .length() - 3, currentFileName.length());
247: long num = Long.valueOf(number).longValue() + 1;
248: if (num >= 100 && num < 1000)
249: number = String.valueOf(num);
250: else if (num >= 10 && num < 100)
251: number = "0" + String.valueOf(num);
252: else if (num > 1 && num < 10)
253: number = "00" + String.valueOf(num);
254: currentFileName = currentFileName.substring(0,
255: currentFileName.length() - 3);
256: newName = currentFileName + number + currentFileExtension;
257: } else {
258: newName = currentFileName.toUpperCase() + "001"
259: + this .extension + CsvDriver.FILE_NAME_EXT;
260: }
261: return newName;
262: }
263:
264: /**
265: *Description of the Method
266: *
267: * @since
268: */
269: public void close() {
270: try {
271: this .closeInputs();
272: buf = null;
273: } catch (Exception e) {
274: }
275: }
276:
277: /**
278: *
279: * Parse csv line with columnTypes.
280: *
281: * @param line
282: * @return array with values or column names.
283: * @throws SQLException
284: */
285: protected String[] parseCsvLine(String line) throws SQLException {
286: ArrayList values = new ArrayList();
287: boolean inQuotedString = false;
288: String value = "";
289: String orgLine = line;
290: int currentPos = 0;
291: int fullLine = 0;
292: int currentColumn = 0;
293: int indexOfBinaryObject = 0;
294: char currentChar;
295: line += separator;
296: long lineLength = line.length();
297: while (fullLine == 0) {
298: currentPos = 0;
299: while (currentPos < lineLength) {
300:
301: //handle BINARY columns
302: if (!(this .columnTypes.size() <= currentColumn)) {
303: if (this .columnTypes
304: .get(columnNames[currentColumn]).equals(
305: CsvDriver.BINARY_TYPE)) {
306: String binaryValue = "";
307: currentChar = line.charAt(currentPos);
308: if (currentChar == ',') {
309: values.add(binaryValue); //binary value is null;
310: currentPos++;
311: } else if (currentChar == '"') {
312: if (line.charAt(currentPos + 1) == '"') {
313: values.add(binaryValue); //binary value is null
314: currentPos = currentPos + 3;
315: } else {
316: // take all until next separator, and that is value
317: // do not insert BinaryObject+index into line, just set right currentPos
318: // and insert value into vector
319: // binary value is always beteween quotes (")
320: binaryValue = line
321: .substring(currentPos);
322: binaryValue = binaryValue
323: .substring(1, binaryValue
324: .indexOf(separator) - 1);
325: values.add(binaryValue);
326: currentPos += binaryValue.length() + 3;
327: }
328: }
329: //set currentColumn++
330: currentColumn++;
331: continue;
332: }
333: } else {
334: throw new SQLException(
335: "Invalid csv format : file = "
336: + new File(fileName)
337: .getAbsolutePath()
338: + ", line = " + line);
339: }
340:
341: //parse one by one character
342: currentChar = line.charAt(currentPos);
343: if (value.length() == 0 && currentChar == '"'
344: && !inQuotedString) {
345: //enter here if we are at start of column value
346: currentPos++;
347: inQuotedString = true;
348: continue;
349: }
350:
351: if (currentChar == '"') {
352: //get next character
353: char nextChar = line.charAt(currentPos + 1);
354: //if we have "", consider it as ", and add it to value
355: if (nextChar == '"') {
356: value += currentChar;
357: currentPos++;
358: } else {
359: //enter here if we are at end of column value
360: // if (!inQuotedString) {
361: // throw new SQLException("Unexpected '\"' in position " +
362: // currentPos + ". Line=" + orgLine);
363: // }
364: if (inQuotedString && nextChar == separator) {
365: //throw new SQLException("Expecting " + separator +
366: // " in position " + (currentPos + 1) +
367: // ". Line=" + orgLine);
368:
369: //set currentPos to comma after value
370: currentPos++;
371: //if value is empty string between double quotes consider it as empty string
372: //else if value is empty string between commas consider it as null value
373: if (this .trimString)
374: value = value.trim();
375: values.add(value);
376: currentColumn++;
377: value = "";
378: inQuotedString = false;
379: } else {
380: if (nextChar != separator) {
381: value += currentChar;
382: }
383: }
384: }
385: }
386:
387: else {
388: //when we are at end of column value, and value is not inside of double quotes
389: if (currentChar == separator) {
390: //when have separator in data
391: if (inQuotedString) {
392: value += currentChar;
393: } else {
394: //if value is empty string between double quotes consider it as empty string
395: //else if value is empty string between commas consider it as null value
396: if (this .trimString)
397: value = value.trim();
398: if (value.equals(""))
399: value = null;
400:
401: values.add(value);
402: currentColumn++;
403: value = "";
404: }
405: } else {
406: value += currentChar;
407: }
408: }
409:
410: currentPos++;
411: } //end while
412:
413: if (inQuotedString) {
414: // Remove extra , added at start
415: value = value.substring(0, value.length() - 1);
416: try {
417: line = this .readLine();
418: } catch (IOException e) {
419: throw new SQLException(e.toString());
420: }
421: } else {
422: fullLine = 1;
423: }
424:
425: }// end while( fullLine == 0 )
426: String[] retVal = new String[values.size()];
427: values.toArray(retVal);
428:
429: return retVal;
430: }
431:
432: /**
433: *
434: * Parse csv line, whithout columnTypes.
435: *
436: * @param line
437: * @return array with values or column names.
438: * @throws SQLException
439: */
440: protected String[] parseCsvLineAsHeader(String line)
441: throws SQLException {
442: Vector values = new Vector();
443: // ArrayList columnTypesList = new ArrayList();
444: boolean inQuotedString = false;
445: String value = "";
446: String orgLine = line;
447: int currentPos = 0;
448: int fullLine = 0;
449:
450: while (fullLine == 0) {
451: currentPos = 0;
452: line += separator;
453: while (currentPos < line.length()) {
454: char currentChar = line.charAt(currentPos);
455: if (value.length() == 0 && currentChar == '"'
456: && !inQuotedString) {
457: currentPos++;
458: inQuotedString = true;
459: continue;
460: }
461: if (currentChar == '"') {
462: char nextChar = line.charAt(currentPos + 1);
463: if (nextChar == '"') {
464: value += currentChar;
465: currentPos++;
466: } else {
467: // if (!inQuotedString) {
468: // throw new SQLException("Unexpected '\"' in position " +
469: // currentPos + ". Line=" + orgLine);
470: // }
471: if (inQuotedString && nextChar == separator) {
472: // throw new SQLException("Expecting " + separator + " in position " +
473: // (currentPos + 1) + ". Line=" + orgLine);
474: // }
475: if (this .trimString)
476: value = value.trim();
477: if (value.endsWith("-"
478: + CsvDriver.BINARY_TYPE)) {
479: value = value
480: .substring(
481: 0,
482: value
483: .indexOf("-"
484: + CsvDriver.BINARY_TYPE));
485: columnTypes1.put(value,
486: CsvDriver.BINARY_TYPE);
487: colTypes.add(CsvDriver.BINARY_TYPE);
488: } else {
489: columnTypes1.put(value,
490: CsvDriver.VARCHAR_TYPE);
491: colTypes.add(CsvDriver.VARCHAR_TYPE);
492: }
493: values.add(value);
494: value = "";
495: inQuotedString = false;
496: currentPos++;
497: } else {
498: if (nextChar != separator) {
499: value += currentChar;
500: }
501: }
502: }
503: } else {
504: if (currentChar == separator) {
505: if (inQuotedString) {
506: value += currentChar;
507: } else {
508: if (this .trimString)
509: value = value.trim();
510: if (value.endsWith("-"
511: + CsvDriver.BINARY_TYPE)) {
512: value = value
513: .substring(
514: 0,
515: value
516: .indexOf("-"
517: + CsvDriver.BINARY_TYPE));
518: columnTypes1.put(value,
519: CsvDriver.BINARY_TYPE);
520: colTypes.add(CsvDriver.BINARY_TYPE);
521: } else {
522: columnTypes1.put(value,
523: CsvDriver.VARCHAR_TYPE);
524: colTypes.add(CsvDriver.VARCHAR_TYPE);
525: }
526: values.add(value);
527: value = "";
528: }
529: } else {
530: value += currentChar;
531: }
532: }
533: currentPos++;
534: }
535: if (inQuotedString) {
536: value = value.substring(0, value.length() - 1);
537: try {
538: line = this .readLine();
539: } catch (IOException e) {
540: throw new SQLException(e.toString());
541: }
542: } else {
543: fullLine = 1;
544: }
545: }
546: String[] retVal = new String[values.size()];
547: values.copyInto(retVal);
548:
549: return retVal;
550:
551: }
552:
553: private String formatString(String str) throws SQLException {
554: String retValue = str;
555: try {
556: //replace spec. characters
557: retValue = Utils.replaceAll(retValue, this .lineBreakEscape,
558: "\n");
559: retValue = Utils.replaceAll(retValue,
560: this .carriageReturnEscape, "\r");
561: } catch (Exception e) {
562: throw new SQLException("Error while reformat string ! : "
563: + str);
564: }
565: return retValue;
566: }
567:
568: private String readLine() throws IOException {
569: String retVal = "";
570: if (Utils.isUTF16(this .charset)) {
571: retVal = this .randomReader.readCsvLine();
572: } else {
573: retVal = bufReader.readLine();
574: }
575: return retVal;
576: }
577:
578: private void closeInputs() throws IOException {
579: if (!Utils.isUTF16(this.charset)) {
580: if (this.bufReader != null)
581: bufReader.close();
582: } else {
583: if (this.randomReader != null)
584: randomReader.close();
585: }
586: }
587:
588: }
|