001: /*
002: * CsvLineParser.java
003: *
004: * This file is part of SQL Workbench/J, http://www.sql-workbench.net
005: *
006: * Copyright 2002-2008, Thomas Kellerer
007: * No part of this code maybe reused without the permission of the author
008: *
009: * To contact the author please send an email to: support@sql-workbench.net
010: *
011: */
012: package workbench.util;
013:
014: /**
015: * A class to efficiently parse a delimited line of data.
016: *
017: * A quoted delimiter is recognized, line data spanning multiple lines (i.e.
018: * data with embedded \n) is not recognized.
019: *
020: * @author support@sql-workbench.net
021: */
022: public class CsvLineParser implements LineParser {
023: private String lineData = null;
024: private int len = 0;
025: private int current = 0;
026: private char delimiter;
027: private char quoteChar = 0;
028: private boolean returnEmptyStrings = false;
029: private boolean trimValues = false;
030: private boolean oneMore = false;
031: private QuoteEscapeType escapeType = QuoteEscapeType.none;
032:
033: public CsvLineParser(char delimit) {
034: this .delimiter = delimit;
035: }
036:
037: public CsvLineParser(char delimit, char quote) {
038: this .delimiter = delimit;
039: this .quoteChar = quote;
040: }
041:
042: public CsvLineParser(String line, char delimit, char quote) {
043: this .setLine(line);
044: this .delimiter = delimit;
045: this .quoteChar = quote;
046: }
047:
048: public void setLine(String line) {
049: this .lineData = line;
050: this .len = this .lineData.length();
051: this .current = 0;
052: }
053:
054: /**
055: * Controls how empty strings are returned. If this is set to
056: * true, than an empty element is returned as an empty string
057: * otherwise an empty element is returend as null
058: */
059: public void setReturnEmptyStrings(boolean flag) {
060: this .returnEmptyStrings = flag;
061: }
062:
063: public void setQuoteEscaping(QuoteEscapeType type) {
064: this .escapeType = type;
065: }
066:
067: public boolean hasNext() {
068: return oneMore || current < len;
069: }
070:
071: public String getNext() {
072: // The line ends with the delimiter
073: // so we have to return an empty string
074: if (oneMore) {
075: oneMore = false;
076: if (returnEmptyStrings)
077: return "";
078: else
079: return null;
080: }
081:
082: int beginField = current;
083: boolean inQuotes = false;
084: int endOffset = 0;
085: while (current < len) {
086: char c = this .lineData.charAt(current);
087: if (!inQuotes && (c == delimiter)) {
088: break;
089: }
090: if (c == this .quoteChar) {
091: // don't return the quote at the end
092: if (inQuotes)
093: endOffset = 1;
094:
095: // don't return the quote at the beginning
096: if (current == beginField)
097: beginField++;
098:
099: if (this .escapeType == QuoteEscapeType.escape) {
100: char last = 0;
101: if (current > 1)
102: last = this .lineData.charAt(current - 1);
103: if (last != '\\') {
104: inQuotes = !inQuotes;
105: }
106: } else if (this .escapeType == QuoteEscapeType.duplicate) {
107: char next = 0;
108: if (current < lineData.length() - 1)
109: next = this .lineData.charAt(current + 1);
110: if (next == '"') {
111: current++;
112: } else {
113: inQuotes = !inQuotes;
114: }
115: } else {
116: inQuotes = !inQuotes;
117: }
118: }
119:
120: current++;
121: }
122:
123: String next = null;
124: if (current - endOffset > beginField) {
125: next = this .lineData.substring(beginField, current
126: - endOffset);
127: }
128:
129: this .current++; // skip the delimiter
130: if (current == len && lineData.charAt(current - 1) == delimiter) {
131: // if the line ends with the delimiter, we have one more
132: // (empty) element
133: oneMore = true;
134: }
135:
136: if (next != null) {
137: if (this .escapeType == QuoteEscapeType.escape) {
138: next = StringUtil.replace(next, "\\", "");
139: } else if (this .escapeType == QuoteEscapeType.duplicate) {
140: next = StringUtil.replace(next, "\"\"", "\"");
141: }
142: }
143:
144: if (this .returnEmptyStrings && next == null)
145: next = StringUtil.EMPTY_STRING;
146: if (trimValues && next != null)
147: return next.trim();
148: else
149: return next;
150: }
151:
152: public void setTrimValues(boolean trimValues) {
153: this.trimValues = trimValues;
154: }
155:
156: }
|