001: /*
002: * CSVReader.java
003: *
004: * Copyright (C) 2005 Anupam Sengupta (anupamsg@users.sourceforge.net)
005: *
006: * This program is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU General Public License
008: * as published by the Free Software Foundation; either version 2
009: * of the License, or (at your option) any later version.
010: *
011: * This program is distributed in the hope that it will be useful,
012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: * GNU General Public License for more details.
015: *
016: * You should have received a copy of the GNU General Public License
017: * along with this program; if not, write to the Free Software
018: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
019: *
020: * Version $Revision: 1.3 $
021: */
022: package net.sf.anupam.csv;
023:
024: import com.Ostermiller.util.CSVParse;
025: import com.Ostermiller.util.ExcelCSVParser;
026: import org.apache.commons.collections.CollectionUtils;
027: import org.apache.commons.logging.Log;
028: import org.apache.commons.logging.LogFactory;
029:
030: import java.io.IOException;
031: import java.io.Reader;
032: import java.util.ArrayList;
033: import java.util.Iterator;
034: import java.util.List;
035: import java.util.NoSuchElementException;
036:
037: /**
038: * Reads a CSV file and parses the individual fields for each CSV record in the
039: * file. The default delimiter is assumed to be the <code>,</code> (comma).
040: * <p/>
041: * <p/>
042: * The class uses the CSV Parser engines from <a
043: * href="http://ostermiller.org/utils/" target="_blank">Steven Ostermiller's
044: * site</a>.
045: * </p>
046: *
047: * @author Anupam Sengupta
048: * @version $Revision: 1.3 $
049: * @see com.Ostermiller.util.CSVParse
050: * @since 1.5
051: */
052: class CSVReader implements Iterable<List<String>> {
053:
054: /**
055: * Logger to use.
056: */
057: private static final Log LOG = LogFactory.getLog(CSVReader.class);
058:
059: /**
060: * The CSV parser engine.
061: */
062: private CSVParse parser;
063:
064: /**
065: * Flag which indicates whether the reader has read all the records.
066: */
067: private boolean readingComplete;
068:
069: /**
070: * Flag which indicates whether the CSV file has a header row.
071: */
072: private boolean headerPresent;
073:
074: /**
075: * Constructor which accepts a reader on the CSV stream to parse. The
076: * presence of a CSV header row is also specified. If present, the header
077: * row will be skipped.
078: *
079: * @param csvReader the CSV stream reader from which to parse
080: * @param headerPresent indicates whether the CSV stream has a header record
081: */
082: public CSVReader(final Reader csvReader, final boolean headerPresent) {
083: super ();
084: this .headerPresent = headerPresent;
085:
086: parser = new ExcelCSVParser(csvReader);
087:
088: }
089:
090: /**
091: * Releases all system resources.
092: */
093: public void close() {
094: try {
095: if (parser != null) {
096: parser.close();
097: LOG.debug("Closed the CSV Reader");
098: }
099: } catch (final IOException e) {
100: // Do nothing
101: } finally {
102: parser = null;
103: }
104: }
105:
106: /**
107: * Finalizes this CSV reader and closes the IO connections.
108: *
109: * @throws Throwable thrown if the finalization fails.
110: * @see Object#finalize()
111: */
112: @Override
113: protected void finalize() throws Throwable {
114: super .finalize();
115: close();
116: }
117:
118: /**
119: * Returns an iterator over the parsed lines. The iterator returns a list of
120: * the CSV field values as a single value over each iteration.
121: *
122: * @return an iterator over the lines.
123: */
124: public Iterator<List<String>> iterator() {
125: return new LineIterator();
126: }
127:
128: // ~ Inner Classes
129: // ----------------------------------------------------------
130:
131: /**
132: * Inner iterator class to provide the Iterable interface to the reader.
133: */
134: private class LineIterator implements Iterator<List<String>> {
135: // ~ Methods
136: // ------------------------------------------------------------
137:
138: /**
139: * The parsed CSV field values.
140: */
141: private String[] parsedValues;
142:
143: /**
144: * Flag indicating whether the previous line was read.
145: */
146: private boolean haveReadPreviousLine;
147:
148: /**
149: * Default Constructor.
150: */
151: public LineIterator() {
152: super ();
153: if (isHeaderPresent()) {
154: readOneLine();
155: }
156: }
157:
158: /**
159: * Returns <code>true</code> if there is at least one more parsed CSV line.
160: *
161: * @return <code>true></code> if there is at least one more parsed line
162: * @see java.util.Iterator#hasNext()
163: */
164: public boolean hasNext() {
165: if (isReadingComplete()) {
166: return false;
167: }
168:
169: if (!haveReadPreviousLine) {
170: readOneLine();
171: haveReadPreviousLine = true;
172: }
173: return !isReadingComplete();
174: }
175:
176: /**
177: * Returns a list of the CSV field values for the current line.
178: *
179: * @return the next list of parsed CSV field values
180: * @see java.util.Iterator#next()
181: */
182: public List<String> next() {
183:
184: if (!haveReadPreviousLine) {
185: readOneLine();
186: } else {
187: haveReadPreviousLine = false;
188: }
189:
190: if (isReadingComplete()) {
191: throw new NoSuchElementException();
192: }
193:
194: final List<String> valueList = new ArrayList<String>(
195: parsedValues.length);
196: CollectionUtils.addAll(valueList, parsedValues);
197:
198: return valueList;
199:
200: }
201:
202: /**
203: * Reads one CSV line using the CSV parser engine and stores the parsed
204: * line fields.
205: */
206: private void readOneLine() {
207: try {
208: parsedValues = getParser().getLine();
209: if (parsedValues == null) {
210: readingIsComplete();
211: }
212: } catch (final IOException e) {
213: LOG.warn(
214: "Error in reading a line from the CSV stream ",
215: e);
216: readingIsComplete();
217: }
218:
219: }
220:
221: /**
222: * This method is not supported.
223: *
224: * @see java.util.Iterator#remove()
225: */
226: public void remove() {
227: LOG
228: .debug("Invalid call to the unsupported remove() method on the iterator");
229: throw new UnsupportedOperationException(
230: "This method is not supported");
231: }
232: }
233:
234: /**
235: * Indicates whether the header row is present or not.
236: *
237: * @return Returns <code>true</code> if the header row is present
238: */
239: public boolean isHeaderPresent() {
240: return this .headerPresent;
241: }
242:
243: /**
244: * Indicates whether the reader has read all CSV lines.
245: *
246: * @return Returns <code>true</code> if all CSV lines have been read
247: */
248: public boolean isReadingComplete() {
249: return this .readingComplete;
250: }
251:
252: /**
253: * Sets the flag to denote that all lines have been read.
254: */
255: protected void readingIsComplete() {
256: this .readingComplete = true;
257: }
258:
259: /**
260: * Returns the internal CSV parser engine instance for this reader.
261: *
262: * @return Returns the parser instance
263: */
264: protected CSVParse getParser() {
265: return this.parser;
266: }
267: }
|