001: /*
002: * Read files in comma separated value format with a fist line of labels.
003: *
004: * Copyright (C) 2004 Campbell, Allen T. <allenc28@yahoo.com>
005: *
006: * Copyright (C) 2004 Stephen Ostermiller
007: * http://ostermiller.org/contact.pl?regarding=Java+Utilities
008: *
009: * This program is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * This program is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU General Public License for more details.
018: *
019: * See COPYING.TXT for details.
020: */
021: package com.Ostermiller.util;
022:
023: import java.io.IOException;
024: import java.util.*;
025:
026: /**
027: * Decorate a CSVParse object to provide an index of field names. Many (most?)
028: * CSV files have a list of field names (labels) as the first line. A
029: * LabeledCSVParser will consume this line automatically. The methods
030: * {@link #getLabels()}, {@link #getLabelIndex(String)} and
031: * {@link #getValueByLabel(String)} allow these labels to be discovered and
032: * used while parsing CSV data. This class can also be used to conveniently
033: * ignore field labels if they happen to be present in a CSV file and are not
034: * desired.
035: *
036: * @author Campbell, Allen T. <allenc28@yahoo.com>
037: * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
038: * @since ostermillerutils 1.03.00
039: */
040: public class LabeledCSVParser implements CSVParse {
041:
042: /**
043: * Class which actually does the parsing. Called for most methods.
044: *
045: * @since ostermillerutils 1.03.00
046: */
047: private CSVParse parse;
048:
049: /**
050: * The first line of the CSV file - treated specially as labels.
051: * Set by setLabels.
052: *
053: * @since ostermillerutils 1.03.00
054: */
055: private String[] labels;
056:
057: /**
058: * Hash of the labels (String) to column number (Integer).
059: * Set by setLabels.
060: *
061: * @since ostermillerutils 1.03.00
062: */
063: private Map<String, Integer> labelMap;
064:
065: /**
066: * The last line read from the CSV file. Saved for getValueByLabel().
067: *
068: * @since ostermillerutils 1.03.00
069: */
070: private String[] lastLine;
071:
072: /**
073: * Set whenever nextValue is called and checked when getValueByLabel() is
074: * called to enforce incompatibility between the methods.
075: *
076: * @since ostermillerutils 1.03.00
077: */
078: private int nextValueLine = -2;
079:
080: /**
081: * Construct a LabeledCSVParser on a CSVParse implementation.
082: *
083: * @param parse CSVParse implementation
084: * @throws IOException if an error occurs while reading.
085: *
086: * @since ostermillerutils 1.03.00
087: */
088: @SuppressWarnings("unused")
089: public LabeledCSVParser(CSVParse parse) throws IOException {
090: this .parse = parse;
091: }
092:
093: /**
094: * Change this parser so that it uses a new delimiter.
095: * <p>
096: * The initial character is a comma, the delimiter cannot be changed
097: * to a quote or other character that has special meaning in CSV.
098: *
099: * @param newDelim delimiter to which to switch.
100: * @throws BadDelimiterException if the character cannot be used as a delimiter.
101: *
102: * @since ostermillerutils 1.03.00
103: */
104: public void changeDelimiter(char newDelim)
105: throws BadDelimiterException {
106: parse.changeDelimiter(newDelim);
107: }
108:
109: /**
110: * Change this parser so that it uses a new character for quoting.
111: * <p>
112: * The initial character is a double quote ("), the delimiter cannot be changed
113: * to a comma or other character that has special meaning in CSV.
114: *
115: * @param newQuote character to use for quoting.
116: * @throws BadQuoteException if the character cannot be used as a quote.
117: *
118: * @since ostermillerutils 1.03.00
119: */
120: public void changeQuote(char newQuote) throws BadQuoteException {
121: parse.changeQuote(newQuote);
122: }
123:
124: /**
125: * Get all the values from the file.
126: * <p>
127: * If the file has already been partially read, only the
128: * values that have not already been read will be included.
129: * <p>
130: * Each line of the file that has at least one value will be
131: * represented. Comments and empty lines are ignored.
132: * <p>
133: * The resulting double array may be jagged.
134: * <p>
135: * The last line of the values is saved and may be accessed
136: * by getValueByLabel().
137: *
138: * @return all the values from the file or null if there are no more values.
139: * @throws IOException if an error occurs while reading.
140: *
141: * @since ostermillerutils 1.03.00
142: */
143: public String[][] getAllValues() throws IOException {
144: if (labels == null)
145: setLabels();
146: String[][] allValues = parse.getAllValues();
147: if (allValues == null) {
148: lastLine = null;
149: } else {
150: lastLine = allValues[allValues.length - 1];
151: }
152: return allValues;
153: }
154:
155: /**
156: * Get the line number that the last token came from.
157: * <p>
158: * New line breaks that occur in the middle of a token are not
159: * counted in the line number count.
160: * <p>
161: * The first line of labels does not count towards the line number.
162: *
163: * @return line number or -1 if no tokens have been returned yet.
164: *
165: * @since ostermillerutils 1.03.00
166: */
167: public int getLastLineNumber() {
168: return lastLineNumber();
169: }
170:
171: /**
172: * Get the line number that the last token came from.
173: * <p>
174: * New line breaks that occur in the middle of a token are not
175: * counted in the line number count.
176: * <p>
177: * The first line of labels does not count towards the line number.
178: *
179: * @return line number or -1 if no tokens have been returned yet.
180: *
181: * @since ostermillerutils 1.03.00
182: */
183: public int lastLineNumber() {
184: int lineNum = parse.getLastLineNumber();
185: if (lineNum <= -1)
186: return -1; // Nothing has been read yet
187: if (lineNum == 1)
188: return -1; // only labels have been read
189: return lineNum - 1; // adjust line number to account for the label line
190: }
191:
192: /**
193: * Get all the values from a line.
194: * <p>
195: * If the line has already been partially read, only the values that have not
196: * already been read will be included.
197: * <p>
198: * In addition to returning all the values from a line, LabeledCSVParser
199: * maintains a buffer of the values. This feature allows
200: * {@link #getValueByLabel(String)} to function. In this case
201: * {@link #getLine()} is used simply to iterate CSV data. The iteration ends
202: * when null is returned.
203: * <p>
204: * <b>Note:</b> The methods {@link #nextValue()} and {@link #getAllValues()}
205: * are incompatible with {@link #getValueByLabel(String)} because the former
206: * methods cause the offset of field values to shift and corrupt the internal
207: * buffer maintained by {@link #getLine}.
208: *
209: * @return all the values from the line or null if there are no more values.
210: * @throws IOException if an error occurs while reading.
211: *
212: * @since ostermillerutils 1.03.00
213: */
214: public String[] getLine() throws IOException {
215: if (labels == null)
216: setLabels();
217: lastLine = parse.getLine();
218: return lastLine;
219: }
220:
221: /**
222: * Read the next value from the file. The line number from
223: * which this value was taken can be obtained from getLastLineNumber().
224: * <p>
225: * This method is not compatible with getValueByLabel(). Using this
226: * method will make getValueByLabel() throw an IllegalStateException
227: * for the rest of the line.
228: *
229: * @return the next value or null if there are no more values.
230: * @throws IOException if an error occurs while reading.
231: *
232: * @since ostermillerutils 1.03.00
233: */
234: public String nextValue() throws IOException {
235: if (labels == null)
236: setLabels();
237: String nextValue = parse.nextValue();
238: nextValueLine = getLastLineNumber();
239: return nextValue;
240: }
241:
242: /**
243: * Initialize the LabeledCSVParser.labels member and LabeledCSVParser.labelMap
244: * member.
245: *
246: * @throws IOException if an IO error occurs
247: *
248: * @since ostermillerutils 1.03.00
249: */
250: private void setLabels() throws IOException {
251: labels = parse.getLine();
252: if (labels == null)
253: return;
254: labelMap = new HashMap<String, Integer>();
255: for (int i = 0; i < labels.length; i++) {
256: labelMap.put(labels[i], new Integer(i));
257: }
258: }
259:
260: /**
261: * Return an array of all field names from the top
262: * of the CSV file.
263: *
264: * @return Field names.
265: * @throws IOException if an IO error occurs
266: *
267: * @since ostermillerutils 1.03.00
268: */
269: public String[] getLabels() throws IOException {
270: if (labels == null)
271: setLabels();
272: return labels;
273: }
274:
275: /**
276: * Get the index of the column having the given label.
277: * The {@link #getLine()} method returns an
278: * array of field values for a single record of data. This method returns
279: * the index of a member of that array based on the specified field name.
280: * The first field has the index 0.
281: *
282: * @param label The field name.
283: * @return The index of the field name, or -1 if the label does not exist.
284: * @deprecated may swallow an IOException while reading the labels - please use getLabelIdx()
285: *
286: * @since ostermillerutils 1.03.00
287: */
288: @Deprecated
289: public int getLabelIndex(String label) {
290: try {
291: return getLabelIdx(label);
292: } catch (IOException iox) {
293: return -1;
294: }
295: }
296:
297: /**
298: * Get the index of the column having the given label.
299: * The {@link #getLine()} method returns an
300: * array of field values for a single record of data. This method returns
301: * the index of a member of that array based on the specified field name.
302: * The first field has the index 0.
303: *
304: * @param label The field name.
305: * @return The index of the field name, or -1 if the label does not exist.
306: * @throws IOException if an IO error occurs
307: *
308: * @since ostermillerutils 1.04.02
309: */
310: public int getLabelIdx(String label) throws IOException {
311: if (labels == null)
312: setLabels();
313: if (labelMap == null)
314: return -1;
315: if (!labelMap.containsKey(label))
316: return -1;
317: return (labelMap.get(label)).intValue();
318: }
319:
320: /**
321: * Given the label for the column, get the column from the last line that
322: * was read. If the column cannot be found in the line, null is returned.
323: *
324: * @param label The field name.
325: * @throws IllegalStateException if nextValue has been called as part of getting the last line. nextValue is not compatible with this method.
326: * @return the value from the last line read or null if there is no such value
327: *
328: * @since ostermillerutils 1.03.00
329: */
330: public String getValueByLabel(String label)
331: throws IllegalStateException {
332: if (nextValueLine == getLastLineNumber())
333: throw new IllegalStateException(
334: "nextValue() was used to get values from this line.");
335: if (lastLine == null)
336: return null;
337: int fieldIndex;
338: try {
339: fieldIndex = getLabelIdx(label);
340: } catch (IOException iox) {
341: // Can't happen here because the labels have been read before the first line.
342: throw new RuntimeException(iox);
343: }
344: if (fieldIndex == -1)
345: return null;
346: if (fieldIndex >= lastLine.length)
347: return null;
348: return lastLine[fieldIndex];
349: }
350:
351: /**
352: * Close any stream upon which this parser is based.
353: *
354: * @throws IOException if an error occurs while closing the stream.
355: *
356: * @since ostermillerutils 1.03.00
357: */
358: public void close() throws IOException {
359: parse.close();
360: }
361: }
|