Csv Reader : CSV File « Development Class


 

/*

 * Java CSV is a stream based library for reading and writing

 * CSV and other delimited data.

 *   

 * Copyright (C) Bruce Dunwiddie bruce@csvreader.com

 *

 * This library is free software; you can redistribute it and/or

 * modify it under the terms of the GNU Lesser General Public

 * License as published by the Free Software Foundation; either

 * version 2.1 of the License, or (at your option) any later version.

 *

 * This library is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 * Lesser General Public License for more details.

 *

 * You should have received a copy of the GNU Lesser General Public

 * License along with this library; if not, write to the Free Software

 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA

 */



import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.io.Reader;

import java.io.StringReader;

import java.nio.charset.Charset;

import java.text.NumberFormat;

import java.util.HashMap;



/**

 * A stream based parser for parsing delimited text data from a file or a

 * stream.

 */

public class CsvReader {

  private Reader inputStream = null;



  private String fileName = null;



  // this holds all the values for switches that the user is allowed to set

  private UserSettings userSettings = new UserSettings();



  private Charset charset = null;



  private boolean useCustomRecordDelimiter = false;



  // this will be our working buffer to hold data chunks

  // read in from the data file



  private DataBuffer dataBuffer = new DataBuffer();



  private ColumnBuffer columnBuffer = new ColumnBuffer();



  private RawRecordBuffer rawBuffer = new RawRecordBuffer();



  private boolean[] isQualified = null;



  private String rawRecord = "";



  private HeadersHolder headersHolder = new HeadersHolder();



  // these are all more or less global loop variables

  // to keep from needing to pass them all into various

  // methods during parsing



  private boolean startedColumn = false;



  private boolean startedWithQualifier = false;



  private boolean hasMoreData = true;



  private char lastLetter = '\0';



  private boolean hasReadNextLine = false;



  private int columnsCount = 0;



  private long currentRecord = 0;



  private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];



  private boolean initialized = false;



  private boolean closed = false;



  /**

   * Double up the text qualifier to represent an occurance of the text

   * qualifier.

   */

  public static final int ESCAPE_MODE_DOUBLED = 1;



  /**

   * Use a backslash character before the text qualifier to represent an

   * occurance of the text qualifier.

   */

  public static final int ESCAPE_MODE_BACKSLASH = 2;



  /**

   * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file

   * as the data source.

   * 

   * @param fileName

   *            The path to the file to use as the data source.

   * @param delimiter

   *            The character to use as the column delimiter.

   * @param charset

   *            The {@link java.nio.charset.Charset Charset} to use while

   *            parsing the data.

   */

  public CsvReader(String fileName, char delimiter, Charset charset)

      throws FileNotFoundException {

    if (fileName == null) {

      throw new IllegalArgumentException(

          "Parameter fileName can not be null.");

    }



    if (charset == null) {

      throw new IllegalArgumentException(

          "Parameter charset can not be null.");

    }



    if (!new File(fileName).exists()) {

      throw new FileNotFoundException("File " + fileName

          + " does not exist.");

    }



    this.fileName = fileName;

    this.userSettings.Delimiter = delimiter;

    this.charset = charset;



    isQualified = new boolean[values.length];

  }



  /**

   * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file

   * as the data source.&nbsp;Uses ISO-8859-1 as the

   * {@link java.nio.charset.Charset Charset}.

   * 

   * @param fileName

   *            The path to the file to use as the data source.

   * @param delimiter

   *            The character to use as the column delimiter.

   */

  public CsvReader(String fileName, char delimiter)

      throws FileNotFoundException {

    this(fileName, delimiter, Charset.forName("ISO-8859-1"));

  }



  /**

   * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file

   * as the data source.&nbsp;Uses a comma as the column delimiter and

   * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.

   * 

   * @param fileName

   *            The path to the file to use as the data source.

   */

  public CsvReader(String fileName) throws FileNotFoundException {

    this(fileName, Letters.COMMA);

  }



  /**

   * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a

   * {@link java.io.Reader Reader} object as the data source.

   * 

   * @param inputStream

   *            The stream to use as the data source.

   * @param delimiter

   *            The character to use as the column delimiter.

   */

  public CsvReader(Reader inputStream, char delimiter) {

    if (inputStream == null) {

      throw new IllegalArgumentException(

          "Parameter inputStream can not be null.");

    }



    this.inputStream = inputStream;

    this.userSettings.Delimiter = delimiter;

    initialized = true;



    isQualified = new boolean[values.length];

  }



  /**

   * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a

   * {@link java.io.Reader Reader} object as the data source.&nbsp;Uses a

   * comma as the column delimiter.

   * 

   * @param inputStream

   *            The stream to use as the data source.

   */

  public CsvReader(Reader inputStream) {

    this(inputStream, Letters.COMMA);

  }



  /**

   * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an

   * {@link java.io.InputStream InputStream} object as the data source.

   * 

   * @param inputStream

   *            The stream to use as the data source.

   * @param delimiter

   *            The character to use as the column delimiter.

   * @param charset

   *            The {@link java.nio.charset.Charset Charset} to use while

   *            parsing the data.

   */

  public CsvReader(InputStream inputStream, char delimiter, Charset charset) {

    this(new InputStreamReader(inputStream, charset), delimiter);

  }



  /**

   * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an

   * {@link java.io.InputStream InputStream} object as the data

   * source.&nbsp;Uses a comma as the column delimiter.

   * 

   * @param inputStream

   *            The stream to use as the data source.

   * @param charset

   *            The {@link java.nio.charset.Charset Charset} to use while

   *            parsing the data.

   */

  public CsvReader(InputStream inputStream, Charset charset) {

    this(new InputStreamReader(inputStream, charset));

  }



  public boolean getCaptureRawRecord() {

    return userSettings.CaptureRawRecord;

  }



  public void setCaptureRawRecord(boolean captureRawRecord) {

    userSettings.CaptureRawRecord = captureRawRecord;

  }



  public String getRawRecord() {

    return rawRecord;

  }



  /**

   * Gets whether leading and trailing whitespace characters are being trimmed

   * from non-textqualified column data. Default is true.

   * 

   * @return Whether leading and trailing whitespace characters are being

   *         trimmed from non-textqualified column data.

   */

  public boolean getTrimWhitespace() {

    return userSettings.TrimWhitespace;

  }



  /**

   * Sets whether leading and trailing whitespace characters should be trimmed

   * from non-textqualified column data or not. Default is true.

   * 

   * @param trimWhitespace

   *            Whether leading and trailing whitespace characters should be

   *            trimmed from non-textqualified column data or not.

   */

  public void setTrimWhitespace(boolean trimWhitespace) {

    userSettings.TrimWhitespace = trimWhitespace;

  }



  /**

   * Gets the character being used as the column delimiter. Default is comma,

   * ','.

   * 

   * @return The character being used as the column delimiter.

   */

  public char getDelimiter() {

    return userSettings.Delimiter;

  }



  /**

   * Sets the character to use as the column delimiter. Default is comma, ','.

   * 

   * @param delimiter

   *            The character to use as the column delimiter.

   */

  public void setDelimiter(char delimiter) {

    userSettings.Delimiter = delimiter;

  }



  public char getRecordDelimiter() {

    return userSettings.RecordDelimiter;

  }



  /**

   * Sets the character to use as the record delimiter.

   * 

   * @param recordDelimiter

   *            The character to use as the record delimiter. Default is

   *            combination of standard end of line characters for Windows,

   *            Unix, or Mac.

   */

  public void setRecordDelimiter(char recordDelimiter) {

    useCustomRecordDelimiter = true;

    userSettings.RecordDelimiter = recordDelimiter;

  }



  /**

   * Gets the character to use as a text qualifier in the data.

   * 

   * @return The character to use as a text qualifier in the data.

   */

  public char getTextQualifier() {

    return userSettings.TextQualifier;

  }



  /**

   * Sets the character to use as a text qualifier in the data.

   * 

   * @param textQualifier

   *            The character to use as a text qualifier in the data.

   */

  public void setTextQualifier(char textQualifier) {

    userSettings.TextQualifier = textQualifier;

  }



  /**

   * Whether text qualifiers will be used while parsing or not.

   * 

   * @return Whether text qualifiers will be used while parsing or not.

   */

  public boolean getUseTextQualifier() {

    return userSettings.UseTextQualifier;

  }



  /**

   * Sets whether text qualifiers will be used while parsing or not.

   * 

   * @param useTextQualifier

   *            Whether to use a text qualifier while parsing or not.

   */

  public void setUseTextQualifier(boolean useTextQualifier) {

    userSettings.UseTextQualifier = useTextQualifier;

  }



  /**

   * Gets the character being used as a comment signal.

   * 

   * @return The character being used as a comment signal.

   */

  public char getComment() {

    return userSettings.Comment;

  }



  /**

   * Sets the character to use as a comment signal.

   * 

   * @param comment

   *            The character to use as a comment signal.

   */

  public void setComment(char comment) {

    userSettings.Comment = comment;

  }



  /**

   * Gets whether comments are being looked for while parsing or not.

   * 

   * @return Whether comments are being looked for while parsing or not.

   */

  public boolean getUseComments() {

    return userSettings.UseComments;

  }



  /**

   * Sets whether comments are being looked for while parsing or not.

   * 

   * @param useComments

   *            Whether comments are being looked for while parsing or not.

   */

  public void setUseComments(boolean useComments) {

    userSettings.UseComments = useComments;

  }



  /**

   * Gets the current way to escape an occurance of the text qualifier inside

   * qualified data.

   * 

   * @return The current way to escape an occurance of the text qualifier

   *         inside qualified data.

   */

  public int getEscapeMode() {

    return userSettings.EscapeMode;

  }



  /**

   * Sets the current way to escape an occurance of the text qualifier inside

   * qualified data.

   * 

   * @param escapeMode

   *            The way to escape an occurance of the text qualifier inside

   *            qualified data.

   * @exception IllegalArgumentException

   *                When an illegal value is specified for escapeMode.

   */

  public void setEscapeMode(int escapeMode) throws IllegalArgumentException {

    if (escapeMode != ESCAPE_MODE_DOUBLED

        && escapeMode != ESCAPE_MODE_BACKSLASH) {

      throw new IllegalArgumentException(

          "Parameter escapeMode must be a valid value.");

    }



    userSettings.EscapeMode = escapeMode;

  }



  public boolean getSkipEmptyRecords() {

    return userSettings.SkipEmptyRecords;

  }



  public void setSkipEmptyRecords(boolean skipEmptyRecords) {

    userSettings.SkipEmptyRecords = skipEmptyRecords;

  }



  /**

   * Safety caution to prevent the parser from using large amounts of memory

   * in the case where parsing settings like file encodings don't end up

   * matching the actual format of a file. This switch can be turned off if

   * the file format is known and tested. With the switch off, the max column

   * lengths and max column count per record supported by the parser will

   * greatly increase. Default is true.

   * 

   * @return The current setting of the safety switch.

   */

  public boolean getSafetySwitch() {

    return userSettings.SafetySwitch;

  }



  /**

   * Safety caution to prevent the parser from using large amounts of memory

   * in the case where parsing settings like file encodings don't end up

   * matching the actual format of a file. This switch can be turned off if

   * the file format is known and tested. With the switch off, the max column

   * lengths and max column count per record supported by the parser will

   * greatly increase. Default is true.

   * 

   * @param safetySwitch

   */

  public void setSafetySwitch(boolean safetySwitch) {

    userSettings.SafetySwitch = safetySwitch;

  }



  /**

   * Gets the count of columns found in this record.

   * 

   * @return The count of columns found in this record.

   */

  public int getColumnCount() {

    return columnsCount;

  }



  /**

   * Gets the index of the current record.

   * 

   * @return The index of the current record.

   */

  public long getCurrentRecord() {

    return currentRecord - 1;

  }



  /**

   * Gets the count of headers read in by a previous call to

   * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.

   * 

   * @return The count of headers read in by a previous call to

   *         {@link com.csvreader.CsvReader#readHeaders readHeaders()}.

   */

  public int getHeaderCount() {

    return headersHolder.Length;

  }



  /**

   * Returns the header values as a string array.

   * 

   * @return The header values as a String array.

   * @exception IOException

   *                Thrown if this object has already been closed.

   */

  public String[] getHeaders() throws IOException {

    checkClosed();



    if (headersHolder.Headers == null) {

      return null;

    } else {

      // use clone here to prevent the outside code from

      // setting values on the array directly, which would

      // throw off the index lookup based on header name

      String[] clone = new String[headersHolder.Length];

      System.arraycopy(headersHolder.Headers, 0, clone, 0,

          headersHolder.Length);

      return clone;

    }

  }



  public void setHeaders(String[] headers) {

    headersHolder.Headers = headers;



    headersHolder.IndexByName.clear();



    if (headers != null) {

      headersHolder.Length = headers.length;

    } else {

      headersHolder.Length = 0;

    }



    // use headersHolder.Length here in case headers is null

    for (int i = 0; i < headersHolder.Length; i++) {

      headersHolder.IndexByName.put(headers[i], Integer.valueOf(i));

    }

  }



  public String[] getValues() throws IOException {

    checkClosed();



    // need to return a clone, and can't use clone because values.Length

    // might be greater than columnsCount

    String[] clone = new String[columnsCount];

    System.arraycopy(values, 0, clone, 0, columnsCount);

    return clone;

  }



  /**

   * Returns the current column value for a given column index.

   * 

   * @param columnIndex

   *            The index of the column.

   * @return The current column value.

   * @exception IOException

   *                Thrown if this object has already been closed.

   */

  public String get(int columnIndex) throws IOException {

    checkClosed();



    if (columnIndex > -1 && columnIndex < columnsCount) {

      return values[columnIndex];

    } else {

      return "";

    }

  }



  /**

   * Returns the current column value for a given column header name.

   * 

   * @param headerName

   *            The header name of the column.

   * @return The current column value.

   * @exception IOException

   *                Thrown if this object has already been closed.

   */

  public String get(String headerName) throws IOException {

    checkClosed();



    return get(getIndex(headerName));

  }



  /**

   * Creates a {@link com.csvreader.CsvReader CsvReader} object using a string

   * of data as the source.&nbsp;Uses ISO-8859-1 as the

   * {@link java.nio.charset.Charset Charset}.

   * 

   * @param data

   *            The String of data to use as the source.

   * @return A {@link com.csvreader.CsvReader CsvReader} object using the

   *         String of data as the source.

   */

  public static CsvReader parse(String data) {

    if (data == null) {

      throw new IllegalArgumentException(

          "Parameter data can not be null.");

    }



    return new CsvReader(new StringReader(data));

  }



  /**

   * Reads another record.

   * 

   * @return Whether another record was successfully read or not.

   * @exception IOException

   *                Thrown if an error occurs while reading data from the

   *                source stream.

   */

  public boolean readRecord() throws IOException {

    checkClosed();



    columnsCount = 0;

    rawBuffer.Position = 0;



    dataBuffer.LineStart = dataBuffer.Position;



    hasReadNextLine = false;



    // check to see if we've already found the end of data



    if (hasMoreData) {

      // loop over the data stream until the end of data is found

      // or the end of the record is found



      do {

        if (dataBuffer.Position == dataBuffer.Count) {

          checkDataLength();

        } else {

          startedWithQualifier = false;



          // grab the current letter as a char



          char currentLetter = dataBuffer.Buffer[dataBuffer.Position];



          if (userSettings.UseTextQualifier

              && currentLetter == userSettings.TextQualifier) {

            // this will be a text qualified column, so

            // we need to set startedWithQualifier to make it

            // enter the seperate branch to handle text

            // qualified columns



            lastLetter = currentLetter;



            // read qualified

            startedColumn = true;

            dataBuffer.ColumnStart = dataBuffer.Position + 1;

            startedWithQualifier = true;

            boolean lastLetterWasQualifier = false;



            char escapeChar = userSettings.TextQualifier;



            if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {

              escapeChar = Letters.BACKSLASH;

            }



            boolean eatingTrailingJunk = false;

            boolean lastLetterWasEscape = false;

            boolean readingComplexEscape = false;

            int escape = ComplexEscape.UNICODE;

            int escapeLength = 0;

            char escapeValue = (char) 0;



            dataBuffer.Position++;



            do {

              if (dataBuffer.Position == dataBuffer.Count) {

                checkDataLength();

              } else {

                // grab the current letter as a char



                currentLetter = dataBuffer.Buffer[dataBuffer.Position];



                if (eatingTrailingJunk) {

                  dataBuffer.ColumnStart = dataBuffer.Position + 1;



                  if (currentLetter == userSettings.Delimiter) {

                    endColumn();

                  } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))

                      || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {

                    endColumn();



                    endRecord();

                  }

                } else if (readingComplexEscape) {

                  escapeLength++;



                  switch (escape) {

                  case ComplexEscape.UNICODE:

                    escapeValue *= (char) 16;

                    escapeValue += hexToDec(currentLetter);



                    if (escapeLength == 4) {

                      readingComplexEscape = false;

                    }



                    break;

                  case ComplexEscape.OCTAL:

                    escapeValue *= (char) 8;

                    escapeValue += (char) (currentLetter - '0');



                    if (escapeLength == 3) {

                      readingComplexEscape = false;

                    }



                    break;

                  case ComplexEscape.DECIMAL:

                    escapeValue *= (char) 10;

                    escapeValue += (char) (currentLetter - '0');



                    if (escapeLength == 3) {

                      readingComplexEscape = false;

                    }



                    break;

                  case ComplexEscape.HEX:

                    escapeValue *= (char) 16;

                    escapeValue += hexToDec(currentLetter);



                    if (escapeLength == 2) {

                      readingComplexEscape = false;

                    }



                    break;

                  }



                  if (!readingComplexEscape) {

                    appendLetter(escapeValue);

                  } else {

                    dataBuffer.ColumnStart = dataBuffer.Position + 1;

                  }

                } else if (currentLetter == userSettings.TextQualifier) {

                  if (lastLetterWasEscape) {

                    lastLetterWasEscape = false;

                    lastLetterWasQualifier = false;

                  } else {

                    updateCurrentValue();



                    if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {

                      lastLetterWasEscape = true;

                    }



                    lastLetterWasQualifier = true;

                  }

                } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH

                    && lastLetterWasEscape) {

                  switch (currentLetter) {

                  case 'n':

                    appendLetter(Letters.LF);

                    break;

                  case 'r':

                    appendLetter(Letters.CR);

                    break;

                  case 't':

                    appendLetter(Letters.TAB);

                    break;

                  case 'b':

                    appendLetter(Letters.BACKSPACE);

                    break;

                  case 'f':

                    appendLetter(Letters.FORM_FEED);

                    break;

                  case 'e':

                    appendLetter(Letters.ESCAPE);

                    break;

                  case 'v':

                    appendLetter(Letters.VERTICAL_TAB);

                    break;

                  case 'a':

                    appendLetter(Letters.ALERT);

                    break;

                  case '0':

                  case '1':

                  case '2':

                  case '3':

                  case '4':

                  case '5':

                  case '6':

                  case '7':

                    escape = ComplexEscape.OCTAL;

                    readingComplexEscape = true;

                    escapeLength = 1;

                    escapeValue = (char) (currentLetter - '0');

                    dataBuffer.ColumnStart = dataBuffer.Position + 1;

                    break;

                  case 'u':

                  case 'x':

                  case 'o':

                  case 'd':

                  case 'U':

                  case 'X':

                  case 'O':

                  case 'D':

                    switch (currentLetter) {

                    case 'u':

                    case 'U':

                      escape = ComplexEscape.UNICODE;

                      break;

                    case 'x':

                    case 'X':

                      escape = ComplexEscape.HEX;

                      break;

                    case 'o':

                    case 'O':

                      escape = ComplexEscape.OCTAL;

                      break;

                    case 'd':

                    case 'D':

                      escape = ComplexEscape.DECIMAL;

                      break;

                    }



                    readingComplexEscape = true;

                    escapeLength = 0;

                    escapeValue = (char) 0;

                    dataBuffer.ColumnStart = dataBuffer.Position + 1;



                    break;

                  default:

                    break;

                  }



                  lastLetterWasEscape = false;



                  // can only happen for ESCAPE_MODE_BACKSLASH

                } else if (currentLetter == escapeChar) {

                  updateCurrentValue();

                  lastLetterWasEscape = true;

                } else {

                  if (lastLetterWasQualifier) {

                    if (currentLetter == userSettings.Delimiter) {

                      endColumn();

                    } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))

                        || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {

                      endColumn();



                      endRecord();

                    } else {

                      dataBuffer.ColumnStart = dataBuffer.Position + 1;



                      eatingTrailingJunk = true;

                    }



                    // make sure to clear the flag for next

                    // run of the loop



                    lastLetterWasQualifier = false;

                  }

                }



                // keep track of the last letter because we need

                // it for several key decisions



                lastLetter = currentLetter;



                if (startedColumn) {

                  dataBuffer.Position++;



                  if (userSettings.SafetySwitch

                      && dataBuffer.Position

                          - dataBuffer.ColumnStart

                          + columnBuffer.Position > 100000) {

                    close();



                    throw new IOException(

                        "Maximum column length of 100,000 exceeded in column "

                            + NumberFormat

                                .getIntegerInstance()

                                .format(

                                    columnsCount)

                            + " in record "

                            + NumberFormat

                                .getIntegerInstance()

                                .format(

                                    currentRecord)

                            + ". Set the SafetySwitch property to false"

                            + " if you're expecting column lengths greater than 100,000 characters to"

                            + " avoid this error.");

                  }

                }

              } // end else



            } while (hasMoreData && startedColumn);

          } else if (currentLetter == userSettings.Delimiter) {

            // we encountered a column with no data, so

            // just send the end column



            lastLetter = currentLetter;



            endColumn();

          } else if (useCustomRecordDelimiter

              && currentLetter == userSettings.RecordDelimiter) {

            // this will skip blank lines

            if (startedColumn || columnsCount > 0

                || !userSettings.SkipEmptyRecords) {

              endColumn();



              endRecord();

            } else {

              dataBuffer.LineStart = dataBuffer.Position + 1;

            }



            lastLetter = currentLetter;

          } else if (!useCustomRecordDelimiter

              && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {

            // this will skip blank lines

            if (startedColumn

                || columnsCount > 0

                || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {

              endColumn();



              endRecord();

            } else {

              dataBuffer.LineStart = dataBuffer.Position + 1;

            }



            lastLetter = currentLetter;

          } else if (userSettings.UseComments && columnsCount == 0

              && currentLetter == userSettings.Comment) {

            // encountered a comment character at the beginning of

            // the line so just ignore the rest of the line



            lastLetter = currentLetter;



            skipLine();

          } else if (userSettings.TrimWhitespace

              && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {

            // do nothing, this will trim leading whitespace

            // for both text qualified columns and non



            startedColumn = true;

            dataBuffer.ColumnStart = dataBuffer.Position + 1;

          } else {

            // since the letter wasn't a special letter, this

            // will be the first letter of our current column



            startedColumn = true;

            dataBuffer.ColumnStart = dataBuffer.Position;

            boolean lastLetterWasBackslash = false;

            boolean readingComplexEscape = false;

            int escape = ComplexEscape.UNICODE;

            int escapeLength = 0;

            char escapeValue = (char) 0;



            boolean firstLoop = true;



            do {

              if (!firstLoop

                  && dataBuffer.Position == dataBuffer.Count) {

                checkDataLength();

              } else {

                if (!firstLoop) {

                  // grab the current letter as a char

                  currentLetter = dataBuffer.Buffer[dataBuffer.Position];

                }



                if (!userSettings.UseTextQualifier

                    && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH

                    && currentLetter == Letters.BACKSLASH) {

                  if (lastLetterWasBackslash) {

                    lastLetterWasBackslash = false;

                  } else {

                    updateCurrentValue();

                    lastLetterWasBackslash = true;

                  }

                } else if (readingComplexEscape) {

                  escapeLength++;



                  switch (escape) {

                  case ComplexEscape.UNICODE:

                    escapeValue *= (char) 16;

                    escapeValue += hexToDec(currentLetter);



                    if (escapeLength == 4) {

                      readingComplexEscape = false;

                    }



                    break;

                  case ComplexEscape.OCTAL:

                    escapeValue *= (char) 8;

                    escapeValue += (char) (currentLetter - '0');



                    if (escapeLength == 3) {

                      readingComplexEscape = false;

                    }



                    break;

                  case ComplexEscape.DECIMAL:

                    escapeValue *= (char) 10;

                    escapeValue += (char) (currentLetter - '0');



                    if (escapeLength == 3) {

                      readingComplexEscape = false;

                    }



                    break;

                  case ComplexEscape.HEX:

                    escapeValue *= (char) 16;

                    escapeValue += hexToDec(currentLetter);



                    if (escapeLength == 2) {

                      readingComplexEscape = false;

                    }



                    break;

                  }



                  if (!readingComplexEscape) {

                    appendLetter(escapeValue);

                  } else {

                    dataBuffer.ColumnStart = dataBuffer.Position + 1;

                  }

                } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH

                    && lastLetterWasBackslash) {

                  switch (currentLetter) {

                  case 'n':

                    appendLetter(Letters.LF);

                    break;

                  case 'r':

                    appendLetter(Letters.CR);

                    break;

                  case 't':

                    appendLetter(Letters.TAB);

                    break;

                  case 'b':

                    appendLetter(Letters.BACKSPACE);

                    break;

                  case 'f':

                    appendLetter(Letters.FORM_FEED);

                    break;

                  case 'e':

                    appendLetter(Letters.ESCAPE);

                    break;

                  case 'v':

                    appendLetter(Letters.VERTICAL_TAB);

                    break;

                  case 'a':

                    appendLetter(Letters.ALERT);

                    break;

                  case '0':

                  case '1':

                  case '2':

                  case '3':

                  case '4':

                  case '5':

                  case '6':

                  case '7':

                    escape = ComplexEscape.OCTAL;

                    readingComplexEscape = true;

                    escapeLength = 1;

                    escapeValue = (char) (currentLetter - '0');

                    dataBuffer.ColumnStart = dataBuffer.Position + 1;

                    break;

                  case 'u':

                  case 'x':

                  case 'o':

                  case 'd':

                  case 'U':

                  case 'X':

                  case 'O':

                  case 'D':

                    switch (currentLetter) {

                    case 'u':

                    case 'U':

                      escape = ComplexEscape.UNICODE;

                      break;

                    case 'x':

                    case 'X':

                      escape = ComplexEscape.HEX;

                      break;

                    case 'o':

                    case 'O':

                      escape = ComplexEscape.OCTAL;

                      break;

                    case 'd':

                    case 'D':

                      escape = ComplexEscape.DECIMAL;

                      break;

                    }



                    readingComplexEscape = true;

                    escapeLength = 0;

                    escapeValue = (char) 0;

                    dataBuffer.ColumnStart = dataBuffer.Position + 1;



                    break;

                  default:

                    break;

                  }



                  lastLetterWasBackslash = false;

                } else {

                  if (currentLetter == userSettings.Delimiter) {

                    endColumn();

                  } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))

                      || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {

                    endColumn();



                    endRecord();

                  }

                }



                // keep track of the last letter because we need

                // it for several key decisions



                lastLetter = currentLetter;

                firstLoop = false;



                if (startedColumn) {

                  dataBuffer.Position++;



                  if (userSettings.SafetySwitch

                      && dataBuffer.Position

                          - dataBuffer.ColumnStart

                          + columnBuffer.Position > 100000) {

                    close();



                    throw new IOException(

                        "Maximum column length of 100,000 exceeded in column "

                            + NumberFormat

                                .getIntegerInstance()

                                .format(

                                    columnsCount)

                            + " in record "

                            + NumberFormat

                                .getIntegerInstance()

                                .format(

                                    currentRecord)

                            + ". Set the SafetySwitch property to false"

                            + " if you're expecting column lengths greater than 100,000 characters to"

                            + " avoid this error.");

                  }

                }

              } // end else

            } while (hasMoreData && startedColumn);

          }



          if (hasMoreData) {

            dataBuffer.Position++;

          }

        } // end else

      } while (hasMoreData && !hasReadNextLine);



      // check to see if we hit the end of the file

      // without processing the current record



      if (startedColumn || lastLetter == userSettings.Delimiter) {

        endColumn();



        endRecord();

      }

    }



    if (userSettings.CaptureRawRecord) {

      if (hasMoreData) {

        if (rawBuffer.Position == 0) {

          rawRecord = new String(dataBuffer.Buffer,

              dataBuffer.LineStart, dataBuffer.Position

                  - dataBuffer.LineStart - 1);

        } else {

          rawRecord = new String(rawBuffer.Buffer, 0,

              rawBuffer.Position)

              + new String(dataBuffer.Buffer,

                  dataBuffer.LineStart, dataBuffer.Position

                      - dataBuffer.LineStart - 1);

        }

      } else {

        // for hasMoreData to ever be false, all data would have had to

        // have been

        // copied to the raw buffer

        rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position);

      }

    } else {

      rawRecord = "";

    }



    return hasReadNextLine;

  }



  /**

   * @exception IOException

   *                Thrown if an error occurs while reading data from the

   *                source stream.

   */

  private void checkDataLength() throws IOException {

    if (!initialized) {

      if (fileName != null) {

        inputStream = new BufferedReader(new InputStreamReader(

            new FileInputStream(fileName), charset),

            StaticSettings.MAX_FILE_BUFFER_SIZE);

      }



      charset = null;

      initialized = true;

    }



    updateCurrentValue();



    if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {

      if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count

          - dataBuffer.LineStart) {

        int newLength = rawBuffer.Buffer.length

            + Math.max(dataBuffer.Count - dataBuffer.LineStart,

                rawBuffer.Buffer.length);



        char[] holder = new char[newLength];



        System.arraycopy(rawBuffer.Buffer, 0, holder, 0,

            rawBuffer.Position);



        rawBuffer.Buffer = holder;

      }



      System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart,

          rawBuffer.Buffer, rawBuffer.Position, dataBuffer.Count

              - dataBuffer.LineStart);



      rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart;

    }



    try {

      dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0,

          dataBuffer.Buffer.length);

    } catch (IOException ex) {

      close();



      throw ex;

    }



    // if no more data could be found, set flag stating that

    // the end of the data was found



    if (dataBuffer.Count == -1) {

      hasMoreData = false;

    }



    dataBuffer.Position = 0;

    dataBuffer.LineStart = 0;

    dataBuffer.ColumnStart = 0;

  }



  /**

   * Read the first record of data as column headers.

   * 

   * @return Whether the header record was successfully read or not.

   * @exception IOException

   *                Thrown if an error occurs while reading data from the

   *                source stream.

   */

  public boolean readHeaders() throws IOException {

    boolean result = readRecord();



    // copy the header data from the column array

    // to the header string array



    headersHolder.Length = columnsCount;



    headersHolder.Headers = new String[columnsCount];



    for (int i = 0; i < headersHolder.Length; i++) {

      String columnValue = get(i);



      headersHolder.Headers[i] = columnValue;



      // if there are duplicate header names, we will save the last one

      headersHolder.IndexByName.put(columnValue, Integer.valueOf(i));

    }



    if (result) {

      currentRecord--;

    }



    columnsCount = 0;



    return result;

  }



  /**

   * Returns the column header value for a given column index.

   * 

   * @param columnIndex

   *            The index of the header column being requested.

   * @return The value of the column header at the given column index.

   * @exception IOException

   *                Thrown if this object has already been closed.

   */

  public String getHeader(int columnIndex) throws IOException {

    checkClosed();



    // check to see if we have read the header record yet



    // check to see if the column index is within the bounds

    // of our header array



    if (columnIndex > -1 && columnIndex < headersHolder.Length) {

      // return the processed header data for this column



      return headersHolder.Headers[columnIndex];

    } else {

      return "";

    }

  }



  public boolean isQualified(int columnIndex) throws IOException {

    checkClosed();



    if (columnIndex < columnsCount && columnIndex > -1) {

      return isQualified[columnIndex];

    } else {

      return false;

    }

  }



  /**

   * @exception IOException

   *                Thrown if a very rare extreme exception occurs during

   *                parsing, normally resulting from improper data format.

   */

  private void endColumn() throws IOException {

    String currentValue = "";



    // must be called before setting startedColumn = false

    if (startedColumn) {

      if (columnBuffer.Position == 0) {

        if (dataBuffer.ColumnStart < dataBuffer.Position) {

          int lastLetter = dataBuffer.Position - 1;



          if (userSettings.TrimWhitespace && !startedWithQualifier) {

            while (lastLetter >= dataBuffer.ColumnStart

                && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {

              lastLetter--;

            }

          }



          currentValue = new String(dataBuffer.Buffer,

              dataBuffer.ColumnStart, lastLetter

                  - dataBuffer.ColumnStart + 1);

        }

      } else {

        updateCurrentValue();



        int lastLetter = columnBuffer.Position - 1;



        if (userSettings.TrimWhitespace && !startedWithQualifier) {

          while (lastLetter >= 0

              && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {

            lastLetter--;

          }

        }



        currentValue = new String(columnBuffer.Buffer, 0,

            lastLetter + 1);

      }

    }



    columnBuffer.Position = 0;



    startedColumn = false;



    if (columnsCount >= 100000 && userSettings.SafetySwitch) {

      close();



      throw new IOException(

          "Maximum column count of 100,000 exceeded in record "

              + NumberFormat.getIntegerInstance().format(

                  currentRecord)

              + ". Set the SafetySwitch property to false"

              + " if you're expecting more than 100,000 columns per record to"

              + " avoid this error.");

    }



    // check to see if our current holder array for

    // column chunks is still big enough to handle another

    // column chunk



    if (columnsCount == values.length) {

      // holder array needs to grow to be able to hold another column

      int newLength = values.length * 2;



      String[] holder = new String[newLength];



      System.arraycopy(values, 0, holder, 0, values.length);



      values = holder;



      boolean[] qualifiedHolder = new boolean[newLength];



      System.arraycopy(isQualified, 0, qualifiedHolder, 0,

          isQualified.length);



      isQualified = qualifiedHolder;

    }



    values[columnsCount] = currentValue;



    isQualified[columnsCount] = startedWithQualifier;



    currentValue = "";



    columnsCount++;

  }



  private void appendLetter(char letter) {

    if (columnBuffer.Position == columnBuffer.Buffer.length) {

      int newLength = columnBuffer.Buffer.length * 2;



      char[] holder = new char[newLength];



      System.arraycopy(columnBuffer.Buffer, 0, holder, 0,

          columnBuffer.Position);



      columnBuffer.Buffer = holder;

    }

    columnBuffer.Buffer[columnBuffer.Position++] = letter;

    dataBuffer.ColumnStart = dataBuffer.Position + 1;

  }



  private void updateCurrentValue() {

    if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) {

      if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position

          - dataBuffer.ColumnStart) {

        int newLength = columnBuffer.Buffer.length

            + Math.max(

                dataBuffer.Position - dataBuffer.ColumnStart,

                columnBuffer.Buffer.length);



        char[] holder = new char[newLength];



        System.arraycopy(columnBuffer.Buffer, 0, holder, 0,

            columnBuffer.Position);



        columnBuffer.Buffer = holder;

      }



      System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart,

          columnBuffer.Buffer, columnBuffer.Position,

          dataBuffer.Position - dataBuffer.ColumnStart);



      columnBuffer.Position += dataBuffer.Position

          - dataBuffer.ColumnStart;

    }



    dataBuffer.ColumnStart = dataBuffer.Position + 1;

  }



  /**

   * @exception IOException

   *                Thrown if an error occurs while reading data from the

   *                source stream.

   */

  private void endRecord() throws IOException {

    // this flag is used as a loop exit condition

    // during parsing



    hasReadNextLine = true;



    currentRecord++;

  }



  /**

   * Gets the corresponding column index for a given column header name.

   * 

   * @param headerName

   *            The header name of the column.

   * @return The column index for the given column header name.&nbsp;Returns

   *         -1 if not found.

   * @exception IOException

   *                Thrown if this object has already been closed.

   */

  public int getIndex(String headerName) throws IOException {

    checkClosed();



    Integer indexValue = headersHolder.IndexByName.get(headerName);



    if (indexValue != null) {

      return indexValue.intValue();

    } else {

      return -1;

    }

  }



  /**

   * Skips the next record of data by parsing each column.&nbsp;Does not

   * increment

   * {@link com.csvreader.CsvReader#getCurrentRecord getCurrentRecord()}.

   * 

   * @return Whether another record was successfully skipped or not.

   * @exception IOException

   *                Thrown if an error occurs while reading data from the

   *                source stream.

   */

  public boolean skipRecord() throws IOException {

    checkClosed();



    boolean recordRead = false;



    if (hasMoreData) {

      recordRead = readRecord();



      if (recordRead) {

        currentRecord--;

      }

    }



    return recordRead;

  }



  /**

   * Skips the next line of data using the standard end of line characters and

   * does not do any column delimited parsing.

   * 

   * @return Whether a line was successfully skipped or not.

   * @exception IOException

   *                Thrown if an error occurs while reading data from the

   *                source stream.

   */

  public boolean skipLine() throws IOException {

    checkClosed();



    // clear public column values for current line



    columnsCount = 0;



    boolean skippedLine = false;



    if (hasMoreData) {

      boolean foundEol = false;



      do {

        if (dataBuffer.Position == dataBuffer.Count) {

          checkDataLength();

        } else {

          skippedLine = true;



          // grab the current letter as a char



          char currentLetter = dataBuffer.Buffer[dataBuffer.Position];



          if (currentLetter == Letters.CR

              || currentLetter == Letters.LF) {

            foundEol = true;

          }



          // keep track of the last letter because we need

          // it for several key decisions



          lastLetter = currentLetter;



          if (!foundEol) {

            dataBuffer.Position++;

          }



        } // end else

      } while (hasMoreData && !foundEol);



      columnBuffer.Position = 0;



      dataBuffer.LineStart = dataBuffer.Position + 1;

    }



    rawBuffer.Position = 0;

    rawRecord = "";



    return skippedLine;

  }



  /**

   * Closes and releases all related resources.

   */

  public void close() {

    if (!closed) {

      close(true);



      closed = true;

    }

  }



  /**

   * 

   */

  private void close(boolean closing) {

    if (!closed) {

      if (closing) {

        charset = null;

        headersHolder.Headers = null;

        headersHolder.IndexByName = null;

        dataBuffer.Buffer = null;

        columnBuffer.Buffer = null;

        rawBuffer.Buffer = null;

      }



      try {

        if (initialized) {

          inputStream.close();

        }

      } catch (Exception e) {

        // just eat the exception

      }



      inputStream = null;



      closed = true;

    }

  }



  /**

   * @exception IOException

   *                Thrown if this object has already been closed.

   */

  private void checkClosed() throws IOException {

    if (closed) {

      throw new IOException(

          "This instance of the CsvReader class has already been closed.");

    }

  }



  /**

   * 

   */

  protected void finalize() {

    close(false);

  }



  private class ComplexEscape {

    private static final int UNICODE = 1;



    private static final int OCTAL = 2;



    private static final int DECIMAL = 3;



    private static final int HEX = 4;

  }



  private static char hexToDec(char hex) {

    char result;



    if (hex >= 'a') {

      result = (char) (hex - 'a' + 10);

    } else if (hex >= 'A') {

      result = (char) (hex - 'A' + 10);

    } else {

      result = (char) (hex - '0');

    }



    return result;

  }



  private class DataBuffer {

    public char[] Buffer;



    public int Position;



    // / <summary>

    // / How much usable data has been read into the stream,

    // / which will not always be as long as Buffer.Length.

    // / </summary>

    public int Count;



    // / <summary>

    // / The position of the cursor in the buffer when the

    // / current column was started or the last time data

    // / was moved out to the column buffer.

    // / </summary>

    public int ColumnStart;



    public int LineStart;



    public DataBuffer() {

      Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];

      Position = 0;

      Count = 0;

      ColumnStart = 0;

      LineStart = 0;

    }

  }



  private class ColumnBuffer {

    public char[] Buffer;



    public int Position;



    public ColumnBuffer() {

      Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];

      Position = 0;

    }

  }



  private class RawRecordBuffer {

    public char[] Buffer;



    public int Position;



    public RawRecordBuffer() {

      Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE

          * StaticSettings.INITIAL_COLUMN_COUNT];

      Position = 0;

    }

  }



  private class Letters {

    public static final char LF = '\n';



    public static final char CR = '\r';



    public static final char QUOTE = '"';



    public static final char COMMA = ',';



    public static final char SPACE = ' ';



    public static final char TAB = '\t';



    public static final char POUND = '#';



    public static final char BACKSLASH = '\\';



    public static final char NULL = '\0';



    public static final char BACKSPACE = '\b';



    public static final char FORM_FEED = '\f';



    public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape



    public static final char VERTICAL_TAB = '\u000B';



    public static final char ALERT = '\u0007';

  }



  private class UserSettings {

    // having these as publicly accessible members will prevent

    // the overhead of the method call that exists on properties

    public boolean CaseSensitive;



    public char TextQualifier;



    public boolean TrimWhitespace;



    public boolean UseTextQualifier;



    public char Delimiter;



    public char RecordDelimiter;



    public char Comment;



    public boolean UseComments;



    public int EscapeMode;



    public boolean SafetySwitch;



    public boolean SkipEmptyRecords;



    public boolean CaptureRawRecord;



    public UserSettings() {

      CaseSensitive = true;

      TextQualifier = Letters.QUOTE;

      TrimWhitespace = true;

      UseTextQualifier = true;

      Delimiter = Letters.COMMA;

      RecordDelimiter = Letters.NULL;

      Comment = Letters.POUND;

      UseComments = false;

      EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;

      SafetySwitch = true;

      SkipEmptyRecords = true;

      CaptureRawRecord = true;

    }

  }



  private class HeadersHolder {

    public String[] Headers;



    public int Length;



    public HashMap<String, Integer> IndexByName;



    public HeadersHolder() {

      Headers = null;

      Length = 0;

      IndexByName = new HashMap<String, Integer>();

    }

  }



  private class StaticSettings {

    // these are static instead of final so they can be changed in unit test

    // isn't visible outside this class and is only accessed once during

    // CsvReader construction

    public static final int MAX_BUFFER_SIZE = 1024;



    public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;



    public static final int INITIAL_COLUMN_COUNT = 10;



    public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;

  }

}
Csv Reader : CSV File « Development Class « Java