0001: /*
0002: * Java CSV is a stream based library for reading and writing
0003: * CSV and other delimited data.
0004: *
0005: * Copyright (C) Bruce Dunwiddie bruce@csvreader.com
0006: *
0007: * This library is free software; you can redistribute it and/or
0008: * modify it under the terms of the GNU Lesser General Public
0009: * License as published by the Free Software Foundation; either
0010: * version 2.1 of the License, or (at your option) any later version.
0011: *
0012: * This library is distributed in the hope that it will be useful,
0013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0015: * Lesser General Public License for more details.
0016: *
0017: * You should have received a copy of the GNU Lesser General Public
0018: * License along with this library; if not, write to the Free Software
0019: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
0020: */
0021: package com.csvreader;
0022:
0023: import java.io.BufferedReader;
0024: import java.io.File;
0025: import java.io.FileInputStream;
0026: import java.io.FileNotFoundException;
0027: import java.io.IOException;
0028: import java.io.InputStream;
0029: import java.io.InputStreamReader;
0030: import java.io.Reader;
0031: import java.io.StringReader;
0032: import java.nio.charset.Charset;
0033: import java.text.NumberFormat;
0034: import java.util.HashMap;
0035:
0036: /**
0037: * A stream based parser for parsing delimited text data from a file or a
0038: * stream.
0039: */
0040: public class CsvReader {
0041: private Reader inputStream = null;
0042:
0043: private String fileName = null;
0044:
0045: // this holds all the values for switches that the user is allowed to set
0046: private UserSettings userSettings = new UserSettings();
0047:
0048: private Charset charset = null;
0049:
0050: private boolean useCustomRecordDelimiter = false;
0051:
0052: // this will be our working buffer to hold data chunks
0053: // read in from the data file
0054:
0055: private DataBuffer dataBuffer = new DataBuffer();
0056:
0057: private ColumnBuffer columnBuffer = new ColumnBuffer();
0058:
0059: private RawRecordBuffer rawBuffer = new RawRecordBuffer();
0060:
0061: private boolean[] isQualified = null;
0062:
0063: private String rawRecord = "";
0064:
0065: private HeadersHolder headersHolder = new HeadersHolder();
0066:
0067: // these are all more or less global loop variables
0068: // to keep from needing to pass them all into various
0069: // methods during parsing
0070:
0071: private boolean startedColumn = false;
0072:
0073: private boolean startedWithQualifier = false;
0074:
0075: private boolean hasMoreData = true;
0076:
0077: private char lastLetter = '\0';
0078:
0079: private boolean hasReadNextLine = false;
0080:
0081: private int columnsCount = 0;
0082:
0083: private long currentRecord = 0;
0084:
0085: private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];
0086:
0087: private boolean initialized = false;
0088:
0089: private boolean closed = false;
0090:
0091: /**
0092: * Double up the text qualifier to represent an occurance of the text
0093: * qualifier.
0094: */
0095: public static final int ESCAPE_MODE_DOUBLED = 1;
0096:
0097: /**
0098: * Use a backslash character before the text qualifier to represent an
0099: * occurance of the text qualifier.
0100: */
0101: public static final int ESCAPE_MODE_BACKSLASH = 2;
0102:
0103: /**
0104: * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
0105: * as the data source.
0106: *
0107: * @param fileName
0108: * The path to the file to use as the data source.
0109: * @param delimiter
0110: * The character to use as the column delimiter.
0111: * @param charset
0112: * The {@link java.nio.charset.Charset Charset} to use while
0113: * parsing the data.
0114: */
0115: public CsvReader(String fileName, char delimiter, Charset charset)
0116: throws FileNotFoundException {
0117: if (fileName == null) {
0118: throw new IllegalArgumentException(
0119: "Parameter fileName can not be null.");
0120: }
0121:
0122: if (charset == null) {
0123: throw new IllegalArgumentException(
0124: "Parameter charset can not be null.");
0125: }
0126:
0127: if (!new File(fileName).exists()) {
0128: throw new FileNotFoundException("File " + fileName
0129: + " does not exist.");
0130: }
0131:
0132: this .fileName = fileName;
0133: this .userSettings.Delimiter = delimiter;
0134: this .charset = charset;
0135:
0136: isQualified = new boolean[values.length];
0137: }
0138:
0139: /**
0140: * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
0141: * as the data source. Uses ISO-8859-1 as the
0142: * {@link java.nio.charset.Charset Charset}.
0143: *
0144: * @param fileName
0145: * The path to the file to use as the data source.
0146: * @param delimiter
0147: * The character to use as the column delimiter.
0148: */
0149: public CsvReader(String fileName, char delimiter)
0150: throws FileNotFoundException {
0151: this (fileName, delimiter, Charset.forName("ISO-8859-1"));
0152: }
0153:
0154: /**
0155: * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
0156: * as the data source. Uses a comma as the column delimiter and
0157: * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.
0158: *
0159: * @param fileName
0160: * The path to the file to use as the data source.
0161: */
0162: public CsvReader(String fileName) throws FileNotFoundException {
0163: this (fileName, Letters.COMMA);
0164: }
0165:
0166: /**
0167: * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
0168: * {@link java.io.Reader Reader} object as the data source.
0169: *
0170: * @param inputStream
0171: * The stream to use as the data source.
0172: * @param delimiter
0173: * The character to use as the column delimiter.
0174: */
0175: public CsvReader(Reader inputStream, char delimiter) {
0176: if (inputStream == null) {
0177: throw new IllegalArgumentException(
0178: "Parameter inputStream can not be null.");
0179: }
0180:
0181: this .inputStream = inputStream;
0182: this .userSettings.Delimiter = delimiter;
0183: initialized = true;
0184:
0185: isQualified = new boolean[values.length];
0186: }
0187:
0188: /**
0189: * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
0190: * {@link java.io.Reader Reader} object as the data source. Uses a
0191: * comma as the column delimiter.
0192: *
0193: * @param inputStream
0194: * The stream to use as the data source.
0195: */
0196: public CsvReader(Reader inputStream) {
0197: this (inputStream, Letters.COMMA);
0198: }
0199:
0200: /**
0201: * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
0202: * {@link java.io.InputStream InputStream} object as the data source.
0203: *
0204: * @param inputStream
0205: * The stream to use as the data source.
0206: * @param delimiter
0207: * The character to use as the column delimiter.
0208: * @param charset
0209: * The {@link java.nio.charset.Charset Charset} to use while
0210: * parsing the data.
0211: */
0212: public CsvReader(InputStream inputStream, char delimiter,
0213: Charset charset) {
0214: this (new InputStreamReader(inputStream, charset), delimiter);
0215: }
0216:
0217: /**
0218: * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
0219: * {@link java.io.InputStream InputStream} object as the data
0220: * source. Uses a comma as the column delimiter.
0221: *
0222: * @param inputStream
0223: * The stream to use as the data source.
0224: * @param charset
0225: * The {@link java.nio.charset.Charset Charset} to use while
0226: * parsing the data.
0227: */
0228: public CsvReader(InputStream inputStream, Charset charset) {
0229: this (new InputStreamReader(inputStream, charset));
0230: }
0231:
0232: public boolean getCaptureRawRecord() {
0233: return userSettings.CaptureRawRecord;
0234: }
0235:
0236: public void setCaptureRawRecord(boolean captureRawRecord) {
0237: userSettings.CaptureRawRecord = captureRawRecord;
0238: }
0239:
0240: public String getRawRecord() {
0241: return rawRecord;
0242: }
0243:
0244: /**
0245: * Gets whether leading and trailing whitespace characters are being trimmed
0246: * from non-textqualified column data. Default is true.
0247: *
0248: * @return Whether leading and trailing whitespace characters are being
0249: * trimmed from non-textqualified column data.
0250: */
0251: public boolean getTrimWhitespace() {
0252: return userSettings.TrimWhitespace;
0253: }
0254:
0255: /**
0256: * Sets whether leading and trailing whitespace characters should be trimmed
0257: * from non-textqualified column data or not. Default is true.
0258: *
0259: * @param trimWhitespace
0260: * Whether leading and trailing whitespace characters should be
0261: * trimmed from non-textqualified column data or not.
0262: */
0263: public void setTrimWhitespace(boolean trimWhitespace) {
0264: userSettings.TrimWhitespace = trimWhitespace;
0265: }
0266:
0267: /**
0268: * Gets the character being used as the column delimiter. Default is comma,
0269: * ','.
0270: *
0271: * @return The character being used as the column delimiter.
0272: */
0273: public char getDelimiter() {
0274: return userSettings.Delimiter;
0275: }
0276:
0277: /**
0278: * Sets the character to use as the column delimiter. Default is comma, ','.
0279: *
0280: * @param delimiter
0281: * The character to use as the column delimiter.
0282: */
0283: public void setDelimiter(char delimiter) {
0284: userSettings.Delimiter = delimiter;
0285: }
0286:
0287: public char getRecordDelimiter() {
0288: return userSettings.RecordDelimiter;
0289: }
0290:
0291: /**
0292: * Sets the character to use as the record delimiter.
0293: *
0294: * @param recordDelimiter
0295: * The character to use as the record delimiter. Default is
0296: * combination of standard end of line characters for Windows,
0297: * Unix, or Mac.
0298: */
0299: public void setRecordDelimiter(char recordDelimiter) {
0300: useCustomRecordDelimiter = true;
0301: userSettings.RecordDelimiter = recordDelimiter;
0302: }
0303:
0304: /**
0305: * Gets the character to use as a text qualifier in the data.
0306: *
0307: * @return The character to use as a text qualifier in the data.
0308: */
0309: public char getTextQualifier() {
0310: return userSettings.TextQualifier;
0311: }
0312:
0313: /**
0314: * Sets the character to use as a text qualifier in the data.
0315: *
0316: * @param textQualifier
0317: * The character to use as a text qualifier in the data.
0318: */
0319: public void setTextQualifier(char textQualifier) {
0320: userSettings.TextQualifier = textQualifier;
0321: }
0322:
0323: /**
0324: * Whether text qualifiers will be used while parsing or not.
0325: *
0326: * @return Whether text qualifiers will be used while parsing or not.
0327: */
0328: public boolean getUseTextQualifier() {
0329: return userSettings.UseTextQualifier;
0330: }
0331:
0332: /**
0333: * Sets whether text qualifiers will be used while parsing or not.
0334: *
0335: * @param useTextQualifier
0336: * Whether to use a text qualifier while parsing or not.
0337: */
0338: public void setUseTextQualifier(boolean useTextQualifier) {
0339: userSettings.UseTextQualifier = useTextQualifier;
0340: }
0341:
0342: /**
0343: * Gets the character being used as a comment signal.
0344: *
0345: * @return The character being used as a comment signal.
0346: */
0347: public char getComment() {
0348: return userSettings.Comment;
0349: }
0350:
0351: /**
0352: * Sets the character to use as a comment signal.
0353: *
0354: * @param comment
0355: * The character to use as a comment signal.
0356: */
0357: public void setComment(char comment) {
0358: userSettings.Comment = comment;
0359: }
0360:
0361: /**
0362: * Gets whether comments are being looked for while parsing or not.
0363: *
0364: * @return Whether comments are being looked for while parsing or not.
0365: */
0366: public boolean getUseComments() {
0367: return userSettings.UseComments;
0368: }
0369:
0370: /**
0371: * Sets whether comments are being looked for while parsing or not.
0372: *
0373: * @param useComments
0374: * Whether comments are being looked for while parsing or not.
0375: */
0376: public void setUseComments(boolean useComments) {
0377: userSettings.UseComments = useComments;
0378: }
0379:
0380: /**
0381: * Gets the current way to escape an occurance of the text qualifier inside
0382: * qualified data.
0383: *
0384: * @return The current way to escape an occurance of the text qualifier
0385: * inside qualified data.
0386: */
0387: public int getEscapeMode() {
0388: return userSettings.EscapeMode;
0389: }
0390:
0391: /**
0392: * Sets the current way to escape an occurance of the text qualifier inside
0393: * qualified data.
0394: *
0395: * @param escapeMode
0396: * The way to escape an occurance of the text qualifier inside
0397: * qualified data.
0398: * @exception IllegalArgumentException
0399: * When an illegal value is specified for escapeMode.
0400: */
0401: public void setEscapeMode(int escapeMode)
0402: throws IllegalArgumentException {
0403: if (escapeMode != ESCAPE_MODE_DOUBLED
0404: && escapeMode != ESCAPE_MODE_BACKSLASH) {
0405: throw new IllegalArgumentException(
0406: "Parameter escapeMode must be a valid value.");
0407: }
0408:
0409: userSettings.EscapeMode = escapeMode;
0410: }
0411:
0412: public boolean getSkipEmptyRecords() {
0413: return userSettings.SkipEmptyRecords;
0414: }
0415:
0416: public void setSkipEmptyRecords(boolean skipEmptyRecords) {
0417: userSettings.SkipEmptyRecords = skipEmptyRecords;
0418: }
0419:
0420: /**
0421: * Safety caution to prevent the parser from using large amounts of memory
0422: * in the case where parsing settings like file encodings don't end up
0423: * matching the actual format of a file. This switch can be turned off if
0424: * the file format is known and tested. With the switch off, the max column
0425: * lengths and max column count per record supported by the parser will
0426: * greatly increase. Default is true.
0427: *
0428: * @return The current setting of the safety switch.
0429: */
0430: public boolean getSafetySwitch() {
0431: return userSettings.SafetySwitch;
0432: }
0433:
0434: /**
0435: * Safety caution to prevent the parser from using large amounts of memory
0436: * in the case where parsing settings like file encodings don't end up
0437: * matching the actual format of a file. This switch can be turned off if
0438: * the file format is known and tested. With the switch off, the max column
0439: * lengths and max column count per record supported by the parser will
0440: * greatly increase. Default is true.
0441: *
0442: * @param safetySwitch
0443: */
0444: public void setSafetySwitch(boolean safetySwitch) {
0445: userSettings.SafetySwitch = safetySwitch;
0446: }
0447:
0448: /**
0449: * Gets the count of columns found in this record.
0450: *
0451: * @return The count of columns found in this record.
0452: */
0453: public int getColumnCount() {
0454: return columnsCount;
0455: }
0456:
0457: /**
0458: * Gets the index of the current record.
0459: *
0460: * @return The index of the current record.
0461: */
0462: public long getCurrentRecord() {
0463: return currentRecord - 1;
0464: }
0465:
0466: /**
0467: * Gets the count of headers read in by a previous call to
0468: * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
0469: *
0470: * @return The count of headers read in by a previous call to
0471: * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
0472: */
0473: public int getHeaderCount() {
0474: return headersHolder.Length;
0475: }
0476:
0477: /**
0478: * Returns the header values as a string array.
0479: *
0480: * @return The header values as a String array.
0481: * @exception IOException
0482: * Thrown if this object has already been closed.
0483: */
0484: public String[] getHeaders() throws IOException {
0485: checkClosed();
0486:
0487: if (headersHolder.Headers == null) {
0488: return null;
0489: } else {
0490: // use clone here to prevent the outside code from
0491: // setting values on the array directly, which would
0492: // throw off the index lookup based on header name
0493: String[] clone = new String[headersHolder.Length];
0494: System.arraycopy(headersHolder.Headers, 0, clone, 0,
0495: headersHolder.Length);
0496: return clone;
0497: }
0498: }
0499:
0500: public void setHeaders(String[] headers) {
0501: headersHolder.Headers = headers;
0502:
0503: headersHolder.IndexByName.clear();
0504:
0505: if (headers != null) {
0506: headersHolder.Length = headers.length;
0507: } else {
0508: headersHolder.Length = 0;
0509: }
0510:
0511: // use headersHolder.Length here in case headers is null
0512: for (int i = 0; i < headersHolder.Length; i++) {
0513: headersHolder.IndexByName.put(headers[i], Integer
0514: .valueOf(i));
0515: }
0516: }
0517:
0518: public String[] getValues() throws IOException {
0519: checkClosed();
0520:
0521: // need to return a clone, and can't use clone because values.Length
0522: // might be greater than columnsCount
0523: String[] clone = new String[columnsCount];
0524: System.arraycopy(values, 0, clone, 0, columnsCount);
0525: return clone;
0526: }
0527:
0528: /**
0529: * Returns the current column value for a given column index.
0530: *
0531: * @param columnIndex
0532: * The index of the column.
0533: * @return The current column value.
0534: * @exception IOException
0535: * Thrown if this object has already been closed.
0536: */
0537: public String get(int columnIndex) throws IOException {
0538: checkClosed();
0539:
0540: if (columnIndex > -1 && columnIndex < columnsCount) {
0541: return values[columnIndex];
0542: } else {
0543: return "";
0544: }
0545: }
0546:
0547: /**
0548: * Returns the current column value for a given column header name.
0549: *
0550: * @param headerName
0551: * The header name of the column.
0552: * @return The current column value.
0553: * @exception IOException
0554: * Thrown if this object has already been closed.
0555: */
0556: public String get(String headerName) throws IOException {
0557: checkClosed();
0558:
0559: return get(getIndex(headerName));
0560: }
0561:
0562: /**
0563: * Creates a {@link com.csvreader.CsvReader CsvReader} object using a string
0564: * of data as the source. Uses ISO-8859-1 as the
0565: * {@link java.nio.charset.Charset Charset}.
0566: *
0567: * @param data
0568: * The String of data to use as the source.
0569: * @return A {@link com.csvreader.CsvReader CsvReader} object using the
0570: * String of data as the source.
0571: */
0572: public static CsvReader parse(String data) {
0573: if (data == null) {
0574: throw new IllegalArgumentException(
0575: "Parameter data can not be null.");
0576: }
0577:
0578: return new CsvReader(new StringReader(data));
0579: }
0580:
0581: /**
0582: * Reads another record.
0583: *
0584: * @return Whether another record was successfully read or not.
0585: * @exception IOException
0586: * Thrown if an error occurs while reading data from the
0587: * source stream.
0588: */
0589: public boolean readRecord() throws IOException {
0590: checkClosed();
0591:
0592: columnsCount = 0;
0593: rawBuffer.Position = 0;
0594:
0595: dataBuffer.LineStart = dataBuffer.Position;
0596:
0597: hasReadNextLine = false;
0598:
0599: // check to see if we've already found the end of data
0600:
0601: if (hasMoreData) {
0602: // loop over the data stream until the end of data is found
0603: // or the end of the record is found
0604:
0605: do {
0606: if (dataBuffer.Position == dataBuffer.Count) {
0607: checkDataLength();
0608: } else {
0609: startedWithQualifier = false;
0610:
0611: // grab the current letter as a char
0612:
0613: char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
0614:
0615: if (userSettings.UseTextQualifier
0616: && currentLetter == userSettings.TextQualifier) {
0617: // this will be a text qualified column, so
0618: // we need to set startedWithQualifier to make it
0619: // enter the seperate branch to handle text
0620: // qualified columns
0621:
0622: lastLetter = currentLetter;
0623:
0624: // read qualified
0625: startedColumn = true;
0626: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0627: startedWithQualifier = true;
0628: boolean lastLetterWasQualifier = false;
0629:
0630: char escapeChar = userSettings.TextQualifier;
0631:
0632: if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
0633: escapeChar = Letters.BACKSLASH;
0634: }
0635:
0636: boolean eatingTrailingJunk = false;
0637: boolean lastLetterWasEscape = false;
0638: boolean readingComplexEscape = false;
0639: int escape = ComplexEscape.UNICODE;
0640: int escapeLength = 0;
0641: char escapeValue = (char) 0;
0642:
0643: dataBuffer.Position++;
0644:
0645: do {
0646: if (dataBuffer.Position == dataBuffer.Count) {
0647: checkDataLength();
0648: } else {
0649: // grab the current letter as a char
0650:
0651: currentLetter = dataBuffer.Buffer[dataBuffer.Position];
0652:
0653: if (eatingTrailingJunk) {
0654: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0655:
0656: if (currentLetter == userSettings.Delimiter) {
0657: endColumn();
0658: } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
0659: || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
0660: endColumn();
0661:
0662: endRecord();
0663: }
0664: } else if (readingComplexEscape) {
0665: escapeLength++;
0666:
0667: switch (escape) {
0668: case ComplexEscape.UNICODE:
0669: escapeValue *= (char) 16;
0670: escapeValue += hexToDec(currentLetter);
0671:
0672: if (escapeLength == 4) {
0673: readingComplexEscape = false;
0674: }
0675:
0676: break;
0677: case ComplexEscape.OCTAL:
0678: escapeValue *= (char) 8;
0679: escapeValue += (char) (currentLetter - '0');
0680:
0681: if (escapeLength == 3) {
0682: readingComplexEscape = false;
0683: }
0684:
0685: break;
0686: case ComplexEscape.DECIMAL:
0687: escapeValue *= (char) 10;
0688: escapeValue += (char) (currentLetter - '0');
0689:
0690: if (escapeLength == 3) {
0691: readingComplexEscape = false;
0692: }
0693:
0694: break;
0695: case ComplexEscape.HEX:
0696: escapeValue *= (char) 16;
0697: escapeValue += hexToDec(currentLetter);
0698:
0699: if (escapeLength == 2) {
0700: readingComplexEscape = false;
0701: }
0702:
0703: break;
0704: }
0705:
0706: if (!readingComplexEscape) {
0707: appendLetter(escapeValue);
0708: } else {
0709: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0710: }
0711: } else if (currentLetter == userSettings.TextQualifier) {
0712: if (lastLetterWasEscape) {
0713: lastLetterWasEscape = false;
0714: lastLetterWasQualifier = false;
0715: } else {
0716: updateCurrentValue();
0717:
0718: if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
0719: lastLetterWasEscape = true;
0720: }
0721:
0722: lastLetterWasQualifier = true;
0723: }
0724: } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
0725: && lastLetterWasEscape) {
0726: switch (currentLetter) {
0727: case 'n':
0728: appendLetter(Letters.LF);
0729: break;
0730: case 'r':
0731: appendLetter(Letters.CR);
0732: break;
0733: case 't':
0734: appendLetter(Letters.TAB);
0735: break;
0736: case 'b':
0737: appendLetter(Letters.BACKSPACE);
0738: break;
0739: case 'f':
0740: appendLetter(Letters.FORM_FEED);
0741: break;
0742: case 'e':
0743: appendLetter(Letters.ESCAPE);
0744: break;
0745: case 'v':
0746: appendLetter(Letters.VERTICAL_TAB);
0747: break;
0748: case 'a':
0749: appendLetter(Letters.ALERT);
0750: break;
0751: case '0':
0752: case '1':
0753: case '2':
0754: case '3':
0755: case '4':
0756: case '5':
0757: case '6':
0758: case '7':
0759: escape = ComplexEscape.OCTAL;
0760: readingComplexEscape = true;
0761: escapeLength = 1;
0762: escapeValue = (char) (currentLetter - '0');
0763: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0764: break;
0765: case 'u':
0766: case 'x':
0767: case 'o':
0768: case 'd':
0769: case 'U':
0770: case 'X':
0771: case 'O':
0772: case 'D':
0773: switch (currentLetter) {
0774: case 'u':
0775: case 'U':
0776: escape = ComplexEscape.UNICODE;
0777: break;
0778: case 'x':
0779: case 'X':
0780: escape = ComplexEscape.HEX;
0781: break;
0782: case 'o':
0783: case 'O':
0784: escape = ComplexEscape.OCTAL;
0785: break;
0786: case 'd':
0787: case 'D':
0788: escape = ComplexEscape.DECIMAL;
0789: break;
0790: }
0791:
0792: readingComplexEscape = true;
0793: escapeLength = 0;
0794: escapeValue = (char) 0;
0795: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0796:
0797: break;
0798: default:
0799: break;
0800: }
0801:
0802: lastLetterWasEscape = false;
0803:
0804: // can only happen for ESCAPE_MODE_BACKSLASH
0805: } else if (currentLetter == escapeChar) {
0806: updateCurrentValue();
0807: lastLetterWasEscape = true;
0808: } else {
0809: if (lastLetterWasQualifier) {
0810: if (currentLetter == userSettings.Delimiter) {
0811: endColumn();
0812: } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
0813: || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
0814: endColumn();
0815:
0816: endRecord();
0817: } else {
0818: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0819:
0820: eatingTrailingJunk = true;
0821: }
0822:
0823: // make sure to clear the flag for next
0824: // run of the loop
0825:
0826: lastLetterWasQualifier = false;
0827: }
0828: }
0829:
0830: // keep track of the last letter because we need
0831: // it for several key decisions
0832:
0833: lastLetter = currentLetter;
0834:
0835: if (startedColumn) {
0836: dataBuffer.Position++;
0837:
0838: if (userSettings.SafetySwitch
0839: && dataBuffer.Position
0840: - dataBuffer.ColumnStart
0841: + columnBuffer.Position > 100000) {
0842: close();
0843:
0844: throw new IOException(
0845: "Maximum column length of 100,000 exceeded in column "
0846: + NumberFormat
0847: .getIntegerInstance()
0848: .format(
0849: columnsCount)
0850: + " in record "
0851: + NumberFormat
0852: .getIntegerInstance()
0853: .format(
0854: currentRecord)
0855: + ". Set the SafetySwitch property to false"
0856: + " if you're expecting column lengths greater than 100,000 characters to"
0857: + " avoid this error.");
0858: }
0859: }
0860: } // end else
0861:
0862: } while (hasMoreData && startedColumn);
0863: } else if (currentLetter == userSettings.Delimiter) {
0864: // we encountered a column with no data, so
0865: // just send the end column
0866:
0867: lastLetter = currentLetter;
0868:
0869: endColumn();
0870: } else if (useCustomRecordDelimiter
0871: && currentLetter == userSettings.RecordDelimiter) {
0872: // this will skip blank lines
0873: if (startedColumn || columnsCount > 0
0874: || !userSettings.SkipEmptyRecords) {
0875: endColumn();
0876:
0877: endRecord();
0878: } else {
0879: dataBuffer.LineStart = dataBuffer.Position + 1;
0880: }
0881:
0882: lastLetter = currentLetter;
0883: } else if (!useCustomRecordDelimiter
0884: && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
0885: // this will skip blank lines
0886: if (startedColumn
0887: || columnsCount > 0
0888: || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {
0889: endColumn();
0890:
0891: endRecord();
0892: } else {
0893: dataBuffer.LineStart = dataBuffer.Position + 1;
0894: }
0895:
0896: lastLetter = currentLetter;
0897: } else if (userSettings.UseComments
0898: && columnsCount == 0
0899: && currentLetter == userSettings.Comment) {
0900: // encountered a comment character at the beginning of
0901: // the line so just ignore the rest of the line
0902:
0903: lastLetter = currentLetter;
0904:
0905: skipLine();
0906: } else if (userSettings.TrimWhitespace
0907: && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
0908: // do nothing, this will trim leading whitespace
0909: // for both text qualified columns and non
0910:
0911: startedColumn = true;
0912: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0913: } else {
0914: // since the letter wasn't a special letter, this
0915: // will be the first letter of our current column
0916:
0917: startedColumn = true;
0918: dataBuffer.ColumnStart = dataBuffer.Position;
0919: boolean lastLetterWasBackslash = false;
0920: boolean readingComplexEscape = false;
0921: int escape = ComplexEscape.UNICODE;
0922: int escapeLength = 0;
0923: char escapeValue = (char) 0;
0924:
0925: boolean firstLoop = true;
0926:
0927: do {
0928: if (!firstLoop
0929: && dataBuffer.Position == dataBuffer.Count) {
0930: checkDataLength();
0931: } else {
0932: if (!firstLoop) {
0933: // grab the current letter as a char
0934: currentLetter = dataBuffer.Buffer[dataBuffer.Position];
0935: }
0936:
0937: if (!userSettings.UseTextQualifier
0938: && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
0939: && currentLetter == Letters.BACKSLASH) {
0940: if (lastLetterWasBackslash) {
0941: lastLetterWasBackslash = false;
0942: } else {
0943: updateCurrentValue();
0944: lastLetterWasBackslash = true;
0945: }
0946: } else if (readingComplexEscape) {
0947: escapeLength++;
0948:
0949: switch (escape) {
0950: case ComplexEscape.UNICODE:
0951: escapeValue *= (char) 16;
0952: escapeValue += hexToDec(currentLetter);
0953:
0954: if (escapeLength == 4) {
0955: readingComplexEscape = false;
0956: }
0957:
0958: break;
0959: case ComplexEscape.OCTAL:
0960: escapeValue *= (char) 8;
0961: escapeValue += (char) (currentLetter - '0');
0962:
0963: if (escapeLength == 3) {
0964: readingComplexEscape = false;
0965: }
0966:
0967: break;
0968: case ComplexEscape.DECIMAL:
0969: escapeValue *= (char) 10;
0970: escapeValue += (char) (currentLetter - '0');
0971:
0972: if (escapeLength == 3) {
0973: readingComplexEscape = false;
0974: }
0975:
0976: break;
0977: case ComplexEscape.HEX:
0978: escapeValue *= (char) 16;
0979: escapeValue += hexToDec(currentLetter);
0980:
0981: if (escapeLength == 2) {
0982: readingComplexEscape = false;
0983: }
0984:
0985: break;
0986: }
0987:
0988: if (!readingComplexEscape) {
0989: appendLetter(escapeValue);
0990: } else {
0991: dataBuffer.ColumnStart = dataBuffer.Position + 1;
0992: }
0993: } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
0994: && lastLetterWasBackslash) {
0995: switch (currentLetter) {
0996: case 'n':
0997: appendLetter(Letters.LF);
0998: break;
0999: case 'r':
1000: appendLetter(Letters.CR);
1001: break;
1002: case 't':
1003: appendLetter(Letters.TAB);
1004: break;
1005: case 'b':
1006: appendLetter(Letters.BACKSPACE);
1007: break;
1008: case 'f':
1009: appendLetter(Letters.FORM_FEED);
1010: break;
1011: case 'e':
1012: appendLetter(Letters.ESCAPE);
1013: break;
1014: case 'v':
1015: appendLetter(Letters.VERTICAL_TAB);
1016: break;
1017: case 'a':
1018: appendLetter(Letters.ALERT);
1019: break;
1020: case '0':
1021: case '1':
1022: case '2':
1023: case '3':
1024: case '4':
1025: case '5':
1026: case '6':
1027: case '7':
1028: escape = ComplexEscape.OCTAL;
1029: readingComplexEscape = true;
1030: escapeLength = 1;
1031: escapeValue = (char) (currentLetter - '0');
1032: dataBuffer.ColumnStart = dataBuffer.Position + 1;
1033: break;
1034: case 'u':
1035: case 'x':
1036: case 'o':
1037: case 'd':
1038: case 'U':
1039: case 'X':
1040: case 'O':
1041: case 'D':
1042: switch (currentLetter) {
1043: case 'u':
1044: case 'U':
1045: escape = ComplexEscape.UNICODE;
1046: break;
1047: case 'x':
1048: case 'X':
1049: escape = ComplexEscape.HEX;
1050: break;
1051: case 'o':
1052: case 'O':
1053: escape = ComplexEscape.OCTAL;
1054: break;
1055: case 'd':
1056: case 'D':
1057: escape = ComplexEscape.DECIMAL;
1058: break;
1059: }
1060:
1061: readingComplexEscape = true;
1062: escapeLength = 0;
1063: escapeValue = (char) 0;
1064: dataBuffer.ColumnStart = dataBuffer.Position + 1;
1065:
1066: break;
1067: default:
1068: break;
1069: }
1070:
1071: lastLetterWasBackslash = false;
1072: } else {
1073: if (currentLetter == userSettings.Delimiter) {
1074: endColumn();
1075: } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
1076: || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
1077: endColumn();
1078:
1079: endRecord();
1080: }
1081: }
1082:
1083: // keep track of the last letter because we need
1084: // it for several key decisions
1085:
1086: lastLetter = currentLetter;
1087: firstLoop = false;
1088:
1089: if (startedColumn) {
1090: dataBuffer.Position++;
1091:
1092: if (userSettings.SafetySwitch
1093: && dataBuffer.Position
1094: - dataBuffer.ColumnStart
1095: + columnBuffer.Position > 100000) {
1096: close();
1097:
1098: throw new IOException(
1099: "Maximum column length of 100,000 exceeded in column "
1100: + NumberFormat
1101: .getIntegerInstance()
1102: .format(
1103: columnsCount)
1104: + " in record "
1105: + NumberFormat
1106: .getIntegerInstance()
1107: .format(
1108: currentRecord)
1109: + ". Set the SafetySwitch property to false"
1110: + " if you're expecting column lengths greater than 100,000 characters to"
1111: + " avoid this error.");
1112: }
1113: }
1114: } // end else
1115: } while (hasMoreData && startedColumn);
1116: }
1117:
1118: if (hasMoreData) {
1119: dataBuffer.Position++;
1120: }
1121: } // end else
1122: } while (hasMoreData && !hasReadNextLine);
1123:
1124: // check to see if we hit the end of the file
1125: // without processing the current record
1126:
1127: if (startedColumn || lastLetter == userSettings.Delimiter) {
1128: endColumn();
1129:
1130: endRecord();
1131: }
1132: }
1133:
1134: if (userSettings.CaptureRawRecord) {
1135: if (hasMoreData) {
1136: if (rawBuffer.Position == 0) {
1137: rawRecord = new String(dataBuffer.Buffer,
1138: dataBuffer.LineStart, dataBuffer.Position
1139: - dataBuffer.LineStart - 1);
1140: } else {
1141: rawRecord = new String(rawBuffer.Buffer, 0,
1142: rawBuffer.Position)
1143: + new String(dataBuffer.Buffer,
1144: dataBuffer.LineStart,
1145: dataBuffer.Position
1146: - dataBuffer.LineStart - 1);
1147: }
1148: } else {
1149: // for hasMoreData to ever be false, all data would have had to
1150: // have been
1151: // copied to the raw buffer
1152: rawRecord = new String(rawBuffer.Buffer, 0,
1153: rawBuffer.Position);
1154: }
1155: } else {
1156: rawRecord = "";
1157: }
1158:
1159: return hasReadNextLine;
1160: }
1161:
1162: /**
1163: * @exception IOException
1164: * Thrown if an error occurs while reading data from the
1165: * source stream.
1166: */
1167: private void checkDataLength() throws IOException {
1168: if (!initialized) {
1169: if (fileName != null) {
1170: inputStream = new BufferedReader(new InputStreamReader(
1171: new FileInputStream(fileName), charset),
1172: StaticSettings.MAX_FILE_BUFFER_SIZE);
1173: }
1174:
1175: charset = null;
1176: initialized = true;
1177: }
1178:
1179: updateCurrentValue();
1180:
1181: if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
1182: if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count
1183: - dataBuffer.LineStart) {
1184: int newLength = rawBuffer.Buffer.length
1185: + Math.max(dataBuffer.Count
1186: - dataBuffer.LineStart,
1187: rawBuffer.Buffer.length);
1188:
1189: char[] holder = new char[newLength];
1190:
1191: System.arraycopy(rawBuffer.Buffer, 0, holder, 0,
1192: rawBuffer.Position);
1193:
1194: rawBuffer.Buffer = holder;
1195: }
1196:
1197: System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart,
1198: rawBuffer.Buffer, rawBuffer.Position,
1199: dataBuffer.Count - dataBuffer.LineStart);
1200:
1201: rawBuffer.Position += dataBuffer.Count
1202: - dataBuffer.LineStart;
1203: }
1204:
1205: try {
1206: dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0,
1207: dataBuffer.Buffer.length);
1208: } catch (IOException ex) {
1209: close();
1210:
1211: throw ex;
1212: }
1213:
1214: // if no more data could be found, set flag stating that
1215: // the end of the data was found
1216:
1217: if (dataBuffer.Count == -1) {
1218: hasMoreData = false;
1219: }
1220:
1221: dataBuffer.Position = 0;
1222: dataBuffer.LineStart = 0;
1223: dataBuffer.ColumnStart = 0;
1224: }
1225:
1226: /**
1227: * Read the first record of data as column headers.
1228: *
1229: * @return Whether the header record was successfully read or not.
1230: * @exception IOException
1231: * Thrown if an error occurs while reading data from the
1232: * source stream.
1233: */
1234: public boolean readHeaders() throws IOException {
1235: boolean result = readRecord();
1236:
1237: // copy the header data from the column array
1238: // to the header string array
1239:
1240: headersHolder.Length = columnsCount;
1241:
1242: headersHolder.Headers = new String[columnsCount];
1243:
1244: for (int i = 0; i < headersHolder.Length; i++) {
1245: String columnValue = get(i);
1246:
1247: headersHolder.Headers[i] = columnValue;
1248:
1249: // if there are duplicate header names, we will save the last one
1250: headersHolder.IndexByName.put(columnValue, Integer
1251: .valueOf(i));
1252: }
1253:
1254: if (result) {
1255: currentRecord--;
1256: }
1257:
1258: columnsCount = 0;
1259:
1260: return result;
1261: }
1262:
1263: /**
1264: * Returns the column header value for a given column index.
1265: *
1266: * @param columnIndex
1267: * The index of the header column being requested.
1268: * @return The value of the column header at the given column index.
1269: * @exception IOException
1270: * Thrown if this object has already been closed.
1271: */
1272: public String getHeader(int columnIndex) throws IOException {
1273: checkClosed();
1274:
1275: // check to see if we have read the header record yet
1276:
1277: // check to see if the column index is within the bounds
1278: // of our header array
1279:
1280: if (columnIndex > -1 && columnIndex < headersHolder.Length) {
1281: // return the processed header data for this column
1282:
1283: return headersHolder.Headers[columnIndex];
1284: } else {
1285: return "";
1286: }
1287: }
1288:
1289: public boolean isQualified(int columnIndex) throws IOException {
1290: checkClosed();
1291:
1292: if (columnIndex < columnsCount && columnIndex > -1) {
1293: return isQualified[columnIndex];
1294: } else {
1295: return false;
1296: }
1297: }
1298:
1299: /**
1300: * @exception IOException
1301: * Thrown if a very rare extreme exception occurs during
1302: * parsing, normally resulting from improper data format.
1303: */
1304: private void endColumn() throws IOException {
1305: String currentValue = "";
1306:
1307: // must be called before setting startedColumn = false
1308: if (startedColumn) {
1309: if (columnBuffer.Position == 0) {
1310: if (dataBuffer.ColumnStart < dataBuffer.Position) {
1311: int lastLetter = dataBuffer.Position - 1;
1312:
1313: if (userSettings.TrimWhitespace
1314: && !startedWithQualifier) {
1315: while (lastLetter >= dataBuffer.ColumnStart
1316: && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
1317: lastLetter--;
1318: }
1319: }
1320:
1321: currentValue = new String(dataBuffer.Buffer,
1322: dataBuffer.ColumnStart, lastLetter
1323: - dataBuffer.ColumnStart + 1);
1324: }
1325: } else {
1326: updateCurrentValue();
1327:
1328: int lastLetter = columnBuffer.Position - 1;
1329:
1330: if (userSettings.TrimWhitespace
1331: && !startedWithQualifier) {
1332: while (lastLetter >= 0
1333: && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
1334: lastLetter--;
1335: }
1336: }
1337:
1338: currentValue = new String(columnBuffer.Buffer, 0,
1339: lastLetter + 1);
1340: }
1341: }
1342:
1343: columnBuffer.Position = 0;
1344:
1345: startedColumn = false;
1346:
1347: if (columnsCount >= 100000 && userSettings.SafetySwitch) {
1348: close();
1349:
1350: throw new IOException(
1351: "Maximum column count of 100,000 exceeded in record "
1352: + NumberFormat.getIntegerInstance().format(
1353: currentRecord)
1354: + ". Set the SafetySwitch property to false"
1355: + " if you're expecting more than 100,000 columns per record to"
1356: + " avoid this error.");
1357: }
1358:
1359: // check to see if our current holder array for
1360: // column chunks is still big enough to handle another
1361: // column chunk
1362:
1363: if (columnsCount == values.length) {
1364: // holder array needs to grow to be able to hold another column
1365: int newLength = values.length * 2;
1366:
1367: String[] holder = new String[newLength];
1368:
1369: System.arraycopy(values, 0, holder, 0, values.length);
1370:
1371: values = holder;
1372:
1373: boolean[] qualifiedHolder = new boolean[newLength];
1374:
1375: System.arraycopy(isQualified, 0, qualifiedHolder, 0,
1376: isQualified.length);
1377:
1378: isQualified = qualifiedHolder;
1379: }
1380:
1381: values[columnsCount] = currentValue;
1382:
1383: isQualified[columnsCount] = startedWithQualifier;
1384:
1385: currentValue = "";
1386:
1387: columnsCount++;
1388: }
1389:
1390: private void appendLetter(char letter) {
1391: if (columnBuffer.Position == columnBuffer.Buffer.length) {
1392: int newLength = columnBuffer.Buffer.length * 2;
1393:
1394: char[] holder = new char[newLength];
1395:
1396: System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1397: columnBuffer.Position);
1398:
1399: columnBuffer.Buffer = holder;
1400: }
1401: columnBuffer.Buffer[columnBuffer.Position++] = letter;
1402: dataBuffer.ColumnStart = dataBuffer.Position + 1;
1403: }
1404:
1405: private void updateCurrentValue() {
1406: if (startedColumn
1407: && dataBuffer.ColumnStart < dataBuffer.Position) {
1408: if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position
1409: - dataBuffer.ColumnStart) {
1410: int newLength = columnBuffer.Buffer.length
1411: + Math.max(dataBuffer.Position
1412: - dataBuffer.ColumnStart,
1413: columnBuffer.Buffer.length);
1414:
1415: char[] holder = new char[newLength];
1416:
1417: System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1418: columnBuffer.Position);
1419:
1420: columnBuffer.Buffer = holder;
1421: }
1422:
1423: System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart,
1424: columnBuffer.Buffer, columnBuffer.Position,
1425: dataBuffer.Position - dataBuffer.ColumnStart);
1426:
1427: columnBuffer.Position += dataBuffer.Position
1428: - dataBuffer.ColumnStart;
1429: }
1430:
1431: dataBuffer.ColumnStart = dataBuffer.Position + 1;
1432: }
1433:
1434: /**
1435: * @exception IOException
1436: * Thrown if an error occurs while reading data from the
1437: * source stream.
1438: */
1439: private void endRecord() throws IOException {
1440: // this flag is used as a loop exit condition
1441: // during parsing
1442:
1443: hasReadNextLine = true;
1444:
1445: currentRecord++;
1446: }
1447:
1448: /**
1449: * Gets the corresponding column index for a given column header name.
1450: *
1451: * @param headerName
1452: * The header name of the column.
1453: * @return The column index for the given column header name. Returns
1454: * -1 if not found.
1455: * @exception IOException
1456: * Thrown if this object has already been closed.
1457: */
1458: public int getIndex(String headerName) throws IOException {
1459: checkClosed();
1460:
1461: Integer indexValue = headersHolder.IndexByName.get(headerName);
1462:
1463: if (indexValue != null) {
1464: return indexValue.intValue();
1465: } else {
1466: return -1;
1467: }
1468: }
1469:
1470: /**
1471: * Skips the next record of data by parsing each column. Does not
1472: * increment
1473: * {@link com.csvreader.CsvReader#getCurrentRecord getCurrentRecord()}.
1474: *
1475: * @return Whether another record was successfully skipped or not.
1476: * @exception IOException
1477: * Thrown if an error occurs while reading data from the
1478: * source stream.
1479: */
1480: public boolean skipRecord() throws IOException {
1481: checkClosed();
1482:
1483: boolean recordRead = false;
1484:
1485: if (hasMoreData) {
1486: recordRead = readRecord();
1487:
1488: if (recordRead) {
1489: currentRecord--;
1490: }
1491: }
1492:
1493: return recordRead;
1494: }
1495:
1496: /**
1497: * Skips the next line of data using the standard end of line characters and
1498: * does not do any column delimited parsing.
1499: *
1500: * @return Whether a line was successfully skipped or not.
1501: * @exception IOException
1502: * Thrown if an error occurs while reading data from the
1503: * source stream.
1504: */
1505: public boolean skipLine() throws IOException {
1506: checkClosed();
1507:
1508: // clear public column values for current line
1509:
1510: columnsCount = 0;
1511:
1512: boolean skippedLine = false;
1513:
1514: if (hasMoreData) {
1515: boolean foundEol = false;
1516:
1517: do {
1518: if (dataBuffer.Position == dataBuffer.Count) {
1519: checkDataLength();
1520: } else {
1521: skippedLine = true;
1522:
1523: // grab the current letter as a char
1524:
1525: char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
1526:
1527: if (currentLetter == Letters.CR
1528: || currentLetter == Letters.LF) {
1529: foundEol = true;
1530: }
1531:
1532: // keep track of the last letter because we need
1533: // it for several key decisions
1534:
1535: lastLetter = currentLetter;
1536:
1537: if (!foundEol) {
1538: dataBuffer.Position++;
1539: }
1540:
1541: } // end else
1542: } while (hasMoreData && !foundEol);
1543:
1544: columnBuffer.Position = 0;
1545:
1546: dataBuffer.LineStart = dataBuffer.Position + 1;
1547: }
1548:
1549: rawBuffer.Position = 0;
1550: rawRecord = "";
1551:
1552: return skippedLine;
1553: }
1554:
1555: /**
1556: * Closes and releases all related resources.
1557: */
1558: public void close() {
1559: if (!closed) {
1560: close(true);
1561:
1562: closed = true;
1563: }
1564: }
1565:
1566: /**
1567: *
1568: */
1569: private void close(boolean closing) {
1570: if (!closed) {
1571: if (closing) {
1572: charset = null;
1573: headersHolder.Headers = null;
1574: headersHolder.IndexByName = null;
1575: dataBuffer.Buffer = null;
1576: columnBuffer.Buffer = null;
1577: rawBuffer.Buffer = null;
1578: }
1579:
1580: try {
1581: if (initialized) {
1582: inputStream.close();
1583: }
1584: } catch (Exception e) {
1585: // just eat the exception
1586: }
1587:
1588: inputStream = null;
1589:
1590: closed = true;
1591: }
1592: }
1593:
1594: /**
1595: * @exception IOException
1596: * Thrown if this object has already been closed.
1597: */
1598: private void checkClosed() throws IOException {
1599: if (closed) {
1600: throw new IOException(
1601: "This instance of the CsvReader class has already been closed.");
1602: }
1603: }
1604:
1605: /**
1606: *
1607: */
1608: protected void finalize() {
1609: close(false);
1610: }
1611:
1612: private class ComplexEscape {
1613: private static final int UNICODE = 1;
1614:
1615: private static final int OCTAL = 2;
1616:
1617: private static final int DECIMAL = 3;
1618:
1619: private static final int HEX = 4;
1620: }
1621:
1622: private static char hexToDec(char hex) {
1623: char result;
1624:
1625: if (hex >= 'a') {
1626: result = (char) (hex - 'a' + 10);
1627: } else if (hex >= 'A') {
1628: result = (char) (hex - 'A' + 10);
1629: } else {
1630: result = (char) (hex - '0');
1631: }
1632:
1633: return result;
1634: }
1635:
1636: private class DataBuffer {
1637: public char[] Buffer;
1638:
1639: public int Position;
1640:
1641: // / <summary>
1642: // / How much usable data has been read into the stream,
1643: // / which will not always be as long as Buffer.Length.
1644: // / </summary>
1645: public int Count;
1646:
1647: // / <summary>
1648: // / The position of the cursor in the buffer when the
1649: // / current column was started or the last time data
1650: // / was moved out to the column buffer.
1651: // / </summary>
1652: public int ColumnStart;
1653:
1654: public int LineStart;
1655:
1656: public DataBuffer() {
1657: Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
1658: Position = 0;
1659: Count = 0;
1660: ColumnStart = 0;
1661: LineStart = 0;
1662: }
1663: }
1664:
1665: private class ColumnBuffer {
1666: public char[] Buffer;
1667:
1668: public int Position;
1669:
1670: public ColumnBuffer() {
1671: Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
1672: Position = 0;
1673: }
1674: }
1675:
1676: private class RawRecordBuffer {
1677: public char[] Buffer;
1678:
1679: public int Position;
1680:
1681: public RawRecordBuffer() {
1682: Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE
1683: * StaticSettings.INITIAL_COLUMN_COUNT];
1684: Position = 0;
1685: }
1686: }
1687:
1688: private class Letters {
1689: public static final char LF = '\n';
1690:
1691: public static final char CR = '\r';
1692:
1693: public static final char QUOTE = '"';
1694:
1695: public static final char COMMA = ',';
1696:
1697: public static final char SPACE = ' ';
1698:
1699: public static final char TAB = '\t';
1700:
1701: public static final char POUND = '#';
1702:
1703: public static final char BACKSLASH = '\\';
1704:
1705: public static final char NULL = '\0';
1706:
1707: public static final char BACKSPACE = '\b';
1708:
1709: public static final char FORM_FEED = '\f';
1710:
1711: public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape
1712:
1713: public static final char VERTICAL_TAB = '\u000B';
1714:
1715: public static final char ALERT = '\u0007';
1716: }
1717:
1718: private class UserSettings {
1719: // having these as publicly accessible members will prevent
1720: // the overhead of the method call that exists on properties
1721: public boolean CaseSensitive;
1722:
1723: public char TextQualifier;
1724:
1725: public boolean TrimWhitespace;
1726:
1727: public boolean UseTextQualifier;
1728:
1729: public char Delimiter;
1730:
1731: public char RecordDelimiter;
1732:
1733: public char Comment;
1734:
1735: public boolean UseComments;
1736:
1737: public int EscapeMode;
1738:
1739: public boolean SafetySwitch;
1740:
1741: public boolean SkipEmptyRecords;
1742:
1743: public boolean CaptureRawRecord;
1744:
1745: public UserSettings() {
1746: CaseSensitive = true;
1747: TextQualifier = Letters.QUOTE;
1748: TrimWhitespace = true;
1749: UseTextQualifier = true;
1750: Delimiter = Letters.COMMA;
1751: RecordDelimiter = Letters.NULL;
1752: Comment = Letters.POUND;
1753: UseComments = false;
1754: EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;
1755: SafetySwitch = true;
1756: SkipEmptyRecords = true;
1757: CaptureRawRecord = true;
1758: }
1759: }
1760:
1761: private class HeadersHolder {
1762: public String[] Headers;
1763:
1764: public int Length;
1765:
1766: public HashMap<String, Integer> IndexByName;
1767:
1768: public HeadersHolder() {
1769: Headers = null;
1770: Length = 0;
1771: IndexByName = new HashMap<String, Integer>();
1772: }
1773: }
1774:
1775: private class StaticSettings {
1776: // these are static instead of final so they can be changed in unit test
1777: // isn't visible outside this class and is only accessed once during
1778: // CsvReader construction
1779: public static final int MAX_BUFFER_SIZE = 1024;
1780:
1781: public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
1782:
1783: public static final int INITIAL_COLUMN_COUNT = 10;
1784:
1785: public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
1786: }
1787: }
|