Source Code Cross Referenced for CsvReader.java in » Database-Client » squirrel-sql-2.6.5a » com » csvreader » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Database Client » squirrel sql 2.6.5a » com.csvreader
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * Java CSV is a stream based library for reading and writing
0003:         * CSV and other delimited data.
0004:         *   
0005:         * Copyright (C) Bruce Dunwiddie bruce@csvreader.com
0006:         *
0007:         * This library is free software; you can redistribute it and/or
0008:         * modify it under the terms of the GNU Lesser General Public
0009:         * License as published by the Free Software Foundation; either
0010:         * version 2.1 of the License, or (at your option) any later version.
0011:         *
0012:         * This library is distributed in the hope that it will be useful,
0013:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015:         * Lesser General Public License for more details.
0016:         *
0017:         * You should have received a copy of the GNU Lesser General Public
0018:         * License along with this library; if not, write to the Free Software
0019:         * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
0020:         */
0021:        package com.csvreader;
0022:
0023:        import java.io.BufferedReader;
0024:        import java.io.File;
0025:        import java.io.FileInputStream;
0026:        import java.io.FileNotFoundException;
0027:        import java.io.IOException;
0028:        import java.io.InputStream;
0029:        import java.io.InputStreamReader;
0030:        import java.io.Reader;
0031:        import java.io.StringReader;
0032:        import java.nio.charset.Charset;
0033:        import java.text.NumberFormat;
0034:        import java.util.HashMap;
0035:
0036:        /**
0037:         * A stream based parser for parsing delimited text data from a file or a
0038:         * stream.
0039:         */
0040:        public class CsvReader {
0041:            private Reader inputStream = null;
0042:
0043:            private String fileName = null;
0044:
0045:            // this holds all the values for switches that the user is allowed to set
0046:            private UserSettings userSettings = new UserSettings();
0047:
0048:            private Charset charset = null;
0049:
0050:            private boolean useCustomRecordDelimiter = false;
0051:
0052:            // this will be our working buffer to hold data chunks
0053:            // read in from the data file
0054:
0055:            private DataBuffer dataBuffer = new DataBuffer();
0056:
0057:            private ColumnBuffer columnBuffer = new ColumnBuffer();
0058:
0059:            private RawRecordBuffer rawBuffer = new RawRecordBuffer();
0060:
0061:            private boolean[] isQualified = null;
0062:
0063:            private String rawRecord = "";
0064:
0065:            private HeadersHolder headersHolder = new HeadersHolder();
0066:
0067:            // these are all more or less global loop variables
0068:            // to keep from needing to pass them all into various
0069:            // methods during parsing
0070:
0071:            private boolean startedColumn = false;
0072:
0073:            private boolean startedWithQualifier = false;
0074:
0075:            private boolean hasMoreData = true;
0076:
0077:            private char lastLetter = '\0';
0078:
0079:            private boolean hasReadNextLine = false;
0080:
0081:            private int columnsCount = 0;
0082:
0083:            private long currentRecord = 0;
0084:
0085:            private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];
0086:
0087:            private boolean initialized = false;
0088:
0089:            private boolean closed = false;
0090:
0091:            /**
0092:             * Double up the text qualifier to represent an occurance of the text
0093:             * qualifier.
0094:             */
0095:            public static final int ESCAPE_MODE_DOUBLED = 1;
0096:
0097:            /**
0098:             * Use a backslash character before the text qualifier to represent an
0099:             * occurance of the text qualifier.
0100:             */
0101:            public static final int ESCAPE_MODE_BACKSLASH = 2;
0102:
0103:            /**
0104:             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
0105:             * as the data source.
0106:             * 
0107:             * @param fileName
0108:             *            The path to the file to use as the data source.
0109:             * @param delimiter
0110:             *            The character to use as the column delimiter.
0111:             * @param charset
0112:             *            The {@link java.nio.charset.Charset Charset} to use while
0113:             *            parsing the data.
0114:             */
0115:            public CsvReader(String fileName, char delimiter, Charset charset)
0116:                    throws FileNotFoundException {
0117:                if (fileName == null) {
0118:                    throw new IllegalArgumentException(
0119:                            "Parameter fileName can not be null.");
0120:                }
0121:
0122:                if (charset == null) {
0123:                    throw new IllegalArgumentException(
0124:                            "Parameter charset can not be null.");
0125:                }
0126:
0127:                if (!new File(fileName).exists()) {
0128:                    throw new FileNotFoundException("File " + fileName
0129:                            + " does not exist.");
0130:                }
0131:
0132:                this .fileName = fileName;
0133:                this .userSettings.Delimiter = delimiter;
0134:                this .charset = charset;
0135:
0136:                isQualified = new boolean[values.length];
0137:            }
0138:
0139:            /**
0140:             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
0141:             * as the data source.&nbsp;Uses ISO-8859-1 as the
0142:             * {@link java.nio.charset.Charset Charset}.
0143:             * 
0144:             * @param fileName
0145:             *            The path to the file to use as the data source.
0146:             * @param delimiter
0147:             *            The character to use as the column delimiter.
0148:             */
0149:            public CsvReader(String fileName, char delimiter)
0150:                    throws FileNotFoundException {
0151:                this (fileName, delimiter, Charset.forName("ISO-8859-1"));
0152:            }
0153:
0154:            /**
0155:             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
0156:             * as the data source.&nbsp;Uses a comma as the column delimiter and
0157:             * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.
0158:             * 
0159:             * @param fileName
0160:             *            The path to the file to use as the data source.
0161:             */
0162:            public CsvReader(String fileName) throws FileNotFoundException {
0163:                this (fileName, Letters.COMMA);
0164:            }
0165:
0166:            /**
0167:             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
0168:             * {@link java.io.Reader Reader} object as the data source.
0169:             * 
0170:             * @param inputStream
0171:             *            The stream to use as the data source.
0172:             * @param delimiter
0173:             *            The character to use as the column delimiter.
0174:             */
0175:            public CsvReader(Reader inputStream, char delimiter) {
0176:                if (inputStream == null) {
0177:                    throw new IllegalArgumentException(
0178:                            "Parameter inputStream can not be null.");
0179:                }
0180:
0181:                this .inputStream = inputStream;
0182:                this .userSettings.Delimiter = delimiter;
0183:                initialized = true;
0184:
0185:                isQualified = new boolean[values.length];
0186:            }
0187:
0188:            /**
0189:             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
0190:             * {@link java.io.Reader Reader} object as the data source.&nbsp;Uses a
0191:             * comma as the column delimiter.
0192:             * 
0193:             * @param inputStream
0194:             *            The stream to use as the data source.
0195:             */
0196:            public CsvReader(Reader inputStream) {
0197:                this (inputStream, Letters.COMMA);
0198:            }
0199:
0200:            /**
0201:             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
0202:             * {@link java.io.InputStream InputStream} object as the data source.
0203:             * 
0204:             * @param inputStream
0205:             *            The stream to use as the data source.
0206:             * @param delimiter
0207:             *            The character to use as the column delimiter.
0208:             * @param charset
0209:             *            The {@link java.nio.charset.Charset Charset} to use while
0210:             *            parsing the data.
0211:             */
0212:            public CsvReader(InputStream inputStream, char delimiter,
0213:                    Charset charset) {
0214:                this (new InputStreamReader(inputStream, charset), delimiter);
0215:            }
0216:
0217:            /**
0218:             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
0219:             * {@link java.io.InputStream InputStream} object as the data
0220:             * source.&nbsp;Uses a comma as the column delimiter.
0221:             * 
0222:             * @param inputStream
0223:             *            The stream to use as the data source.
0224:             * @param charset
0225:             *            The {@link java.nio.charset.Charset Charset} to use while
0226:             *            parsing the data.
0227:             */
0228:            public CsvReader(InputStream inputStream, Charset charset) {
0229:                this (new InputStreamReader(inputStream, charset));
0230:            }
0231:
0232:            public boolean getCaptureRawRecord() {
0233:                return userSettings.CaptureRawRecord;
0234:            }
0235:
0236:            public void setCaptureRawRecord(boolean captureRawRecord) {
0237:                userSettings.CaptureRawRecord = captureRawRecord;
0238:            }
0239:
0240:            public String getRawRecord() {
0241:                return rawRecord;
0242:            }
0243:
0244:            /**
0245:             * Gets whether leading and trailing whitespace characters are being trimmed
0246:             * from non-textqualified column data. Default is true.
0247:             * 
0248:             * @return Whether leading and trailing whitespace characters are being
0249:             *         trimmed from non-textqualified column data.
0250:             */
0251:            public boolean getTrimWhitespace() {
0252:                return userSettings.TrimWhitespace;
0253:            }
0254:
0255:            /**
0256:             * Sets whether leading and trailing whitespace characters should be trimmed
0257:             * from non-textqualified column data or not. Default is true.
0258:             * 
0259:             * @param trimWhitespace
0260:             *            Whether leading and trailing whitespace characters should be
0261:             *            trimmed from non-textqualified column data or not.
0262:             */
0263:            public void setTrimWhitespace(boolean trimWhitespace) {
0264:                userSettings.TrimWhitespace = trimWhitespace;
0265:            }
0266:
0267:            /**
0268:             * Gets the character being used as the column delimiter. Default is comma,
0269:             * ','.
0270:             * 
0271:             * @return The character being used as the column delimiter.
0272:             */
0273:            public char getDelimiter() {
0274:                return userSettings.Delimiter;
0275:            }
0276:
0277:            /**
0278:             * Sets the character to use as the column delimiter. Default is comma, ','.
0279:             * 
0280:             * @param delimiter
0281:             *            The character to use as the column delimiter.
0282:             */
0283:            public void setDelimiter(char delimiter) {
0284:                userSettings.Delimiter = delimiter;
0285:            }
0286:
0287:            public char getRecordDelimiter() {
0288:                return userSettings.RecordDelimiter;
0289:            }
0290:
0291:            /**
0292:             * Sets the character to use as the record delimiter.
0293:             * 
0294:             * @param recordDelimiter
0295:             *            The character to use as the record delimiter. Default is
0296:             *            combination of standard end of line characters for Windows,
0297:             *            Unix, or Mac.
0298:             */
0299:            public void setRecordDelimiter(char recordDelimiter) {
0300:                useCustomRecordDelimiter = true;
0301:                userSettings.RecordDelimiter = recordDelimiter;
0302:            }
0303:
0304:            /**
0305:             * Gets the character to use as a text qualifier in the data.
0306:             * 
0307:             * @return The character to use as a text qualifier in the data.
0308:             */
0309:            public char getTextQualifier() {
0310:                return userSettings.TextQualifier;
0311:            }
0312:
0313:            /**
0314:             * Sets the character to use as a text qualifier in the data.
0315:             * 
0316:             * @param textQualifier
0317:             *            The character to use as a text qualifier in the data.
0318:             */
0319:            public void setTextQualifier(char textQualifier) {
0320:                userSettings.TextQualifier = textQualifier;
0321:            }
0322:
0323:            /**
0324:             * Whether text qualifiers will be used while parsing or not.
0325:             * 
0326:             * @return Whether text qualifiers will be used while parsing or not.
0327:             */
0328:            public boolean getUseTextQualifier() {
0329:                return userSettings.UseTextQualifier;
0330:            }
0331:
0332:            /**
0333:             * Sets whether text qualifiers will be used while parsing or not.
0334:             * 
0335:             * @param useTextQualifier
0336:             *            Whether to use a text qualifier while parsing or not.
0337:             */
0338:            public void setUseTextQualifier(boolean useTextQualifier) {
0339:                userSettings.UseTextQualifier = useTextQualifier;
0340:            }
0341:
0342:            /**
0343:             * Gets the character being used as a comment signal.
0344:             * 
0345:             * @return The character being used as a comment signal.
0346:             */
0347:            public char getComment() {
0348:                return userSettings.Comment;
0349:            }
0350:
0351:            /**
0352:             * Sets the character to use as a comment signal.
0353:             * 
0354:             * @param comment
0355:             *            The character to use as a comment signal.
0356:             */
0357:            public void setComment(char comment) {
0358:                userSettings.Comment = comment;
0359:            }
0360:
0361:            /**
0362:             * Gets whether comments are being looked for while parsing or not.
0363:             * 
0364:             * @return Whether comments are being looked for while parsing or not.
0365:             */
0366:            public boolean getUseComments() {
0367:                return userSettings.UseComments;
0368:            }
0369:
0370:            /**
0371:             * Sets whether comments are being looked for while parsing or not.
0372:             * 
0373:             * @param useComments
0374:             *            Whether comments are being looked for while parsing or not.
0375:             */
0376:            public void setUseComments(boolean useComments) {
0377:                userSettings.UseComments = useComments;
0378:            }
0379:
0380:            /**
0381:             * Gets the current way to escape an occurance of the text qualifier inside
0382:             * qualified data.
0383:             * 
0384:             * @return The current way to escape an occurance of the text qualifier
0385:             *         inside qualified data.
0386:             */
0387:            public int getEscapeMode() {
0388:                return userSettings.EscapeMode;
0389:            }
0390:
0391:            /**
0392:             * Sets the current way to escape an occurance of the text qualifier inside
0393:             * qualified data.
0394:             * 
0395:             * @param escapeMode
0396:             *            The way to escape an occurance of the text qualifier inside
0397:             *            qualified data.
0398:             * @exception IllegalArgumentException
0399:             *                When an illegal value is specified for escapeMode.
0400:             */
0401:            public void setEscapeMode(int escapeMode)
0402:                    throws IllegalArgumentException {
0403:                if (escapeMode != ESCAPE_MODE_DOUBLED
0404:                        && escapeMode != ESCAPE_MODE_BACKSLASH) {
0405:                    throw new IllegalArgumentException(
0406:                            "Parameter escapeMode must be a valid value.");
0407:                }
0408:
0409:                userSettings.EscapeMode = escapeMode;
0410:            }
0411:
0412:            public boolean getSkipEmptyRecords() {
0413:                return userSettings.SkipEmptyRecords;
0414:            }
0415:
0416:            public void setSkipEmptyRecords(boolean skipEmptyRecords) {
0417:                userSettings.SkipEmptyRecords = skipEmptyRecords;
0418:            }
0419:
0420:            /**
0421:             * Safety caution to prevent the parser from using large amounts of memory
0422:             * in the case where parsing settings like file encodings don't end up
0423:             * matching the actual format of a file. This switch can be turned off if
0424:             * the file format is known and tested. With the switch off, the max column
0425:             * lengths and max column count per record supported by the parser will
0426:             * greatly increase. Default is true.
0427:             * 
0428:             * @return The current setting of the safety switch.
0429:             */
0430:            public boolean getSafetySwitch() {
0431:                return userSettings.SafetySwitch;
0432:            }
0433:
0434:            /**
0435:             * Safety caution to prevent the parser from using large amounts of memory
0436:             * in the case where parsing settings like file encodings don't end up
0437:             * matching the actual format of a file. This switch can be turned off if
0438:             * the file format is known and tested. With the switch off, the max column
0439:             * lengths and max column count per record supported by the parser will
0440:             * greatly increase. Default is true.
0441:             * 
0442:             * @param safetySwitch
0443:             */
0444:            public void setSafetySwitch(boolean safetySwitch) {
0445:                userSettings.SafetySwitch = safetySwitch;
0446:            }
0447:
0448:            /**
0449:             * Gets the count of columns found in this record.
0450:             * 
0451:             * @return The count of columns found in this record.
0452:             */
0453:            public int getColumnCount() {
0454:                return columnsCount;
0455:            }
0456:
0457:            /**
0458:             * Gets the index of the current record.
0459:             * 
0460:             * @return The index of the current record.
0461:             */
0462:            public long getCurrentRecord() {
0463:                return currentRecord - 1;
0464:            }
0465:
0466:            /**
0467:             * Gets the count of headers read in by a previous call to
0468:             * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
0469:             * 
0470:             * @return The count of headers read in by a previous call to
0471:             *         {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
0472:             */
0473:            public int getHeaderCount() {
0474:                return headersHolder.Length;
0475:            }
0476:
0477:            /**
0478:             * Returns the header values as a string array.
0479:             * 
0480:             * @return The header values as a String array.
0481:             * @exception IOException
0482:             *                Thrown if this object has already been closed.
0483:             */
0484:            public String[] getHeaders() throws IOException {
0485:                checkClosed();
0486:
0487:                if (headersHolder.Headers == null) {
0488:                    return null;
0489:                } else {
0490:                    // use clone here to prevent the outside code from
0491:                    // setting values on the array directly, which would
0492:                    // throw off the index lookup based on header name
0493:                    String[] clone = new String[headersHolder.Length];
0494:                    System.arraycopy(headersHolder.Headers, 0, clone, 0,
0495:                            headersHolder.Length);
0496:                    return clone;
0497:                }
0498:            }
0499:
0500:            public void setHeaders(String[] headers) {
0501:                headersHolder.Headers = headers;
0502:
0503:                headersHolder.IndexByName.clear();
0504:
0505:                if (headers != null) {
0506:                    headersHolder.Length = headers.length;
0507:                } else {
0508:                    headersHolder.Length = 0;
0509:                }
0510:
0511:                // use headersHolder.Length here in case headers is null
0512:                for (int i = 0; i < headersHolder.Length; i++) {
0513:                    headersHolder.IndexByName.put(headers[i], Integer
0514:                            .valueOf(i));
0515:                }
0516:            }
0517:
0518:            public String[] getValues() throws IOException {
0519:                checkClosed();
0520:
0521:                // need to return a clone, and can't use clone because values.Length
0522:                // might be greater than columnsCount
0523:                String[] clone = new String[columnsCount];
0524:                System.arraycopy(values, 0, clone, 0, columnsCount);
0525:                return clone;
0526:            }
0527:
0528:            /**
0529:             * Returns the current column value for a given column index.
0530:             * 
0531:             * @param columnIndex
0532:             *            The index of the column.
0533:             * @return The current column value.
0534:             * @exception IOException
0535:             *                Thrown if this object has already been closed.
0536:             */
0537:            public String get(int columnIndex) throws IOException {
0538:                checkClosed();
0539:
0540:                if (columnIndex > -1 && columnIndex < columnsCount) {
0541:                    return values[columnIndex];
0542:                } else {
0543:                    return "";
0544:                }
0545:            }
0546:
0547:            /**
0548:             * Returns the current column value for a given column header name.
0549:             * 
0550:             * @param headerName
0551:             *            The header name of the column.
0552:             * @return The current column value.
0553:             * @exception IOException
0554:             *                Thrown if this object has already been closed.
0555:             */
0556:            public String get(String headerName) throws IOException {
0557:                checkClosed();
0558:
0559:                return get(getIndex(headerName));
0560:            }
0561:
0562:            /**
0563:             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a string
0564:             * of data as the source.&nbsp;Uses ISO-8859-1 as the
0565:             * {@link java.nio.charset.Charset Charset}.
0566:             * 
0567:             * @param data
0568:             *            The String of data to use as the source.
0569:             * @return A {@link com.csvreader.CsvReader CsvReader} object using the
0570:             *         String of data as the source.
0571:             */
0572:            public static CsvReader parse(String data) {
0573:                if (data == null) {
0574:                    throw new IllegalArgumentException(
0575:                            "Parameter data can not be null.");
0576:                }
0577:
0578:                return new CsvReader(new StringReader(data));
0579:            }
0580:
0581:            /**
0582:             * Reads another record.
0583:             * 
0584:             * @return Whether another record was successfully read or not.
0585:             * @exception IOException
0586:             *                Thrown if an error occurs while reading data from the
0587:             *                source stream.
0588:             */
0589:            public boolean readRecord() throws IOException {
0590:                checkClosed();
0591:
0592:                columnsCount = 0;
0593:                rawBuffer.Position = 0;
0594:
0595:                dataBuffer.LineStart = dataBuffer.Position;
0596:
0597:                hasReadNextLine = false;
0598:
0599:                // check to see if we've already found the end of data
0600:
0601:                if (hasMoreData) {
0602:                    // loop over the data stream until the end of data is found
0603:                    // or the end of the record is found
0604:
0605:                    do {
0606:                        if (dataBuffer.Position == dataBuffer.Count) {
0607:                            checkDataLength();
0608:                        } else {
0609:                            startedWithQualifier = false;
0610:
0611:                            // grab the current letter as a char
0612:
0613:                            char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
0614:
0615:                            if (userSettings.UseTextQualifier
0616:                                    && currentLetter == userSettings.TextQualifier) {
0617:                                // this will be a text qualified column, so
0618:                                // we need to set startedWithQualifier to make it
0619:                                // enter the seperate branch to handle text
0620:                                // qualified columns
0621:
0622:                                lastLetter = currentLetter;
0623:
0624:                                // read qualified
0625:                                startedColumn = true;
0626:                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
0627:                                startedWithQualifier = true;
0628:                                boolean lastLetterWasQualifier = false;
0629:
0630:                                char escapeChar = userSettings.TextQualifier;
0631:
0632:                                if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
0633:                                    escapeChar = Letters.BACKSLASH;
0634:                                }
0635:
0636:                                boolean eatingTrailingJunk = false;
0637:                                boolean lastLetterWasEscape = false;
0638:                                boolean readingComplexEscape = false;
0639:                                int escape = ComplexEscape.UNICODE;
0640:                                int escapeLength = 0;
0641:                                char escapeValue = (char) 0;
0642:
0643:                                dataBuffer.Position++;
0644:
0645:                                do {
0646:                                    if (dataBuffer.Position == dataBuffer.Count) {
0647:                                        checkDataLength();
0648:                                    } else {
0649:                                        // grab the current letter as a char
0650:
0651:                                        currentLetter = dataBuffer.Buffer[dataBuffer.Position];
0652:
0653:                                        if (eatingTrailingJunk) {
0654:                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
0655:
0656:                                            if (currentLetter == userSettings.Delimiter) {
0657:                                                endColumn();
0658:                                            } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
0659:                                                    || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
0660:                                                endColumn();
0661:
0662:                                                endRecord();
0663:                                            }
0664:                                        } else if (readingComplexEscape) {
0665:                                            escapeLength++;
0666:
0667:                                            switch (escape) {
0668:                                            case ComplexEscape.UNICODE:
0669:                                                escapeValue *= (char) 16;
0670:                                                escapeValue += hexToDec(currentLetter);
0671:
0672:                                                if (escapeLength == 4) {
0673:                                                    readingComplexEscape = false;
0674:                                                }
0675:
0676:                                                break;
0677:                                            case ComplexEscape.OCTAL:
0678:                                                escapeValue *= (char) 8;
0679:                                                escapeValue += (char) (currentLetter - '0');
0680:
0681:                                                if (escapeLength == 3) {
0682:                                                    readingComplexEscape = false;
0683:                                                }
0684:
0685:                                                break;
0686:                                            case ComplexEscape.DECIMAL:
0687:                                                escapeValue *= (char) 10;
0688:                                                escapeValue += (char) (currentLetter - '0');
0689:
0690:                                                if (escapeLength == 3) {
0691:                                                    readingComplexEscape = false;
0692:                                                }
0693:
0694:                                                break;
0695:                                            case ComplexEscape.HEX:
0696:                                                escapeValue *= (char) 16;
0697:                                                escapeValue += hexToDec(currentLetter);
0698:
0699:                                                if (escapeLength == 2) {
0700:                                                    readingComplexEscape = false;
0701:                                                }
0702:
0703:                                                break;
0704:                                            }
0705:
0706:                                            if (!readingComplexEscape) {
0707:                                                appendLetter(escapeValue);
0708:                                            } else {
0709:                                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
0710:                                            }
0711:                                        } else if (currentLetter == userSettings.TextQualifier) {
0712:                                            if (lastLetterWasEscape) {
0713:                                                lastLetterWasEscape = false;
0714:                                                lastLetterWasQualifier = false;
0715:                                            } else {
0716:                                                updateCurrentValue();
0717:
0718:                                                if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
0719:                                                    lastLetterWasEscape = true;
0720:                                                }
0721:
0722:                                                lastLetterWasQualifier = true;
0723:                                            }
0724:                                        } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
0725:                                                && lastLetterWasEscape) {
0726:                                            switch (currentLetter) {
0727:                                            case 'n':
0728:                                                appendLetter(Letters.LF);
0729:                                                break;
0730:                                            case 'r':
0731:                                                appendLetter(Letters.CR);
0732:                                                break;
0733:                                            case 't':
0734:                                                appendLetter(Letters.TAB);
0735:                                                break;
0736:                                            case 'b':
0737:                                                appendLetter(Letters.BACKSPACE);
0738:                                                break;
0739:                                            case 'f':
0740:                                                appendLetter(Letters.FORM_FEED);
0741:                                                break;
0742:                                            case 'e':
0743:                                                appendLetter(Letters.ESCAPE);
0744:                                                break;
0745:                                            case 'v':
0746:                                                appendLetter(Letters.VERTICAL_TAB);
0747:                                                break;
0748:                                            case 'a':
0749:                                                appendLetter(Letters.ALERT);
0750:                                                break;
0751:                                            case '0':
0752:                                            case '1':
0753:                                            case '2':
0754:                                            case '3':
0755:                                            case '4':
0756:                                            case '5':
0757:                                            case '6':
0758:                                            case '7':
0759:                                                escape = ComplexEscape.OCTAL;
0760:                                                readingComplexEscape = true;
0761:                                                escapeLength = 1;
0762:                                                escapeValue = (char) (currentLetter - '0');
0763:                                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
0764:                                                break;
0765:                                            case 'u':
0766:                                            case 'x':
0767:                                            case 'o':
0768:                                            case 'd':
0769:                                            case 'U':
0770:                                            case 'X':
0771:                                            case 'O':
0772:                                            case 'D':
0773:                                                switch (currentLetter) {
0774:                                                case 'u':
0775:                                                case 'U':
0776:                                                    escape = ComplexEscape.UNICODE;
0777:                                                    break;
0778:                                                case 'x':
0779:                                                case 'X':
0780:                                                    escape = ComplexEscape.HEX;
0781:                                                    break;
0782:                                                case 'o':
0783:                                                case 'O':
0784:                                                    escape = ComplexEscape.OCTAL;
0785:                                                    break;
0786:                                                case 'd':
0787:                                                case 'D':
0788:                                                    escape = ComplexEscape.DECIMAL;
0789:                                                    break;
0790:                                                }
0791:
0792:                                                readingComplexEscape = true;
0793:                                                escapeLength = 0;
0794:                                                escapeValue = (char) 0;
0795:                                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
0796:
0797:                                                break;
0798:                                            default:
0799:                                                break;
0800:                                            }
0801:
0802:                                            lastLetterWasEscape = false;
0803:
0804:                                            // can only happen for ESCAPE_MODE_BACKSLASH
0805:                                        } else if (currentLetter == escapeChar) {
0806:                                            updateCurrentValue();
0807:                                            lastLetterWasEscape = true;
0808:                                        } else {
0809:                                            if (lastLetterWasQualifier) {
0810:                                                if (currentLetter == userSettings.Delimiter) {
0811:                                                    endColumn();
0812:                                                } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
0813:                                                        || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
0814:                                                    endColumn();
0815:
0816:                                                    endRecord();
0817:                                                } else {
0818:                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
0819:
0820:                                                    eatingTrailingJunk = true;
0821:                                                }
0822:
0823:                                                // make sure to clear the flag for next
0824:                                                // run of the loop
0825:
0826:                                                lastLetterWasQualifier = false;
0827:                                            }
0828:                                        }
0829:
0830:                                        // keep track of the last letter because we need
0831:                                        // it for several key decisions
0832:
0833:                                        lastLetter = currentLetter;
0834:
0835:                                        if (startedColumn) {
0836:                                            dataBuffer.Position++;
0837:
0838:                                            if (userSettings.SafetySwitch
0839:                                                    && dataBuffer.Position
0840:                                                            - dataBuffer.ColumnStart
0841:                                                            + columnBuffer.Position > 100000) {
0842:                                                close();
0843:
0844:                                                throw new IOException(
0845:                                                        "Maximum column length of 100,000 exceeded in column "
0846:                                                                + NumberFormat
0847:                                                                        .getIntegerInstance()
0848:                                                                        .format(
0849:                                                                                columnsCount)
0850:                                                                + " in record "
0851:                                                                + NumberFormat
0852:                                                                        .getIntegerInstance()
0853:                                                                        .format(
0854:                                                                                currentRecord)
0855:                                                                + ". Set the SafetySwitch property to false"
0856:                                                                + " if you're expecting column lengths greater than 100,000 characters to"
0857:                                                                + " avoid this error.");
0858:                                            }
0859:                                        }
0860:                                    } // end else
0861:
0862:                                } while (hasMoreData && startedColumn);
0863:                            } else if (currentLetter == userSettings.Delimiter) {
0864:                                // we encountered a column with no data, so
0865:                                // just send the end column
0866:
0867:                                lastLetter = currentLetter;
0868:
0869:                                endColumn();
0870:                            } else if (useCustomRecordDelimiter
0871:                                    && currentLetter == userSettings.RecordDelimiter) {
0872:                                // this will skip blank lines
0873:                                if (startedColumn || columnsCount > 0
0874:                                        || !userSettings.SkipEmptyRecords) {
0875:                                    endColumn();
0876:
0877:                                    endRecord();
0878:                                } else {
0879:                                    dataBuffer.LineStart = dataBuffer.Position + 1;
0880:                                }
0881:
0882:                                lastLetter = currentLetter;
0883:                            } else if (!useCustomRecordDelimiter
0884:                                    && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
0885:                                // this will skip blank lines
0886:                                if (startedColumn
0887:                                        || columnsCount > 0
0888:                                        || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {
0889:                                    endColumn();
0890:
0891:                                    endRecord();
0892:                                } else {
0893:                                    dataBuffer.LineStart = dataBuffer.Position + 1;
0894:                                }
0895:
0896:                                lastLetter = currentLetter;
0897:                            } else if (userSettings.UseComments
0898:                                    && columnsCount == 0
0899:                                    && currentLetter == userSettings.Comment) {
0900:                                // encountered a comment character at the beginning of
0901:                                // the line so just ignore the rest of the line
0902:
0903:                                lastLetter = currentLetter;
0904:
0905:                                skipLine();
0906:                            } else if (userSettings.TrimWhitespace
0907:                                    && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
0908:                                // do nothing, this will trim leading whitespace
0909:                                // for both text qualified columns and non
0910:
0911:                                startedColumn = true;
0912:                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
0913:                            } else {
0914:                                // since the letter wasn't a special letter, this
0915:                                // will be the first letter of our current column
0916:
0917:                                startedColumn = true;
0918:                                dataBuffer.ColumnStart = dataBuffer.Position;
0919:                                boolean lastLetterWasBackslash = false;
0920:                                boolean readingComplexEscape = false;
0921:                                int escape = ComplexEscape.UNICODE;
0922:                                int escapeLength = 0;
0923:                                char escapeValue = (char) 0;
0924:
0925:                                boolean firstLoop = true;
0926:
0927:                                do {
0928:                                    if (!firstLoop
0929:                                            && dataBuffer.Position == dataBuffer.Count) {
0930:                                        checkDataLength();
0931:                                    } else {
0932:                                        if (!firstLoop) {
0933:                                            // grab the current letter as a char
0934:                                            currentLetter = dataBuffer.Buffer[dataBuffer.Position];
0935:                                        }
0936:
0937:                                        if (!userSettings.UseTextQualifier
0938:                                                && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
0939:                                                && currentLetter == Letters.BACKSLASH) {
0940:                                            if (lastLetterWasBackslash) {
0941:                                                lastLetterWasBackslash = false;
0942:                                            } else {
0943:                                                updateCurrentValue();
0944:                                                lastLetterWasBackslash = true;
0945:                                            }
0946:                                        } else if (readingComplexEscape) {
0947:                                            escapeLength++;
0948:
0949:                                            switch (escape) {
0950:                                            case ComplexEscape.UNICODE:
0951:                                                escapeValue *= (char) 16;
0952:                                                escapeValue += hexToDec(currentLetter);
0953:
0954:                                                if (escapeLength == 4) {
0955:                                                    readingComplexEscape = false;
0956:                                                }
0957:
0958:                                                break;
0959:                                            case ComplexEscape.OCTAL:
0960:                                                escapeValue *= (char) 8;
0961:                                                escapeValue += (char) (currentLetter - '0');
0962:
0963:                                                if (escapeLength == 3) {
0964:                                                    readingComplexEscape = false;
0965:                                                }
0966:
0967:                                                break;
0968:                                            case ComplexEscape.DECIMAL:
0969:                                                escapeValue *= (char) 10;
0970:                                                escapeValue += (char) (currentLetter - '0');
0971:
0972:                                                if (escapeLength == 3) {
0973:                                                    readingComplexEscape = false;
0974:                                                }
0975:
0976:                                                break;
0977:                                            case ComplexEscape.HEX:
0978:                                                escapeValue *= (char) 16;
0979:                                                escapeValue += hexToDec(currentLetter);
0980:
0981:                                                if (escapeLength == 2) {
0982:                                                    readingComplexEscape = false;
0983:                                                }
0984:
0985:                                                break;
0986:                                            }
0987:
0988:                                            if (!readingComplexEscape) {
0989:                                                appendLetter(escapeValue);
0990:                                            } else {
0991:                                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
0992:                                            }
0993:                                        } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
0994:                                                && lastLetterWasBackslash) {
0995:                                            switch (currentLetter) {
0996:                                            case 'n':
0997:                                                appendLetter(Letters.LF);
0998:                                                break;
0999:                                            case 'r':
1000:                                                appendLetter(Letters.CR);
1001:                                                break;
1002:                                            case 't':
1003:                                                appendLetter(Letters.TAB);
1004:                                                break;
1005:                                            case 'b':
1006:                                                appendLetter(Letters.BACKSPACE);
1007:                                                break;
1008:                                            case 'f':
1009:                                                appendLetter(Letters.FORM_FEED);
1010:                                                break;
1011:                                            case 'e':
1012:                                                appendLetter(Letters.ESCAPE);
1013:                                                break;
1014:                                            case 'v':
1015:                                                appendLetter(Letters.VERTICAL_TAB);
1016:                                                break;
1017:                                            case 'a':
1018:                                                appendLetter(Letters.ALERT);
1019:                                                break;
1020:                                            case '0':
1021:                                            case '1':
1022:                                            case '2':
1023:                                            case '3':
1024:                                            case '4':
1025:                                            case '5':
1026:                                            case '6':
1027:                                            case '7':
1028:                                                escape = ComplexEscape.OCTAL;
1029:                                                readingComplexEscape = true;
1030:                                                escapeLength = 1;
1031:                                                escapeValue = (char) (currentLetter - '0');
1032:                                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
1033:                                                break;
1034:                                            case 'u':
1035:                                            case 'x':
1036:                                            case 'o':
1037:                                            case 'd':
1038:                                            case 'U':
1039:                                            case 'X':
1040:                                            case 'O':
1041:                                            case 'D':
1042:                                                switch (currentLetter) {
1043:                                                case 'u':
1044:                                                case 'U':
1045:                                                    escape = ComplexEscape.UNICODE;
1046:                                                    break;
1047:                                                case 'x':
1048:                                                case 'X':
1049:                                                    escape = ComplexEscape.HEX;
1050:                                                    break;
1051:                                                case 'o':
1052:                                                case 'O':
1053:                                                    escape = ComplexEscape.OCTAL;
1054:                                                    break;
1055:                                                case 'd':
1056:                                                case 'D':
1057:                                                    escape = ComplexEscape.DECIMAL;
1058:                                                    break;
1059:                                                }
1060:
1061:                                                readingComplexEscape = true;
1062:                                                escapeLength = 0;
1063:                                                escapeValue = (char) 0;
1064:                                                dataBuffer.ColumnStart = dataBuffer.Position + 1;
1065:
1066:                                                break;
1067:                                            default:
1068:                                                break;
1069:                                            }
1070:
1071:                                            lastLetterWasBackslash = false;
1072:                                        } else {
1073:                                            if (currentLetter == userSettings.Delimiter) {
1074:                                                endColumn();
1075:                                            } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
1076:                                                    || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
1077:                                                endColumn();
1078:
1079:                                                endRecord();
1080:                                            }
1081:                                        }
1082:
1083:                                        // keep track of the last letter because we need
1084:                                        // it for several key decisions
1085:
1086:                                        lastLetter = currentLetter;
1087:                                        firstLoop = false;
1088:
1089:                                        if (startedColumn) {
1090:                                            dataBuffer.Position++;
1091:
1092:                                            if (userSettings.SafetySwitch
1093:                                                    && dataBuffer.Position
1094:                                                            - dataBuffer.ColumnStart
1095:                                                            + columnBuffer.Position > 100000) {
1096:                                                close();
1097:
1098:                                                throw new IOException(
1099:                                                        "Maximum column length of 100,000 exceeded in column "
1100:                                                                + NumberFormat
1101:                                                                        .getIntegerInstance()
1102:                                                                        .format(
1103:                                                                                columnsCount)
1104:                                                                + " in record "
1105:                                                                + NumberFormat
1106:                                                                        .getIntegerInstance()
1107:                                                                        .format(
1108:                                                                                currentRecord)
1109:                                                                + ". Set the SafetySwitch property to false"
1110:                                                                + " if you're expecting column lengths greater than 100,000 characters to"
1111:                                                                + " avoid this error.");
1112:                                            }
1113:                                        }
1114:                                    } // end else
1115:                                } while (hasMoreData && startedColumn);
1116:                            }
1117:
1118:                            if (hasMoreData) {
1119:                                dataBuffer.Position++;
1120:                            }
1121:                        } // end else
1122:                    } while (hasMoreData && !hasReadNextLine);
1123:
1124:                    // check to see if we hit the end of the file
1125:                    // without processing the current record
1126:
1127:                    if (startedColumn || lastLetter == userSettings.Delimiter) {
1128:                        endColumn();
1129:
1130:                        endRecord();
1131:                    }
1132:                }
1133:
1134:                if (userSettings.CaptureRawRecord) {
1135:                    if (hasMoreData) {
1136:                        if (rawBuffer.Position == 0) {
1137:                            rawRecord = new String(dataBuffer.Buffer,
1138:                                    dataBuffer.LineStart, dataBuffer.Position
1139:                                            - dataBuffer.LineStart - 1);
1140:                        } else {
1141:                            rawRecord = new String(rawBuffer.Buffer, 0,
1142:                                    rawBuffer.Position)
1143:                                    + new String(dataBuffer.Buffer,
1144:                                            dataBuffer.LineStart,
1145:                                            dataBuffer.Position
1146:                                                    - dataBuffer.LineStart - 1);
1147:                        }
1148:                    } else {
1149:                        // for hasMoreData to ever be false, all data would have had to
1150:                        // have been
1151:                        // copied to the raw buffer
1152:                        rawRecord = new String(rawBuffer.Buffer, 0,
1153:                                rawBuffer.Position);
1154:                    }
1155:                } else {
1156:                    rawRecord = "";
1157:                }
1158:
1159:                return hasReadNextLine;
1160:            }
1161:
1162:            /**
1163:             * @exception IOException
1164:             *                Thrown if an error occurs while reading data from the
1165:             *                source stream.
1166:             */
1167:            private void checkDataLength() throws IOException {
1168:                if (!initialized) {
1169:                    if (fileName != null) {
1170:                        inputStream = new BufferedReader(new InputStreamReader(
1171:                                new FileInputStream(fileName), charset),
1172:                                StaticSettings.MAX_FILE_BUFFER_SIZE);
1173:                    }
1174:
1175:                    charset = null;
1176:                    initialized = true;
1177:                }
1178:
1179:                updateCurrentValue();
1180:
1181:                if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
1182:                    if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count
1183:                            - dataBuffer.LineStart) {
1184:                        int newLength = rawBuffer.Buffer.length
1185:                                + Math.max(dataBuffer.Count
1186:                                        - dataBuffer.LineStart,
1187:                                        rawBuffer.Buffer.length);
1188:
1189:                        char[] holder = new char[newLength];
1190:
1191:                        System.arraycopy(rawBuffer.Buffer, 0, holder, 0,
1192:                                rawBuffer.Position);
1193:
1194:                        rawBuffer.Buffer = holder;
1195:                    }
1196:
1197:                    System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart,
1198:                            rawBuffer.Buffer, rawBuffer.Position,
1199:                            dataBuffer.Count - dataBuffer.LineStart);
1200:
1201:                    rawBuffer.Position += dataBuffer.Count
1202:                            - dataBuffer.LineStart;
1203:                }
1204:
1205:                try {
1206:                    dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0,
1207:                            dataBuffer.Buffer.length);
1208:                } catch (IOException ex) {
1209:                    close();
1210:
1211:                    throw ex;
1212:                }
1213:
1214:                // if no more data could be found, set flag stating that
1215:                // the end of the data was found
1216:
1217:                if (dataBuffer.Count == -1) {
1218:                    hasMoreData = false;
1219:                }
1220:
1221:                dataBuffer.Position = 0;
1222:                dataBuffer.LineStart = 0;
1223:                dataBuffer.ColumnStart = 0;
1224:            }
1225:
1226:            /**
1227:             * Read the first record of data as column headers.
1228:             * 
1229:             * @return Whether the header record was successfully read or not.
1230:             * @exception IOException
1231:             *                Thrown if an error occurs while reading data from the
1232:             *                source stream.
1233:             */
1234:            public boolean readHeaders() throws IOException {
1235:                boolean result = readRecord();
1236:
1237:                // copy the header data from the column array
1238:                // to the header string array
1239:
1240:                headersHolder.Length = columnsCount;
1241:
1242:                headersHolder.Headers = new String[columnsCount];
1243:
1244:                for (int i = 0; i < headersHolder.Length; i++) {
1245:                    String columnValue = get(i);
1246:
1247:                    headersHolder.Headers[i] = columnValue;
1248:
1249:                    // if there are duplicate header names, we will save the last one
1250:                    headersHolder.IndexByName.put(columnValue, Integer
1251:                            .valueOf(i));
1252:                }
1253:
1254:                if (result) {
1255:                    currentRecord--;
1256:                }
1257:
1258:                columnsCount = 0;
1259:
1260:                return result;
1261:            }
1262:
1263:            /**
1264:             * Returns the column header value for a given column index.
1265:             * 
1266:             * @param columnIndex
1267:             *            The index of the header column being requested.
1268:             * @return The value of the column header at the given column index.
1269:             * @exception IOException
1270:             *                Thrown if this object has already been closed.
1271:             */
1272:            public String getHeader(int columnIndex) throws IOException {
1273:                checkClosed();
1274:
1275:                // check to see if we have read the header record yet
1276:
1277:                // check to see if the column index is within the bounds
1278:                // of our header array
1279:
1280:                if (columnIndex > -1 && columnIndex < headersHolder.Length) {
1281:                    // return the processed header data for this column
1282:
1283:                    return headersHolder.Headers[columnIndex];
1284:                } else {
1285:                    return "";
1286:                }
1287:            }
1288:
1289:            public boolean isQualified(int columnIndex) throws IOException {
1290:                checkClosed();
1291:
1292:                if (columnIndex < columnsCount && columnIndex > -1) {
1293:                    return isQualified[columnIndex];
1294:                } else {
1295:                    return false;
1296:                }
1297:            }
1298:
1299:            /**
1300:             * @exception IOException
1301:             *                Thrown if a very rare extreme exception occurs during
1302:             *                parsing, normally resulting from improper data format.
1303:             */
1304:            private void endColumn() throws IOException {
1305:                String currentValue = "";
1306:
1307:                // must be called before setting startedColumn = false
1308:                if (startedColumn) {
1309:                    if (columnBuffer.Position == 0) {
1310:                        if (dataBuffer.ColumnStart < dataBuffer.Position) {
1311:                            int lastLetter = dataBuffer.Position - 1;
1312:
1313:                            if (userSettings.TrimWhitespace
1314:                                    && !startedWithQualifier) {
1315:                                while (lastLetter >= dataBuffer.ColumnStart
1316:                                        && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
1317:                                    lastLetter--;
1318:                                }
1319:                            }
1320:
1321:                            currentValue = new String(dataBuffer.Buffer,
1322:                                    dataBuffer.ColumnStart, lastLetter
1323:                                            - dataBuffer.ColumnStart + 1);
1324:                        }
1325:                    } else {
1326:                        updateCurrentValue();
1327:
1328:                        int lastLetter = columnBuffer.Position - 1;
1329:
1330:                        if (userSettings.TrimWhitespace
1331:                                && !startedWithQualifier) {
1332:                            while (lastLetter >= 0
1333:                                    && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
1334:                                lastLetter--;
1335:                            }
1336:                        }
1337:
1338:                        currentValue = new String(columnBuffer.Buffer, 0,
1339:                                lastLetter + 1);
1340:                    }
1341:                }
1342:
1343:                columnBuffer.Position = 0;
1344:
1345:                startedColumn = false;
1346:
1347:                if (columnsCount >= 100000 && userSettings.SafetySwitch) {
1348:                    close();
1349:
1350:                    throw new IOException(
1351:                            "Maximum column count of 100,000 exceeded in record "
1352:                                    + NumberFormat.getIntegerInstance().format(
1353:                                            currentRecord)
1354:                                    + ". Set the SafetySwitch property to false"
1355:                                    + " if you're expecting more than 100,000 columns per record to"
1356:                                    + " avoid this error.");
1357:                }
1358:
1359:                // check to see if our current holder array for
1360:                // column chunks is still big enough to handle another
1361:                // column chunk
1362:
1363:                if (columnsCount == values.length) {
1364:                    // holder array needs to grow to be able to hold another column
1365:                    int newLength = values.length * 2;
1366:
1367:                    String[] holder = new String[newLength];
1368:
1369:                    System.arraycopy(values, 0, holder, 0, values.length);
1370:
1371:                    values = holder;
1372:
1373:                    boolean[] qualifiedHolder = new boolean[newLength];
1374:
1375:                    System.arraycopy(isQualified, 0, qualifiedHolder, 0,
1376:                            isQualified.length);
1377:
1378:                    isQualified = qualifiedHolder;
1379:                }
1380:
1381:                values[columnsCount] = currentValue;
1382:
1383:                isQualified[columnsCount] = startedWithQualifier;
1384:
1385:                currentValue = "";
1386:
1387:                columnsCount++;
1388:            }
1389:
1390:            private void appendLetter(char letter) {
1391:                if (columnBuffer.Position == columnBuffer.Buffer.length) {
1392:                    int newLength = columnBuffer.Buffer.length * 2;
1393:
1394:                    char[] holder = new char[newLength];
1395:
1396:                    System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1397:                            columnBuffer.Position);
1398:
1399:                    columnBuffer.Buffer = holder;
1400:                }
1401:                columnBuffer.Buffer[columnBuffer.Position++] = letter;
1402:                dataBuffer.ColumnStart = dataBuffer.Position + 1;
1403:            }
1404:
1405:            private void updateCurrentValue() {
1406:                if (startedColumn
1407:                        && dataBuffer.ColumnStart < dataBuffer.Position) {
1408:                    if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position
1409:                            - dataBuffer.ColumnStart) {
1410:                        int newLength = columnBuffer.Buffer.length
1411:                                + Math.max(dataBuffer.Position
1412:                                        - dataBuffer.ColumnStart,
1413:                                        columnBuffer.Buffer.length);
1414:
1415:                        char[] holder = new char[newLength];
1416:
1417:                        System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1418:                                columnBuffer.Position);
1419:
1420:                        columnBuffer.Buffer = holder;
1421:                    }
1422:
1423:                    System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart,
1424:                            columnBuffer.Buffer, columnBuffer.Position,
1425:                            dataBuffer.Position - dataBuffer.ColumnStart);
1426:
1427:                    columnBuffer.Position += dataBuffer.Position
1428:                            - dataBuffer.ColumnStart;
1429:                }
1430:
1431:                dataBuffer.ColumnStart = dataBuffer.Position + 1;
1432:            }
1433:
1434:            /**
1435:             * @exception IOException
1436:             *                Thrown if an error occurs while reading data from the
1437:             *                source stream.
1438:             */
1439:            private void endRecord() throws IOException {
1440:                // this flag is used as a loop exit condition
1441:                // during parsing
1442:
1443:                hasReadNextLine = true;
1444:
1445:                currentRecord++;
1446:            }
1447:
1448:            /**
1449:             * Gets the corresponding column index for a given column header name.
1450:             * 
1451:             * @param headerName
1452:             *            The header name of the column.
1453:             * @return The column index for the given column header name.&nbsp;Returns
1454:             *         -1 if not found.
1455:             * @exception IOException
1456:             *                Thrown if this object has already been closed.
1457:             */
1458:            public int getIndex(String headerName) throws IOException {
1459:                checkClosed();
1460:
1461:                Integer indexValue = headersHolder.IndexByName.get(headerName);
1462:
1463:                if (indexValue != null) {
1464:                    return indexValue.intValue();
1465:                } else {
1466:                    return -1;
1467:                }
1468:            }
1469:
1470:            /**
1471:             * Skips the next record of data by parsing each column.&nbsp;Does not
1472:             * increment
1473:             * {@link com.csvreader.CsvReader#getCurrentRecord getCurrentRecord()}.
1474:             * 
1475:             * @return Whether another record was successfully skipped or not.
1476:             * @exception IOException
1477:             *                Thrown if an error occurs while reading data from the
1478:             *                source stream.
1479:             */
1480:            public boolean skipRecord() throws IOException {
1481:                checkClosed();
1482:
1483:                boolean recordRead = false;
1484:
1485:                if (hasMoreData) {
1486:                    recordRead = readRecord();
1487:
1488:                    if (recordRead) {
1489:                        currentRecord--;
1490:                    }
1491:                }
1492:
1493:                return recordRead;
1494:            }
1495:
1496:            /**
1497:             * Skips the next line of data using the standard end of line characters and
1498:             * does not do any column delimited parsing.
1499:             * 
1500:             * @return Whether a line was successfully skipped or not.
1501:             * @exception IOException
1502:             *                Thrown if an error occurs while reading data from the
1503:             *                source stream.
1504:             */
1505:            public boolean skipLine() throws IOException {
1506:                checkClosed();
1507:
1508:                // clear public column values for current line
1509:
1510:                columnsCount = 0;
1511:
1512:                boolean skippedLine = false;
1513:
1514:                if (hasMoreData) {
1515:                    boolean foundEol = false;
1516:
1517:                    do {
1518:                        if (dataBuffer.Position == dataBuffer.Count) {
1519:                            checkDataLength();
1520:                        } else {
1521:                            skippedLine = true;
1522:
1523:                            // grab the current letter as a char
1524:
1525:                            char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
1526:
1527:                            if (currentLetter == Letters.CR
1528:                                    || currentLetter == Letters.LF) {
1529:                                foundEol = true;
1530:                            }
1531:
1532:                            // keep track of the last letter because we need
1533:                            // it for several key decisions
1534:
1535:                            lastLetter = currentLetter;
1536:
1537:                            if (!foundEol) {
1538:                                dataBuffer.Position++;
1539:                            }
1540:
1541:                        } // end else
1542:                    } while (hasMoreData && !foundEol);
1543:
1544:                    columnBuffer.Position = 0;
1545:
1546:                    dataBuffer.LineStart = dataBuffer.Position + 1;
1547:                }
1548:
1549:                rawBuffer.Position = 0;
1550:                rawRecord = "";
1551:
1552:                return skippedLine;
1553:            }
1554:
1555:            /**
1556:             * Closes and releases all related resources.
1557:             */
1558:            public void close() {
1559:                if (!closed) {
1560:                    close(true);
1561:
1562:                    closed = true;
1563:                }
1564:            }
1565:
1566:            /**
1567:             * 
1568:             */
1569:            private void close(boolean closing) {
1570:                if (!closed) {
1571:                    if (closing) {
1572:                        charset = null;
1573:                        headersHolder.Headers = null;
1574:                        headersHolder.IndexByName = null;
1575:                        dataBuffer.Buffer = null;
1576:                        columnBuffer.Buffer = null;
1577:                        rawBuffer.Buffer = null;
1578:                    }
1579:
1580:                    try {
1581:                        if (initialized) {
1582:                            inputStream.close();
1583:                        }
1584:                    } catch (Exception e) {
1585:                        // just eat the exception
1586:                    }
1587:
1588:                    inputStream = null;
1589:
1590:                    closed = true;
1591:                }
1592:            }
1593:
1594:            /**
1595:             * @exception IOException
1596:             *                Thrown if this object has already been closed.
1597:             */
1598:            private void checkClosed() throws IOException {
1599:                if (closed) {
1600:                    throw new IOException(
1601:                            "This instance of the CsvReader class has already been closed.");
1602:                }
1603:            }
1604:
1605:            /**
1606:             * 
1607:             */
1608:            protected void finalize() {
1609:                close(false);
1610:            }
1611:
1612:            private class ComplexEscape {
1613:                private static final int UNICODE = 1;
1614:
1615:                private static final int OCTAL = 2;
1616:
1617:                private static final int DECIMAL = 3;
1618:
1619:                private static final int HEX = 4;
1620:            }
1621:
1622:            private static char hexToDec(char hex) {
1623:                char result;
1624:
1625:                if (hex >= 'a') {
1626:                    result = (char) (hex - 'a' + 10);
1627:                } else if (hex >= 'A') {
1628:                    result = (char) (hex - 'A' + 10);
1629:                } else {
1630:                    result = (char) (hex - '0');
1631:                }
1632:
1633:                return result;
1634:            }
1635:
1636:            private class DataBuffer {
1637:                public char[] Buffer;
1638:
1639:                public int Position;
1640:
1641:                // / <summary>
1642:                // / How much usable data has been read into the stream,
1643:                // / which will not always be as long as Buffer.Length.
1644:                // / </summary>
1645:                public int Count;
1646:
1647:                // / <summary>
1648:                // / The position of the cursor in the buffer when the
1649:                // / current column was started or the last time data
1650:                // / was moved out to the column buffer.
1651:                // / </summary>
1652:                public int ColumnStart;
1653:
1654:                public int LineStart;
1655:
1656:                public DataBuffer() {
1657:                    Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
1658:                    Position = 0;
1659:                    Count = 0;
1660:                    ColumnStart = 0;
1661:                    LineStart = 0;
1662:                }
1663:            }
1664:
1665:            private class ColumnBuffer {
1666:                public char[] Buffer;
1667:
1668:                public int Position;
1669:
1670:                public ColumnBuffer() {
1671:                    Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
1672:                    Position = 0;
1673:                }
1674:            }
1675:
1676:            private class RawRecordBuffer {
1677:                public char[] Buffer;
1678:
1679:                public int Position;
1680:
1681:                public RawRecordBuffer() {
1682:                    Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE
1683:                            * StaticSettings.INITIAL_COLUMN_COUNT];
1684:                    Position = 0;
1685:                }
1686:            }
1687:
1688:            private class Letters {
1689:                public static final char LF = '\n';
1690:
1691:                public static final char CR = '\r';
1692:
1693:                public static final char QUOTE = '"';
1694:
1695:                public static final char COMMA = ',';
1696:
1697:                public static final char SPACE = ' ';
1698:
1699:                public static final char TAB = '\t';
1700:
1701:                public static final char POUND = '#';
1702:
1703:                public static final char BACKSLASH = '\\';
1704:
1705:                public static final char NULL = '\0';
1706:
1707:                public static final char BACKSPACE = '\b';
1708:
1709:                public static final char FORM_FEED = '\f';
1710:
1711:                public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape
1712:
1713:                public static final char VERTICAL_TAB = '\u000B';
1714:
1715:                public static final char ALERT = '\u0007';
1716:            }
1717:
1718:            private class UserSettings {
1719:                // having these as publicly accessible members will prevent
1720:                // the overhead of the method call that exists on properties
1721:                public boolean CaseSensitive;
1722:
1723:                public char TextQualifier;
1724:
1725:                public boolean TrimWhitespace;
1726:
1727:                public boolean UseTextQualifier;
1728:
1729:                public char Delimiter;
1730:
1731:                public char RecordDelimiter;
1732:
1733:                public char Comment;
1734:
1735:                public boolean UseComments;
1736:
1737:                public int EscapeMode;
1738:
1739:                public boolean SafetySwitch;
1740:
1741:                public boolean SkipEmptyRecords;
1742:
1743:                public boolean CaptureRawRecord;
1744:
1745:                public UserSettings() {
1746:                    CaseSensitive = true;
1747:                    TextQualifier = Letters.QUOTE;
1748:                    TrimWhitespace = true;
1749:                    UseTextQualifier = true;
1750:                    Delimiter = Letters.COMMA;
1751:                    RecordDelimiter = Letters.NULL;
1752:                    Comment = Letters.POUND;
1753:                    UseComments = false;
1754:                    EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;
1755:                    SafetySwitch = true;
1756:                    SkipEmptyRecords = true;
1757:                    CaptureRawRecord = true;
1758:                }
1759:            }
1760:
1761:            private class HeadersHolder {
1762:                public String[] Headers;
1763:
1764:                public int Length;
1765:
1766:                public HashMap<String, Integer> IndexByName;
1767:
1768:                public HeadersHolder() {
1769:                    Headers = null;
1770:                    Length = 0;
1771:                    IndexByName = new HashMap<String, Integer>();
1772:                }
1773:            }
1774:
1775:            private class StaticSettings {
1776:                // these are static instead of final so they can be changed in unit test
1777:                // isn't visible outside this class and is only accessed once during
1778:                // CsvReader construction
1779:                public static final int MAX_BUFFER_SIZE = 1024;
1780:
1781:                public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
1782:
1783:                public static final int INITIAL_COLUMN_COUNT = 10;
1784:
1785:                public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
1786:            }
1787:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.