Source Code Cross Referenced for ImportReadData.java in  » Database-DBMS » db-derby-10.2 » org » apache » derby » impl » load » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Database DBMS » db derby 10.2 » org.apache.derby.impl.load 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:
003:           Derby - Class org.apache.derby.impl.load.ImportReadData
004:
005:           Licensed to the Apache Software Foundation (ASF) under one or more
006:           contributor license agreements.  See the NOTICE file distributed with
007:           this work for additional information regarding copyright ownership.
008:           The ASF licenses this file to You under the Apache License, Version 2.0
009:           (the "License"); you may not use this file except in compliance with
010:           the License.  You may obtain a copy of the License at
011:
012:              http://www.apache.org/licenses/LICENSE-2.0
013:
014:           Unless required by applicable law or agreed to in writing, software
015:           distributed under the License is distributed on an "AS IS" BASIS,
016:           WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017:           See the License for the specific language governing permissions and
018:           limitations under the License.
019:
020:         */
021:
022:        package org.apache.derby.impl.load;
023:
024:        import java.io.BufferedReader;
025:        import java.io.FileNotFoundException;
026:        import java.io.InputStream;
027:        import java.io.InputStreamReader;
028:        import java.io.FileInputStream;
029:        import java.io.IOException;
030:        import java.net.MalformedURLException;
031:        import java.net.URL;
032:        import org.apache.derby.iapi.services.sanity.SanityManager;
033:
034:        final class ImportReadData implements 
035:                java.security.PrivilegedExceptionAction {
036:            //Read data from this file
037:            private String inputFileName;
038:
039:            private int[] columnWidths;
040:            private int rowWidth;
041:            private char[] tempString;
042:            private int numberOfCharsReadSoFar;
043:
044:            //temporary variables
045:            private BufferedReader bufferedReader;
046:
047:            //temporary variable which holds each token as we are building it.
048:            private static final int START_SIZE = 10240;
049:            private char[] currentToken = new char[START_SIZE];
050:            private int currentTokenMaxSize = START_SIZE;
051:
052:            //This tells whether to look for a matching stop pattern
053:            boolean foundStartDelimiter;
054:            int totalCharsSoFar;
055:            //following is used to ignore whitespaces in the front
056:            int positionOfNonWhiteSpaceCharInFront;
057:            //following is used to ignore whitespaces in the back
058:            int positionOfNonWhiteSpaceCharInBack;
059:            int lineNumber;
060:            int fieldStartDelimiterIndex;
061:            int fieldStopDelimiterIndex;
062:            int stopDelimiterPosition;
063:            boolean foundStartAndStopDelimiters;
064:
065:            //in the constructor we open the stream only if it's delimited file to find out
066:            //number of columns. In case of fixed, we know that already from the control file.
067:            //then we close the stream. Now the stream is reopened when the first record is
068:            //read from the file(ie when the first time next is issued. This was done for the
069:            //bug 1032 filed by Dan
070:            boolean streamOpenForReading;
071:
072:            static final int DEFAULT_FORMAT_CODE = 0;
073:            static final int ASCII_FIXED_FORMAT_CODE = 1;
074:            private int formatCode = DEFAULT_FORMAT_CODE;
075:            private boolean hasColumnDefinition;
076:            private char recordSeparatorChar0;
077:            private char fieldSeparatorChar0;
078:            private boolean recordSepStartNotWhite = true;
079:            private boolean fieldSepStartNotWhite = true;
080:
081:            //get properties infr from following
082:            protected ControlInfo controlFileReader;
083:
084:            //Read first row to find out how many columns make up a row and put it in
085:            //the following variable
086:            protected int numberOfColumns;
087:
088:            // the types of the columns that we are about to read
089:            protected String[] columnTypes;
090:
091:            //Read control file properties and write it in here
092:            protected char[] fieldSeparator;
093:            protected int fieldSeparatorLength;
094:            protected char[] recordSeparator;
095:            protected int recordSeparatorLength;
096:            protected String nullString;
097:            protected String columnDefinition;
098:            protected String format;
099:            protected String dataCodeset;
100:            protected char[] fieldStartDelimiter;
101:            protected int fieldStartDelimiterLength;
102:            protected char[] fieldStopDelimiter;
103:            protected int fieldStopDelimiterLength;
104:            protected boolean hasDelimiterAtEnd;
105:
106:            //load the control file properties info locally, since we need to refer to them
107:            //all the time while looking for tokens
108:            private void loadPropertiesInfo() throws Exception {
109:                fieldSeparator = controlFileReader.getFieldSeparator()
110:                        .toCharArray();
111:                fieldSeparatorLength = fieldSeparator.length;
112:                recordSeparator = controlFileReader.getRecordSeparator()
113:                        .toCharArray();
114:                recordSeparatorLength = recordSeparator.length;
115:                nullString = controlFileReader.getNullString();
116:                columnDefinition = controlFileReader.getColumnDefinition();
117:                format = controlFileReader.getFormat();
118:                dataCodeset = controlFileReader.getDataCodeset();
119:                fieldStartDelimiter = controlFileReader
120:                        .getFieldStartDelimiter().toCharArray();
121:                fieldStartDelimiterLength = fieldStartDelimiter.length;
122:                fieldStopDelimiter = controlFileReader.getFieldEndDelimiter()
123:                        .toCharArray();
124:                fieldStopDelimiterLength = fieldStopDelimiter.length;
125:                hasDelimiterAtEnd = controlFileReader.getHasDelimiterAtEnd();
126:
127:                // when record or field separators start with typical white space,
128:                // we can't ignore it around values in the import file.  So set up
129:                // a boolean so we don't keep re-testing for it.
130:                if (recordSeparatorLength > 0) {
131:                    recordSeparatorChar0 = recordSeparator[0];
132:                    recordSepStartNotWhite = (Character
133:                            .isWhitespace(recordSeparatorChar0) == false);
134:                }
135:                if (fieldSeparatorLength > 0) {
136:                    fieldSeparatorChar0 = fieldSeparator[0];
137:                    fieldSepStartNotWhite = (Character
138:                            .isWhitespace(fieldSeparatorChar0) == false);
139:                }
140:            }
141:
142:            //inputFileName: File to read data from
143:            //controlFileReader: File used to interpret data in the inputFileName
144:            ImportReadData(String inputFileName, ControlInfo controlFileReader)
145:                    throws Exception {
146:                this .inputFileName = inputFileName;
147:                this .controlFileReader = controlFileReader;
148:
149:                //load the control file properties info locally, since we need to refer to
150:                //them all the time while looking for tokens
151:                loadPropertiesInfo();
152:                //read the first row to find how many columns make a row and then save that
153:                //column information for further use
154:                loadMetaData();
155:            }
156:
157:            //just a getter returning number of columns for a row in the data file
158:            int getNumberOfColumns() {
159:                return numberOfColumns;
160:            }
161:
162:            /**if columndefinition is true, ignore first row. The way to do that is to just
163:             *  look for the record separator
164:             * @exception	Exception if there is an error
165:             */
166:            protected void ignoreFirstRow() throws Exception {
167:                readNextToken(recordSeparator, 0, recordSeparatorLength, true);
168:            }
169:
170:            /** load the column types from the meta data line to be analyzed
171:             * later in the constructor of the ImportResultSetMetaData.
172:             */
173:            protected void loadColumnTypes() throws Exception {
174:                int idx;
175:                String[] metaDataArray;
176:
177:                // start by counting the number of columns that we have at the
178:                // meta data line
179:                findNumberOfColumnsInARow();
180:
181:                // reopen the file to the start of the file to read the actual column types data
182:                closeStream();
183:                openFile();
184:
185:                // make room for the meta data
186:                metaDataArray = new String[numberOfColumns];
187:
188:                // read the meta data line line - meta data is always in a delimited format
189:                readNextDelimitedRow(metaDataArray);
190:
191:                // allocate space for the columnTypes  meta data
192:                // since the meta data line contains a combination of column name and
193:                // column type for every column we actually have only half the number of
194:                // columns that was counted.
195:                columnTypes = new String[numberOfColumns / 2];
196:
197:                for (idx = 0; idx < numberOfColumns; idx = idx + 2) {
198:                    columnTypes[idx / 2] = metaDataArray[idx + 1];
199:                }
200:
201:                // reopen to the start of the file so the rest of the program will
202:                // work as expected
203:                closeStream();
204:                openFile();
205:
206:                // init the numberOfColumns variable since it is
207:                // being accumulate by the findNumberOfColumnsInARow method
208:                numberOfColumns = 0;
209:            }
210:
211:            private void openFile() throws Exception {
212:                try {
213:                    java.security.AccessController.doPrivileged(this );
214:                } catch (java.security.PrivilegedActionException pae) {
215:                    throw pae.getException();
216:                }
217:            }
218:
219:            public final Object run() throws Exception {
220:                realOpenFile();
221:                return null;
222:            }
223:
224:            //open the input data file for reading
225:            private void realOpenFile() throws Exception {
226:                InputStream inputStream;
227:                try {
228:                    try {
229:                        URL url = new URL(inputFileName);
230:                        if (url.getProtocol().equals("file")) { //this means it's a file url
231:                            inputFileName = url.getFile(); //seems like you can't do openstream on file
232:                            throw new MalformedURLException(); //so, get the filename from url and do it ususal way
233:                        }
234:                        inputStream = url.openStream();
235:                    } catch (MalformedURLException ex) {
236:                        inputStream = new FileInputStream(inputFileName);
237:
238:                    }
239:                } catch (FileNotFoundException ex) {
240:                    throw LoadError.dataFileNotFound(inputFileName);
241:                } catch (SecurityException se) {
242:                    java.sql.SQLException sqle = LoadError
243:                            .dataFileNotFound(inputFileName);
244:
245:                    sqle.setNextException(new java.sql.SQLException("XJ001", se
246:                            .getMessage(), 0));
247:
248:                    throw sqle;
249:                }
250:                java.io.Reader rd = dataCodeset == null ? new InputStreamReader(
251:                        inputStream)
252:                        : new InputStreamReader(inputStream, dataCodeset);
253:                bufferedReader = new BufferedReader(rd, 32 * 1024);
254:                streamOpenForReading = true;
255:            }
256:
257:            //read the first data row to find how many columns make a row and then save that
258:            //column information for future use
259:            private void loadMetaData() throws Exception {
260:                //open the input data file for reading the metadata information
261:                openFile();
262:                // if column definition is true, ignore the first row since that's not
263:                // really the data do uppercase because the ui shows the values as True
264:                // and False
265:                if (columnDefinition.toUpperCase(java.util.Locale.ENGLISH)
266:                        .equals(
267:                                ControlInfo.INTERNAL_TRUE
268:                                        .toUpperCase(java.util.Locale.ENGLISH))) {
269:                    hasColumnDefinition = true;
270:                    ignoreFirstRow();
271:                }
272:
273:                if (formatCode == DEFAULT_FORMAT_CODE) {
274:                    findNumberOfColumnsInARow();
275:                }
276:                closeStream();
277:            }
278:
279:            /**close the input data file
280:             * @exception	Exception if there is an error
281:             */
282:            void closeStream() throws Exception {
283:                if (streamOpenForReading) {
284:                    bufferedReader.close();
285:                    streamOpenForReading = false;
286:                }
287:            }
288:
289:            //actually looks at the data file to find how many columns make up a row
290:            int findNumberOfColumnsInARow() throws Exception {
291:                // init the number of columns to 1 - no such thing as a table
292:                // without columns
293:                numberOfColumns = 1;
294:                while (!readTokensUntilEndOfRecord()) {
295:                    numberOfColumns++;
296:                }
297:                //--numberOfColumns;
298:                //what shall we do if there is delimeter after the last column?
299:                //reducing the number of columns seems to work fine.
300:
301:                //this is necessary to be able to read delimited files that have a delimeter
302:                //at the end of a row.
303:                if (hasDelimiterAtEnd) {
304:                    --numberOfColumns;
305:                }
306:
307:                // a special check - if the imported file is empty then
308:                // set the number of columns to 0
309:                if (numberOfCharsReadSoFar == 0) {
310:                    numberOfColumns = 0;
311:                }
312:                return numberOfColumns;
313:            }
314:
315:            //keep track of white spaces in the front. We use positionOfNonWhiteSpaceCharInFront for
316:            //that. It has the count of number of white spaces found so far before any non-white char
317:            //in the token.
318:            //Look for whitespace only if field start delimiter is not found yet. Any white spaces
319:            //within the start and stop delimiters are ignored.
320:            //Also if one of the white space chars is same as recordSeparator or fieldSeparator then
321:            //disregard it.
322:            private void checkForWhiteSpaceInFront() {
323:                //if found white space characters so far, the following if will be true
324:                if ((positionOfNonWhiteSpaceCharInFront + 1) == totalCharsSoFar
325:                        && ((!foundStartDelimiter) && (!foundStartAndStopDelimiters))) {
326:                    char currentChar = currentToken[positionOfNonWhiteSpaceCharInFront];
327:                    if (//currentChar == '\t' ||
328:                    //currentChar == '\r' || alc: why isn't this included?
329:                    // alc: BTW, \r and \n should be replaced
330:                    // or amended with the first char of line.separator...
331:                    //currentChar == '\n' ||
332:                    //currentChar == ' ') {
333:                    // use String.trim()'s definition of whitespace.
334:                    // i18n - check for whitespace - avoid doing a hard coded character
335:                    // check and use the isWhitespace method to cover all the Unicode
336:                    // options
337:                    Character.isWhitespace(currentChar) == true) {
338:
339:                        if ((recordSepStartNotWhite || (currentChar != recordSeparatorChar0))
340:                                && (fieldSepStartNotWhite || (currentChar != fieldSeparatorChar0)))
341:                            //disregard if whitespace char is same as separator first char
342:                            positionOfNonWhiteSpaceCharInFront++;
343:                    }
344:                }
345:            }
346:
347:            //look for white spaces from the back towards the stop delimiter position.
348:            //If there was no startdelimite & stopdelimiter combination, then we start from the back
349:            //all the way to the beginning and stop when we find non-white char
350:            //positionOfNonWhiteSpaceCharInBack keeps the count of whitespaces at the back
351:            private void checkForWhiteSpaceInBack() {
352:                boolean onlyWhiteSpaceSoFar = true;
353:                positionOfNonWhiteSpaceCharInBack = 0;
354:
355:                for (int i = totalCharsSoFar; (i > stopDelimiterPosition)
356:                        && onlyWhiteSpaceSoFar; i--) {
357:                    char currentChar = currentToken[i];
358:                    // replace test on \t,\n,' ' with String.trim's definition of white space
359:                    // i18n - check for whitespace - avoid doing a hard coded character
360:                    // check and use the isWhitespace method to cover all the Unicode
361:                    // options
362:                    if (Character.isWhitespace(currentChar) == true) {
363:
364:                        if ((recordSepStartNotWhite || (currentChar != recordSeparatorChar0))
365:                                && (fieldSepStartNotWhite || (currentChar != fieldSeparatorChar0)))
366:                            //disregard if whitespace char is same as separator first char
367:                            positionOfNonWhiteSpaceCharInBack++;
368:                    } else
369:                        onlyWhiteSpaceSoFar = false;
370:                }
371:            }
372:
373:            //keep looking for field and record separators simultaneously because we don't yet
374:            //know how many columns make up a row in this data file. Stop as soon as we get
375:            //the record separator which is indicated by a return value of true from this function
376:            boolean readTokensUntilEndOfRecord() throws Exception {
377:                int nextChar;
378:                int fieldSeparatorIndex = 0;
379:                int recordSeparatorIndex = 0;
380:
381:                fieldStopDelimiterIndex = 0;
382:                fieldStartDelimiterIndex = 0;
383:                totalCharsSoFar = 0;
384:                //at the start of every new token, make white space in front count 0
385:                positionOfNonWhiteSpaceCharInFront = 0;
386:                foundStartDelimiter = false;
387:                foundStartAndStopDelimiters = false;
388:                numberOfCharsReadSoFar = 0;
389:
390:                while (true) {
391:                    nextChar = bufferedReader.read();
392:                    if (nextChar == -1)
393:                        return true;
394:                    numberOfCharsReadSoFar++;
395:                    //read the character into the token holder. If token holder reaches it's capacity,
396:                    //double it's capacity
397:                    currentToken[totalCharsSoFar++] = (char) nextChar;
398:                    //check if character read is white space char in front
399:                    checkForWhiteSpaceInFront();
400:                    if (totalCharsSoFar == currentTokenMaxSize) {
401:                        currentTokenMaxSize = currentTokenMaxSize * 2;
402:                        char[] tempArray = new char[currentTokenMaxSize];
403:                        System.arraycopy(currentToken, 0, tempArray, 0,
404:                                totalCharsSoFar);
405:                        currentToken = tempArray;
406:                    }
407:
408:                    //see if we can find fieldSeparator
409:                    fieldSeparatorIndex = lookForPassedSeparator(
410:                            fieldSeparator, fieldSeparatorIndex,
411:                            fieldSeparatorLength, nextChar, false);
412:                    //every time we find a column separator, the return false will indicate that count
413:                    //this token as column data value and keep lookin for more tokens or record
414:                    //separator
415:                    if (fieldSeparatorIndex == -1)
416:                        return false;
417:
418:                    //if found start delimiter, then don't look for record separator, just look for
419:                    //end delimiter
420:                    if (!foundStartDelimiter) {
421:                        //see if we can find recordSeparator
422:                        recordSeparatorIndex = lookForPassedSeparator(
423:                                recordSeparator, recordSeparatorIndex,
424:                                recordSeparatorLength, nextChar, true);
425:                        if (recordSeparatorIndex == -1)
426:                            return true;
427:                    }
428:                }
429:            }
430:
431:            //if not inside a start delimiter, then look for the delimiter passed
432:            //else look for stop delimiter first.
433:            //this routine returns -1 if it finds field delimiter or record delimiter
434:            private int lookForPassedSeparator(char[] delimiter,
435:                    int delimiterIndex, int delimiterLength, int nextChar,
436:                    boolean lookForRecordSeperator) throws IOException {
437:
438:                //foundStartDelimiter will be false if we haven't found a start delimiter yet
439:                //if we haven't found startdelimiter, then we look for both start delimiter
440:                //and passed delimiter(which can be field or record delimiter). If we do find
441:                //start delimiter, then we only look for stop delimiter and not the passed delimiter.
442:                if (!foundStartDelimiter) {
443:                    //look for start delimiter only if it's length is non-zero and only if haven't already
444:                    //found it at all so far.
445:                    if (fieldStartDelimiterLength != 0
446:                            && (!foundStartAndStopDelimiters)) {
447:                        //the code inside following if will be executed only if we have gone past all the
448:                        //white characters in the front.
449:                        if (totalCharsSoFar != positionOfNonWhiteSpaceCharInFront
450:                                && (totalCharsSoFar - positionOfNonWhiteSpaceCharInFront) <= fieldStartDelimiterLength) {
451:                            //After getting rid of white spaces in front, look for the start delimiter. If
452:                            //found, set foundStartDelimiter flag.
453:                            if (nextChar == fieldStartDelimiter[fieldStartDelimiterIndex]) {
454:                                fieldStartDelimiterIndex++;
455:                                if (fieldStartDelimiterIndex == fieldStartDelimiterLength) {
456:                                    foundStartDelimiter = true;
457:                                    //since characters read so far are same as start delimiters, discard those chars
458:                                    totalCharsSoFar = 0;
459:                                    positionOfNonWhiteSpaceCharInFront = 0;
460:                                    return 0;
461:                                }
462:                            } else {
463:                                //found a mismatch for the start delimiter
464:                                //see if found match for more than one char of this start delimiter before the
465:                                //current mismatch, if so check the remaining chars agains
466:                                //eg if stop delimiter is xa and data is xxa
467:                                if (fieldStartDelimiterIndex > 0) {
468:                                    reCheckRestOfTheCharacters(totalCharsSoFar
469:                                            - fieldStartDelimiterIndex,
470:                                            fieldStartDelimiter,
471:                                            fieldStartDelimiterLength);
472:                                }
473:                            }
474:                        }
475:                    }
476:
477:                    /*look for typical record seperators line feed (\n),  a carriage return
478:                     * (\r) or a carriage return followed by line feed (\r\n)
479:                     */
480:                    if (lookForRecordSeperator) {
481:                        if (nextChar == '\r' || nextChar == '\n') {
482:                            recordSeparatorChar0 = (char) nextChar;
483:                            if (nextChar == '\r') {
484:                                //omot the line feed character if it exists in the stream
485:                                omitLineFeed();
486:                            }
487:
488:                            totalCharsSoFar = totalCharsSoFar - 1;
489:                            return -1;
490:                        }
491:
492:                        return delimiterIndex;
493:                    }
494:
495:                    //look for passed delimiter
496:                    if (nextChar == delimiter[delimiterIndex]) {
497:                        delimiterIndex++;
498:                        if (delimiterIndex == delimiterLength) { //found passed delimiter
499:                            totalCharsSoFar = totalCharsSoFar - delimiterLength;
500:                            return -1;
501:                        }
502:                        return delimiterIndex; //this number of chars of delimiter have exact match so far
503:                    } else {
504:                        //found a mismatch for the delimiter
505:                        //see if found match for more than one char of this delimiter before the
506:                        //current mismatch, if so check the remaining chars agains
507:                        //eg if delimiter is xa and data is xxa
508:                        if (delimiterIndex > 0)
509:                            return (reCheckRestOfTheCharacters(totalCharsSoFar
510:                                    - delimiterIndex, delimiter,
511:                                    delimiterLength));
512:                    }
513:                } else {
514:                    //see if we can find fieldStopDelimiter
515:                    if (nextChar == fieldStopDelimiter[fieldStopDelimiterIndex]) {
516:                        fieldStopDelimiterIndex++;
517:                        if (fieldStopDelimiterIndex == fieldStopDelimiterLength) {
518:                            boolean skipped = skipDoubleDelimiters(fieldStopDelimiter);
519:                            if (!skipped) {
520:                                foundStartDelimiter = false;
521:                                //found stop delimiter, discard the chars corresponding to stop delimiter
522:                                totalCharsSoFar = totalCharsSoFar
523:                                        - fieldStopDelimiterLength;
524:                                //following is to take care of a case like "aa"aa This will result in an
525:                                //error. Also a case like "aa"   will truncate it to just aa
526:                                stopDelimiterPosition = totalCharsSoFar;
527:                                //following is used to distinguish between empty string ,"", and null string ,,
528:                                foundStartAndStopDelimiters = true;
529:                            } else {
530:                                fieldStopDelimiterIndex = 0;
531:                            }
532:                            return 0;
533:                        }
534:                        return 0;
535:                    } else {
536:                        //found a mismatch for the stop delimiter
537:                        //see if found match for more than one char of this stop delimiter before the
538:                        //current mismatch, if so check the remaining chars agains
539:                        //eg if stop delimiter is xa and data is xxa
540:                        if (fieldStopDelimiterIndex > 0) {
541:                            reCheckRestOfTheCharacters(totalCharsSoFar
542:                                    - fieldStopDelimiterIndex,
543:                                    fieldStopDelimiter,
544:                                    fieldStopDelimiterLength);
545:                            return 0;
546:                        }
547:                    }
548:                }
549:                return 0;
550:            }
551:
552:            //If after finding a few matching characters for a delimiter, find a mismatch,
553:            //restart the matching process from character next to the one from which you
554:            //were in the process of finding the matching pattern
555:            private int reCheckRestOfTheCharacters(int startFrom,
556:                    char[] delimiter, int delimiterLength) {
557:                int delimiterIndex = 0;
558:                // alc: need to test delim of abab with abaabab
559:                // if delimIndex resets to 0, i probably needs to reset to
560:                // (an ever increasing) startFrom=startFrom+1, not stay where it is
561:                for (int i = startFrom; i < totalCharsSoFar; i++) {
562:                    if (currentToken[i] == delimiter[delimiterIndex])
563:                        delimiterIndex++;
564:                    else
565:                        delimiterIndex = 0;
566:                }
567:                return delimiterIndex;
568:            }
569:
570:            /*
571:             * skips the duplicate delimeter characters inserd character stringd ata 
572:             * to get the original string. In Double Delimter recognigation Delimiter 
573:             * Format strings are written with a duplicate delimeter if a delimiter is
574:             * found inside the data while exporting.
575:             * For example with double quote(") as character delimiter
576:             *
577:             *	 "What a ""nice""day!"
578:             *
579:             *   will be imported as:
580:             *
581:             *	 What a "nice"day!
582:             *
583:             *	 In the case of export, the rule applies in reverse. For example,
584:             *
585:             *	 I am 6"tall.
586:             *
587:             *	 will be exported to a file as:
588:             *
589:             *	 "I am 6""tall."
590:             */
591:            private boolean skipDoubleDelimiters(char[] characterDelimiter)
592:                    throws IOException {
593:                boolean skipped = true;
594:                int cDelLength = characterDelimiter.length;
595:                bufferedReader.mark(cDelLength);
596:                for (int i = 0; i < cDelLength; i++) {
597:                    int nextChar = bufferedReader.read();
598:                    if (nextChar != characterDelimiter[i]) {
599:                        //not a double delimter case
600:                        bufferedReader.reset();
601:                        skipped = false;
602:                        break;
603:                    }
604:                }
605:                return skipped;
606:            }
607:
608:            //omit the line feed character(\n) 
609:            private void omitLineFeed() throws IOException {
610:                bufferedReader.mark(1);
611:                int nextChar = bufferedReader.read();
612:                if (nextChar != '\n') {
613:                    //not a Line Feed
614:                    bufferedReader.reset();
615:                }
616:            }
617:
618:            /**returns the number of the current row
619:             */
620:            int getCurrentRowNumber() {
621:                return lineNumber;
622:            }
623:
624:            /**the way we read the next row from input file depends on it's format
625:             * @exception	Exception if there is an error
626:             */
627:            boolean readNextRow(String[] returnStringArray) throws Exception {
628:                boolean readVal;
629:                int idx;
630:
631:                if (!streamOpenForReading) {
632:                    openFile();
633:                    //as earlier, ignore the first row if it's colum definition
634:                    //do uppercase because the ui shows the values as True and False
635:                    if (hasColumnDefinition) {
636:                        ignoreFirstRow();
637:                    }
638:                }
639:                if (formatCode == DEFAULT_FORMAT_CODE)
640:                    readVal = readNextDelimitedRow(returnStringArray);
641:                else
642:                    readVal = readNextFixedRow(returnStringArray);
643:
644:                return readVal;
645:            }
646:
647:            // made this a field so it isn't inited for each row, just
648:            // set and cleared on the rows that need it (the last row
649:            // in a file, typically, so it isn't used much)
650:
651:            private boolean haveSep = true;
652:
653:            //read the specified column width for each column
654:            private boolean readNextFixedRow(String[] returnStringArray)
655:                    throws Exception {
656:                // readLength is how many bytes it has read so far
657:                int readLength = 0;
658:                int totalLength = 0;
659:
660:                // keep reading until rolWidth bytes have been read
661:                while ((readLength += bufferedReader.read(tempString,
662:                        readLength, rowWidth - readLength)) < rowWidth) {
663:
664:                    if (readLength == totalLength - 1) {// EOF
665:                        if (readLength == -1) { // no row, EOF
666:                            return false;
667:                        } else {
668:                            // it's only a bad read if insufficient data was
669:                            // returned; missing the last record separator is ok
670:                            if (totalLength != rowWidth
671:                                    - recordSeparator.length) {
672:                                throw LoadError
673:                                        .unexpectedEndOfFile(lineNumber + 1);
674:                            } else {
675:                                haveSep = false;
676:                                break;
677:                            }
678:                        }
679:                    }
680:                    // else, some thing is read, continue until the whole column is
681:                    // read
682:                    totalLength = readLength;
683:                }
684:
685:                int colStart = 0;
686:                for (int i = 0; i < numberOfColumns; i++) {
687:                    int colWidth = columnWidths[i];
688:
689:                    if (colWidth == 0) //if column width is 0, return null
690:                        returnStringArray[i] = null;
691:                    else {
692:                        // if found nullstring, return it as null value
693:                        String checkAgainstNullString = new String(tempString,
694:                                colStart, colWidth);
695:                        if (checkAgainstNullString.trim().equals(nullString))
696:                            returnStringArray[i] = null;
697:                        else
698:                            returnStringArray[i] = checkAgainstNullString;
699:                        colStart += colWidth;
700:                    }
701:                }
702:
703:                //if what we read is not recordSeparator, throw an exception
704:                if (haveSep) {
705:                    for (int i = (recordSeparatorLength - 1); i >= 0; i--) {
706:                        if (tempString[colStart + i] != recordSeparator[i])
707:                            throw LoadError
708:                                    .recordSeparatorMissing(lineNumber + 1);
709:                    }
710:                } else
711:                    haveSep = true; // reset for the next time, if any.
712:
713:                lineNumber++;
714:                return true;
715:            }
716:
717:            //by this time, we know number of columns that make up a row in this data file
718:            //so first look for number of columns-1 field delimites and then look for record
719:            //delimiter
720:            private boolean readNextDelimitedRow(String[] returnStringArray)
721:                    throws Exception {
722:
723:                int upperLimit = numberOfColumns - 1; //reduce # field accesses
724:
725:                //no data in the input file for some reason
726:                if (upperLimit < 0)
727:                    return false;
728:
729:                //look for number of columns - 1 field separators
730:                for (int i = 0; i < upperLimit; i++) {
731:                    if (!readNextToken(fieldSeparator, 0, fieldSeparatorLength,
732:                            false)) {
733:                        if (i == 0) // still on the first check
734:                            return false;
735:                        else
736:                            throw LoadError.unexpectedEndOfFile(lineNumber + 1);
737:                    }
738:                    //following is to take care of a case like "aa"aa This will result in an
739:                    //error. Also a case like "aa"   will truncate it to just aa. valid blank
740:                    //chars are  ' ' '\r' '\t'
741:                    if (stopDelimiterPosition != 0
742:                            && ((stopDelimiterPosition) != totalCharsSoFar)) {
743:                        for (int k = stopDelimiterPosition + 1; k < totalCharsSoFar; k++) {
744:                            // alc: should change || to && since || case is never true --
745:                            // currentChar can't be three different things at once.
746:                            // alc: why no \n? BTW, \r and \n should be replaced
747:                            // or amended with the first char of line.separator...
748:                            //char currentChar = currentToken[k];
749:                            //if (currentChar != ' ' && currentChar != '\r' && currentChar != '\t')
750:                            // use String.trim()'s definition of whitespace.
751:                            // i18n - check for whitespace - avoid doing a hard coded
752:                            // character check and use the isWhitespace method to cover all
753:                            // the Unicode options
754:                            if (Character.isWhitespace(currentToken[k]) == false) {
755:                                throw LoadError.dataAfterStopDelimiter(
756:                                        lineNumber + 1, i + 1);
757:                            }
758:                        }
759:                        totalCharsSoFar = stopDelimiterPosition;
760:                    }
761:                    //totalCharsSoFar can become -1 in readNextToken
762:                    if (totalCharsSoFar != -1) {
763:                        returnStringArray[i] = new String(currentToken,
764:                                positionOfNonWhiteSpaceCharInFront,
765:                                totalCharsSoFar);
766:                    } else
767:                        returnStringArray[i] = null;
768:                }
769:
770:                //look for record separator for the last column's value
771:                //if I find endoffile and the it's only one column table, then it's a valid endoffile
772:                //case. Otherwise, it's an error case. Without the following check for the return value
773:                //of readNextToken, import was going into infinite loop for a table with single column
774:                //import. end-of-file was getting ignored without the following if.
775:                if (!readNextToken(recordSeparator, 0, recordSeparatorLength,
776:                        true)) {
777:                    if (upperLimit == 0)
778:                        return false;
779:                    else
780:                        throw LoadError.unexpectedEndOfFile(lineNumber + 1);
781:                }
782:                //following is to take care of a case like "aa"aa This will result in an
783:                //error. Also a case like "aa"   will truncate it to just aa. valid blank
784:                //chars are  ' ' '\r' '\t'
785:                if (stopDelimiterPosition != 0
786:                        && (stopDelimiterPosition != totalCharsSoFar)) {
787:                    for (int i = stopDelimiterPosition + 1; i < totalCharsSoFar; i++) {
788:                        // alc: should change || to && since || case is never true --
789:                        // currentChar can't be three different things at once.
790:                        // alc: why no \n? BTW, \r and \n should be replaced
791:                        // or amended with the first char of line.separator...
792:                        //char currentChar = currentToken[i];
793:                        //if (currentChar != ' ' && currentChar != '\r' && currentChar != '\t')
794:                        // use String.trim()'s definition of whitespace.
795:                        // i18n - check for whitespace - avoid doing a hard coded character
796:                        // check and use the isWhitespace method to cover all the Unicode
797:                        // options
798:                        if (Character.isWhitespace(currentToken[i]) == false) {
799:                            throw LoadError.dataAfterStopDelimiter(
800:                                    lineNumber + 1, numberOfColumns);
801:                        }
802:                    }
803:                    totalCharsSoFar = stopDelimiterPosition;
804:                }
805:
806:                //to be able to read delimited files that have a delimeter at the end,
807:                //we have to reduce totalCharsSoFar by one when it is last column.
808:                //Otherwise last delimeter becomes part of the data.
809:                if (hasDelimiterAtEnd) {
810:                    if (!(fieldStopDelimiterLength > 0)) { //if there is no field stop delimeter specified,
811:                        //hopefully fieldStopDelimiterLength will not be >0
812:
813:                        //there is weird behavior in the code that makes it read the last
814:                        //delimeter as part of the last column data, so this forces us to
815:                        //reduce number of read chars only if there is data stop delimeter
816:
817:                        //Only if it is the last column:
818:                        //if (fieldStopDelimiter==null){
819:                        --totalCharsSoFar;
820:                        //}
821:                    }
822:                }
823:
824:                if (totalCharsSoFar != -1) {
825:
826:                    /* This is a hack to fix a problem: When there is missing data in columns
827:                    and hasDelimiterAtEnd==true, then the last delimiter was read as the last column data.
828:                    Hopefully this will tackle that issue by skipping the last column which is in this case
829:                    just the delimiter.
830:                    We need to be careful about the case when the last column data itself is
831:                    actually same as the delimiter.
832:                     */
833:                    if (!hasDelimiterAtEnd) {//normal path:
834:                        returnStringArray[upperLimit] = new String(
835:                                currentToken,
836:                                positionOfNonWhiteSpaceCharInFront,
837:                                totalCharsSoFar);
838:                    } else if (totalCharsSoFar == fieldSeparatorLength
839:                            && isFieldSep(currentToken)) {
840:                        //means hasDelimiterAtEnd==true and all of the above are true
841:
842:                        String currentStr = new String(currentToken,
843:                                positionOfNonWhiteSpaceCharInFront,
844:                                totalCharsSoFar);
845:
846:                        if (currentToken[totalCharsSoFar + 1] == fieldStopDelimiter[0]) {
847:                            returnStringArray[upperLimit] = currentStr;
848:                        } else {
849:                            returnStringArray[upperLimit] = null;
850:                        }
851:                    } else {
852:                        //means hasDelimiterAtEnd==true and previous case is wrong.
853:                        if (totalCharsSoFar > 0) {
854:                            returnStringArray[upperLimit] = new String(
855:                                    currentToken,
856:                                    positionOfNonWhiteSpaceCharInFront,
857:                                    totalCharsSoFar);
858:                        } else {
859:                            returnStringArray[upperLimit] = null;
860:                        }
861:                    }
862:                } else
863:                    returnStringArray[upperLimit] = null;
864:
865:                lineNumber++;
866:                return true;
867:            }
868:
869:            //tells if a char array is field separator:
870:            private boolean isFieldSep(char[] chrArray) {
871:                for (int i = 0; i < chrArray.length && i < fieldSeparatorLength; i++) {
872:                    if (chrArray[i] != fieldSeparator[i])
873:                        return false;
874:                }
875:                return true;
876:            }
877:
878:            //read one column's value at a time
879:            boolean readNextToken(char[] delimiter, int delimiterIndex,
880:                    int delimiterLength, boolean isRecordSeperator)
881:                    throws Exception {
882:                int nextChar;
883:
884:                fieldStopDelimiterIndex = 0;
885:                fieldStartDelimiterIndex = 0;
886:                totalCharsSoFar = 0;
887:                //at the start of every new token, make white space in front count 0
888:                positionOfNonWhiteSpaceCharInFront = 0;
889:                stopDelimiterPosition = 0;
890:                foundStartAndStopDelimiters = false;
891:                foundStartDelimiter = false;
892:                int returnValue;
893:
894:                while (true) {
895:                    nextChar = bufferedReader.read();
896:                    if (nextChar == -1) //end of file
897:                        return false;
898:
899:                    //read the character into the token holder. If token holder reaches it's capacity,
900:                    //double it's capacity
901:                    currentToken[totalCharsSoFar++] = (char) nextChar;
902:                    //check if character read is white space char in front
903:                    checkForWhiteSpaceInFront();
904:                    if (totalCharsSoFar == currentTokenMaxSize) {
905:                        currentTokenMaxSize = currentTokenMaxSize * 2;
906:                        char[] tempArray = new char[currentTokenMaxSize];
907:                        System.arraycopy(currentToken, 0, tempArray, 0,
908:                                totalCharsSoFar);
909:                        currentToken = tempArray;
910:                    }
911:
912:                    returnValue = lookForPassedSeparator(delimiter,
913:                            delimiterIndex, delimiterLength, nextChar,
914:                            isRecordSeperator);
915:                    if (returnValue == -1) {
916:                        //if no stop delimiter found that "" this means null
917:                        //also if no stop delimiter found then get rid of spaces around the token
918:                        if (!foundStartAndStopDelimiters) {
919:                            if (totalCharsSoFar == 0)
920:                                totalCharsSoFar = -1;
921:                            else {
922:                                //get the count of white spaces from back and subtract that and white spaces in
923:                                //the front from the characters read so far so that we ignore spaces around the
924:                                //token.
925:                                checkForWhiteSpaceInBack();
926:                                totalCharsSoFar = totalCharsSoFar
927:                                        - positionOfNonWhiteSpaceCharInFront
928:                                        - positionOfNonWhiteSpaceCharInBack;
929:                            }
930:                        }
931:                        return true;
932:                    }
933:                    delimiterIndex = returnValue;
934:                }
935:            }
936:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.