001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041: package org.netbeans.modules.mashup.db.bootstrap;
042:
043: import java.io.File;
044: import java.io.IOException;
045: import java.sql.Types;
046: import java.util.ArrayList;
047: import java.util.Collections;
048: import java.util.List;
049: import java.util.StringTokenizer;
050: import java.util.regex.Pattern;
051:
052: import org.axiondb.io.AxionFileSystem;
053: import org.axiondb.io.BufferedDataInputStream;
054: import org.netbeans.modules.mashup.db.common.FlatfileDBException;
055: import org.netbeans.modules.mashup.db.common.PropertyKeys;
056: import org.netbeans.modules.mashup.db.common.SQLUtils;
057: import org.netbeans.modules.mashup.db.model.FlatfileDBColumn;
058: import org.netbeans.modules.mashup.db.model.FlatfileDBTable;
059: import org.netbeans.modules.mashup.db.model.impl.FlatfileDBColumnImpl;
060: import net.java.hulp.i18n.Logger;
061:
062: import com.sun.sql.framework.utils.StringUtil;
063: import org.netbeans.modules.etl.logger.Localizer;
064: import org.netbeans.modules.etl.logger.LogUtil;
065:
066: /**
067: * Extends base class to provide delimited-file implementation of FlatfileBootstrapParser.
068: * <br>
069: * TODO: Scan upto 10 lines to determine file type, record delimiter, field length (in
070: * case of delimiter table)
071: *
072: * @author Ahimanikya Satapathy
073: * @version $Revision$
074: */
075: public class DelimitedBootstrapParser implements
076: FlatfileBootstrapParser {
077:
078: private static transient final Logger mLogger = LogUtil
079: .getLogger(DelimitedBootstrapParser.class.getName());
080: private static transient final Localizer mLoc = Localizer.get();
081:
082: class CharTokenizer {
083:
084: char[] _charArray;
085: private int _currentPosition;
086: private String _delimiters;
087: private int _maxPosition;
088: private Pattern _qqPattern = null;
089: private String _qualifier;
090:
091: public CharTokenizer(char[] thecharArray, String theDelim,
092: String qualifier) {
093: _delimiters = theDelim;
094: _charArray = thecharArray;
095: _maxPosition = _charArray.length;
096: _currentPosition = 0;
097: _qualifier = qualifier;
098: }
099:
100: public String getQualifier() {
101: return _qualifier;
102: }
103:
104: public int getQualifierLength() {
105: return _qualifier.length();
106: }
107:
108: public boolean hasMoreTokens() {
109: return (_currentPosition < _maxPosition - 1);
110: }
111:
112: public boolean isQuoted() {
113: return !isNullString(_qualifier);
114: }
115:
116: public String nextToken() throws FlatfileDBException {
117: int start = _currentPosition;
118: int end = start;
119: int pos = _currentPosition;
120: boolean inQuotedString = false;
121: boolean isDelimiter = false;
122: boolean endQuotedString = false;
123: boolean treatAsUnquoted = false;
124: boolean wasEscaped = false;
125: boolean wasNewline = false;
126:
127: while (pos < _maxPosition) {
128: // if new line
129: if (isNewLine(_charArray[pos])) {
130: if (isQuoted() && !endQuotedString) {
131: _maxPosition = pos;
132: _currentPosition = pos;
133: end = pos;
134: break;
135: }
136: _currentPosition = _maxPosition;
137: }
138:
139: // if quoted and found qualifier
140: if (isQuoted() && isQualifier(pos)) {
141: if (!inQuotedString) { // not inside the quoted string
142: pos += getQualifierLength();
143: start = pos;
144: inQuotedString = true;
145: continue;
146: } else if (isQualifier(pos + getQualifierLength())) {
147: pos += (getQualifierLength() * 2);
148: wasEscaped = true;
149: continue;
150: }
151: // inside the quoted string
152: end = pos;
153: pos += getQualifierLength();
154: inQuotedString = false;
155: endQuotedString = true;
156: continue;
157: }
158:
159: // if quoted, close quote found, but have not found a delimiter yet
160: if (isQuoted() && endQuotedString
161: && _delimiters.charAt(0) != _charArray[pos]
162: && !isNewLine(_charArray[pos])) {
163: pos++;
164: continue;
165: }
166:
167: // if quoted, close quote found and found a delimiter
168: if (isQuoted() && endQuotedString) {
169: if (isDelimiter(pos)) {
170: isDelimiter = true;
171: pos += _delimiters.length();
172: break;
173: } else if (isNewLine(_charArray[pos])) {
174: wasNewline = true;
175: break;
176: }
177: }
178:
179: // if quoted but did not find start qualifer, treat this token as
180: // unquoted
181: if (isQuoted() && !inQuotedString) {
182: treatAsUnquoted = true;
183: }
184:
185: // if non-quoted
186: if ((!isQuoted() || treatAsUnquoted)
187: && pos < _maxPosition) {
188: if (isDelimiter(pos)) {
189: end = pos;
190: isDelimiter = true;
191: pos += _delimiters.length();
192: break;
193: } else if (isNewLine(_charArray[pos])) {
194: end = pos;
195: break;
196: }
197: }
198:
199: pos++;
200: }
201:
202: if (wasNewline) {
203: _currentPosition = _maxPosition;
204: } else {
205: _currentPosition = pos;
206: }
207:
208: if (pos == _maxPosition) {
209: end = _maxPosition;
210: }
211:
212: if (start != end) {
213: String token = new String(_charArray, start, end
214: - start);
215: if (wasEscaped) {
216: _qqPattern = Pattern.compile(_qualifier
217: + _qualifier);
218: return _qqPattern.matcher(token).replaceAll(
219: _qualifier);
220: }
221: return token;
222: } else if (endQuotedString || isDelimiter) {
223: return EMPTY_STRING;
224: } else {
225: throw new FlatfileDBException(
226: "\n\nBad file format for the given parameters, please fix the file and try again.");
227: }
228:
229: }
230:
231: // if delimiter more than 1 char long, make sure all chars match
232: private boolean isDelimiter(int position) {
233: boolean delimiterFound = true;
234: for (int j = 0; j < _delimiters.length(); j++) {
235: if ((position < _maxPosition)
236: && (_delimiters.charAt(j) != _charArray[position++])) {
237: delimiterFound = false;
238: break;
239: }
240: }
241: return delimiterFound;
242: }
243:
244: // if qualifier more than 1 char long, make sure all chars match
245: private boolean isQualifier(int position) {
246: boolean qualifierFound = true;
247: for (int j = 0; j < getQualifierLength(); j++) {
248: if (getQualifier().charAt(j) != _charArray[position++]) {
249: qualifierFound = false;
250: break;
251: }
252: }
253: return qualifierFound;
254: }
255: }
256:
257: private static final String EMPTY_STRING = "";
258: private static final int EOF = -1;
259:
260: //private static final char FILLER = ' ';
261: private static AxionFileSystem FS = new AxionFileSystem();
262: private static final String LOG_CATEGORY = DelimitedBootstrapParser.class
263: .getName();
264: private static final char NL = Character.MAX_VALUE;
265:
266: /** Creates a new default instance of DelimitedBootstrapParser. */
267: public DelimitedBootstrapParser() {
268: }
269:
270: /**
271: * @see org.netbeans.modules.mashup.db.bootstrap.FlatfileBootstrapParser#getFlatfileFields
272: */
273: public List buildFlatfileDBColumns(FlatfileDBTable table)
274: throws FlatfileDBException {
275: if (table == null || table.getProperties() == null
276: || table.getProperties().size() == 0) {
277: return Collections.EMPTY_LIST;
278: }
279:
280: String fieldSep = table
281: .getProperty(PropertyKeys.FIELDDELIMITER);
282: if (fieldSep.equalsIgnoreCase("UserDefined")) {
283: fieldSep = table
284: .getProperty(PropertyKeys.WIZARDCUSTOMFIELDDELIMITER);
285: table.setProperty(PropertyKeys.FIELDDELIMITER, fieldSep);
286: if (StringUtil.isNullString(fieldSep)) {
287: throw new FlatfileDBException(
288: "Please supply valid custom delimiter.");
289: }
290: }
291:
292: final String recordSep = table
293: .getProperty(PropertyKeys.RECORDDELIMITER);
294: final String qualifier = table
295: .getProperty(PropertyKeys.QUALIFIER);
296: boolean isFirstLineHeader = Boolean.valueOf(
297: table.getProperty(PropertyKeys.ISFIRSTLINEHEADER))
298: .booleanValue();
299: int defaultPrecision = 60;
300:
301: if (fieldSep == null || recordSep == null) {
302: return Collections.EMPTY_LIST;
303: }
304:
305: // Support multiple record delimiter for delimited
306: StringTokenizer tokenizer = new StringTokenizer(recordSep, " ");
307: ArrayList tmpList = new ArrayList();
308: while (tokenizer.hasMoreTokens()) {
309: tmpList.add(tokenizer.nextToken());
310: }
311: String[] recordSeps = (String[]) tmpList.toArray(new String[0]);
312:
313: int jdbcType = SQLUtils.getStdJdbcType(table
314: .getProperty(PropertyKeys.WIZARDDEFAULTSQLTYPE));
315: if (jdbcType == SQLUtils.JDBCSQL_TYPE_UNDEFINED) {
316: jdbcType = Types.VARCHAR;
317: }
318:
319: try {
320: defaultPrecision = Integer
321: .valueOf(
322: table
323: .getProperty(PropertyKeys.WIZARDDEFAULTPRECISION))
324: .intValue();
325: } catch (Exception e) {
326: defaultPrecision = 20;
327: }
328:
329: File dataFile = new File(table.getLocalFilePath(), table
330: .getFileName());
331: BufferedDataInputStream data = null;
332:
333: FlatfileDBColumn[] columns = getColumns(table);
334: List<FlatfileDBColumn> colList = new ArrayList<FlatfileDBColumn>(
335: columns.length);
336:
337: try {
338: data = new BufferedDataInputStream(FS.open(dataFile));
339: char[] charArray = readLine(data, 0, recordSeps);
340: if (charArray[0] == NL && isFirstLineHeader) {
341: throw new FlatfileDBException(
342: "Empty line detected - invalid header.");
343: }
344:
345: CharTokenizer charTokenizer = new CharTokenizer(charArray,
346: fieldSep, qualifier);
347: for (int i = 1; charTokenizer.hasMoreTokens(); i++) {
348:
349: String columnName = charTokenizer.nextToken();
350: if (!isFirstLineHeader || charArray[0] == NL) {
351: columnName = "FIELD_" + i;
352: } else {
353: // WT #63275: Trim leading/trailing whitespace and ensure internal
354: // spaces in a header name get substituted with underscores.
355: columnName = StringUtil
356: .createColumnNameFromFieldName(columnName
357: .trim());
358: }
359:
360: FlatfileDBColumn column = null;
361: if (columns != null && i <= columns.length) {
362: column = columns[i - 1];
363: }
364:
365: if (column == null) {
366: column = new FlatfileDBColumnImpl(columnName,
367: jdbcType, defaultPrecision, 0, true);
368: } else if (isFirstLineHeader) {
369: column.setName(columnName);
370: }
371:
372: column.setCardinalPosition(i);
373: colList.add(column);
374: }
375:
376: return colList;
377: } catch (Exception e) {
378: mLogger.errorNoloc(mLoc.t(
379: "PRSR050: Failed to read and parse the file {0}",
380: LOG_CATEGORY), e);
381: throw new FlatfileDBException(
382: "Failed to read and parse the sample file."
383: + e.getMessage());
384: } finally {
385: FS.closeInputStream(data);
386: }
387: }
388:
389: private FlatfileDBColumn[] getColumns(FlatfileDBTable table) {
390: FlatfileDBColumn[] columns = new FlatfileDBColumn[0];
391: if (table.getColumnList().size() > 0) {
392: columns = (FlatfileDBColumn[]) table.getColumnList()
393: .toArray(columns);
394: }
395: return columns;
396: }
397:
398: private boolean isEndOfRecord(int recLength, int nextChar,
399: BufferedDataInputStream data, String[] lineSeps)
400: throws IOException {
401: if (isEOF(nextChar)) {
402: return true;
403: }
404:
405: boolean foundEOL = false;
406: for (int k = 0; (k < lineSeps.length && !foundEOL); k++) {
407: String lineSep = lineSeps[k];
408: if (!("".equals(lineSep)) && lineSep.charAt(0) == nextChar) {
409: foundEOL = true;
410: char[] charBuf = lineSep.toCharArray();
411: // Look ahead to see whether the following chars match EOL.
412: long lastDataFileOffset = data.getPos();
413: for (int i = 1; i < lineSep.length(); i++) {
414: if (charBuf[i] != (char) data.read()) {
415: data.seek(lastDataFileOffset);
416: foundEOL = false;
417: }
418: }
419: }
420: }
421: return foundEOL;
422: }
423:
424: private boolean isEOF(int nextChar) {
425: return nextChar == EOF;
426: }
427:
428: private boolean isNewLine(int nextChar) {
429: return nextChar == NL;
430: }
431:
432: private boolean isNullString(String str) {
433: return (str == null || str.trim().length() == 0);
434: }
435:
436: private char[] readLine(BufferedDataInputStream data,
437: long fileOffset, String[] lineSeps) throws IOException {
438: char[] _lineCharArray = new char[80];
439: //Arrays.fill(_lineCharArray, FILLER);
440: int recLength = 0;
441: try {
442: int nextChar;
443: data.seek(fileOffset);
444:
445: while (true) {
446: nextChar = data.read();
447: if (isEndOfRecord(recLength, nextChar, data, lineSeps)) {
448: char[] newlineCharArray = new char[recLength + 1];
449: System.arraycopy(_lineCharArray, 0,
450: newlineCharArray, 0, recLength);
451: _lineCharArray = newlineCharArray;
452: _lineCharArray[recLength] = NL;
453: break;
454: }
455:
456: // ensure capacity
457: if ((recLength + 2) > _lineCharArray.length) {
458: char[] newlineCharArray = new char[recLength + 80];
459: System.arraycopy(_lineCharArray, 0,
460: newlineCharArray, 0, _lineCharArray.length);
461: _lineCharArray = newlineCharArray;
462: }
463:
464: _lineCharArray[recLength++] = ((char) nextChar);
465: }
466: return _lineCharArray;
467:
468: } catch (IOException e) {
469: throw new IOException("Unable to parse data file...");
470: }
471: }
472:
473: public void makeGuess(FlatfileDBTable table) {
474: }
475:
476: public boolean acceptable(FlatfileDBTable table) {
477: return true; // since Delimited is default guess
478: }
479: }
|