0001: /*
0002: * TextFileParser.java
0003: *
0004: * This file is part of SQL Workbench/J, http://www.sql-workbench.net
0005: *
0006: * Copyright 2002-2008, Thomas Kellerer
0007: * No part of this code maybe reused without the permission of the author
0008: *
0009: * To contact the author please send an email to: support@sql-workbench.net
0010: *
0011: */
0012: package workbench.db.importer;
0013:
0014: import java.io.BufferedReader;
0015: import java.io.EOFException;
0016: import java.io.File;
0017: import java.io.IOException;
0018: import java.sql.SQLException;
0019: import java.util.ArrayList;
0020: import java.util.Iterator;
0021: import java.util.List;
0022: import java.util.Map;
0023:
0024: import workbench.db.ColumnIdentifier;
0025: import workbench.db.DbMetadata;
0026: import workbench.db.TableIdentifier;
0027: import workbench.db.WbConnection;
0028: import workbench.interfaces.JobErrorHandler;
0029: import workbench.resource.Settings;
0030: import workbench.util.ExceptionUtil;
0031: import workbench.interfaces.ImportFileParser;
0032: import workbench.log.LogMgr;
0033: import workbench.resource.ResourceMgr;
0034: import workbench.util.CsvLineParser;
0035: import workbench.util.FileUtil;
0036: import workbench.util.MessageBuffer;
0037: import workbench.util.SqlUtil;
0038: import workbench.util.StringUtil;
0039: import workbench.util.ValueConverter;
0040: import workbench.util.WbStringTokenizer;
0041: import java.util.regex.Pattern;
0042: import java.util.regex.Matcher;
0043: import workbench.db.importer.modifier.ImportValueModifier;
0044: import workbench.util.FixedLengthLineParser;
0045: import workbench.util.LineParser;
0046: import workbench.util.QuoteEscapeType;
0047: import workbench.util.WbFile;
0048:
0049: /**
0050: *
0051: * @author support@sql-workbench.net
0052: */
0053: public class TextFileParser implements RowDataProducer,
0054: ImportFileParser {
0055: private File inputFile;
0056: private File baseDir;
0057: private String tableName;
0058: private String encoding = null;
0059: private String delimiter = "\t";
0060: private String quoteChar = null;
0061: private boolean decodeUnicode = false;
0062: private boolean enableMultiLineMode = true;
0063: private boolean checkDependencies = false;
0064:
0065: private int colCount = -1;
0066: private int importColCount = -1;
0067:
0068: private ColumnIdentifier[] columns;
0069:
0070: // When importing a file with fixed column widths
0071: // each entry in this array defines the width of the corresponding
0072: // column in the columns array
0073: private int[] columnWidthMap;
0074:
0075: // for each column from columns
0076: // the value for the respective index
0077: // defines its real index (in rowData)
0078: // if the value is -1 then the column
0079: // will not be imported
0080: private int[] columnMap;
0081:
0082: private Object[] rowData;
0083:
0084: private boolean withHeader = true;
0085: private boolean cancelImport = false;
0086: private boolean regularStop = false;
0087: private boolean emptyStringIsNull = false;
0088: private boolean trimValues = false;
0089:
0090: private RowDataReceiver receiver;
0091: private boolean abortOnError = false;
0092: private WbConnection connection;
0093: private String sourceDir;
0094: private String extensionToUse;
0095: private JobErrorHandler errorHandler;
0096: private List<ColumnIdentifier> pendingImportColumns;
0097: private ValueConverter converter = new ValueConverter();
0098: private MessageBuffer messages = new MessageBuffer();
0099: private boolean hasErrors = false;
0100: private boolean hasWarnings = false;
0101:
0102: // If a filter for the input file is defined
0103: // this will hold the regular expressions per column
0104: private Pattern[] columnFilter;
0105: private Pattern lineFilter;
0106: private String targetSchema;
0107: private boolean blobsAreFilenames = true;
0108: private boolean clobsAreFilenames = false;
0109:
0110: private ImportFileHandler fileHandler = new ImportFileHandler();
0111: private String currentLine;
0112: private QuoteEscapeType quoteEscape;
0113: private ImportValueModifier valueModifier;
0114:
0115: public TextFileParser() {
0116: }
0117:
0118: public TextFileParser(File aFile) {
0119: this .inputFile = aFile;
0120: this .sourceDir = null;
0121: }
0122:
0123: public ImportFileHandler getFileHandler() {
0124: return this .fileHandler;
0125: }
0126:
0127: public void setValueModifier(ImportValueModifier mod) {
0128: this .valueModifier = mod;
0129: }
0130:
0131: public void setEnableMultilineRecords(boolean flag) {
0132: this .enableMultiLineMode = flag;
0133: }
0134:
0135: public void setTargetSchema(String schema) {
0136: this .targetSchema = schema;
0137: }
0138:
0139: public void setReceiver(RowDataReceiver rec) {
0140: this .receiver = rec;
0141: }
0142:
0143: public void setInputFile(File file) {
0144: this .sourceDir = null;
0145: this .inputFile = file;
0146: }
0147:
0148: public void setSourceExtension(String ext) {
0149: if (ext != null && ext.trim().length() == 0)
0150: return;
0151: this .extensionToUse = ext;
0152: }
0153:
0154: public void setSourceDirectory(String dir) {
0155: this .sourceDir = dir;
0156: this .inputFile = null;
0157: }
0158:
0159: public void setTableName(String aName) {
0160: this .tableName = aName;
0161: }
0162:
0163: public void setQuoteEscaping(QuoteEscapeType type) {
0164: this .quoteEscape = type;
0165: }
0166:
0167: public void setCheckDependencies(boolean flag) {
0168: this .checkDependencies = flag;
0169: }
0170:
0171: public QuoteEscapeType getQuoteEscaping() {
0172: return this .quoteEscape;
0173: }
0174:
0175: public boolean hasErrors() {
0176: return this .hasErrors;
0177: }
0178:
0179: public boolean hasWarnings() {
0180: return this .hasWarnings;
0181: }
0182:
0183: public void importAllColumns() {
0184: this .columnMap = new int[this .colCount];
0185: for (int i = 0; i < this .colCount; i++)
0186: this .columnMap[i] = i;
0187: this .importColCount = this .colCount;
0188: }
0189:
0190: public String getSourceFilename() {
0191: if (this .inputFile == null)
0192: return null;
0193: return this .inputFile.getAbsolutePath();
0194: }
0195:
0196: public void setLineFilter(String regex) {
0197: try {
0198: this .lineFilter = Pattern.compile(regex);
0199: } catch (Exception e) {
0200: this .lineFilter = null;
0201: String msg = ResourceMgr.getString("ErrImportBadRegex");
0202: msg = StringUtil.replace(msg, "%regex%", regex);
0203: this .messages.append(msg);
0204: this .messages.appendNewLine();
0205: this .hasWarnings = true;
0206: LogMgr.logError("TextFileParser.addColumnFilter()",
0207: "Error compiling regular expression " + regex, e);
0208: }
0209: }
0210:
0211: public String getLastRecord() {
0212: return this .currentLine;
0213: }
0214:
0215: protected boolean hasColumnFilter() {
0216: if (this .columnFilter == null)
0217: return false;
0218: for (int i = 0; i < this .columnFilter.length; i++) {
0219: if (this .columnFilter[i] != null)
0220: return true;
0221: }
0222: return false;
0223: }
0224:
0225: public void addColumnFilter(String colname, String regex) {
0226: int index = this .getColumnIndex(colname);
0227: if (index == -1)
0228: return;
0229: if (this .columnFilter == null)
0230: this .columnFilter = new Pattern[this .colCount];
0231: try {
0232: Pattern p = Pattern.compile(regex);
0233: this .columnFilter[index] = p;
0234: } catch (Exception e) {
0235: LogMgr.logError("TextFileParser.addColumnFilter()",
0236: "Error compiling regular expression " + regex
0237: + " for column " + colname, e);
0238: String msg = ResourceMgr.getString("ErrImportBadRegex");
0239: msg = StringUtil.replace(msg, "%regex%", regex);
0240: this .messages.append(msg);
0241: this .messages.appendNewLine();
0242: this .hasWarnings = true;
0243: this .columnFilter[index] = null;
0244: }
0245: }
0246:
0247: public void setTreatClobAsFilenames(boolean flag) {
0248: this .clobsAreFilenames = flag;
0249: }
0250:
0251: public void setTreatBlobsAsFilenames(boolean flag) {
0252: this .blobsAreFilenames = flag;
0253: }
0254:
0255: public void setValueConverter(ValueConverter convert) {
0256: this .converter = convert;
0257: }
0258:
0259: public void setImportColumnNames(List<String> columnList)
0260: throws IllegalArgumentException {
0261: List<ColumnIdentifier> cols = new ArrayList<ColumnIdentifier>(
0262: columnList.size());
0263: for (String colname : columnList) {
0264: ColumnIdentifier col = new ColumnIdentifier(colname);
0265: if (!colname.equals(RowDataProducer.SKIP_INDICATOR)
0266: && cols.contains(col)) {
0267: String msg = ResourceMgr.getFormattedString(
0268: "ErrImpDupColumn", colname);
0269: this .messages.append(msg);
0270: throw new IllegalArgumentException("Duplicate column "
0271: + colname);
0272: }
0273: cols.add(col);
0274: }
0275: setImportColumns(cols);
0276: }
0277:
0278: /**
0279: * Define the columns that should be imported.
0280: * If the list is empty or null, then all columns will be imported
0281: * @param columnList the columns to be imported
0282: */
0283: public void setImportColumns(List<ColumnIdentifier> columnList) {
0284: if (columnList == null) {
0285: this .importAllColumns();
0286: return;
0287: }
0288:
0289: int count = columnList.size();
0290: if (count == 0) {
0291: this .importAllColumns();
0292: return;
0293: }
0294:
0295: if (this .columns == null) {
0296: // store the list so that when the columns
0297: // are retrieved or defined later, the real columns to be imported
0298: // can be defined
0299: this .pendingImportColumns = columnList;
0300: } else {
0301: this .pendingImportColumns = null;
0302: checkPendingImportColumns(columnList);
0303: }
0304: }
0305:
0306: private void removeInvalidColumns(List cols)
0307: throws IllegalArgumentException {
0308: Iterator itr = cols.iterator();
0309: while (itr.hasNext()) {
0310: Object o = itr.next();
0311: String columnName = o.toString();
0312: int index = this .getColumnIndex(columnName);
0313: if (index == -1) {
0314: itr.remove();
0315: String msg = ResourceMgr.getString("ErrImpColNotFound");
0316: this .messages.append(StringUtil.replace(msg,
0317: "%colname%", columnName)
0318: + "\n");
0319: throw new IllegalArgumentException("Column ["
0320: + columnName + "] not found");
0321: }
0322: }
0323: }
0324:
0325: /**
0326: * Retain only those columns in the defined source file columns
0327: * that are in the passed list
0328: */
0329: private void checkPendingImportColumns(List<ColumnIdentifier> colIds)
0330: throws IllegalArgumentException {
0331: if (colIds == null || colIds.size() == 0)
0332: return;
0333:
0334: removeInvalidColumns(colIds);
0335:
0336: int count = colIds.size();
0337: if (count == 0) {
0338: this .messages.append(ResourceMgr
0339: .getString("ErrImpInvalidColDef")
0340: + "\n");
0341: this .hasErrors = true;
0342: throw new IllegalArgumentException(
0343: "At least one import column must be defined");
0344: }
0345:
0346: this .columnMap = new int[this .colCount];
0347: for (int i = 0; i < this .colCount; i++)
0348: this .columnMap[i] = -1;
0349: this .importColCount = 0;
0350:
0351: for (int i = 0; i < count; i++) {
0352: // We use toString() so that either ColumnIds or Strings
0353: // can be put into the passed list
0354: ColumnIdentifier col = colIds.get(i);
0355: int index = this .getColumnIndex(col.getColumnName());
0356: if (index > -1) {
0357: this .columnMap[index] = i;
0358: this .importColCount++;
0359: } else {
0360: String msg = ResourceMgr.getString("ErrImpColNotFound");
0361: this .messages.append(StringUtil.replace(msg,
0362: "%colname%", col.getColumnName())
0363: + "\n");
0364: this .hasErrors = true;
0365: throw new IllegalArgumentException("Column ["
0366: + col.getColumnName() + "] not found!");
0367: }
0368: }
0369: }
0370:
0371: private ColumnIdentifier[] getColumnsToImport() {
0372: if (this .columnMap == null)
0373: return this .columns;
0374: if (this .importColCount == this .colCount)
0375: return this .columns;
0376: ColumnIdentifier[] result = new ColumnIdentifier[this .importColCount];
0377: int col = 0;
0378: for (int i = 0; i < this .colCount; i++) {
0379: if (this .columnMap[i] != -1) {
0380: result[col] = this .columns[i];
0381: col++;
0382: }
0383: }
0384: return result;
0385: }
0386:
0387: /**
0388: * Return the index of the specified column
0389: * in the import file.
0390: *
0391: * @param colName the column to search for
0392: * @return the index of the named column or -1 if the column was not found
0393: */
0394: private int getColumnIndex(String colName) {
0395: if (colName == null)
0396: return -1;
0397: if (this .colCount < 1)
0398: return -1;
0399: if (this .columns == null)
0400: return -1;
0401: for (int i = 0; i < this .colCount; i++) {
0402: if (this .columns[i] != null
0403: && colName.equalsIgnoreCase(this .columns[i]
0404: .getColumnName()))
0405: return i;
0406: }
0407: return -1;
0408: }
0409:
0410: /**
0411: * Define the columns in the input file.
0412: * @param columnList the list of columns present in the input file
0413: * @throws SQLException if the columns could not be verified
0414: * in the DB or the target table does not exist
0415: */
0416: public void setColumns(List<ColumnIdentifier> columnList)
0417: throws SQLException {
0418: setColumns(columnList, false);
0419: }
0420:
0421: public void setColumns(List<ColumnIdentifier> columnList,
0422: boolean checkTargetTable) throws SQLException {
0423: if (columnList == null || columnList.size() == 0)
0424: return;
0425:
0426: if (checkTargetTable)
0427: checkTargetTable();
0428:
0429: if (this .connection != null && this .tableName != null) {
0430: this .readColumnDefinition(columnList);
0431: checkPendingImportColumns(this .pendingImportColumns);
0432: } else {
0433: this .colCount = columnList.size();
0434: this .columns = new ColumnIdentifier[colCount];
0435: for (int i = 0; i < columns.length; i++) {
0436: this .columns[i] = columnList.get(i);
0437: }
0438: this .importAllColumns();
0439: }
0440: }
0441:
0442: /**
0443: * Define the width for each column.
0444: * This will reset a delimiter defined using setDelimiter()
0445: */
0446: public void setColumnWidths(
0447: Map<ColumnIdentifier, Integer> widthMapping) {
0448: if (widthMapping == null) {
0449: return;
0450: }
0451: if (this .columns == null) {
0452: throw new IllegalArgumentException("No columns defined!");
0453: }
0454:
0455: this .delimiter = null;
0456: this .columnWidthMap = new int[this .columns.length];
0457: for (int i = 0; i < columns.length; i++) {
0458: Integer width = widthMapping.get(columns[i]);
0459: if (width != null) {
0460: this .columnWidthMap[i] = width.intValue();
0461: }
0462: }
0463: }
0464:
0465: public void setConnection(WbConnection aConn) {
0466: this .connection = aConn;
0467: }
0468:
0469: public String getEncoding() {
0470: return (this .encoding == null ? Settings.getInstance()
0471: .getDefaultDataEncoding() : this .encoding);
0472: }
0473:
0474: public void setEncoding(String enc) {
0475: if (enc == null)
0476: return;
0477: this .encoding = enc;
0478: }
0479:
0480: public MessageBuffer getMessages() {
0481: return this .messages;
0482: }
0483:
0484: public void setAbortOnError(boolean flag) {
0485: this .abortOnError = flag;
0486: }
0487:
0488: public void setDelimiter(String delimit) {
0489: if (delimit == null)
0490: return;
0491: this .delimiter = delimit;
0492: if ("\\t".equals(this .delimiter)) {
0493: this .delimiter = "\t";
0494: }
0495: }
0496:
0497: public void stop() {
0498: LogMgr.logDebug("TextFileParser.stop()", "Stopping import");
0499: this .cancelImport = true;
0500: this .regularStop = true;
0501: }
0502:
0503: public boolean isCancelled() {
0504: return this .cancelImport;
0505: }
0506:
0507: public void cancel() {
0508: LogMgr.logDebug("TextFileParser.cancel()", "Cancelling import");
0509: this .cancelImport = true;
0510: this .regularStop = false;
0511: }
0512:
0513: public void setContainsHeader(boolean aFlag) {
0514: this .withHeader = aFlag;
0515: }
0516:
0517: public void setQuoteChar(String aChar) {
0518: if (aChar != null && aChar.trim().length() > 0) {
0519: this .quoteChar = aChar;
0520: } else {
0521: this .quoteChar = null;
0522: }
0523: }
0524:
0525: public void start() throws Exception {
0526: this .receiver.setTableCount(-1); // clear multi-table flag in receiver
0527: this .receiver.setCurrentTable(-1);
0528:
0529: try {
0530: if (this .sourceDir != null)
0531: processDirectory();
0532: else
0533: processOneFile();
0534: } finally {
0535: if (this .cancelImport && !regularStop) {
0536: this .receiver.importCancelled();
0537: } else {
0538: this .receiver.importFinished();
0539: }
0540: try {
0541: this .fileHandler.done();
0542: } catch (Throwable th) {
0543: }
0544: }
0545: }
0546:
0547: private void processDirectory() throws Exception {
0548: File dir = new File(this .sourceDir);
0549: if (this .extensionToUse == null)
0550: this .extensionToUse = ".txt";
0551:
0552: FileNameSorter sorter = new FileNameSorter(this .connection,
0553: dir, extensionToUse, new DefaultTablenameResolver());
0554: List<WbFile> toProcess = null;
0555: if (this .checkDependencies) {
0556: try {
0557: toProcess = sorter.getSortedList();
0558: } catch (CycleErrorException e) {
0559: cancelImport = true;
0560: LogMgr.logError("TextFileParser.processDirectory()",
0561: "Error when checking dependencies", e);
0562: throw e;
0563: }
0564: } else {
0565: toProcess = sorter.getFiles();
0566: }
0567:
0568: int count = toProcess.size();
0569: this .receiver.setTableCount(count);
0570: int currentFile = 0;
0571:
0572: for (WbFile f : toProcess) {
0573: if (this .cancelImport) {
0574: break;
0575: }
0576:
0577: try {
0578: currentFile++;
0579: this .receiver.setCurrentTable(currentFile);
0580: this .inputFile = f;
0581: this .tableName = f.getFileName();
0582: this .columns = null;
0583: this .colCount = 0;
0584: this .columnMap = null;
0585: this .processOneFile();
0586: } catch (Exception e) {
0587: this .hasErrors = true;
0588: this .receiver.tableImportError();
0589: if (this .abortOnError)
0590: throw e;
0591: }
0592: }
0593: }
0594:
0595: private void setupFileHandler() throws IOException {
0596: this .fileHandler
0597: .setMainFile(this .inputFile, this .getEncoding());
0598: }
0599:
0600: private void processOneFile() throws Exception {
0601: this .cancelImport = false;
0602: this .regularStop = false;
0603:
0604: if (this .inputFile.isAbsolute()) {
0605: this .baseDir = this .inputFile.getParentFile();
0606: }
0607: if (baseDir == null)
0608: this .baseDir = new File(".");
0609:
0610: setupFileHandler();
0611:
0612: if (!this .withHeader && this .sourceDir != null) {
0613: this .setColumns(this .getColumnsFromTargetTable(), true);
0614: }
0615:
0616: BufferedReader in = this .fileHandler.getMainFileReader();
0617:
0618: String lineEnding = StringUtil.LINE_TERMINATOR;
0619: if (enableMultiLineMode) {
0620: try {
0621: lineEnding = FileUtil.getLineEnding(in);
0622: } catch (IOException io) {
0623: LogMgr
0624: .logError(
0625: "TextFileParser.processOneFile()",
0626: "Could not read line ending from file. Multi-line mode disabled!",
0627: io);
0628: this .messages.append(ResourceMgr
0629: .getString("ErrNoMultiLine")
0630: + "\n");
0631: enableMultiLineMode = false;
0632: }
0633: LogMgr.logInfo("TextFileParser.processOneFile()",
0634: "Using line ending: "
0635: + lineEnding.replaceAll("\\r", "\\\\r")
0636: .replaceAll("\\n", "\\\\n"));
0637: // now that we have already used the Reader supplied by the fileHandler,
0638: // we have to close and re-open the ZIP archive in order to make sure we start at the beginning
0639: // as we cannot rely on mark() and reset() to be available for the ZIP archives.
0640: in.close();
0641: setupFileHandler();
0642: in = this .fileHandler.getMainFileReader();
0643: }
0644:
0645: currentLine = null;
0646:
0647: try {
0648: currentLine = in.readLine();
0649: if (this .withHeader) {
0650: if (currentLine == null)
0651: throw new IOException("Could not read header line!");
0652: if (this .columns == null)
0653: this .readColumns(currentLine);
0654: currentLine = in.readLine();
0655: }
0656: } catch (EOFException eof) {
0657: currentLine = null;
0658: } catch (IOException e) {
0659: LogMgr.logWarning("TextFileParser.processOneFile()",
0660: "Error reading input file "
0661: + inputFile.getAbsolutePath(), e);
0662: FileUtil.closeQuitely(in);
0663: throw e;
0664: } catch (SQLException e) {
0665: LogMgr.logError("TextFileParser.processOneFile()",
0666: "Column definition could not be read.", e);
0667: FileUtil.closeQuitely(in);
0668: throw e;
0669: }
0670:
0671: if (this .colCount <= 0) {
0672: throw new Exception(
0673: "Cannot import file without a column definition");
0674: }
0675:
0676: ColumnIdentifier[] cols = this .getColumnsToImport();
0677: try {
0678: this .receiver.setTargetTable(getTargetTable(), cols);
0679: } catch (Exception e) {
0680: LogMgr.logError("TextFileParser.processOneFile()",
0681: "Error setting target table", e);
0682: throw e;
0683: }
0684:
0685: this .rowData = new Object[this .importColCount];
0686: int importRow = 0;
0687:
0688: char quoteCharToUse = (quoteChar == null ? 0 : quoteChar
0689: .charAt(0));
0690: LineParser tok = null;
0691:
0692: if (this .columnWidthMap != null) {
0693: tok = new FixedLengthLineParser(this .columnWidthMap);
0694: } else {
0695: CsvLineParser csv = new CsvLineParser(delimiter.charAt(0),
0696: quoteCharToUse);
0697: csv.setReturnEmptyStrings(true);
0698: csv.setQuoteEscaping(this .quoteEscape);
0699: tok = csv;
0700: }
0701:
0702: tok.setTrimValues(this .trimValues);
0703:
0704: try {
0705: boolean includeLine = true;
0706: boolean hasColumnFilter = this .hasColumnFilter();
0707: boolean hasLineFilter = this .lineFilter != null;
0708:
0709: while (currentLine != null) {
0710: if (this .cancelImport)
0711: break;
0712:
0713: // silently ignore empty lines...
0714: if (StringUtil.isEmptyString(currentLine)) {
0715: try {
0716: currentLine = in.readLine();
0717: } catch (IOException e) {
0718: LogMgr.logError(
0719: "TextFileParser.processOneFile()",
0720: "Error reading source file", e);
0721: currentLine = null;
0722: }
0723: continue;
0724: }
0725:
0726: if (enableMultiLineMode
0727: && StringUtil.hasOpenQuotes(currentLine,
0728: quoteCharToUse)) {
0729: try {
0730: StringBuilder b = new StringBuilder(currentLine
0731: .length() * 2);
0732: b.append(currentLine);
0733: b.append(lineEnding);
0734: String nextLine = in.readLine();
0735:
0736: // if the next line is null, the file is finished
0737: // in that case we must not "continue" in order to
0738: // catch the EOF situation correctly!
0739: if (nextLine != null) {
0740: b.append(nextLine);
0741: currentLine = b.toString();
0742: continue;
0743: }
0744: } catch (IOException e) {
0745: LogMgr
0746: .logError(
0747: "TextFileParser.processOneFile()",
0748: "Could not read next line for multi-line record",
0749: e);
0750: }
0751: }
0752:
0753: this .clearRowData();
0754:
0755: boolean processRow = receiver.shouldProcessNextRow();
0756: if (!processRow)
0757: receiver.nextRowSkipped();
0758:
0759: if (hasLineFilter && processRow) {
0760: Matcher m = this .lineFilter.matcher(currentLine);
0761: processRow = m.matches();
0762: }
0763:
0764: importRow++;
0765:
0766: if (!processRow) {
0767: {
0768: try {
0769: currentLine = in.readLine();
0770: } catch (IOException e) {
0771: LogMgr.logError(
0772: "TextFileParser.processOneFile()",
0773: "Error reading source file", e);
0774: currentLine = null;
0775: }
0776: continue;
0777: }
0778: }
0779:
0780: tok.setLine(currentLine);
0781: includeLine = true;
0782: int targetIndex = -1;
0783:
0784: for (int i = 0; i < this .colCount; i++) {
0785: String value = null;
0786: try {
0787: if (tok.hasNext()) {
0788: value = tok.getNext();
0789: if (this .columns[i] == null)
0790: continue;
0791: targetIndex = this .columnMap[i];
0792: if (targetIndex == -1)
0793: continue;
0794:
0795: int colType = this .columns[i].getDataType();
0796:
0797: if (hasColumnFilter
0798: && this .columnFilter[i] != null) {
0799: if (value == null) {
0800: includeLine = false;
0801: break;
0802: }
0803: Matcher m = this .columnFilter[i]
0804: .matcher(value);
0805: if (!m.matches()) {
0806: includeLine = false;
0807: break;
0808: }
0809: }
0810:
0811: if (this .valueModifier != null) {
0812: value = valueModifier.modifyValue(
0813: columns[i], value);
0814: }
0815:
0816: if (SqlUtil.isCharacterType(colType)) {
0817: if (clobsAreFilenames && value != null
0818: && SqlUtil.isClobType(colType)) {
0819: File cfile = new File(value);
0820: if (!cfile.isAbsolute()) {
0821: cfile = new File(this .baseDir,
0822: value);
0823: }
0824: rowData[targetIndex] = cfile;
0825: } else {
0826: if (this .decodeUnicode) {
0827: value = StringUtil
0828: .decodeUnicode(value);
0829: }
0830: if (this .emptyStringIsNull
0831: && StringUtil
0832: .isEmptyString(value)) {
0833: value = null;
0834: }
0835: rowData[targetIndex] = value;
0836: }
0837: } else if (blobsAreFilenames
0838: && value != null
0839: && SqlUtil.isBlobType(colType)) {
0840: File bfile = new File(value.trim());
0841: if (!bfile.isAbsolute()) {
0842: bfile = new File(this .baseDir,
0843: value.trim());
0844: }
0845: rowData[targetIndex] = bfile;
0846: } else {
0847: rowData[targetIndex] = converter
0848: .convertValue(value, colType);
0849: }
0850: }
0851:
0852: } catch (Exception e) {
0853: if (targetIndex != -1)
0854: rowData[targetIndex] = null;
0855: String msg = ResourceMgr
0856: .getString("ErrTextfileImport");
0857: msg = msg.replaceAll("%row%", Integer
0858: .toString(importRow));
0859: msg = StringUtil.replace(msg, "%col%",
0860: (this .columns[i] == null ? "n/a"
0861: : this .columns[i]
0862: .getColumnName()));
0863: msg = msg.replaceAll("%value%",
0864: (value == null ? "(NULL)" : value));
0865: msg = StringUtil.replace(msg, "%msg%", e
0866: .getClass().getName()
0867: + ": "
0868: + ExceptionUtil.getDisplay(e, false));
0869: this .messages.append(msg);
0870: this .messages.appendNewLine();
0871: if (this .abortOnError) {
0872: this .hasErrors = true;
0873: this .cancelImport = true;
0874: throw e;
0875: }
0876: this .hasWarnings = true;
0877: LogMgr.logWarning(
0878: "TextFileParser.processOneFile()", msg,
0879: e);
0880: if (this .errorHandler != null) {
0881: int choice = errorHandler.getActionOnError(
0882: importRow, this .columns[i]
0883: .getColumnName(),
0884: (value == null ? "(NULL)" : value),
0885: ExceptionUtil.getDisplay(e, false));
0886: if (choice == JobErrorHandler.JOB_ABORT)
0887: throw e;
0888: if (choice == JobErrorHandler.JOB_IGNORE_ALL) {
0889: this .abortOnError = false;
0890: }
0891: }
0892: this .receiver.recordRejected(currentLine);
0893: includeLine = false;
0894: }
0895: }
0896:
0897: if (this .cancelImport)
0898: break;
0899:
0900: try {
0901: if (includeLine)
0902: this .receiver.processRow(rowData);
0903: } catch (Exception e) {
0904: this .hasErrors = true;
0905: this .cancelImport = true;
0906: // processRow() will only throw an exception if abortOnError is true
0907: // so we can always re-throw the exception here.
0908: LogMgr.logError("TextFileParser.processOneFile()",
0909: "Error sending line " + importRow, e);
0910: throw e;
0911: }
0912:
0913: try {
0914: currentLine = in.readLine();
0915: } catch (IOException e) {
0916: LogMgr.logError("TextFileParser.processOneFile()",
0917: "Error reading source file", e);
0918: currentLine = null;
0919: }
0920: }
0921: } finally {
0922: FileUtil.closeQuitely(in);
0923: // do not close the ImportFileHandler here, because the DataImporter
0924: // might still need the references to the ZIP archives if running
0925: // in batch mode. So the fileHandler is closed after sending the finishImport()
0926: // to the DataImporter
0927: }
0928:
0929: }
0930:
0931: private void clearRowData() {
0932: for (int i = 0; i < this .importColCount; i++) {
0933: this .rowData[i] = null;
0934: }
0935: }
0936:
0937: /**
0938: * Retrieve the column definitions from the header line
0939: */
0940: private void readColumns(String headerLine) throws Exception {
0941: List<ColumnIdentifier> cols = new ArrayList<ColumnIdentifier>();
0942: WbStringTokenizer tok = new WbStringTokenizer(delimiter
0943: .charAt(0), this .quoteChar, false);
0944: tok.setDelimiterNeedsWhitspace(false);
0945: tok.setSourceString(headerLine);
0946: while (tok.hasMoreTokens()) {
0947: String column = tok.nextToken();
0948: cols.add(new ColumnIdentifier(column));
0949: }
0950: this .readColumnDefinition(cols);
0951: if (this .pendingImportColumns != null) {
0952: checkPendingImportColumns(this .pendingImportColumns);
0953: this .pendingImportColumns = null;
0954: }
0955: }
0956:
0957: /**
0958: * Return the column names found in the input file.
0959: * The identifiers will only have a name but
0960: * no data type assigned as this information is not available in a text file.
0961: * If the input file does not contain a header row, the columns
0962: * will be named Column1, Column2, ...
0963: *
0964: * @return the columns defined in the input file
0965: */
0966: public List<ColumnIdentifier> getColumnsFromFile() {
0967: BufferedReader in = null;
0968: List<ColumnIdentifier> cols = new ArrayList<ColumnIdentifier>();
0969: try {
0970: // Make sure the file handler is initialized as this can be called from
0971: // the outside as well.
0972: setupFileHandler();
0973: in = this .fileHandler.getMainFileReader();
0974: String firstLine = in.readLine();
0975: WbStringTokenizer tok = new WbStringTokenizer(delimiter
0976: .charAt(0), this .quoteChar, false);
0977: tok.setSourceString(firstLine);
0978: int i = 1;
0979: while (tok.hasMoreTokens()) {
0980: String column = tok.nextToken();
0981: if (column == null)
0982: continue;
0983: String name = null;
0984: if (this .withHeader) {
0985: name = column.toUpperCase();
0986: } else {
0987: name = "Column" + i;
0988: }
0989: ColumnIdentifier c = new ColumnIdentifier(name);
0990: cols.add(c);
0991: i++;
0992: }
0993: } catch (Exception e) {
0994: this .hasErrors = true;
0995: LogMgr.logError("TextFileParser.getColumnsFromFile()",
0996: "Error when reading columns", e);
0997: } finally {
0998: this .fileHandler.done();
0999: }
1000: return cols;
1001: }
1002:
1003: protected void checkTargetTable() throws SQLException {
1004: TableIdentifier tbl = getTargetTable();
1005:
1006: if (!this .connection.getMetadata().tableExists(tbl)) {
1007: String msg = ResourceMgr.getFormattedString(
1008: "ErrImportTableNotFound", tbl.getTableExpression());
1009: this .messages.append(msg);
1010: this .messages.appendNewLine();
1011: this .columns = null;
1012: this .hasErrors = true;
1013: throw new SQLException("Table " + tbl.getTableExpression()
1014: + " not found!");
1015: }
1016: }
1017:
1018: public void setupFileColumns() throws SQLException, IOException {
1019: List<ColumnIdentifier> cols = null;
1020:
1021: checkTargetTable();
1022:
1023: if (this .withHeader) {
1024: cols = this .getColumnsFromFile();
1025: } else {
1026: cols = this .getColumnsFromTargetTable();
1027: }
1028: this .setColumns(cols, false);
1029: }
1030:
1031: private List<ColumnIdentifier> getColumnsFromTargetTable()
1032: throws SQLException {
1033: return this .connection.getMetadata().getTableColumns(
1034: getTargetTable());
1035: }
1036:
1037: private TableIdentifier getTargetTable() {
1038: if (this .tableName == null)
1039: return null;
1040: TableIdentifier targetTable = new TableIdentifier(
1041: this .tableName);
1042: targetTable.setPreserveQuotes(true);
1043: if (this .targetSchema != null) {
1044: targetTable.setSchema(this .targetSchema);
1045: }
1046: if (this .connection != null) {
1047: targetTable.adjustCase(this .connection);
1048: if (targetTable.getSchema() == null) {
1049: targetTable.setSchema(this .connection
1050: .getCurrentSchema());
1051: }
1052: }
1053: return targetTable;
1054: }
1055:
1056: /**
1057: * Read the column definitions from the database.
1058: * @param cols a List of column names (String)
1059: */
1060: private void readColumnDefinition(List<ColumnIdentifier> cols)
1061: throws SQLException {
1062: try {
1063: this .colCount = cols.size();
1064: this .columns = new ColumnIdentifier[colCount];
1065: ArrayList<ColumnIdentifier> realCols = new ArrayList<ColumnIdentifier>();
1066: boolean partialImport = false;
1067: DbMetadata meta = this .connection.getMetadata();
1068: TableIdentifier targetTable = getTargetTable();
1069: List tableCols = meta.getTableColumns(targetTable);
1070: int numTableCols = tableCols.size();
1071:
1072: // Should not happen, but just to make sure ;)
1073: if (numTableCols == 0) {
1074: throw new SQLException("Table "
1075: + targetTable.getTableExpression()
1076: + " not found!");
1077: }
1078:
1079: for (int i = 0; i < cols.size(); i++) {
1080: ColumnIdentifier col = cols.get(i);
1081: String colname = col.getColumnName();
1082: if (colname.toLowerCase().startsWith(
1083: RowDataProducer.SKIP_INDICATOR)) {
1084: partialImport = true;
1085: this .columns[i] = null;
1086: } else {
1087: int index = tableCols.indexOf(col);
1088: if (index > -1) {
1089: this .columns[i] = (ColumnIdentifier) tableCols
1090: .get(index);
1091: realCols.add(this .columns[i]);
1092: } else {
1093: if (this .pendingImportColumns == null
1094: || this .pendingImportColumns
1095: .contains(colname)) {
1096: if (this .abortOnError) {
1097: String msg = ResourceMgr
1098: .getString("ErrImportColumnNotFound");
1099: msg = StringUtil.replace(msg,
1100: "%column%", colname);
1101: msg = StringUtil.replace(msg,
1102: "%table%", this .tableName);
1103: this .messages.append(msg);
1104: this .messages.appendNewLine();
1105: this .hasErrors = true;
1106: throw new SQLException(msg);
1107: } else {
1108: String msg = ResourceMgr
1109: .getString("ErrImportColumnIgnored");
1110: msg = StringUtil.replace(msg,
1111: "%column%", colname);
1112: msg = StringUtil.replace(msg,
1113: "%table%", this .tableName);
1114: LogMgr.logWarning(
1115: "TextFileParser.readColumns()",
1116: msg);
1117: this .hasWarnings = true;
1118: this .messages.append(msg);
1119: this .messages.appendNewLine();
1120: }
1121: }
1122: partialImport = true;
1123: }
1124: }
1125: }
1126:
1127: if (realCols.size() == 0) {
1128: String msg = ResourceMgr
1129: .getString("ErrImportNoColumns");
1130: msg = StringUtil
1131: .replace(msg, "%table%", this .tableName);
1132: this .hasErrors = true;
1133: this .messages.append(msg);
1134: this .messages.appendNewLine();
1135: throw new SQLException(
1136: "No column matched in import file");
1137: }
1138:
1139: // reset mapping
1140: this .importAllColumns();
1141:
1142: if (partialImport) {
1143: // only if we found at least one column to ignore, we
1144: // need to set the real column list
1145: this .setImportColumns(realCols);
1146: }
1147:
1148: } catch (SQLException e) {
1149: this .hasErrors = true;
1150: throw e;
1151: } catch (Exception e) {
1152: LogMgr.logError("TextFileParser.readColumnDefinition()",
1153: "Error when reading column definition", e);
1154: this .colCount = -1;
1155: this .columns = null;
1156: }
1157: }
1158:
1159: public int getColumnCount() {
1160: return this .colCount;
1161: }
1162:
1163: /**
1164: * Returns the column list as a comma separated string
1165: * that can be used for the WbImport command
1166: */
1167: public String getColumns() {
1168: StringBuilder result = new StringBuilder(this .colCount * 10);
1169:
1170: if (this .columnMap == null
1171: || this .importColCount == this .colCount) {
1172: for (int i = 0; i < this .colCount; i++) {
1173: if (i > 0)
1174: result.append(',');
1175: result.append(this .columns[i].getColumnName());
1176: }
1177: } else {
1178: for (int i = 0; i < this .colCount; i++) {
1179: if (i > 0)
1180: result.append(',');
1181: if (this .columnMap[i] != -1) {
1182: result.append(this .columns[i].getColumnName());
1183: } else {
1184: result.append(RowDataProducer.SKIP_INDICATOR);
1185: }
1186: }
1187: }
1188: return result.toString();
1189: }
1190:
1191: /**
1192: * Getter for property emptyStringIsNull.
1193: * @return Value of property emptyStringIsNull.
1194: */
1195: public boolean isEmptyStringIsNull() {
1196: return emptyStringIsNull;
1197: }
1198:
1199: /**
1200: * Setter for property emptyStringIsNull.
1201: * @param flag New value of property emptyStringIsNull.
1202: */
1203: public void setEmptyStringIsNull(boolean flag) {
1204: this .emptyStringIsNull = flag;
1205: }
1206:
1207: public void setDecodeUnicode(boolean flag) {
1208: this .decodeUnicode = flag;
1209: }
1210:
1211: public boolean getDecodeUnicode() {
1212: return this .decodeUnicode;
1213: }
1214:
1215: public boolean isTrimValues() {
1216: return trimValues;
1217: }
1218:
1219: public void setTrimValues(boolean trimValues) {
1220: this .trimValues = trimValues;
1221: }
1222:
1223: public void setErrorHandler(JobErrorHandler handler) {
1224: this.errorHandler = handler;
1225: }
1226:
1227: }
|