001: /*
002: * $Id: TaggedEBCDICTable.java,v 1.14 2005/12/20 18:32:27 ahimanikya Exp $
003: * =======================================================================
004: * Copyright (c) 2002-2004 Axion Development Team. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above
011: * copyright notice, this list of conditions and the following
012: * disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * 3. The names "Tigris", "Axion", nor the names of its contributors may
020: * not be used to endorse or promote products derived from this
021: * software without specific prior written permission.
022: *
023: * 4. Products derived from this software may not be called "Axion", nor
024: * may "Tigris" or "Axion" appear in their names without specific prior
025: * written permission.
026:
027: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
030: * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031: * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
032: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
033: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
034: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
035: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
036: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
037: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
038: * =======================================================================
039: */
040: package org.axiondb.engine.tables;
041:
042: import java.io.IOException;
043: import java.io.ObjectInputStream;
044: import java.io.ObjectOutputStream;
045: import java.util.Arrays;
046: import java.util.HashSet;
047: import java.util.Properties;
048: import java.util.Set;
049:
050: import org.axiondb.AxionException;
051: import org.axiondb.DataType;
052: import org.axiondb.Database;
053: import org.axiondb.ExternalTable;
054: import org.axiondb.Row;
055: import org.axiondb.engine.rows.SimpleRow;
056: import org.axiondb.io.BufferedDataInputStream;
057: import org.axiondb.io.BufferedDataOutputStream;
058: import org.axiondb.util.AsciiEbcdicEncoder;
059:
060: /**
061: * A disk-resident Fixed Width Flatfile {@link org.axiondb.Table}.
062: * <p>
063: * Example: create external table test1( col1 datatype, col2 datatype, ...)
064: * organization(loadtype='taggedebcdic' RecordLength='213', HeaderBytesOffset='24',
065: * tagLength='4', minTagCount='1', maxTagCount='48', tagByteCount='0',
066: * recordTrailerByteCount='54' FileName='C:/hawaii/test/input_data.txt',
067: * TagByteCount='2', en='cp037')
068: *
069: * @version $Revision: 1.14 $ $Date: 2005/12/20 18:32:27 $
070: * @author Sudhi Seshachala
071: * @author Jonathan Giron
072: * @author Ahimanikya Satapathy
073: */
074:
075: public class TaggedEBCDICTable extends BaseFlatfileTable {
076:
077: public static final String PROP_HEADERBYTESOFFSET = "HEADERBYTESOFFSET";
078: public static final String PROP_RECORDLENGTH = "RECORDLENGTH";
079: public static final String PROP_TAGLENGTH = "TAGLENGTH";
080: public static final String PROP_MINTAGCOUNT = "MINTAGCOUNT";
081: public static final String PROP_MAXTAGCOUNT = "MAXTAGCOUNT";
082: public static final String PROP_RECORDTRAILERBYTECOUNT = "RECORDTRAILERBYTECOUNT";
083: public static final String PROP_TAGBYTECOUNT = "TAGBYTECOUNT";
084: public static final String PROP_ENCODING = "EN";
085: private static final char EBCDIC_FILLER = '@';
086: private static final char CR = '\r';
087: private static final char NL = '\n';
088:
089: private static final Set PROPERTY_KEYS = new HashSet(8);
090: private static final Set REQUIRED_KEYS = new HashSet(8);
091:
092: static {
093: PROPERTY_KEYS.add(PROP_HEADERBYTESOFFSET);
094: PROPERTY_KEYS.add(PROP_RECORDLENGTH);
095: PROPERTY_KEYS.add(PROP_TAGLENGTH);
096: PROPERTY_KEYS.add(PROP_MINTAGCOUNT);
097: PROPERTY_KEYS.add(PROP_MAXTAGCOUNT);
098: PROPERTY_KEYS.add(PROP_RECORDTRAILERBYTECOUNT);
099: PROPERTY_KEYS.add(PROP_TAGBYTECOUNT);
100: PROPERTY_KEYS.add(PROP_ENCODING);
101: }
102:
103: public TaggedEBCDICTable(String name, Database db)
104: throws AxionException {
105: super (name, db, new TaggedEBCDICTableLoader());
106: setType(ExternalTable.TAGGED_EBCDIC_TABLE_TYPE);
107: }
108:
109: protected Row getRowByOffset(int idToAssign, long ptr)
110: throws AxionException {
111: try {
112: BufferedDataInputStream data = getInputStream();
113: int colCount = getColumnCount();
114: Row row = new SimpleRow(idToAssign, colCount);
115: _tagID = _minTagCount;
116:
117: synchronized (data) {
118: data.seek(ptr);
119: while (_tagID < _maxTagCount) {
120:
121: byte byteData[] = null;
122: byteData = readTaggedRowColumn(data);
123: if (byteData == null) {
124: break;
125: }
126: if (byteData != null) {
127: String columnValue = new String(byteData);
128: if (columnValue.length() == 0) {
129: columnValue = null;
130: }
131:
132: trySettingColumn(idToAssign, row, _tagID - 1,
133: columnValue);
134:
135: // found invalid data while trying to set column value
136: // so ignore this line
137: if (row == null) {
138: break;
139: }
140: }
141: }
142: _headerBytesOffset += _recordTrailerByteCount;
143: }
144: return row;
145: } catch (IOException e) {
146: e.printStackTrace();
147: throw new AxionException(e);
148: }
149: }
150:
151: private byte[] readTaggedRowColumn(BufferedDataInputStream rFile)
152: throws IOException {
153: // decode tagID and column length
154: // The tagid and column length are Binary encoded
155: // use left shift 8 and use the next byte to mask
156: // FIXME: Read Left/Right shift from metadata
157: _tagBuf = new byte[_tagLength];
158:
159: int bytesRead = rFile.read(_tagBuf);
160: byte[] byteData = null;
161: if (bytesRead == -1) {
162: return null; // EOF
163: }
164: _tagID = (_tagBuf[0] << 8) | _tagBuf[1];
165: int colLength = (_tagBuf[_tagByteCount] << 8)
166: | _tagBuf[_tagByteCount + 1];
167: _headerBytesOffset += _tagLength;
168: if ((_tagID <= _maxTagCount) && (_tagID >= _minTagCount)
169: && colLength >= 0) {
170: byteData = new byte[colLength];
171: bytesRead = rFile.read(byteData);
172: if (bytesRead == -1) {
173: return null;
174: }
175:
176: //Just for testing, convert to ascii
177: if (_encoding != null && byteData != null) {
178: AsciiEbcdicEncoder.convertEbcdicToAscii(byteData);
179: }
180:
181: _headerBytesOffset += colLength;
182:
183: }
184: return byteData;
185: }
186:
187: protected long ignoreRowsToSkip() throws AxionException {
188: long offset = super .ignoreRowsToSkip();
189: if (offset > 0) {
190: return offset;
191: } else if (_headerBytesOffset > 0) {
192: return _headerBytesOffset;
193: }
194: return 0;
195: }
196:
197: protected void initializeTable() throws AxionException {
198: super .initializeTable();
199: _tagBuf = new byte[_tagLength];
200:
201: }
202:
203: public long getCurrentParsePosition() {
204: return _headerBytesOffset;
205: }
206:
207: private int getColumnSize(int index) {
208: return getColumn(index).getDataType().getColumnDisplaySize();
209: }
210:
211: public boolean loadExternalTable(Properties props)
212: throws AxionException {
213: context = new TaggedEBCDICTableContext();
214: return super .loadExternalTable(props);
215: }
216:
217: public Properties getTableProperties() {
218: return context.getTableProperties();
219: }
220:
221: protected void parseTableProperties(ObjectInputStream in)
222: throws AxionException {
223: try {
224: _tagLength = in.readInt();
225: _minTagCount = in.readInt();
226: _maxTagCount = in.readInt();
227: _recordTrailerByteCount = in.readInt();
228: _recordLength = in.readInt();
229: _tagByteCount = in.readInt();
230: _encoding = in.readUTF();
231: _headerBytesOffset = in.readInt();
232: _fileName = in.readUTF();
233:
234: context = new TaggedEBCDICTableContext();
235: context.updateProperties();
236: context.readOrSetDefaultProperties(context
237: .getTableProperties());
238: createOrLoadDataFile();
239: } catch (IOException e) {
240: throw new AxionException(
241: "Unable to parse meta file for table " + getName(),
242: e);
243: }
244: }
245:
246: protected synchronized void renameTableFiles(String oldName,
247: String name) {
248: super .renameTableFiles(oldName, name);
249: _recordLength = 0;
250: for (int i = 0, I = getColumnCount(); i < I; i++) {
251: _recordLength += this .getColumnSize(i);
252: }
253: context.setProperty(PROP_RECORDLENGTH, Integer
254: .toString(_recordLength));
255: }
256:
257: protected void writeTableProperties(ObjectOutputStream out)
258: throws AxionException {
259: try {
260: out.writeInt(_tagLength);
261: out.writeInt(_minTagCount);
262: out.writeInt(_maxTagCount);
263: out.writeInt(_recordTrailerByteCount);
264: out.writeInt(_recordLength);
265: out.writeInt(_tagByteCount);
266: if (_fileName != null && _encoding != null) {
267: out.writeUTF(_encoding);
268: out.writeInt(_headerBytesOffset);
269: out.writeUTF(_fileName);
270: }
271: } catch (IOException e) {
272: throw new AxionException(
273: "Unable to write meta file for table " + getName(),
274: e);
275: }
276: }
277:
278: protected void writeHeader(BufferedDataOutputStream dataFile)
279: throws AxionException {
280: }
281:
282: protected void writeRow(BufferedDataOutputStream out, Row row)
283: throws AxionException {
284: byte trailerBytes[] = new byte[_recordTrailerByteCount];
285: java.util.Arrays.fill(trailerBytes, (byte) EBCDIC_FILLER);
286: try {
287:
288: for (int i = 0, I = getColumnCount(); i < I; i++) {
289: Object data = row.get(i);
290: DataType dataType = getColumn(i).getDataType();
291: byte dataBytes[] = null;
292: if (data instanceof byte[]) {
293: if (_encoding != null && data != null) {
294: AsciiEbcdicEncoder
295: .convertEbcdicToAscii((byte[]) data);
296: }
297: } else {
298: if (_encoding != null && data != null) {
299: dataBytes = dataType.toString(data).getBytes();
300: AsciiEbcdicEncoder
301: .convertEbcdicToAscii(dataBytes);
302: } else {
303: dataBytes = dataType.toString(data).getBytes();
304: }
305: }
306: row.set(i, data);
307: out.write(writeColumn(i, new String(dataBytes)));
308: }
309: out.write(trailerBytes);
310: } catch (Exception e) {
311: throw new AxionException(e.getMessage());
312: }
313: }
314:
315: private byte[] writeColumn(int colIndex, String value) {
316: if (value == null) {
317: value = " ";
318: }
319:
320: byte byteData[] = new byte[getColumnSize(colIndex)];
321: Arrays.fill(byteData, (byte) EBCDIC_FILLER);
322: byte colValue[] = value.getBytes();
323:
324: // truncate if required
325: int len = colValue.length <= byteData.length ? colValue.length
326: : byteData.length;
327: System.arraycopy(colValue, 0, byteData, 0, len);
328: return byteData;
329: }
330:
331: protected boolean isEndOfRecord(int recLength, int nextChar,
332: BufferedDataInputStream data) {
333: return recLength >= _recordLength || isNewLine(nextChar)
334: || isCarriageReturn(nextChar) || isEOF(nextChar);
335: }
336:
337: protected boolean isCarriageReturn(int nextChar) {
338: return nextChar == CR;
339: }
340:
341: protected boolean isNewLine(int nextChar) {
342: return nextChar == NL;
343: }
344:
345: private class TaggedEBCDICTableContext extends
346: BaseFlatfileTableOrganizationContext {
347:
348: public Set getPropertyKeys() {
349: Set baseKeys = super .getPropertyKeys();
350: Set keys = new HashSet(baseKeys.size()
351: + PROPERTY_KEYS.size());
352: keys.addAll(baseKeys);
353: keys.addAll(PROPERTY_KEYS);
354:
355: return keys;
356: }
357:
358: public Set getRequiredPropertyKeys() {
359: Set baseRequiredKeys = super .getRequiredPropertyKeys();
360: Set keys = new HashSet(baseRequiredKeys.size()
361: + REQUIRED_KEYS.size());
362: keys.addAll(baseRequiredKeys);
363: keys.addAll(REQUIRED_KEYS);
364:
365: return keys;
366: }
367:
368: public void readOrSetDefaultProperties(Properties props)
369: throws AxionException {
370: super .readOrSetDefaultProperties(props);
371:
372: // Set record length, if not supplied by user then compute it
373: try {
374: String recLen = props.getProperty(PROP_RECORDLENGTH);
375: if (isNullString(recLen)) {
376: _recordLength = 0;
377: } else {
378: _recordLength = Integer.parseInt(recLen);
379: }
380: } catch (NumberFormatException e) {
381: _recordLength = 0;
382: }
383:
384: // compute record length; if not set yet
385: if (_recordLength == 0) {
386: for (int i = 0, I = getColumnCount(); i < I; i++) {
387: _recordLength += getColumnSize(i);
388: }
389: }
390:
391: String headerBytesOffset = props
392: .getProperty(PROP_HEADERBYTESOFFSET);
393: if (isNullString(headerBytesOffset)) {
394: _headerBytesOffset = 0;
395: } else {
396: _headerBytesOffset = Integer
397: .parseInt(headerBytesOffset);
398: }
399:
400: String tagLen = props.getProperty(PROP_TAGLENGTH);
401: if (isNullString(tagLen)) {
402: _tagLength = 0;
403: } else {
404: _tagLength = Integer.parseInt(tagLen);
405: }
406:
407: String recordTrailerByteCount = props
408: .getProperty(PROP_RECORDTRAILERBYTECOUNT);
409: if (isNullString(recordTrailerByteCount)) {
410: _recordTrailerByteCount = 0;
411: } else {
412: _recordTrailerByteCount = Integer
413: .parseInt(recordTrailerByteCount);
414: }
415:
416: String minTagCount = props.getProperty(PROP_MINTAGCOUNT);
417: if (isNullString(minTagCount)) {
418: _minTagCount = 0;
419: } else {
420: _minTagCount = Integer.parseInt(minTagCount);
421: }
422:
423: String maxTagCount = props.getProperty(PROP_MAXTAGCOUNT);
424: if (isNullString(minTagCount)) {
425: _maxTagCount = 0;
426: } else {
427: _maxTagCount = Integer.parseInt(maxTagCount);
428: }
429: String tagByteCount = props.getProperty(PROP_TAGBYTECOUNT);
430: if (isNullString(tagByteCount)) {
431: _tagByteCount = 0;
432: } else {
433: _tagByteCount = Integer.parseInt(tagByteCount);
434: }
435: _encoding = props.getProperty(PROP_ENCODING);
436:
437: }
438:
439: public void updateProperties() {
440: super .updateProperties();
441:
442: _props.setProperty(PROP_LOADTYPE,
443: ExternalTableFactory.TYPE_TAGGEDEBCDIC);
444: _props.setProperty(PROP_HEADERBYTESOFFSET, Integer
445: .toString(_headerBytesOffset));
446: _props.setProperty(PROP_RECORDLENGTH, Integer
447: .toString(_recordLength));
448: _props.setProperty(PROP_TAGLENGTH, Integer
449: .toString(_tagLength));
450: _props.setProperty(PROP_MINTAGCOUNT, Integer
451: .toString(_minTagCount));
452: _props.setProperty(PROP_MAXTAGCOUNT, Integer
453: .toString(_maxTagCount));
454: _props.setProperty(PROP_RECORDTRAILERBYTECOUNT, Integer
455: .toString(_recordTrailerByteCount));
456: _props.setProperty(PROP_TAGBYTECOUNT, Integer
457: .toString(_tagByteCount));
458: _props.setProperty(PROP_ENCODING, _encoding);
459: }
460: }
461:
462: private int _tagID;
463: private int _tagLength;
464: private int _maxTagCount;
465: private int _minTagCount;
466: private int _tagByteCount;
467: private int _recordTrailerByteCount;
468: private int _recordLength;
469: private String _encoding;
470:
471: protected long _pos;
472: protected int _index;
473: protected int _count;
474: private byte[] _tagBuf = null;
475:
476: private int _headerBytesOffset;
477:
478: }
|