001: /*
002: * GeoTools - OpenSource mapping toolkit
003: * http://geotools.org
004: * (C) 2002-2006, Geotools Project Managment Committee (PMC)
005: * (C) 2002, Centre for Computational Geography
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation; either
010: * version 2.1 of the License, or (at your option) any later version.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: *
017: * This file is based on an origional contained in the GISToolkit project:
018: * http://gistoolkit.sourceforge.net/
019: */
020: package org.geotools.data.shapefile.dbf;
021:
022: import java.io.FileInputStream;
023: import java.io.IOException;
024: import java.nio.ByteBuffer;
025: import java.nio.ByteOrder;
026: import java.nio.CharBuffer;
027: import java.nio.MappedByteBuffer;
028: import java.nio.channels.FileChannel;
029: import java.nio.channels.ReadableByteChannel;
030: import java.nio.charset.Charset;
031: import java.nio.charset.CharsetDecoder;
032: import java.util.Calendar;
033:
034: import org.geotools.data.shapefile.StreamLogging;
035: import org.geotools.resources.NIOUtilities;
036:
037: /**
038: * A DbaseFileReader is used to read a dbase III format file. <br>
039: * The general use of this class is: <CODE><PRE>
040: *
041: * FileChannel in = new FileInputStream("thefile.dbf").getChannel();
042: * DbaseFileReader r = new DbaseFileReader( in ) Object[] fields = new
043: * Object[r.getHeader().getNumFields()]; while (r.hasNext()) {
044: * r.readEntry(fields); // do stuff } r.close();
045: *
046: * </PRE></CODE> For consumers who wish to be a bit more selective with their reading
047: * of rows, the Row object has been added. The semantics are the same as using
048: * the readEntry method, but remember that the Row object is always the same.
049: * The values are parsed as they are read, so it pays to copy them out (as each
050: * call to Row.read() will result in an expensive String parse). <br>
051: * <b>EACH CALL TO readEntry OR readRow ADVANCES THE FILE!</b><br>
052: * An example of using the Row method of reading: <CODE><PRE>
053: *
054: * FileChannel in = new FileInputStream("thefile.dbf").getChannel();
055: * DbaseFileReader r = new DbaseFileReader( in ) int fields =
056: * r.getHeader().getNumFields(); while (r.hasNext()) { DbaseFileReader.Row row =
057: * r.readRow(); for (int i = 0; i < fields; i++) { // do stuff Foo.bar(
058: * row.read(i) ); } } r.close();
059: *
060: * </PRE></CODE>
061: *
062: * @author Ian Schneider
063: * @source $URL: http://svn.geotools.org/geotools/tags/2.4.1/modules/plugin/shapefile/src/main/java/org/geotools/data/shapefile/dbf/DbaseFileReader.java $
064: */
065: public class DbaseFileReader {
066:
067: public final class Row {
068: public Object read(int column) throws IOException {
069: int offset = getOffset(column);
070: return readObject(offset, column);
071: }
072:
073: public String toString() {
074: StringBuffer ret = new StringBuffer("DBF Row - ");
075: for (int i = 0; i < header.getNumFields(); i++) {
076: ret.append(header.getFieldName(i)).append(": \"");
077: try {
078: ret.append(this .read(i));
079: } catch (IOException ioe) {
080: ret.append(ioe.getMessage());
081: }
082: ret.append("\" ");
083: }
084: return ret.toString();
085: }
086: }
087:
088: DbaseFileHeader header;
089:
090: ByteBuffer buffer;
091:
092: ReadableByteChannel channel;
093:
094: CharBuffer charBuffer;
095:
096: Charset charset;
097:
098: CharsetDecoder decoder;
099:
100: char[] fieldTypes;
101:
102: int[] fieldLengths;
103:
104: int cnt = 1;
105:
106: Row row;
107:
108: protected boolean useMemoryMappedBuffer;
109:
110: protected final boolean randomAccessEnabled;
111:
112: protected int currentOffset = 0;
113: private StreamLogging streamLogger = new StreamLogging(
114: "Dbase File Reader");
115:
116: private Charset stringCharset;
117:
118: /**
119: * Creates a new instance of DBaseFileReader
120: *
121: * @param channel
122: * The readable channel to use.
123: * @throws IOException
124: * If an error occurs while initializing.
125: */
126: public DbaseFileReader(ReadableByteChannel channel,
127: boolean useMemoryMappedBuffer) throws IOException {
128: this (channel, useMemoryMappedBuffer, Charset
129: .forName("ISO-8859-1"));
130: }
131:
132: /**
133: * Creates a new instance of DBaseFileReader
134: *
135: * @param channel
136: * The readable channel to use.
137: * @throws IOException
138: * If an error occurs while initializing.
139: */
140: public DbaseFileReader(ReadableByteChannel channel,
141: boolean useMemoryMappedBuffer, Charset charset)
142: throws IOException {
143: this .channel = channel;
144: this .stringCharset = charset;
145: this .charset = Charset.forName("ISO-8859-1"); // charset;
146:
147: this .useMemoryMappedBuffer = useMemoryMappedBuffer;
148: this .randomAccessEnabled = (channel instanceof FileChannel);
149: streamLogger.open();
150: header = new DbaseFileHeader();
151: header.readHeader(channel);
152:
153: init();
154: }
155:
156: protected int fill(ByteBuffer buffer, ReadableByteChannel channel)
157: throws IOException {
158: int r = buffer.remaining();
159: // channel reads return -1 when EOF or other error
160: // because they a non-blocking reads, 0 is a valid return value!!
161: while (buffer.remaining() > 0 && r != -1) {
162: r = channel.read(buffer);
163: }
164: if (r == -1) {
165: buffer.limit(buffer.position());
166: }
167: return r;
168: }
169:
170: private void bufferCheck() throws IOException {
171: // remaining is less than record length
172: // compact the remaining data and read again
173: if (!buffer.isReadOnly()
174: && buffer.remaining() < header.getRecordLength()) {
175: // if (!this.useMemoryMappedBuffer) {
176: this .currentOffset += buffer.position();
177: // }
178: buffer.compact();
179: fill(buffer, channel);
180: buffer.position(0);
181: }
182: }
183:
184: private int getOffset(int column) {
185: int offset = 0;
186: for (int i = 0, ii = column; i < ii; i++) {
187: offset += fieldLengths[i];
188: }
189: return offset;
190: }
191:
192: private void init() throws IOException {
193: // create the ByteBuffer
194: // if we have a FileChannel, lets map it
195: if (channel instanceof FileChannel
196: && this .useMemoryMappedBuffer) {
197: FileChannel fc = (FileChannel) channel;
198: buffer = fc
199: .map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
200: buffer.position((int) fc.position());
201: this .currentOffset = 0;
202: } else {
203: // Force useMemoryMappedBuffer to false
204: this .useMemoryMappedBuffer = false;
205: // Some other type of channel
206: // start with a 8K buffer, should be more than adequate
207: int size = 8 * 1024;
208: // if for some reason its not, resize it
209: size = header.getRecordLength() > size ? header
210: .getRecordLength() : size;
211: buffer = ByteBuffer.allocateDirect(size);
212: // fill it and reset
213: fill(buffer, channel);
214: buffer.flip();
215: this .currentOffset = header.getHeaderLength();
216: }
217:
218: // The entire file is in little endian
219: buffer.order(ByteOrder.LITTLE_ENDIAN);
220:
221: // Set up some buffers and lookups for efficiency
222: fieldTypes = new char[header.getNumFields()];
223: fieldLengths = new int[header.getNumFields()];
224: for (int i = 0, ii = header.getNumFields(); i < ii; i++) {
225: fieldTypes[i] = header.getFieldType(i);
226: fieldLengths[i] = header.getFieldLength(i);
227: }
228:
229: charBuffer = CharBuffer.allocate(header.getRecordLength() - 1);
230: decoder = charset.newDecoder();
231:
232: row = new Row();
233: }
234:
235: /**
236: * Get the header from this file. The header is read upon instantiation.
237: *
238: * @return The header associated with this file or null if an error
239: * occurred.
240: */
241: public DbaseFileHeader getHeader() {
242: return header;
243: }
244:
245: /**
246: * Clean up all resources associated with this reader.<B>Highly recomended.</B>
247: *
248: * @throws IOException
249: * If an error occurs.
250: */
251: public void close() throws IOException {
252: if (channel.isOpen()) {
253: channel.close();
254: streamLogger.close();
255: }
256: if (buffer instanceof MappedByteBuffer) {
257: NIOUtilities.clean(buffer);
258: }
259:
260: buffer = null;
261: channel = null;
262: charBuffer = null;
263: decoder = null;
264: header = null;
265: row = null;
266: }
267:
268: /**
269: * Query the reader as to whether there is another record.
270: *
271: * @return True if more records exist, false otherwise.
272: */
273: public boolean hasNext() {
274: return cnt < header.getNumRecords() + 1;
275: }
276:
277: /**
278: * Get the next record (entry). Will return a new array of values.
279: *
280: * @throws IOException
281: * If an error occurs.
282: * @return A new array of values.
283: */
284: public Object[] readEntry() throws IOException {
285: return readEntry(new Object[header.getNumFields()]);
286: }
287:
288: public Row readRow() throws IOException {
289: read();
290: return row;
291: }
292:
293: /**
294: * Skip the next record.
295: *
296: * @throws IOException
297: * If an error occurs.
298: */
299: public void skip() throws IOException {
300: boolean foundRecord = false;
301: while (!foundRecord) {
302:
303: bufferCheck();
304:
305: // read the deleted flag
306: char tempDeleted = (char) buffer.get();
307:
308: // skip the next bytes
309: buffer.position(buffer.position()
310: + header.getRecordLength() - 1); // the
311: // 1 is
312: // for
313: // the
314: // deleted
315: // flag
316: // just
317: // read.
318:
319: // add the row if it is not deleted.
320: if (tempDeleted != '*') {
321: foundRecord = true;
322: }
323: }
324: }
325:
326: /**
327: * Copy the next record into the array starting at offset.
328: *
329: * @param entry
330: * Th array to copy into.
331: * @param offset
332: * The offset to start at
333: * @throws IOException
334: * If an error occurs.
335: * @return The same array passed in.
336: */
337: public Object[] readEntry(Object[] entry, final int offset)
338: throws IOException {
339: if (entry.length - offset < header.getNumFields()) {
340: throw new ArrayIndexOutOfBoundsException();
341: }
342:
343: read();
344:
345: // retrieve the record length
346: final int numFields = header.getNumFields();
347:
348: int fieldOffset = 0;
349: for (int j = 0; j < numFields; j++) {
350: entry[j + offset] = readObject(fieldOffset, j);
351: fieldOffset += fieldLengths[j];
352: }
353:
354: return entry;
355: }
356:
357: /**
358: * Transfer, by bytes, the next record to the writer.
359: */
360: public void transferTo(DbaseFileWriter writer) throws IOException {
361: bufferCheck();
362: buffer.limit(buffer.position() + header.getRecordLength());
363: writer.channel.write(buffer);
364: buffer.limit(buffer.capacity());
365:
366: cnt++;
367: }
368:
369: private void read() throws IOException {
370: boolean foundRecord = false;
371: while (!foundRecord) {
372:
373: bufferCheck();
374:
375: // read the deleted flag
376: char deleted = (char) buffer.get();
377: if (deleted == '*') {
378: continue;
379: }
380:
381: charBuffer.position(0);
382: buffer.limit(buffer.position() + header.getRecordLength()
383: - 1);
384: decoder.decode(buffer, charBuffer, true);
385: buffer.limit(buffer.capacity());
386: charBuffer.flip();
387:
388: foundRecord = true;
389: }
390:
391: cnt++;
392: }
393:
394: /**
395: * Copy the next entry into the array.
396: *
397: * @param entry
398: * The array to copy into.
399: * @throws IOException
400: * If an error occurs.
401: * @return The same array passed in.
402: */
403: public Object[] readEntry(Object[] entry) throws IOException {
404: return readEntry(entry, 0);
405: }
406:
407: private Object readObject(final int fieldOffset, final int fieldNum)
408: throws IOException {
409: final char type = fieldTypes[fieldNum];
410: final int fieldLen = fieldLengths[fieldNum];
411: Object object = null;
412:
413: // System.out.println( charBuffer.subSequence(fieldOffset,fieldOffset +
414: // fieldLen));
415:
416: if (fieldLen > 0) {
417:
418: switch (type) {
419: // (L)logical (T,t,F,f,Y,y,N,n)
420: case 'l':
421: case 'L':
422: switch (charBuffer.charAt(fieldOffset)) {
423:
424: case 't':
425: case 'T':
426: case 'Y':
427: case 'y':
428: object = Boolean.TRUE;
429: break;
430: case 'f':
431: case 'F':
432: case 'N':
433: case 'n':
434: object = Boolean.FALSE;
435: break;
436: default:
437:
438: throw new IOException("Unknown logical value : '"
439: + charBuffer.charAt(fieldOffset) + "'");
440: }
441: break;
442: // (C)character (String)
443: case 'c':
444: case 'C':
445: // oh, this seems like a lot of work to parse strings...but,
446: // For some reason if zero characters ( (int) char == 0 ) are
447: // allowed
448: // in these strings, they do not compare correctly later on down
449: // the
450: // line....
451: int start = fieldOffset;
452: int end = fieldOffset + fieldLen - 1;
453: // trim off whitespace and 'zero' chars
454: while (start < end) {
455: char c = charBuffer.get(start);
456: if (c == 0 || Character.isWhitespace(c)) {
457: start++;
458: } else
459: break;
460: }
461: while (end > start) {
462: char c = charBuffer.get(end);
463: if (c == 0 || Character.isWhitespace(c)) {
464: end--;
465: } else
466: break;
467: }
468: // set up the new indexes for start and end
469: charBuffer.position(start).limit(end + 1);
470: String s = charBuffer.toString();
471: // to support some foreign languages, such as Chinese, we have to convert
472: // from ISO-8859-1 to a user provided charset
473: if (!stringCharset.displayName().equals("ISO-8859-1"))
474: s = new String(s.getBytes("ISO-8859-1"),
475: stringCharset.displayName());
476: // this resets the limit...
477: charBuffer.clear();
478: object = s;
479: break;
480: // (D)date (Date)
481: case 'd':
482: case 'D':
483: try {
484: String tempString = charBuffer.subSequence(
485: fieldOffset, fieldOffset + 4).toString();
486: int tempYear = Integer.parseInt(tempString);
487: tempString = charBuffer.subSequence(
488: fieldOffset + 4, fieldOffset + 6)
489: .toString();
490: int tempMonth = Integer.parseInt(tempString) - 1;
491: tempString = charBuffer.subSequence(
492: fieldOffset + 6, fieldOffset + 8)
493: .toString();
494: int tempDay = Integer.parseInt(tempString);
495: Calendar cal = Calendar.getInstance();
496: cal.clear();
497: cal.set(Calendar.YEAR, tempYear);
498: cal.set(Calendar.MONTH, tempMonth);
499: cal.set(Calendar.DAY_OF_MONTH, tempDay);
500: object = cal.getTime();
501: } catch (NumberFormatException nfe) {
502: // todo: use progresslistener, this isn't a grave error.
503: }
504: break;
505:
506: // (F)floating (Double)
507: case 'n':
508: case 'N':
509: try {
510: final Class clazz = header.getFieldClass(fieldNum);
511: final String number = extractNumberString(
512: charBuffer, fieldOffset, fieldLen);
513: if (clazz == Integer.class) {
514: object = new Integer(number);
515: break;
516: } else if (clazz == Long.class) {
517: object = new Long(number);
518: break;
519: }
520: // else will fall through to the floating point number
521: } catch (NumberFormatException e) {
522:
523: // todo: use progresslistener, this isn't a grave error.
524:
525: // don't do this!!! the Double parse will be attemted as we
526: // fall
527: // through, so no need to create a new Object. -IanS
528: // object = new Integer(0);
529:
530: // Lets try parsing a long instead...
531: try {
532: object = new Long(extractNumberString(
533: charBuffer, fieldOffset, fieldLen));
534: break;
535: } catch (NumberFormatException e2) {
536:
537: }
538: }
539:
540: case 'f':
541: case 'F': // floating point number
542: try {
543:
544: object = new Double(extractNumberString(charBuffer,
545: fieldOffset, fieldLen));
546: } catch (NumberFormatException e) {
547: // todo: use progresslistener, this isn't a grave error,
548: // though it
549: // does indicate something is wrong
550:
551: // okay, now whatever we got was truly undigestable. Lets go
552: // with
553: // a zero Double.
554: object = new Double(0.0);
555: }
556: break;
557: default:
558: throw new IOException("Invalid field type : " + type);
559: }
560:
561: }
562: return object;
563: }
564:
565: /**
566: * @param charBuffer2 TODO
567: * @param fieldOffset
568: * @param fieldLen
569: */
570: private final String extractNumberString(
571: final CharBuffer charBuffer2, final int fieldOffset,
572: final int fieldLen) {
573: String thing = charBuffer2.subSequence(fieldOffset,
574: fieldOffset + fieldLen).toString().trim();
575: return thing;
576: }
577:
578: public static void main(String[] args) throws Exception {
579: FileChannel channel = new FileInputStream(args[0]).getChannel();
580: DbaseFileReader reader = new DbaseFileReader(channel, false,
581: Charset.forName("ISO-8859-1"));
582: System.out.println(reader.getHeader());
583: int r = 0;
584: while (reader.hasNext()) {
585: System.out.println(++r + ","
586: + java.util.Arrays.asList(reader.readEntry()));
587: }
588: reader.close();
589: }
590:
591: }
|