001: package org.geotools.dbffile;
002:
003: import com.vividsolutions.jump.io.EndianDataInputStream;
004:
005: import java.io.*;
006:
007: import java.text.DateFormat;
008: import java.text.ParseException;
009: import java.text.SimpleDateFormat;
010:
011: import java.util.Calendar;
012: import java.util.Date;
013: import java.util.Locale;
014: import java.util.Vector;
015:
016: /**
017: *
018: * This class represents a DBF (or DBase) file.<p>
019: * Construct it with a filename (including the .dbf)
020: * this causes the header and field definitions to be read.<p>
021: * Later queries return rows or columns of the database.
022: *<hr>
023: * @author <a href="mailto:ian@geog.leeds.ac.uk">Ian Turton</a> Centre for
024: * Computaional Geography, University of Leeds, LS2 9JT, 1998.
025: *
026: */
027: public class DbfFile implements DbfConsts {
028: static final boolean DEBUG = false;
029: int dbf_id;
030: int last_update_d;
031: int last_update_m;
032: int last_update_y;
033: int last_rec;
034: int data_offset;
035: int rec_size;
036: boolean hasmemo;
037: public EndianDataInputStream dFile;
038: RandomAccessFile rFile;
039: int filesize;
040: int numfields;
041: public DbfFieldDef[] fielddef;
042: public static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat(
043: "yyyyMMdd") {
044: {
045: // DZ
046: setLenient(true);
047: }
048: };
049:
050: protected DbfFile() {
051: //for testing.
052: }
053:
054: /**
055: * Constructor, opens the file and reads the header infomation.
056: * @param file The file to be opened, includes path and .dbf
057: * @exception java.io.IOException If the file can't be opened.
058: * @exception DbfFileException If there is an error reading header.
059: */
060: public DbfFile(String file) throws java.io.IOException,
061: DbfFileException {
062: if (DEBUG) {
063: System.out
064: .println("---->uk.ac.leeds.ccg.dbffile.DbfFile constructed. Will identify itself as DbFi>");
065: }
066:
067: InputStream in = new FileInputStream(file);
068: EndianDataInputStream sfile = new EndianDataInputStream(in);
069: rFile = new RandomAccessFile(new File(file), "r");
070:
071: if (DEBUG) {
072: System.out.println("Dbf file has initinalized");
073: }
074:
075: init(sfile);
076: }
077:
078: /**
079: * Returns the date of the last update of the file as a string.
080: */
081: public String getLastUpdate() {
082: String date = last_update_d + "/" + last_update_m + "/"
083: + last_update_y;
084:
085: return date;
086: }
087:
088: /**
089: * Returns the number of records in the database file.
090: */
091: public int getLastRec() {
092: return last_rec;
093: }
094:
095: /**
096: * Returns the size of the records in the database file.
097: */
098: public int getRecSize() {
099: return rec_size;
100: }
101:
102: /**
103: * Returns the number of fields in the records in the database file.
104: */
105: public int getNumFields() {
106: return numfields;
107: }
108:
109: public String getFieldName(int row) {
110: return (fielddef[row].fieldname).toString();
111: }
112:
113: public String getFieldType(int row) {
114: char type = fielddef[row].fieldtype;
115: String realtype = "";
116:
117: switch (type) {
118: case 'C':
119: realtype = "STRING";
120:
121: break;
122:
123: case 'N':
124:
125: if (fielddef[row].fieldnumdec == 0) {
126: realtype = "INTEGER";
127: } else {
128: realtype = "DOUBLE";
129: }
130:
131: break;
132:
133: case 'F':
134: realtype = "DOUBLE";
135:
136: break;
137:
138: case 'D': //Added by [Jon Aquino]
139: realtype = "DATE";
140:
141: break;
142:
143: default:
144: realtype = "STRING";
145:
146: break;
147: }
148:
149: return realtype;
150: }
151:
152: /**
153: * Returns the size of the database file.
154: */
155: public int getFileSize() {
156: return filesize;
157: }
158:
159: /**
160: * initailizer, allows the use of multiple constructers in later
161: * versions.
162: */
163: private void init(EndianDataInputStream sfile) throws IOException,
164: DbfFileException {
165: DbfFileHeader head = new DbfFileHeader(sfile);
166: int widthsofar;
167:
168: if (DEBUG) {
169: System.out.println("Dbf file has initinalized");
170: }
171:
172: dFile = sfile;
173:
174: fielddef = new DbfFieldDef[numfields];
175: widthsofar = 1;
176:
177: for (int index = 0; index < numfields; index++) {
178: fielddef[index] = new DbfFieldDef();
179: fielddef[index].setup(widthsofar, dFile);
180: widthsofar += fielddef[index].fieldlen;
181: }
182:
183: sfile.skipBytes(1); // end of field defs marker
184: }
185:
186: /**
187: * gets the next record and returns it as a string. This method works on
188: * a sequential stream and can not go backwards. Only useful if you want
189: * to read the whole file in one.
190: * @exception java.io.IOException on read error.
191: */
192: public StringBuffer GetNextDbfRec() throws java.io.IOException {
193: StringBuffer record = new StringBuffer(rec_size + numfields);
194:
195: for (int i = 0; i < rec_size; i++) {
196: // we could do some checking here.
197: record.append((char) rFile.readUnsignedByte());
198: }
199:
200: return record;
201: }
202:
203: /**
204: * fetches the <i>row</i>th row of the file
205: * @param row - the row to fetch
206: * @exception java.io.IOException on read error.
207: */
208: public StringBuffer GetDbfRec(int row) throws java.io.IOException {
209: StringBuffer record = new StringBuffer(rec_size + numfields);
210:
211: rFile.seek(data_offset + (rec_size * row));
212:
213: //Multi byte character modification thanks to Hisaji ONO
214: byte[] strbuf = new byte[rec_size]; // <---- byte array buffer fo storing string's byte data
215:
216: dFile.readByteLEnum(strbuf);
217: record.append(new String(strbuf)); // <- append byte array to String Buffer
218:
219: //record.append(strbuf);
220: return record;
221: }
222:
223: /**
224: * fetches the <i>row</i>th row of the file and parses it into an vector
225: * of objects.
226: * @param row - the row to fetch
227: * @exception java.io.IOException on read error.
228: */
229: public Vector ParseDbfRecord(int row) throws java.io.IOException {
230: return ParseRecord(GetDbfRec(row));
231: }
232:
233: // like public Vector ParseRecord(StringBuffer rec), but this
234: // will try to minimize the number of object created to keep
235: // memory usage down.
236: //
237: // Will return a String, Double, or Integer
238: // not currently supporting Data or logical since we dont have any test datasets
239: public Object ParseRecordColumn(StringBuffer rec, int wantedCol)
240: throws Exception {
241: int start;
242: int end;
243: start = fielddef[wantedCol].fieldstart;
244: end = start + fielddef[wantedCol].fieldlen;
245:
246: switch (fielddef[wantedCol].fieldtype) {
247: case 'C': //character
248: return rec.substring(start, end).intern();
249:
250: case 'F': //same as numeric, more or less
251: case 'N': //numeric
252:
253: // fields of type 'F' are always represented as Doubles
254: boolean isInteger = fielddef[wantedCol].fieldnumdec == 0
255: && fielddef[wantedCol].fieldtype == 'N';
256:
257: // The number field should be trimed from the start AND the end.
258: // Added .trim() to 'String numb = rec.substring(start, end)' instead. [Kevin Neufeld]
259: // while ((start < end) && (rec.charAt(start) == ' '))
260: // start++;
261:
262: String numb = rec.substring(start, end).trim();
263: if (isInteger) { //its an int
264:
265: try {
266: return new Integer(numb);
267: } catch (java.lang.NumberFormatException e) {
268: return new Integer(0);
269: }
270: } else { //its a float
271:
272: try {
273: return new Double(numb);
274: } catch (java.lang.NumberFormatException e) {
275: // dBase can have numbers that look like '********' !! This isn't ideal but at least reads them
276: return new Double(Double.NaN);
277: }
278: }
279:
280: case 'D': //date. Added by [Jon Aquino]
281: return parseDate(rec.substring(start, end));
282:
283: default:
284: return rec.substring(start, end).intern();
285: }
286: }
287:
288: /**
289: * Parses the record stored in the StringBuffer rec into a vector of
290: * objects
291: * @param rec the record to be parsed.
292: */
293: public Vector ParseRecord(StringBuffer rec) {
294: Vector record = new Vector(numfields);
295: String t;
296: Integer I = new Integer(0);
297: Double F = new Double(0.0);
298: t = rec.toString();
299:
300: for (int i = 0; i < numfields; i++) {
301: if (DEBUG) {
302: System.out
303: .println("DbFi>type " + fielddef[i].fieldtype);
304: }
305:
306: if (DEBUG) {
307: System.out.println("DbFi>start "
308: + fielddef[i].fieldstart);
309: }
310:
311: if (DEBUG) {
312: System.out.println("DbFi>len " + fielddef[i].fieldlen);
313: }
314:
315: if (DEBUG) {
316: System.out.println(t.substring(fielddef[i].fieldstart,
317: fielddef[i].fieldstart + fielddef[i].fieldlen));
318: }
319:
320: switch (fielddef[i].fieldtype) {
321: case 'C':
322: record.addElement(t.substring(fielddef[i].fieldstart,
323: fielddef[i].fieldstart + fielddef[i].fieldlen));
324:
325: break;
326:
327: case 'N':
328:
329: if (fielddef[i].fieldnumdec == 0) { //its an int
330:
331: try {
332: String tt = t.substring(fielddef[i].fieldstart,
333: fielddef[i].fieldstart
334: + fielddef[i].fieldlen);
335: record.addElement(I.valueOf(tt.trim()));
336: } catch (java.lang.NumberFormatException e) {
337: record.addElement(new Integer(0));
338: }
339: } else { //its a float
340:
341: try {
342: record
343: .addElement(F.valueOf(t.substring(
344: fielddef[i].fieldstart,
345: fielddef[i].fieldstart
346: + fielddef[i].fieldlen)
347: .trim()));
348: } catch (java.lang.NumberFormatException e) {
349: record.addElement(new Double(0.0));
350: }
351: }
352:
353: break;
354:
355: case 'F':
356:
357: try {
358: record.addElement(F.valueOf(t.substring(
359: fielddef[i].fieldstart,
360: fielddef[i].fieldstart
361: + fielddef[i].fieldlen).trim()));
362: } catch (java.lang.NumberFormatException e) {
363: record.addElement(new Double(0.0));
364: }
365:
366: break;
367:
368: case 'D':
369:
370: //Date formats. This method doesn't seem to be called anywhere in JUMP,
371: //so I'm not going to spend time understanding this method. [Jon Aquino]
372: throw new UnsupportedOperationException();
373:
374: default:
375: record.addElement(t.substring(fielddef[i].fieldstart,
376: fielddef[i].fieldstart + fielddef[i].fieldlen));
377: }
378: }
379:
380: return record;
381: }
382:
383: /**
384: * Fetches a column of Integers from the database file.
385: * @param col - the column to fetch
386: * @exception java.io.IOException - on read error
387: * @exception DbfFileException - column is not an Integer.
388: */
389: public Integer[] getIntegerCol(int col) throws java.io.IOException,
390: DbfFileException {
391: return getIntegerCol(col, 0, last_rec);
392: }
393:
394: /**
395: * Fetches a part column of Integers from the database file.
396: * @param col - the column to fetch
397: * @param start - the row to start fetching from
398: * @param end - the row to stop fetching at.
399: * @exception java.io.IOException - on read error
400: * @exception DbfFileException - column is not an Integer.
401: */
402: public Integer[] getIntegerCol(int col, int start, int end)
403: throws java.io.IOException, DbfFileException {
404: Integer[] column = new Integer[end - start];
405: String record = new String();
406: StringBuffer sb = new StringBuffer(numfields);
407: int k = 0;
408: int i = 0;
409:
410: if (col >= numfields) {
411: throw new DbfFileException("DbFi>No Such Column in file: "
412: + col);
413: }
414:
415: if (fielddef[col].fieldtype != 'N') {
416: throw new DbfFileException("DbFi>Column " + col
417: + " is not Integer");
418: }
419:
420: // move to start of data
421: try {
422: rFile.seek(data_offset + (rec_size * start));
423:
424: for (i = start; i < end; i++) {
425: sb.setLength(0);
426:
427: for (k = 0; k < rec_size; k++)
428: sb.append((char) rFile.readUnsignedByte());
429:
430: record = sb.toString();
431:
432: try {
433: column[i - start] = new Integer(record.substring(
434: fielddef[col].fieldstart,
435: fielddef[col].fieldstart
436: + fielddef[col].fieldlen));
437: } catch (java.lang.NumberFormatException e) {
438: column[i - start] = new Integer(0);
439: }
440: }
441: } catch (java.io.EOFException e) {
442: System.err.println("DbFi>" + e);
443: System.err.println("DbFi>record " + i + " byte " + k
444: + " file pos " + rFile.getFilePointer());
445: } catch (java.io.IOException e) {
446: System.err.println("DbFi>" + e);
447: System.err.println("DbFi>record " + i + " byte " + k
448: + " file pos " + rFile.getFilePointer());
449: }
450:
451: return column;
452: }
453:
454: /**
455: * Fetches a column of Double from the database file.
456: * @param col - the column to fetch
457: * @exception java.io.IOException - on read error
458: * @exception DbfFileException - column is not an Integer.
459: */
460: public Double[] getFloatCol(int col) throws DbfFileException,
461: java.io.IOException {
462: return getFloatCol(col, 0, last_rec);
463: }
464:
465: /**
466: * Fetches a part column of Double from the database file.
467: * @param col - the column to fetch
468: * @param start - the row to start fetching from
469: * @param end - the row to stop fetching at.
470: * @exception java.io.IOException - on read error
471: * @exception DbfFileException - column is not an Integer.
472: */
473: public Double[] getFloatCol(int col, int start, int end)
474: throws DbfFileException, java.io.IOException {
475: Double[] column = new Double[end - start];
476: String record;
477: String st;
478: StringBuffer sb = new StringBuffer(rec_size);
479: int k = 0;
480: int i = 0;
481:
482: if (col >= numfields) {
483: throw new DbfFileException("DbFi>No Such Column in file: "
484: + col);
485: }
486:
487: if (fielddef[col].fieldtype != 'F') {
488: throw new DbfFileException("DbFi>Column " + col
489: + " is not Double " + fielddef[col].fieldtype);
490: }
491:
492: // move to start of data
493: try {
494: rFile.seek(data_offset + (rec_size * start));
495:
496: for (i = start; i < end; i++) {
497: sb.setLength(0);
498:
499: // we should be able to skip to the start here.
500: for (k = 0; k < rec_size; k++)
501: sb.append((char) rFile.readUnsignedByte());
502:
503: record = sb.toString();
504: st = new String(record.substring(
505: fielddef[col].fieldstart,
506: fielddef[col].fieldstart
507: + fielddef[col].fieldlen));
508:
509: if (st.indexOf('.') == -1) {
510: st = st + ".0";
511: }
512:
513: try {
514: column[i - start] = new Double(st);
515: } catch (java.lang.NumberFormatException e) {
516: column[i - start] = new Double(0.0);
517: }
518: }
519: } catch (java.io.EOFException e) {
520: System.err.println("DbFi>" + e);
521: System.err.println("DbFi>record " + i + " byte " + k
522: + " file pos " + rFile.getFilePointer());
523: } catch (java.io.IOException e) {
524: System.err.println("DbFi>" + e);
525: System.err.println("DbFi>record " + i + " byte " + k
526: + " file pos " + rFile.getFilePointer());
527: }
528:
529: return column;
530: }
531:
532: /**
533: * Fetches a column of Strings from the database file.
534: * @param col - the column to fetch
535: * @exception java.io.IOException - on read error
536: * @exception DbfFileException - column is not an Integer.
537: */
538: public String[] getStringCol(int col) throws DbfFileException,
539: java.io.IOException {
540: return getStringCol(col, 0, last_rec);
541: }
542:
543: /**
544: * Fetches a part column of Strings from the database file.
545: * @param int col - the column to fetch
546: * @param int start - the row to start fetching from
547: * @param int end - the row to stop fetching at.
548: * @exception java.io.IOException - on read error
549: * @exception DbfFileException - column is not an Integer.
550: */
551: public String[] getStringCol(int col, int start, int end)
552: throws DbfFileException, java.io.IOException {
553: String[] column = new String[end - start];
554: String record = new String();
555:
556: //StringBuffer sb = new StringBuffer(numfields);
557: int k = 0;
558:
559: //StringBuffer sb = new StringBuffer(numfields);
560: int i = 0;
561:
562: if (col >= numfields) {
563: throw new DbfFileException("DbFi>No Such Column in file: "
564: + col);
565: }
566:
567: if (fielddef[col].fieldtype != 'C') {
568: throw new DbfFileException("DbFi>Column " + col
569: + " is not a String");
570: }
571:
572: // move to start of data
573: try {
574: rFile.seek(data_offset + (start * rec_size));
575:
576: for (i = start; i < end; i++) {
577: //sb.setLength(0);
578: //*** initialize buffer for record ***
579: byte[] strbuf = new byte[rec_size];
580:
581: for (k = 0; k < rec_size; k++) {
582: strbuf[k] = rFile.readByte(); //*** get byte data
583: }
584:
585: //sb.append((char)rFile.readUnsignedByte());
586: //record=sb.toString();
587: //*** convert buffer data to String ***
588: record = new String(strbuf);
589:
590: //column[i-start]=new String(record.substring(fielddef[col].fieldstart,fielddef[col].fieldstart+fielddef[col].fieldlen));
591: //*** Extract string data from record
592: column[i - start] = new String(strbuf,
593: fielddef[col].fieldstart,
594: fielddef[col].fieldlen);
595: }
596: } catch (java.io.EOFException e) {
597: System.err.println("DbFi>" + e);
598: System.err.println("DbFi>record " + i + " byte " + k
599: + " file pos " + rFile.getFilePointer());
600: } catch (java.io.IOException e) {
601: System.err.println("DbFi>" + e);
602: System.err.println("DbFi>record " + i + " byte " + k
603: + " file pos " + rFile.getFilePointer());
604: }
605:
606: return column;
607: }
608:
609: public void close() throws IOException {
610: dFile.close();
611: rFile.close();
612: }
613:
614: /**
615: * Internal Class to hold information from the header of the file
616: */
617: class DbfFileHeader {
618: /**
619: * Reads the header of a dbf file.
620: * @param LEDataInputStream file Stream attached to the input file
621: * @exception IOException read error.
622: */
623: public DbfFileHeader(EndianDataInputStream file)
624: throws IOException {
625: getDbfFileHeader(file);
626: }
627:
628: private void getDbfFileHeader(EndianDataInputStream file)
629: throws IOException {
630: int len;
631: dbf_id = (int) file.readUnsignedByteLE();
632:
633: if (DEBUG) {
634: System.out.print("DbFi>Header id ");
635: }
636:
637: if (DEBUG) {
638: System.out.println(dbf_id);
639: }
640:
641: if (dbf_id == 3) {
642: hasmemo = false;
643: } else {
644: hasmemo = true;
645: }
646:
647: last_update_y = (int) file.readUnsignedByteLE()
648: + DBF_CENTURY;
649: last_update_m = (int) file.readUnsignedByteLE();
650: last_update_d = (int) file.readUnsignedByteLE();
651:
652: if (DEBUG) {
653: System.out.print("DbFi>last update ");
654: }
655:
656: if (DEBUG) {
657: System.out.print(last_update_d);
658: }
659:
660: if (DEBUG) {
661: System.out.print("/");
662: }
663:
664: if (DEBUG) {
665: System.out.print(last_update_m);
666: }
667:
668: if (DEBUG) {
669: System.out.print("/");
670: }
671:
672: if (DEBUG) {
673: System.out.println(last_update_y);
674: }
675:
676: last_rec = file.readIntLE();
677:
678: if (DEBUG) {
679: System.out.print("DbFi>last rec ");
680: }
681:
682: if (DEBUG) {
683: System.out.println(last_rec);
684: }
685:
686: data_offset = file.readShortLE();
687:
688: //data_offset=0;
689: //System.out.println("x = "+file.readUnsignedByte()+" " +
690: //file.readUnsignedByte());
691: if (DEBUG) {
692: System.out.print("DbFi>data offset ");
693: }
694:
695: if (DEBUG) {
696: System.out.println(data_offset);
697: }
698:
699: rec_size = file.readShortLE();
700:
701: if (DEBUG) {
702: System.out.print("DbFi>rec_size ");
703: }
704:
705: if (DEBUG) {
706: System.out.println(rec_size);
707: }
708:
709: filesize = (rec_size * last_rec) + data_offset + 1;
710: numfields = (data_offset - DBF_BUFFSIZE - 1) / DBF_BUFFSIZE;
711:
712: if (DEBUG) {
713: System.out.print("DbFi>num fields ");
714: }
715:
716: if (DEBUG) {
717: System.out.println(numfields);
718: }
719:
720: if (DEBUG) {
721: System.out.print("DbFi>file size ");
722: }
723:
724: if (DEBUG) {
725: System.out.println(filesize);
726: }
727:
728: file.skipBytes(20);
729: }
730: }
731:
732: protected Date parseDate(String s) throws ParseException {
733: if (s.trim().length() == 0) {
734: return null;
735: }
736:
737: if (s.equals("00000000")) {
738: //Not sure if Jan 1, 0001 is the most appropriate value.
739: //Year 0000 gives me a ParseException. [Jon Aquino]
740: return DATE_PARSER.parse("00010101");
741: }
742: try {
743: return lastFormat.parse(s);
744: } catch (ParseException pe) {
745: // ignore
746: }
747:
748: String[] patterns = new String[] { "yyyyMMdd", "yy/mm/dd" };
749:
750: for (int i = 0; i < patterns.length; i++) {
751: DateFormat df = new SimpleDateFormat(patterns[i]);
752: df.setLenient(true);
753: try {
754: Date d = df.parse(s);
755: lastFormat = df;
756: return d;
757: } catch (ParseException pe) {
758: // ignore
759: }
760: }
761:
762: return null;
763: }
764:
765: private DateFormat lastFormat = DATE_PARSER;
766:
767: public static void main(String[] args) throws Exception {
768: System.out.println(new SimpleDateFormat("yyyymmdd") {
769: {
770: setLenient(false);
771: }
772: }.parse("00010101"));
773: }
774: }
|