001: /*
002: * This is free software, licensed under the Gnu Public License (GPL)
003: * get a copy from <http://www.gnu.org/licenses/gpl.html>
004: *
005: * author: Henner Zeller <H.Zeller@acm.org>
006: */
007: package henplus.importparser;
008:
009: import java.io.Reader;
010: import java.util.Calendar;
011:
012: /**
013: * A Parser for a
014: */
015: public class ImportParser {
016: private final static int INIT_SIZE = 8192;
017:
018: private final TypeParser[] _parsers;
019: private final char[] _colDelim;
020: private final char[] _rowDelim;
021:
022: public ImportParser(TypeParser[] parsers, String colDelim,
023: String rowDelim) {
024: _parsers = parsers;
025: _colDelim = new char[colDelim.length()];
026: colDelim.getChars(0, colDelim.length(), _colDelim, 0);
027: _rowDelim = new char[rowDelim.length()];
028: rowDelim.getChars(0, rowDelim.length(), _rowDelim, 0);
029: }
030:
031: // fixme: build in read-ahead in case colDelim and rowDelim have the
032: // same prefix..build fast state machine
033: // allows for multiple ways to delimit rows and columns
034: public void parse(Reader input, ValueRecipient recipient)
035: throws Exception {
036: // local variable access is faster
037: final char[] colPattern = _colDelim;
038: int colPatternPos = 0;
039:
040: final char[] rowPattern = _rowDelim;
041: int rowPatternPos = 0;
042:
043: char[] buffer = new char[INIT_SIZE];
044: int fieldStart = 0;
045: int pos = 0;
046: int currentColumn = 0;
047: int currentRow = 1;
048:
049: for (;;) {
050: if (buffer.length - pos == 0) { // need to adjust buffer
051: if (fieldStart > 0) { // remove unneded stuff in front
052: System.arraycopy(buffer, fieldStart, buffer, 0,
053: buffer.length - fieldStart);
054: //System.out.println("**shift buffer from " + fieldStart);
055: pos -= fieldStart;
056: fieldStart = 0;
057: } else { // fieldStart is already at 0, so increase size
058: char[] newBuffer = new char[buffer.length * 2];
059: System.arraycopy(buffer, fieldStart, newBuffer, 0,
060: buffer.length - fieldStart);
061: buffer = newBuffer;
062: //System.out.println("**larger buffer..");
063: }
064: }
065: int bytesRead = input
066: .read(buffer, pos, buffer.length - pos);
067: if (bytesRead < 0) {
068: break; // EOF
069: }
070:
071: int bufferEnd = pos + bytesRead;
072: while (pos < bufferEnd) {
073: final char c = buffer[pos++];
074:
075: // column pattern matches ?
076: if (colPattern[colPatternPos] == c) {
077: colPatternPos++;
078: if (colPatternPos >= colPattern.length) { // match!
079: if (currentColumn < _parsers.length) {
080: TypeParser colParser = _parsers[currentColumn];
081: if (colParser != null) {
082: colParser.parse(buffer, fieldStart, pos
083: - fieldStart
084: - colPattern.length, recipient);
085: }
086: }
087: colPatternPos = 0;
088: fieldStart = pos;
089: currentColumn++;
090: }
091: } else {
092: colPatternPos = 0; // no match. restart pattern..
093: }
094:
095: // row pattern matches ?
096: if (rowPattern[rowPatternPos] == c) {
097: rowPatternPos++;
098: if (rowPatternPos >= rowPattern.length) { // match!
099: if (currentColumn < _parsers.length - 1) {
100: System.err
101: .println("less columns than expected in row "
102: + currentRow
103: + ": expected "
104: + _parsers.length
105: + " but got "
106: + (currentColumn + 1));
107: }
108: if (currentColumn < _parsers.length) {
109: TypeParser colParser = _parsers[currentColumn];
110: if (colParser != null) {
111: colParser.parse(buffer, fieldStart, pos
112: - fieldStart
113: - rowPattern.length, recipient);
114: }
115: }
116: if (recipient.finishRow()) {
117: return;
118: }
119: fieldStart = pos;
120: rowPatternPos = 0;
121: currentColumn = 0;
122: currentRow++;
123: }
124: } else {
125: rowPatternPos = 0; // no match. restart pattern..
126: }
127: }
128: }
129: }
130:
131: static int count = 0;
132:
133: public static void main(String argv[]) throws Exception {
134: Reader r = new java.io.FileReader(argv[0]);
135: int cols = Integer.parseInt(argv[1]);
136: TypeParser[] parsers = new TypeParser[cols];
137: for (int i = 0; i < cols; ++i) {
138: parsers[i] = new StringParser(i + 1);
139: }
140: ValueRecipient v = new ValueRecipient() {
141: public void setLong(int fieldNumber, long value) {
142: }
143:
144: public void setString(int fieldNumber, String value) {
145: System.out.println("'" + value + "'");
146: }
147:
148: public void setDate(int fieldNumber, Calendar cal) {
149: }
150:
151: public boolean finishRow() {
152: System.out.println(">>row done..<<");
153: count++;
154: return false;
155: }
156: };
157: ImportParser parser = new ImportParser(parsers, "\",\"", "\n\"");
158: parser.parse(r, v);
159: System.err.println("COUNT: " + count);
160: }
161: }
|