001: package org.drools.decisiontable.parser.csv;
002:
003: /*
004: * Copyright 2005 JBoss Inc
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.util.ArrayList;
020: import java.util.List;
021:
022: /**
023: *
024: * @author <a href="mailto:michael.neale@gmail.com"> Michael Neale</a> Break up
025: * a CSV line, with all the normal CSV features.
026: */
027: public class CsvLineParser {
028: private ICsvParser lineParser;
029:
030: public CsvLineParser() {
031: this .lineParser = new CsvParserImpl();
032: }
033:
034: /**
035: * Use the current lineParser implementation to return a CSV line as a List
036: * of cells. (Strings).
037: */
038: public List parse(final CharSequence line) {
039: return this .lineParser.parse(line.toString());
040: }
041:
042: /**
043: * This is insurance incase I need to replace it with more complex Csv
044: * handlers in the future.
045: */
046: static interface ICsvParser {
047: public List parse(String line);
048: }
049:
050: /**
051: * Parse comma-separated values (CSV), a common Windows file format. Sample
052: * input: "LU",86.25,"11/4/1998","2:19PM",+4.0625
053: * <p>
054: * Inner logic adapted from a C++ original that was Copyright (C) 1999
055: * Lucent Technologies Excerpted from 'The Practice of Programming' by Brian
056: * W. Kernighan and Rob Pike.
057: * <p>
058: * Included by permission of the http://tpop.awl.com/ web site, which says:
059: * "You may use this code for any purpose, as long as you leave the
060: * copyright notice and book citation attached." I have done so.
061: *
062: * @author Brian W. Kernighan and Rob Pike (C++ original)
063: * @author Ian F. Darwin (translation into Java and removal of I/O)
064: * @author Ben Ballard (rewrote advQuoted to handle '""' and for
065: * readability)
066: */
067: static class CsvParserImpl implements ICsvParser {
068:
069: public static final char DEFAULT_SEP = ',';
070:
071: /** Construct a CSV parser, with the default separator (','). */
072: public CsvParserImpl() {
073: this (CsvParserImpl.DEFAULT_SEP);
074: }
075:
076: /**
077: * Construct a CSV parser with a given separator.
078: *
079: * @param sep
080: * The single char for the separator (not a list of separator
081: * characters)
082: */
083: public CsvParserImpl(final char sep) {
084: this .fieldSep = sep;
085: }
086:
087: /** The fields in the current String */
088: protected List list = new ArrayList();
089:
090: /** the separator char for this parser */
091: protected char fieldSep;
092:
093: /**
094: * parse: break the input String into fields
095: *
096: * @return java.util.Iterator containing each field from the original as
097: * a String, in order.
098: */
099: public List parse(final String line) {
100: final StringBuffer sb = new StringBuffer();
101: this .list.clear(); // recycle to initial state
102: int i = 0;
103:
104: if (line.length() == 0) {
105: this .list.add(line);
106: return this .list;
107: }
108:
109: do {
110: sb.setLength(0);
111: if (i < line.length() && line.charAt(i) == '"') {
112: i = advQuoted(line, sb, ++i); // skip
113: } else {
114: i = advPlain(line, sb, i);
115: }
116: this .list.add(sb.toString());
117: i++;
118: } while (i < line.length());
119:
120: return this .list;
121: }
122:
123: /** advQuoted: quoted field; return index of next separator */
124: protected int advQuoted(final String s, final StringBuffer sb,
125: final int i) {
126: int j;
127: final int len = s.length();
128: for (j = i; j < len; j++) {
129: if (s.charAt(j) == '"' && j + 1 < len) {
130: if (s.charAt(j + 1) == '"') {
131: j++; // skip escape char
132: } else if (s.charAt(j + 1) == this .fieldSep) { // next delimeter
133: j++; // skip end quotes
134: break;
135: }
136: } else if (s.charAt(j) == '"' && j + 1 == len) { // end quotes at end of line
137: break; // done
138: }
139: sb.append(s.charAt(j)); // regular character.
140: }
141: return j;
142: }
143:
144: /** advPlain: unquoted field; return index of next separator */
145: protected int advPlain(final String s, final StringBuffer sb,
146: final int i) {
147: int j;
148:
149: j = s.indexOf(this .fieldSep, i); // look for separator
150: if (j == -1) { // none found
151: sb.append(s.substring(i));
152: return s.length();
153: } else {
154: sb.append(s.substring(i, j));
155: return j;
156: }
157: }
158:
159: }
160: }
|