001: package prefuse.data.io;
002:
003: import java.io.IOException;
004: import java.io.InputStream;
005: import java.util.ArrayList;
006:
007: import prefuse.data.Table;
008: import prefuse.data.parser.DataParseException;
009: import prefuse.data.parser.DataParser;
010: import prefuse.data.parser.ParserFactory;
011: import prefuse.data.parser.TypeInferencer;
012: import prefuse.util.collections.ByteArrayList;
013: import prefuse.util.io.IOLib;
014:
015: /**
016: * Abstract base class for TableReader instances that read in a table
017: * from a textual data file.
018: *
019: * @author <a href="http://jheer.org">jeffrey heer</a>
020: */
021: public abstract class AbstractTextTableReader extends
022: AbstractTableReader {
023:
024: private ParserFactory m_pfactory;
025: private boolean m_hasHeader;
026:
027: /**
028: * Create a new AbstractTextTableReader using a default ParserFactory.
029: */
030: public AbstractTextTableReader() {
031: this (ParserFactory.getDefaultFactory());
032: }
033:
034: /**
035: * Create a new AbstractTextTableReader.
036: * @param parserFactory the ParserFactory to use for parsing text strings
037: * into table values.
038: */
039: public AbstractTextTableReader(ParserFactory parserFactory) {
040: m_pfactory = parserFactory;
041: m_hasHeader = true;
042: }
043:
044: /**
045: * Set whether or not the table data file includes a header row.
046: * @param hasHeaderRow true if the the data file includes a header row,
047: * false otherwise.
048: */
049: public void setHasHeader(boolean hasHeaderRow) {
050: m_hasHeader = hasHeaderRow;
051: }
052:
053: /**
054: * @see prefuse.data.io.AbstractTableReader#readTable(java.io.InputStream)
055: */
056: public Table readTable(InputStream is) throws DataIOException {
057:
058: // determine input stream capabilities
059: // if we can't reset the stream, we read in all the bytes
060: // and make our own local stream
061: ByteArrayList buf = null;
062: if (is.markSupported()) {
063: // mark the stream to our reset point
064: is.mark(Integer.MAX_VALUE);
065: } else {
066: // load in the entirety of the input stream
067: try {
068: buf = IOLib.readAsBytes(is);
069: } catch (IOException ioe) {
070: throw new DataIOException(ioe);
071: }
072: // create our own input stream
073: is = buf.getAsInputStream();
074: }
075:
076: final TypeInferencer di = new TypeInferencer(m_pfactory);
077: final ArrayList headers = getColumnNames();
078: final int[] dim = new int[] { 0, 0 };
079:
080: TableReadListener scanner = new TableReadListener() {
081: int prevLine = -1;
082:
083: public void readValue(int line, int col, String value)
084: throws DataParseException {
085: // sample value to determine data type
086: if (line > 1 || !m_hasHeader) {
087: di.sample(col - 1, value);
088:
089: // update num rows
090: if (line != prevLine) {
091: prevLine = line;
092: dim[0]++;
093: }
094: } else if (line == 1 && m_hasHeader) {
095: headers.add(value);
096: }
097:
098: // update num cols
099: if (col > dim[1])
100: dim[1] = col;
101: }
102: };
103:
104: // do a scan of the stream, collecting length and type data
105: try {
106: read(is, scanner);
107: } catch (IOException ioe) {
108: throw new DataIOException(ioe);
109: } catch (DataParseException de) {
110: // can't happen
111: }
112:
113: // create the table
114: int nrows = dim[0];
115: int ncols = dim[1];
116: final Table table = new Table(nrows, ncols);
117:
118: // create the table columns
119: for (int i = 0; i < ncols; ++i) {
120: String header;
121: if (m_hasHeader || i < headers.size()) {
122: header = (String) headers.get(i);
123: } else {
124: header = getDefaultHeader(i);
125: }
126: table.addColumn(header, di.getType(i));
127: table.getColumn(i).setParser(di.getParser(i));
128: }
129:
130: // reset dim array, will hold row/col indices
131: dim[0] = dim[1] = -1;
132:
133: TableReadListener parser = new TableReadListener() {
134: int prevLine = -1;
135:
136: public void readValue(int line, int col, String value)
137: throws DataParseException {
138: // early exit on header value
139: if (line == 1 && m_hasHeader)
140: return;
141: if (line != prevLine) {
142: prevLine = line;
143: ++dim[0];
144: }
145: dim[1] = col - 1;
146:
147: // XXX NOTE-2005.08.29-jheer
148: // For now we use generic routines for filling column values.
149: // This results in the autoboxing of primitive types, slowing
150: // performance a bit and possibly triggering avoidable garbage
151: // collections. If this proves to be a problem down the road,
152: // we can add more nuance later.
153: DataParser dp = di.getParser(dim[1]);
154: table.set(dim[0], dim[1], dp.parse(value));
155: }
156: };
157:
158: // read the data into the table
159: try {
160: // prepare the input stream
161: if (is.markSupported()) {
162: is.reset();
163: } else {
164: is = buf.getAsInputStream();
165: }
166: // read the data
167: read(is, parser);
168: } catch (IOException ioe) {
169: throw new DataIOException(ioe);
170: } catch (DataParseException de) {
171: throw new DataIOException("Parse exception for column "
172: + '\"' + dim[1] + '\"' + " at row: " + dim[0], de);
173: }
174:
175: return table;
176: }
177:
178: /**
179: * Subclasses can override this to provide column names through
180: * a custom mechanism.
181: * @return an ArrayList of String instances indicating the column names
182: */
183: protected ArrayList getColumnNames() {
184: return new ArrayList();
185: }
186:
187: /**
188: * Returns default column header names of the type "A", "B", ...,
189: * "Z", "AA", "AB", etc.
190: * @param idx the index of the column header
191: * @return a default column header name for the given index.
192: */
193: public static String getDefaultHeader(int idx) {
194: if (idx == 0)
195: return "A";
196: int len = ((int) (Math.log(idx) / Math.log(26))) + 1;
197: char[] h = new char[len];
198: int p = len;
199:
200: h[--p] = (char) ('A' + (idx % 26));
201: idx = idx / 26;
202:
203: while (idx > 26) {
204: h[--p] = (char) ('A' + (idx % 26));
205: idx = idx / 26;
206: }
207: if (idx > 0) {
208: h[--p] = (char) ('A' + ((idx - 1) % 26));
209: }
210:
211: return new String(h, p, len);
212: }
213:
214: /**
215: * Scans the input stream, making call backs for each encountered entry
216: * on the provided TextReadListener.
217: * @param is the InputStream to read
218: * @param trl the TextReadListener that will receive callbacks
219: * @throws IOException
220: * @throws DataParseException
221: */
222: protected abstract void read(InputStream is, TableReadListener trl)
223: throws IOException, DataParseException;
224:
225: } // end of abstract class AbstractTextTableReader
|