001: package prefuse.data.parser;
002:
003: import java.util.Arrays;
004:
005: /**
006: * Factory class that maintains a collection of parser instances and returns
007: * the appropriate parser based on a history of samples presented to the
008: * factory. The {@link #sample(String)} method takes a text string and tests
009: * it against all available parsers, updating whether or not the parsers can
010: * successfully parse the value. This method is used in a more automated
011: * fashion by the {@link TypeInferencer} class.
012: *
013: * @author <a href="http://jheer.org">jeffrey heer</a>
014: * @see TypeInferencer
015: */
016: public class ParserFactory implements Cloneable {
017:
018: private static final DataParser[] DEFAULT_PARSERS = new DataParser[] {
019: new IntParser(), new LongParser(), new DoubleParser(),
020: new FloatParser(), new BooleanParser(),
021: new ColorIntParser(), new DateParser(), new TimeParser(),
022: new DateTimeParser(), new IntArrayParser(),
023: new LongArrayParser(), new FloatArrayParser(),
024: new DoubleArrayParser(), new StringParser() };
025:
026: private static ParserFactory DEFAULT_FACTORY = new ParserFactory(
027: DEFAULT_PARSERS);
028:
029: private DataParser[] m_parsers;
030: private boolean[] m_isCandidate;
031:
032: /**
033: * Returns the default parser factory. The default factory tests for the
034: * following data types (in the provided order of precedence):
035: * int, long, double, float, boolean, Date, Time, DateTime, String.
036: * @return the default parser factory.
037: */
038: public static ParserFactory getDefaultFactory() {
039: return DEFAULT_FACTORY;
040: }
041:
042: /**
043: * Sets the default parser factory. This factory will be used by default
044: * by all readers to parse data values.
045: * @param factory the new default parser factory.
046: */
047: public static void setDefaultFactory(ParserFactory factory) {
048: DEFAULT_FACTORY = factory;
049: }
050:
051: /**
052: * Constructor. Uses a default collection of parsers, testing for the
053: * following data type in the followinf order of precedence:
054: * int, long, double, float, boolean, Date, Time, DateTime, String.
055: */
056: public ParserFactory() {
057: this (DEFAULT_PARSERS);
058: }
059:
060: /**
061: * @see java.lang.Object#clone()
062: */
063: public Object clone() {
064: return new ParserFactory(m_parsers);
065: }
066:
067: /**
068: * <p>Constructor. Takes an array of parsers to test. After creating this
069: * instance, sample data values can be passed in using the
070: * <code>sample()</code> method, and this class will check the sample
071: * against the parsers, computing which parsers can successfully parse the
072: * sample. This process of elimination disregards inappropriate parsers.
073: * After a series of samples, the <code>getParser()</code>
074: * method can be used to retrieve the highest ranking candidate parser.
075: * </p>
076: *
077: * <p>
078: * If no parser can parse all samples, a null value will be returned by
079: * getParser(). For this reason, it is recommended to always use a
080: * StringParser as the last element of the input array, as it is guaranteed
081: * to always parse successfully (by simply returning its input String).
082: * </p>
083: *
084: * <p>
085: * The ordering of parsers in the array is taken to be the desired order
086: * of precendence of the parsers. For example, if both parser[0] and
087: * parser[2] can parse all the available samples, parser[0] will be
088: * returned.
089: * </p>
090: * @param parsers the input DataParsers to use.
091: */
092: public ParserFactory(DataParser[] parsers) {
093: // check integrity of input
094: for (int i = 0; i < parsers.length; ++i) {
095: if (parsers[i] == null) {
096: throw new IllegalArgumentException(
097: "Input parsers must be non-null");
098: }
099: }
100: // initialize member variables
101: m_parsers = parsers;
102: m_isCandidate = new boolean[m_parsers.length];
103: reset();
104: }
105:
106: /**
107: * Reset the candidate parser settings, making each parser
108: * equally likely.
109: */
110: protected void reset() {
111: Arrays.fill(m_isCandidate, true);
112: }
113:
114: /**
115: * Sample a data value against the parsers, updating the
116: * parser candidates.
117: * @param val the String value to sample
118: */
119: protected void sample(String val) {
120: for (int i = 0; i < m_parsers.length; ++i) {
121: if (m_isCandidate[i]) {
122: m_isCandidate[i] = m_parsers[i].canParse(val);
123: }
124: }
125: }
126:
127: /**
128: * Returns the highest ranking parser that successfully can
129: * parse all the input samples viewed by this instance. If
130: * no such parser exists, a null value is returned.
131: * @return the highest-ranking data parser, or null if none
132: */
133: protected DataParser getParser() {
134: for (int i = 0; i < m_parsers.length; ++i) {
135: if (m_isCandidate[i]) {
136: return m_parsers[i];
137: }
138: }
139: return null;
140: }
141:
142: /**
143: * Returns a parser for the specified data type.
144: * @param type the Class for the data type to parse
145: * @return a parser for the given data type, or null
146: * if no such parser can be found.
147: */
148: public DataParser getParser(Class type) {
149: for (int i = 0; i < m_parsers.length; ++i) {
150: if (m_parsers[i].getType().equals(type)) {
151: return m_parsers[i];
152: }
153: }
154: return null;
155: }
156:
157: /**
158: * Analyzes the given array of String values to determine an
159: * acceptable parser data type.
160: * @param data an array of String values to parse
161: * @param startRow the row from which to begin analyzing the
162: * data array, allowing header rows to be excluded.
163: * @return the appropriate parser for the inferred data type,
164: * of null if none.
165: */
166: public DataParser getParser(String[] data, int startRow) {
167: return getParser(new String[][] { data }, 0, startRow);
168: }
169:
170: /**
171: * Analyzes a column of the given array of String values to
172: * determine an acceptable parser data type.
173: * @param data an 2D array of String values to parse
174: * @param col an index for the column to process
175: * @param startRow the row from which to begin analyzing the
176: * data array, allowing header rows to be excluded.
177: * @return the appropriate parser for the inferred data type,
178: * of null if none.
179: */
180: public DataParser getParser(String[][] data, int col, int startRow) {
181: // sanity check input
182: if (data == null || data.length == 0)
183: return null;
184:
185: int nrows = data.length;
186:
187: // analyze each column in turn
188: this .reset();
189: for (int row = startRow; row < nrows; ++row) {
190: this .sample(data[row][col]);
191: }
192:
193: DataParser parser = getParser();
194: return parser;
195: }
196:
197: } // end of class ParserFactory
|