001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * AbstractTimeSeries.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.filters.unsupervised.attribute;
024:
025: import java.util.Enumeration;
026: import java.util.Vector;
027: import weka.core.Instance;
028: import weka.core.Instances;
029: import weka.core.Option;
030: import weka.core.OptionHandler;
031: import weka.core.Queue;
032: import weka.core.Range;
033: import weka.core.Utils;
034: import weka.filters.Filter;
035: import weka.filters.UnsupervisedFilter;
036:
037: /**
038: * An abstract instance filter that assumes instances form time-series data and
039: * performs some merging of attribute values in the current instance with
040: * attribute attribute values of some previous (or future) instance. For
041: * instances where the desired value is unknown either the instance may
042: * be dropped, or missing values used.<p>
043: *
044: * Valid filter-specific options are:<p>
045: *
046: * -R index1,index2-index4,...<br>
047: * Specify list of columns to calculate new values for.
048: * First and last are valid indexes.
049: * (default none)<p>
050: *
051: * -V <br>
052: * Invert matching sense (i.e. calculate for all non-specified columns)<p>
053: *
054: * -I num <br>
055: * The number of instances forward to merge values between.
056: * A negative number indicates taking values from a past instance.
057: * (default -1) <p>
058: *
059: * -M <br>
060: * For instances at the beginning or end of the dataset where the translated
061: * values are not known, remove those instances (default is to use missing
062: * values). <p>
063: *
064: * @author Len Trigg (trigg@cs.waikato.ac.nz)
065: * @version $Revision: 1.8 $
066: */
067: public abstract class AbstractTimeSeries extends Filter implements
068: UnsupervisedFilter, OptionHandler {
069:
070: /** for serialization */
071: private static final long serialVersionUID = -3795656792078022357L;
072:
073: /** Stores which columns to copy */
074: protected Range m_SelectedCols = new Range();
075:
076: /**
077: * True if missing values should be used rather than removing instances
078: * where the translated value is not known (due to border effects).
079: */
080: protected boolean m_FillWithMissing = true;
081:
082: /**
083: * The number of instances forward to translate values between.
084: * A negative number indicates taking values from a past instance.
085: */
086: protected int m_InstanceRange = -1;
087:
088: /** Stores the historical instances to copy values between */
089: protected Queue m_History;
090:
091: /**
092: * Returns an enumeration describing the available options.
093: *
094: * @return an enumeration of all the available options.
095: */
096: public Enumeration listOptions() {
097:
098: Vector newVector = new Vector(4);
099:
100: newVector.addElement(new Option(
101: "\tSpecify list of columns to translate in time. First and\n"
102: + "\tlast are valid indexes. (default none)",
103: "R", 1, "-R <index1,index2-index4,...>"));
104: newVector
105: .addElement(new Option(
106: "\tInvert matching sense (i.e. calculate for all non-specified columns)",
107: "V", 0, "-V"));
108: newVector
109: .addElement(new Option(
110: "\tThe number of instances forward to translate values\n"
111: + "\tbetween. A negative number indicates taking values from\n"
112: + "\ta past instance. (default -1)",
113: "I", 1, "-I <num>"));
114: newVector
115: .addElement(new Option(
116: "\tFor instances at the beginning or end of the dataset where\n"
117: + "\tthe translated values are not known, remove those instances\n"
118: + "\t(default is to use missing values).",
119: "M", 0, "-M"));
120:
121: return newVector.elements();
122: }
123:
124: /**
125: * Parses a given list of options controlling the behaviour of this object.
126: * Valid options are:<p>
127: *
128: * -R index1,index2-index4,...<br>
129: * Specify list of columns to copy. First and last are valid indexes.
130: * (default none)<p>
131: *
132: * -V<br>
133: * Invert matching sense (i.e. calculate for all non-specified columns)<p>
134: *
135: * -I num <br>
136: * The number of instances forward to translate values between.
137: * A negative number indicates taking values from a past instance.
138: * (default -1) <p>
139: *
140: * -M <br>
141: * For instances at the beginning or end of the dataset where the translated
142: * values are not known, remove those instances (default is to use missing
143: * values). <p>
144: *
145: * @param options the list of options as an array of strings
146: * @throws Exception if an option is not supported
147: */
148: public void setOptions(String[] options) throws Exception {
149:
150: String copyList = Utils.getOption('R', options);
151: if (copyList.length() != 0) {
152: setAttributeIndices(copyList);
153: } else {
154: setAttributeIndices("");
155: }
156:
157: setInvertSelection(Utils.getFlag('V', options));
158:
159: setFillWithMissing(!Utils.getFlag('M', options));
160:
161: String instanceRange = Utils.getOption('I', options);
162: if (instanceRange.length() != 0) {
163: setInstanceRange(Integer.parseInt(instanceRange));
164: } else {
165: setInstanceRange(-1);
166: }
167:
168: if (getInputFormat() != null) {
169: setInputFormat(getInputFormat());
170: }
171: }
172:
173: /**
174: * Gets the current settings of the filter.
175: *
176: * @return an array of strings suitable for passing to setOptions
177: */
178: public String[] getOptions() {
179:
180: String[] options = new String[6];
181: int current = 0;
182:
183: if (!getAttributeIndices().equals("")) {
184: options[current++] = "-R";
185: options[current++] = getAttributeIndices();
186: }
187: if (getInvertSelection()) {
188: options[current++] = "-V";
189: }
190: options[current++] = "-I";
191: options[current++] = "" + getInstanceRange();
192: if (!getFillWithMissing()) {
193: options[current++] = "-M";
194: }
195:
196: while (current < options.length) {
197: options[current++] = "";
198: }
199: return options;
200: }
201:
202: /**
203: * Sets the format of the input instances.
204: *
205: * @param instanceInfo an Instances object containing the input instance
206: * structure (any instances contained in the object are ignored - only the
207: * structure is required).
208: * @return true if the outputFormat may be collected immediately
209: * @throws Exception if the format couldn't be set successfully
210: */
211: public boolean setInputFormat(Instances instanceInfo)
212: throws Exception {
213:
214: super .setInputFormat(instanceInfo);
215: resetHistory();
216: m_SelectedCols.setUpper(instanceInfo.numAttributes() - 1);
217: return false;
218: }
219:
220: /**
221: * Input an instance for filtering. Ordinarily the instance is processed
222: * and made available for output immediately. Some filters require all
223: * instances be read before producing output.
224: *
225: * @param instance the input instance
226: * @return true if the filtered instance may now be
227: * collected with output().
228: * @throws Exception if the input instance was not of the correct
229: * format or if there was a problem with the filtering.
230: */
231: public boolean input(Instance instance) throws Exception {
232:
233: if (getInputFormat() == null) {
234: throw new NullPointerException(
235: "No input instance format defined");
236: }
237: if (m_NewBatch) {
238: resetQueue();
239: m_NewBatch = false;
240: resetHistory();
241: }
242:
243: Instance newInstance = historyInput(instance);
244: if (newInstance != null) {
245: push(newInstance);
246: return true;
247: } else {
248: return false;
249: }
250: }
251:
252: /**
253: * Signifies that this batch of input to the filter is finished. If the
254: * filter requires all instances prior to filtering, output() may now
255: * be called to retrieve the filtered instances.
256: *
257: * @return true if there are instances pending output
258: * @throws IllegalStateException if no input structure has been defined
259: */
260: public boolean batchFinished() {
261:
262: if (getInputFormat() == null) {
263: throw new IllegalStateException(
264: "No input instance format defined");
265: }
266: if (getFillWithMissing() && (m_InstanceRange > 0)) {
267: while (!m_History.empty()) {
268: push(mergeInstances(null, (Instance) m_History.pop()));
269: }
270: }
271: flushInput();
272: m_NewBatch = true;
273: m_FirstBatchDone = true;
274: return (numPendingOutput() != 0);
275: }
276:
277: /**
278: * Returns the tip text for this property
279: * @return tip text for this property suitable for
280: * displaying in the explorer/experimenter gui
281: */
282: public String fillWithMissingTipText() {
283: return "For instances at the beginning or end of the dataset where the translated "
284: + "values are not known, use missing values (default is to remove those "
285: + "instances)";
286: }
287:
288: /**
289: * Gets whether missing values should be used rather than removing instances
290: * where the translated value is not known (due to border effects).
291: *
292: * @return true if so
293: */
294: public boolean getFillWithMissing() {
295:
296: return m_FillWithMissing;
297: }
298:
299: /**
300: * Sets whether missing values should be used rather than removing instances
301: * where the translated value is not known (due to border effects).
302: *
303: * @param newFillWithMissing true if so
304: */
305: public void setFillWithMissing(boolean newFillWithMissing) {
306:
307: m_FillWithMissing = newFillWithMissing;
308: }
309:
310: /**
311: * Returns the tip text for this property
312: * @return tip text for this property suitable for
313: * displaying in the explorer/experimenter gui
314: */
315: public String instanceRangeTipText() {
316: return "The number of instances forward/backward to merge values between. "
317: + "A negative number indicates taking values from a past instance.";
318: }
319:
320: /**
321: * Gets the number of instances forward to translate values between.
322: * A negative number indicates taking values from a past instance.
323: *
324: * @return Value of InstanceRange.
325: */
326: public int getInstanceRange() {
327:
328: return m_InstanceRange;
329: }
330:
331: /**
332: * Sets the number of instances forward to translate values between.
333: * A negative number indicates taking values from a past instance.
334: *
335: * @param newInstanceRange Value to assign to InstanceRange.
336: */
337: public void setInstanceRange(int newInstanceRange) {
338:
339: m_InstanceRange = newInstanceRange;
340: }
341:
342: /**
343: * Returns the tip text for this property
344: * @return tip text for this property suitable for
345: * displaying in the explorer/experimenter gui
346: */
347: public String invertSelectionTipText() {
348: return "Invert matching sense. ie calculate for all non-specified columns.";
349: }
350:
351: /**
352: * Get whether the supplied columns are to be removed or kept
353: *
354: * @return true if the supplied columns will be kept
355: */
356: public boolean getInvertSelection() {
357:
358: return m_SelectedCols.getInvert();
359: }
360:
361: /**
362: * Set whether selected columns should be removed or kept. If true the
363: * selected columns are kept and unselected columns are copied. If false
364: * selected columns are copied and unselected columns are kept.
365: *
366: * @param invert the new invert setting
367: */
368: public void setInvertSelection(boolean invert) {
369:
370: m_SelectedCols.setInvert(invert);
371: }
372:
373: /**
374: * Returns the tip text for this property
375: *
376: * @return tip text for this property suitable for
377: * displaying in the explorer/experimenter gui
378: */
379: public String attributeIndicesTipText() {
380: return "Specify range of attributes to act on."
381: + " This is a comma separated list of attribute indices, with"
382: + " \"first\" and \"last\" valid values. Specify an inclusive"
383: + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
384: }
385:
386: /**
387: * Get the current range selection
388: *
389: * @return a string containing a comma separated list of ranges
390: */
391: public String getAttributeIndices() {
392:
393: return m_SelectedCols.getRanges();
394: }
395:
396: /**
397: * Set which attributes are to be copied (or kept if invert is true)
398: *
399: * @param rangeList a string representing the list of attributes. Since
400: * the string will typically come from a user, attributes are indexed from
401: * 1. <br>
402: * eg: first-3,5,6-last
403: */
404: public void setAttributeIndices(String rangeList) {
405:
406: m_SelectedCols.setRanges(rangeList);
407: }
408:
409: /**
410: * Set which attributes are to be copied (or kept if invert is true)
411: *
412: * @param attributes an array containing indexes of attributes to select.
413: * Since the array will typically come from a program, attributes are indexed
414: * from 0.
415: */
416: public void setAttributeIndicesArray(int[] attributes) {
417:
418: setAttributeIndices(Range.indicesToRangeList(attributes));
419: }
420:
421: /** Clears any instances from the history queue. */
422: protected void resetHistory() {
423:
424: if (m_History == null) {
425: m_History = new Queue();
426: } else {
427: m_History.removeAllElements();
428: }
429: }
430:
431: /**
432: * Adds an instance to the history buffer. If enough instances are in
433: * the buffer, a new instance may be output, with selected attribute
434: * values copied from one to another.
435: *
436: * @param instance the input instance
437: * @return a new instance with translated values, or null if no
438: * output instance is produced
439: */
440: protected Instance historyInput(Instance instance) {
441:
442: m_History.push(instance);
443: if (m_History.size() <= Math.abs(m_InstanceRange)) {
444: if (getFillWithMissing() && (m_InstanceRange < 0)) {
445: return mergeInstances(null, instance);
446: } else {
447: return null;
448: }
449: }
450: if (m_InstanceRange < 0) {
451: return mergeInstances((Instance) m_History.pop(), instance);
452: } else {
453: return mergeInstances(instance, (Instance) m_History.pop());
454: }
455: }
456:
457: /**
458: * Creates a new instance the same as one instance (the "destination")
459: * but with some attribute values copied from another instance
460: * (the "source")
461: *
462: * @param source the source instance
463: * @param dest the destination instance
464: * @return the new merged instance
465: */
466: protected abstract Instance mergeInstances(Instance source,
467: Instance dest);
468:
469: }
|