001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * AttributeSelection.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.filters.supervised.attribute;
024:
025: import weka.attributeSelection.ASEvaluation;
026: import weka.attributeSelection.ASSearch;
027: import weka.attributeSelection.AttributeEvaluator;
028: import weka.attributeSelection.AttributeTransformer;
029: import weka.attributeSelection.BestFirst;
030: import weka.attributeSelection.CfsSubsetEval;
031: import weka.attributeSelection.Ranker;
032: import weka.attributeSelection.UnsupervisedAttributeEvaluator;
033: import weka.attributeSelection.UnsupervisedSubsetEvaluator;
034: import weka.core.Capabilities;
035: import weka.core.FastVector;
036: import weka.core.Instance;
037: import weka.core.Instances;
038: import weka.core.Option;
039: import weka.core.OptionHandler;
040: import weka.core.SparseInstance;
041: import weka.core.Utils;
042: import weka.core.Capabilities.Capability;
043: import weka.filters.Filter;
044: import weka.filters.SupervisedFilter;
045:
046: import java.util.Enumeration;
047: import java.util.Vector;
048:
049: /**
050: <!-- globalinfo-start -->
051: * A supervised attribute filter that can be used to select attributes. It is very flexible and allows various search and evaluation methods to be combined.
052: * <p/>
053: <!-- globalinfo-end -->
054: *
055: <!-- options-start -->
056: * Valid options are: <p/>
057: *
058: * <pre> -S <"Name of search class [search options]">
059: * Sets search method for subset evaluators.
060: * eg. -S "weka.attributeSelection.BestFirst -S 8"</pre>
061: *
062: * <pre> -E <"Name of attribute/subset evaluation class [evaluator options]">
063: * Sets attribute/subset evaluator.
064: * eg. -E "weka.attributeSelection.CfsSubsetEval -L"</pre>
065: *
066: * <pre>
067: * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
068: * </pre>
069: *
070: * <pre> -M
071: * Treat missing values as a seperate
072: * value.</pre>
073: *
074: * <pre> -L
075: * Don't include locally predictive attributes.</pre>
076: *
077: * <pre>
078: * Options specific to search weka.attributeSelection.BestFirst:
079: * </pre>
080: *
081: * <pre> -P <start set>
082: * Specify a starting set of attributes.
083: * Eg. 1,3,5-7.</pre>
084: *
085: * <pre> -D <0 = backward | 1 = forward | 2 = bi-directional>
086: * Direction of search. (default = 1).</pre>
087: *
088: * <pre> -N <num>
089: * Number of non-improving nodes to
090: * consider before terminating search.</pre>
091: *
092: * <pre> -S <num>
093: * Size of lookup cache for evaluated subsets.
094: * Expressed as a multiple of the number of
095: * attributes in the data set. (default = 1)</pre>
096: *
097: <!-- options-end -->
098: *
099: * @author Mark Hall (mhall@cs.waikato.ac.nz)
100: * @version $Revision: 1.7 $
101: */
102: public class AttributeSelection extends Filter implements
103: SupervisedFilter, OptionHandler {
104:
105: /** for serialization */
106: static final long serialVersionUID = -296211247688169716L;
107:
108: /** the attribute selection evaluation object */
109: private weka.attributeSelection.AttributeSelection m_trainSelector;
110:
111: /** the attribute evaluator to use */
112: private ASEvaluation m_ASEvaluator;
113:
114: /** the search method if any */
115: private ASSearch m_ASSearch;
116:
117: /** holds a copy of the full set of valid options passed to the filter */
118: private String[] m_FilterOptions;
119:
120: /** holds the selected attributes */
121: private int[] m_SelectedAttributes;
122:
123: /**
124: * Returns a string describing this filter
125: *
126: * @return a description of the filter suitable for
127: * displaying in the explorer/experimenter gui
128: */
129: public String globalInfo() {
130:
131: return "A supervised attribute filter that can be used to select "
132: + "attributes. It is very flexible and allows various search "
133: + "and evaluation methods to be combined.";
134: }
135:
136: /**
137: * Constructor
138: */
139: public AttributeSelection() {
140:
141: resetOptions();
142: }
143:
144: /**
145: * Returns an enumeration describing the available options.
146: * @return an enumeration of all the available options.
147: */
148: public Enumeration listOptions() {
149:
150: Vector newVector = new Vector(6);
151:
152: newVector
153: .addElement(new Option(
154: "\tSets search method for subset evaluators.\n"
155: + "\teg. -S \"weka.attributeSelection.BestFirst -S 8\"",
156: "S", 1,
157: "-S <\"Name of search class [search options]\">"));
158:
159: newVector
160: .addElement(new Option(
161: "\tSets attribute/subset evaluator.\n"
162: + "\teg. -E \"weka.attributeSelection.CfsSubsetEval -L\"",
163: "E", 1,
164: "-E <\"Name of attribute/subset evaluation class [evaluator options]\">"));
165:
166: if ((m_ASEvaluator != null)
167: && (m_ASEvaluator instanceof OptionHandler)) {
168: Enumeration enu = ((OptionHandler) m_ASEvaluator)
169: .listOptions();
170:
171: newVector
172: .addElement(new Option("", "", 0,
173: "\nOptions specific to "
174: + "evaluator "
175: + m_ASEvaluator.getClass()
176: .getName() + ":"));
177: while (enu.hasMoreElements()) {
178: newVector.addElement((Option) enu.nextElement());
179: }
180: }
181:
182: if ((m_ASSearch != null)
183: && (m_ASSearch instanceof OptionHandler)) {
184: Enumeration enu = ((OptionHandler) m_ASSearch)
185: .listOptions();
186:
187: newVector.addElement(new Option("", "", 0,
188: "\nOptions specific to " + "search "
189: + m_ASSearch.getClass().getName() + ":"));
190: while (enu.hasMoreElements()) {
191: newVector.addElement((Option) enu.nextElement());
192: }
193: }
194: return newVector.elements();
195: }
196:
197: /**
198: * Parses a given list of options. <p/>
199: *
200: <!-- options-start -->
201: * Valid options are: <p/>
202: *
203: * <pre> -S <"Name of search class [search options]">
204: * Sets search method for subset evaluators.
205: * eg. -S "weka.attributeSelection.BestFirst -S 8"</pre>
206: *
207: * <pre> -E <"Name of attribute/subset evaluation class [evaluator options]">
208: * Sets attribute/subset evaluator.
209: * eg. -E "weka.attributeSelection.CfsSubsetEval -L"</pre>
210: *
211: * <pre>
212: * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
213: * </pre>
214: *
215: * <pre> -M
216: * Treat missing values as a seperate
217: * value.</pre>
218: *
219: * <pre> -L
220: * Don't include locally predictive attributes.</pre>
221: *
222: * <pre>
223: * Options specific to search weka.attributeSelection.BestFirst:
224: * </pre>
225: *
226: * <pre> -P <start set>
227: * Specify a starting set of attributes.
228: * Eg. 1,3,5-7.</pre>
229: *
230: * <pre> -D <0 = backward | 1 = forward | 2 = bi-directional>
231: * Direction of search. (default = 1).</pre>
232: *
233: * <pre> -N <num>
234: * Number of non-improving nodes to
235: * consider before terminating search.</pre>
236: *
237: * <pre> -S <num>
238: * Size of lookup cache for evaluated subsets.
239: * Expressed as a multiple of the number of
240: * attributes in the data set. (default = 1)</pre>
241: *
242: <!-- options-end -->
243: *
244: * @param options the list of options as an array of strings
245: * @throws Exception if an option is not supported
246: */
247: public void setOptions(String[] options) throws Exception {
248:
249: String optionString;
250: resetOptions();
251:
252: if (Utils.getFlag('X', options)) {
253: throw new Exception(
254: "Cross validation is not a valid option"
255: + " when using attribute selection as a Filter.");
256: }
257:
258: optionString = Utils.getOption('E', options);
259: if (optionString.length() != 0) {
260: optionString = optionString.trim();
261: // split a quoted evaluator name from its options (if any)
262: int breakLoc = optionString.indexOf(' ');
263: String evalClassName = optionString;
264: String evalOptionsString = "";
265: String[] evalOptions = null;
266: if (breakLoc != -1) {
267: evalClassName = optionString.substring(0, breakLoc);
268: evalOptionsString = optionString.substring(breakLoc)
269: .trim();
270: evalOptions = Utils.splitOptions(evalOptionsString);
271: }
272: setEvaluator(ASEvaluation.forName(evalClassName,
273: evalOptions));
274: }
275:
276: if (m_ASEvaluator instanceof AttributeEvaluator) {
277: setSearch(new Ranker());
278: }
279:
280: optionString = Utils.getOption('S', options);
281: if (optionString.length() != 0) {
282: optionString = optionString.trim();
283: int breakLoc = optionString.indexOf(' ');
284: String SearchClassName = optionString;
285: String SearchOptionsString = "";
286: String[] SearchOptions = null;
287: if (breakLoc != -1) {
288: SearchClassName = optionString.substring(0, breakLoc);
289: SearchOptionsString = optionString.substring(breakLoc)
290: .trim();
291: SearchOptions = Utils.splitOptions(SearchOptionsString);
292: }
293: setSearch(ASSearch.forName(SearchClassName, SearchOptions));
294: }
295:
296: Utils.checkForRemainingOptions(options);
297: }
298:
299: /**
300: * Gets the current settings for the attribute selection (search, evaluator)
301: * etc.
302: *
303: * @return an array of strings suitable for passing to setOptions()
304: */
305: public String[] getOptions() {
306: String[] EvaluatorOptions = new String[0];
307: String[] SearchOptions = new String[0];
308: int current = 0;
309:
310: if (m_ASEvaluator instanceof OptionHandler) {
311: EvaluatorOptions = ((OptionHandler) m_ASEvaluator)
312: .getOptions();
313: }
314:
315: if (m_ASSearch instanceof OptionHandler) {
316: SearchOptions = ((OptionHandler) m_ASSearch).getOptions();
317: }
318:
319: String[] setOptions = new String[10];
320: setOptions[current++] = "-E";
321: setOptions[current++] = getEvaluator().getClass().getName()
322: + " " + Utils.joinOptions(EvaluatorOptions);
323:
324: setOptions[current++] = "-S";
325: setOptions[current++] = getSearch().getClass().getName() + " "
326: + Utils.joinOptions(SearchOptions);
327:
328: while (current < setOptions.length) {
329: setOptions[current++] = "";
330: }
331:
332: return setOptions;
333: }
334:
335: /**
336: * Returns the tip text for this property
337: *
338: * @return tip text for this property suitable for
339: * displaying in the explorer/experimenter gui
340: */
341: public String evaluatorTipText() {
342:
343: return "Determines how attributes/attribute subsets are evaluated.";
344: }
345:
346: /**
347: * set attribute/subset evaluator
348: *
349: * @param evaluator the evaluator to use
350: */
351: public void setEvaluator(ASEvaluation evaluator) {
352: m_ASEvaluator = evaluator;
353: }
354:
355: /**
356: * Returns the tip text for this property
357: *
358: * @return tip text for this property suitable for
359: * displaying in the explorer/experimenter gui
360: */
361: public String searchTipText() {
362:
363: return "Determines the search method.";
364: }
365:
366: /**
367: * Set search class
368: *
369: * @param search the search class to use
370: */
371: public void setSearch(ASSearch search) {
372: m_ASSearch = search;
373: }
374:
375: /**
376: * Get the name of the attribute/subset evaluator
377: *
378: * @return the name of the attribute/subset evaluator as a string
379: */
380: public ASEvaluation getEvaluator() {
381:
382: return m_ASEvaluator;
383: }
384:
385: /**
386: * Get the name of the search method
387: *
388: * @return the name of the search method as a string
389: */
390: public ASSearch getSearch() {
391:
392: return m_ASSearch;
393: }
394:
395: /**
396: * Returns the Capabilities of this filter.
397: *
398: * @return the capabilities of this object
399: * @see Capabilities
400: */
401: public Capabilities getCapabilities() {
402: Capabilities result;
403:
404: if (m_ASEvaluator == null) {
405: result = super .getCapabilities();
406: } else {
407: result = m_ASEvaluator.getCapabilities();
408: // class index will be set if necessary, so we always allow the dataset
409: // to have no class attribute set. see the following method:
410: // weka.attributeSelection.AttributeSelection.SelectAttributes(Instances)
411: result.enable(Capability.NO_CLASS);
412: }
413:
414: result.setMinimumNumberInstances(0);
415:
416: return result;
417: }
418:
419: /**
420: * Input an instance for filtering. Ordinarily the instance is processed
421: * and made available for output immediately. Some filters require all
422: * instances be read before producing output.
423: *
424: * @param instance the input instance
425: * @return true if the filtered instance may now be
426: * collected with output().
427: * @throws IllegalStateException if no input format has been defined.
428: * @throws Exception if the input instance was not of the correct format
429: * or if there was a problem with the filtering.
430: */
431: public boolean input(Instance instance) throws Exception {
432:
433: if (getInputFormat() == null) {
434: throw new IllegalStateException(
435: "No input instance format defined");
436: }
437:
438: if (m_NewBatch) {
439: resetQueue();
440: m_NewBatch = false;
441: }
442:
443: if (isOutputFormatDefined()) {
444: convertInstance(instance);
445: return true;
446: }
447:
448: bufferInput(instance);
449: return false;
450: }
451:
452: /**
453: * Signify that this batch of input to the filter is finished. If the filter
454: * requires all instances prior to filtering, output() may now be called
455: * to retrieve the filtered instances.
456: *
457: * @return true if there are instances pending output.
458: * @throws IllegalStateException if no input structure has been defined.
459: * @throws Exception if there is a problem during the attribute selection.
460: */
461: public boolean batchFinished() throws Exception {
462:
463: if (getInputFormat() == null) {
464: throw new IllegalStateException(
465: "No input instance format defined");
466: }
467:
468: if (!isOutputFormatDefined()) {
469: m_trainSelector.setEvaluator(m_ASEvaluator);
470: m_trainSelector.setSearch(m_ASSearch);
471: m_trainSelector.SelectAttributes(getInputFormat());
472: // System.out.println(m_trainSelector.toResultsString());
473:
474: m_SelectedAttributes = m_trainSelector.selectedAttributes();
475: if (m_SelectedAttributes == null) {
476: throw new Exception("No selected attributes\n");
477: }
478:
479: setOutputFormat();
480:
481: // Convert pending input instances
482: for (int i = 0; i < getInputFormat().numInstances(); i++) {
483: convertInstance(getInputFormat().instance(i));
484: }
485: flushInput();
486: }
487:
488: m_NewBatch = true;
489: return (numPendingOutput() != 0);
490: }
491:
492: /**
493: * Set the output format. Takes the currently defined attribute set
494: * m_InputFormat and calls setOutputFormat(Instances) appropriately.
495: *
496: * @throws Exception if something goes wrong
497: */
498: protected void setOutputFormat() throws Exception {
499: Instances informat;
500:
501: if (m_SelectedAttributes == null) {
502: setOutputFormat(null);
503: return;
504: }
505:
506: FastVector attributes = new FastVector(
507: m_SelectedAttributes.length);
508:
509: int i;
510: if (m_ASEvaluator instanceof AttributeTransformer) {
511: informat = ((AttributeTransformer) m_ASEvaluator)
512: .transformedData();
513: } else {
514: informat = getInputFormat();
515: }
516:
517: for (i = 0; i < m_SelectedAttributes.length; i++) {
518: attributes.addElement(informat.attribute(
519: m_SelectedAttributes[i]).copy());
520: }
521:
522: Instances outputFormat = new Instances(getInputFormat()
523: .relationName(), attributes, 0);
524:
525: if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
526: && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
527: outputFormat.setClassIndex(m_SelectedAttributes.length - 1);
528: }
529:
530: setOutputFormat(outputFormat);
531: }
532:
533: /**
534: * Convert a single instance over. Selected attributes only are transfered.
535: * The converted instance is added to the end of
536: * the output queue.
537: *
538: * @param instance the instance to convert
539: * @throws Exception if something goes wrong
540: */
541: protected void convertInstance(Instance instance) throws Exception {
542: double[] newVals = new double[getOutputFormat().numAttributes()];
543:
544: if (m_ASEvaluator instanceof AttributeTransformer) {
545: Instance tempInstance = ((AttributeTransformer) m_ASEvaluator)
546: .convertInstance(instance);
547: for (int i = 0; i < m_SelectedAttributes.length; i++) {
548: int current = m_SelectedAttributes[i];
549: newVals[i] = tempInstance.value(current);
550: }
551: } else {
552: for (int i = 0; i < m_SelectedAttributes.length; i++) {
553: int current = m_SelectedAttributes[i];
554: newVals[i] = instance.value(current);
555: }
556: }
557: if (instance instanceof SparseInstance) {
558: push(new SparseInstance(instance.weight(), newVals));
559: } else {
560: push(new Instance(instance.weight(), newVals));
561: }
562: }
563:
564: /**
565: * set options to their default values
566: */
567: protected void resetOptions() {
568:
569: m_trainSelector = new weka.attributeSelection.AttributeSelection();
570: setEvaluator(new CfsSubsetEval());
571: setSearch(new BestFirst());
572: m_SelectedAttributes = null;
573: m_FilterOptions = null;
574: }
575:
576: /**
577: * Main method for testing this class.
578: *
579: * @param argv should contain arguments to the filter: use -h for help
580: */
581: public static void main(String[] argv) {
582: runFilter(new AttributeSelection(), argv);
583: }
584: }
|