001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * RemovePercentage.java
019: * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.filters.unsupervised.instance;
024:
025: import weka.core.Capabilities;
026: import weka.core.Instance;
027: import weka.core.Instances;
028: import weka.core.Option;
029: import weka.core.OptionHandler;
030: import weka.core.Utils;
031: import weka.core.Capabilities.Capability;
032: import weka.filters.Filter;
033: import weka.filters.UnsupervisedFilter;
034:
035: import java.util.Enumeration;
036: import java.util.Vector;
037:
038: /**
039: <!-- globalinfo-start -->
040: * A filter that removes a given percentage of a dataset.
041: * <p/>
042: <!-- globalinfo-end -->
043: *
044: <!-- options-start -->
045: * Valid options are: <p/>
046: *
047: * <pre> -P <percentage>
048: * Specifies percentage of instances to select. (default 50)
049: * </pre>
050: *
051: * <pre> -V
052: * Specifies if inverse of selection is to be output.
053: * </pre>
054: *
055: <!-- options-end -->
056: *
057: * @author Richard Kirkby (eibe@cs.waikato.ac.nz)
058: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
059: * @version $Revision: 1.7 $
060: */
061: public class RemovePercentage extends Filter implements
062: UnsupervisedFilter, OptionHandler {
063:
064: /** for serialization */
065: static final long serialVersionUID = 2150341191158533133L;
066:
067: /** Percentage of instances to select. */
068: private int m_Percentage = 50;
069:
070: /** Indicates if inverse of selection is to be output. */
071: private boolean m_Inverse = false;
072:
073: /**
074: * Gets an enumeration describing the available options..
075: *
076: * @return an enumeration of all the available options.
077: */
078: public Enumeration listOptions() {
079:
080: Vector newVector = new Vector(2);
081:
082: newVector
083: .addElement(new Option(
084: "\tSpecifies percentage of instances to select. (default 50)\n",
085: "P", 1, "-P <percentage>"));
086:
087: newVector
088: .addElement(new Option(
089: "\tSpecifies if inverse of selection is to be output.\n",
090: "V", 0, "-V"));
091:
092: return newVector.elements();
093: }
094:
095: /**
096: * Parses a given list of options. <p/>
097: *
098: <!-- options-start -->
099: * Valid options are: <p/>
100: *
101: * <pre> -P <percentage>
102: * Specifies percentage of instances to select. (default 50)
103: * </pre>
104: *
105: * <pre> -V
106: * Specifies if inverse of selection is to be output.
107: * </pre>
108: *
109: <!-- options-end -->
110: *
111: * @param options the list of options as an array of strings
112: * @throws Exception if an option is not supported
113: */
114: public void setOptions(String[] options) throws Exception {
115:
116: String percent = Utils.getOption('P', options);
117: if (percent.length() != 0) {
118: setPercentage(Integer.parseInt(percent));
119: } else {
120: setPercentage(50);
121: }
122: setInvertSelection(Utils.getFlag('V', options));
123:
124: if (getInputFormat() != null) {
125: setInputFormat(getInputFormat());
126: }
127: }
128:
129: /**
130: * Gets the current settings of the filter.
131: *
132: * @return an array of strings suitable for passing to setOptions
133: */
134: public String[] getOptions() {
135:
136: String[] options = new String[5];
137: int current = 0;
138:
139: options[current++] = "-P";
140: options[current++] = "" + getPercentage();
141: if (getInvertSelection()) {
142: options[current++] = "-V";
143: }
144:
145: while (current < options.length) {
146: options[current++] = "";
147: }
148: return options;
149: }
150:
151: /**
152: * Returns a string describing this filter
153: *
154: * @return a description of the filter suitable for
155: * displaying in the explorer/experimenter gui
156: */
157: public String globalInfo() {
158:
159: return "A filter that removes a given percentage of a dataset.";
160: }
161:
162: /**
163: * Returns the tip text for this property
164: *
165: * @return tip text for this property suitable for
166: * displaying in the explorer/experimenter gui
167: */
168: public String percentageTipText() {
169:
170: return "The percentage of the data to select.";
171: }
172:
173: /**
174: * Gets the percentage of instances to select.
175: *
176: * @return the percentage.
177: */
178: public int getPercentage() {
179:
180: return m_Percentage;
181: }
182:
183: /**
184: * Sets the percentage of intances to select.
185: *
186: * @param percent the percentage
187: * @throws IllegalArgumentException if percenatge out of range
188: */
189: public void setPercentage(int percent) {
190:
191: if (percent < 0 || percent > 100) {
192: throw new IllegalArgumentException(
193: "Percentage must be between 0 and 100.");
194: }
195: m_Percentage = percent;
196: }
197:
198: /**
199: * Returns the tip text for this property
200: *
201: * @return tip text for this property suitable for
202: * displaying in the explorer/experimenter gui
203: */
204: public String invertSelectionTipText() {
205:
206: return "Whether to invert the selection.";
207: }
208:
209: /**
210: * Gets if selection is to be inverted.
211: *
212: * @return true if the selection is to be inverted
213: */
214: public boolean getInvertSelection() {
215:
216: return m_Inverse;
217: }
218:
219: /**
220: * Sets if selection is to be inverted.
221: *
222: * @param inverse true if inversion is to be performed
223: */
224: public void setInvertSelection(boolean inverse) {
225:
226: m_Inverse = inverse;
227: }
228:
229: /**
230: * Returns the Capabilities of this filter.
231: *
232: * @return the capabilities of this object
233: * @see Capabilities
234: */
235: public Capabilities getCapabilities() {
236: Capabilities result = super .getCapabilities();
237:
238: // attributes
239: result.enableAllAttributes();
240: result.enable(Capability.MISSING_VALUES);
241:
242: // class
243: result.enableAllClasses();
244: result.enable(Capability.MISSING_CLASS_VALUES);
245: result.enable(Capability.NO_CLASS);
246:
247: return result;
248: }
249:
250: /**
251: * Sets the format of the input instances.
252: *
253: * @param instanceInfo an Instances object containing the input instance
254: * structure (any instances contained in the object are ignored - only the
255: * structure is required).
256: * @return true because outputFormat can be collected immediately
257: * @throws Exception if the input format can't be set successfully
258: */
259: public boolean setInputFormat(Instances instanceInfo)
260: throws Exception {
261:
262: super .setInputFormat(instanceInfo);
263: setOutputFormat(instanceInfo);
264: return true;
265: }
266:
267: /**
268: * Input an instance for filtering. Ordinarily the instance is processed
269: * and made available for output immediately. Some filters require all
270: * instances be read before producing output.
271: *
272: * @param instance the input instance
273: * @return true if the filtered instance may now be
274: * collected with output().
275: * @throws IllegalStateException if no input format has been set.
276: */
277: public boolean input(Instance instance) {
278: if (getInputFormat() == null) {
279: throw new IllegalStateException(
280: "No input instance format defined");
281: }
282:
283: if (m_NewBatch) {
284: resetQueue();
285: m_NewBatch = false;
286: }
287:
288: if (isFirstBatchDone()) {
289: push(instance);
290: return true;
291: } else {
292: bufferInput(instance);
293: return false;
294: }
295: }
296:
297: /**
298: * Signify that this batch of input to the filter is
299: * finished. Output() may now be called to retrieve the filtered
300: * instances.
301: *
302: * @return true if there are instances pending output
303: * @throws IllegalStateException if no input structure has been defined
304: */
305: public boolean batchFinished() {
306:
307: if (getInputFormat() == null) {
308: throw new IllegalStateException(
309: "No input instance format defined");
310: }
311:
312: // Push instances for output into output queue
313: Instances toFilter = getInputFormat();
314: int cutOff = toFilter.numInstances() * m_Percentage / 100;
315:
316: if (m_Inverse) {
317: for (int i = 0; i < cutOff; i++) {
318: push(toFilter.instance(i));
319: }
320: } else {
321: for (int i = cutOff; i < toFilter.numInstances(); i++) {
322: push(toFilter.instance(i));
323: }
324: }
325: flushInput();
326:
327: m_NewBatch = true;
328: m_FirstBatchDone = true;
329:
330: return (numPendingOutput() != 0);
331: }
332:
333: /**
334: * Main method for testing this class.
335: *
336: * @param argv should contain arguments to the filter: use -h for help
337: */
338: public static void main(String[] argv) {
339: runFilter(new RemovePercentage(), argv);
340: }
341: }
|