001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * Obfuscate.java
019: * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.filters.unsupervised.attribute;
024:
025: import weka.core.Attribute;
026: import weka.core.Capabilities;
027: import weka.core.FastVector;
028: import weka.core.Instance;
029: import weka.core.Instances;
030: import weka.core.Capabilities.Capability;
031: import weka.filters.Filter;
032: import weka.filters.StreamableFilter;
033: import weka.filters.UnsupervisedFilter;
034:
035: /**
036: <!-- globalinfo-start -->
037: * A simple instance filter that renames the relation, all attribute names and all nominal (and string) attribute values. For exchanging sensitive datasets. Currently doesn't like string or relational attributes.
038: * <p/>
039: <!-- globalinfo-end -->
040: *
041: * @author Len Trigg (len@reeltwo.com)
042: * @version $Revision: 1.6 $
043: */
044: public class Obfuscate extends Filter implements UnsupervisedFilter,
045: StreamableFilter {
046:
047: /** for serialization */
048: static final long serialVersionUID = -343922772462971561L;
049:
050: /**
051: * Returns a string describing this filter
052: *
053: * @return a description of the filter suitable for
054: * displaying in the explorer/experimenter gui
055: */
056: public String globalInfo() {
057: return "A simple instance filter that renames the relation, all attribute names "
058: + "and all nominal (and string) attribute values. For exchanging sensitive "
059: + "datasets. Currently doesn't like string or relational attributes.";
060: }
061:
062: /**
063: * Returns the Capabilities of this filter.
064: *
065: * @return the capabilities of this object
066: * @see Capabilities
067: */
068: public Capabilities getCapabilities() {
069: Capabilities result = super .getCapabilities();
070:
071: // attributes
072: result.enableAllAttributes();
073: result.enable(Capability.MISSING_VALUES);
074:
075: // class
076: result.enableAllClasses();
077: result.enable(Capability.MISSING_CLASS_VALUES);
078: result.enable(Capability.NO_CLASS);
079:
080: return result;
081: }
082:
083: /**
084: * Sets the format of the input instances.
085: *
086: * @param instanceInfo an Instances object containing the input instance
087: * structure (any instances contained in the object are ignored - only the
088: * structure is required).
089: * @return true if the outputFormat may be collected immediately
090: * @throws Exception if
091: */
092: public boolean setInputFormat(Instances instanceInfo)
093: throws Exception {
094:
095: super .setInputFormat(instanceInfo);
096:
097: // Make the obfuscated header
098: FastVector v = new FastVector();
099: for (int i = 0; i < instanceInfo.numAttributes(); i++) {
100: Attribute oldAtt = instanceInfo.attribute(i);
101: Attribute newAtt = null;
102: switch (oldAtt.type()) {
103: case Attribute.NUMERIC:
104: newAtt = new Attribute("A" + (i + 1));
105: break;
106: case Attribute.NOMINAL:
107: FastVector vals = new FastVector();
108: for (int j = 0; j < oldAtt.numValues(); j++) {
109: vals.addElement("V" + (j + 1));
110: }
111: newAtt = new Attribute("A" + (i + 1), vals);
112: break;
113: case Attribute.STRING:
114: case Attribute.RELATIONAL:
115: default:
116: newAtt = (Attribute) oldAtt.copy();
117: System.err.println("Not converting attribute: "
118: + oldAtt.name());
119: break;
120: }
121: v.addElement(newAtt);
122: }
123: Instances newHeader = new Instances("R", v, 10);
124: newHeader.setClassIndex(instanceInfo.classIndex());
125: setOutputFormat(newHeader);
126: return true;
127: }
128:
129: /**
130: * Input an instance for filtering. Ordinarily the instance is processed
131: * and made available for output immediately. Some filters require all
132: * instances be read before producing output.
133: *
134: * @param instance the input instance
135: * @return true if the filtered instance may now be
136: * collected with output().
137: * @throws IllegalStateException if no input format has been set.
138: */
139: public boolean input(Instance instance) {
140:
141: if (getInputFormat() == null) {
142: throw new IllegalStateException(
143: "No input instance format defined");
144: }
145: if (m_NewBatch) {
146: resetQueue();
147: m_NewBatch = false;
148: }
149: push((Instance) instance.copy());
150: return true;
151: }
152:
153: /**
154: * Main method for testing this class.
155: *
156: * @param argv should contain arguments to the filter: use -h for help
157: */
158: public static void main(String[] argv) {
159: runFilter(new Obfuscate(), argv);
160: }
161: }
|