001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * ClassificationViaRegression.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.classifiers.meta;
024:
025: import weka.classifiers.Classifier;
026: import weka.classifiers.SingleClassifierEnhancer;
027: import weka.core.Capabilities;
028: import weka.core.Instance;
029: import weka.core.Instances;
030: import weka.core.TechnicalInformation;
031: import weka.core.TechnicalInformationHandler;
032: import weka.core.Utils;
033: import weka.core.Capabilities.Capability;
034: import weka.core.TechnicalInformation.Field;
035: import weka.core.TechnicalInformation.Type;
036: import weka.filters.Filter;
037: import weka.filters.unsupervised.attribute.MakeIndicator;
038:
039: /**
040: <!-- globalinfo-start -->
041: * Class for doing classification using regression methods. Class is binarized and one regression model is built for each class value. For more information, see, for example<br/>
042: * <br/>
043: * E. Frank, Y. Wang, S. Inglis, G. Holmes, I.H. Witten (1998). Using model trees for classification. Machine Learning. 32(1):63-76.
044: * <p/>
045: <!-- globalinfo-end -->
046: *
047: <!-- technical-bibtex-start -->
048: * BibTeX:
049: * <pre>
050: * @article{Frank1998,
051: * author = {E. Frank and Y. Wang and S. Inglis and G. Holmes and I.H. Witten},
052: * journal = {Machine Learning},
053: * number = {1},
054: * pages = {63-76},
055: * title = {Using model trees for classification},
056: * volume = {32},
057: * year = {1998}
058: * }
059: * </pre>
060: * <p/>
061: <!-- technical-bibtex-end -->
062: *
063: <!-- options-start -->
064: * Valid options are: <p/>
065: *
066: * <pre> -D
067: * If set, classifier is run in debug mode and
068: * may output additional info to the console</pre>
069: *
070: * <pre> -W
071: * Full name of base classifier.
072: * (default: weka.classifiers.trees.M5P)</pre>
073: *
074: * <pre>
075: * Options specific to classifier weka.classifiers.trees.M5P:
076: * </pre>
077: *
078: * <pre> -N
079: * Use unpruned tree/rules</pre>
080: *
081: * <pre> -U
082: * Use unsmoothed predictions</pre>
083: *
084: * <pre> -R
085: * Build regression tree/rule rather than a model tree/rule</pre>
086: *
087: * <pre> -M <minimum number of instances>
088: * Set minimum number of instances per leaf
089: * (default 4)</pre>
090: *
091: * <pre> -L
092: * Save instances at the nodes in
093: * the tree (for visualization purposes)</pre>
094: *
095: <!-- options-end -->
096: *
097: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
098: * @author Len Trigg (trigg@cs.waikato.ac.nz)
099: * @version $Revision: 1.26 $
100: */
101: public class ClassificationViaRegression extends
102: SingleClassifierEnhancer implements TechnicalInformationHandler {
103:
104: /** for serialization */
105: static final long serialVersionUID = 4500023123618669859L;
106:
107: /** The classifiers. (One for each class.) */
108: private Classifier[] m_Classifiers;
109:
110: /** The filters used to transform the class. */
111: private MakeIndicator[] m_ClassFilters;
112:
113: /**
114: * Default constructor.
115: */
116: public ClassificationViaRegression() {
117:
118: m_Classifier = new weka.classifiers.trees.M5P();
119: }
120:
121: /**
122: * Returns a string describing classifier
123: * @return a description suitable for
124: * displaying in the explorer/experimenter gui
125: */
126: public String globalInfo() {
127:
128: return "Class for doing classification using regression methods. Class is "
129: + "binarized and one regression model is built for each class value. For more "
130: + "information, see, for example\n\n"
131: + getTechnicalInformation().toString();
132: }
133:
134: /**
135: * Returns an instance of a TechnicalInformation object, containing
136: * detailed information about the technical background of this class,
137: * e.g., paper reference or book this class is based on.
138: *
139: * @return the technical information about this class
140: */
141: public TechnicalInformation getTechnicalInformation() {
142: TechnicalInformation result;
143:
144: result = new TechnicalInformation(Type.ARTICLE);
145: result
146: .setValue(Field.AUTHOR,
147: "E. Frank and Y. Wang and S. Inglis and G. Holmes and I.H. Witten");
148: result.setValue(Field.YEAR, "1998");
149: result.setValue(Field.TITLE,
150: "Using model trees for classification");
151: result.setValue(Field.JOURNAL, "Machine Learning");
152: result.setValue(Field.VOLUME, "32");
153: result.setValue(Field.NUMBER, "1");
154: result.setValue(Field.PAGES, "63-76");
155:
156: return result;
157: }
158:
159: /**
160: * String describing default classifier.
161: *
162: * @return the default classifier classname
163: */
164: protected String defaultClassifierString() {
165:
166: return "weka.classifiers.trees.M5P";
167: }
168:
169: /**
170: * Returns default capabilities of the classifier.
171: *
172: * @return the capabilities of this classifier
173: */
174: public Capabilities getCapabilities() {
175: Capabilities result = super .getCapabilities();
176:
177: // class
178: result.disableAllClasses();
179: result.disableAllClassDependencies();
180: result.enable(Capability.NOMINAL_CLASS);
181:
182: return result;
183: }
184:
185: /**
186: * Builds the classifiers.
187: *
188: * @param insts the training data.
189: * @throws Exception if a classifier can't be built
190: */
191: public void buildClassifier(Instances insts) throws Exception {
192:
193: Instances newInsts;
194:
195: // can classifier handle the data?
196: getCapabilities().testWithFail(insts);
197:
198: // remove instances with missing class
199: insts = new Instances(insts);
200: insts.deleteWithMissingClass();
201:
202: m_Classifiers = Classifier.makeCopies(m_Classifier, insts
203: .numClasses());
204: m_ClassFilters = new MakeIndicator[insts.numClasses()];
205: for (int i = 0; i < insts.numClasses(); i++) {
206: m_ClassFilters[i] = new MakeIndicator();
207: m_ClassFilters[i].setAttributeIndex(""
208: + (insts.classIndex() + 1));
209: m_ClassFilters[i].setValueIndex(i);
210: m_ClassFilters[i].setNumeric(true);
211: m_ClassFilters[i].setInputFormat(insts);
212: newInsts = Filter.useFilter(insts, m_ClassFilters[i]);
213: m_Classifiers[i].buildClassifier(newInsts);
214: }
215: }
216:
217: /**
218: * Returns the distribution for an instance.
219: *
220: * @param inst the instance to get the distribution for
221: * @return the computed distribution
222: * @throws Exception if the distribution can't be computed successfully
223: */
224: public double[] distributionForInstance(Instance inst)
225: throws Exception {
226:
227: double[] probs = new double[inst.numClasses()];
228: Instance newInst;
229: double sum = 0;
230:
231: for (int i = 0; i < inst.numClasses(); i++) {
232: m_ClassFilters[i].input(inst);
233: m_ClassFilters[i].batchFinished();
234: newInst = m_ClassFilters[i].output();
235: probs[i] = m_Classifiers[i].classifyInstance(newInst);
236: if (probs[i] > 1) {
237: probs[i] = 1;
238: }
239: if (probs[i] < 0) {
240: probs[i] = 0;
241: }
242: sum += probs[i];
243: }
244: if (sum != 0) {
245: Utils.normalize(probs, sum);
246: }
247: return probs;
248: }
249:
250: /**
251: * Prints the classifiers.
252: *
253: * @return a string representation of the classifier
254: */
255: public String toString() {
256:
257: if (m_Classifiers == null) {
258: return "Classification via Regression: No model built yet.";
259: }
260: StringBuffer text = new StringBuffer();
261: text.append("Classification via Regression\n\n");
262: for (int i = 0; i < m_Classifiers.length; i++) {
263: text.append("Classifier for class with index " + i
264: + ":\n\n");
265: text.append(m_Classifiers[i].toString() + "\n\n");
266: }
267: return text.toString();
268: }
269:
270: /**
271: * Main method for testing this class.
272: *
273: * @param argv the options for the learner
274: */
275: public static void main(String[] argv) {
276: runClassifier(new ClassificationViaRegression(), argv);
277: }
278: }
|