001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * ClassifierSplitModel.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.classifiers.trees.j48;
024:
025: import weka.core.Instance;
026: import weka.core.Instances;
027: import weka.core.Utils;
028:
029: import java.io.Serializable;
030:
031: /**
032: * Abstract class for classification models that can be used
033: * recursively to split the data.
034: *
035: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
036: * @version $Revision: 1.10 $
037: */
038: public abstract class ClassifierSplitModel implements Cloneable,
039: Serializable {
040:
041: /** for serialization */
042: private static final long serialVersionUID = 4280730118393457457L;
043:
044: /** Distribution of class values. */
045: protected Distribution m_distribution;
046:
047: /** Number of created subsets. */
048: protected int m_numSubsets;
049:
050: /**
051: * Allows to clone a model (shallow copy).
052: */
053: public Object clone() {
054:
055: Object clone = null;
056:
057: try {
058: clone = super .clone();
059: } catch (CloneNotSupportedException e) {
060: }
061: return clone;
062: }
063:
064: /**
065: * Builds the classifier split model for the given set of instances.
066: *
067: * @exception Exception if something goes wrong
068: */
069: public abstract void buildClassifier(Instances instances)
070: throws Exception;
071:
072: /**
073: * Checks if generated model is valid.
074: */
075: public final boolean checkModel() {
076:
077: if (m_numSubsets > 0)
078: return true;
079: else
080: return false;
081: }
082:
083: /**
084: * Classifies a given instance.
085: *
086: * @exception Exception if something goes wrong
087: */
088: public final double classifyInstance(Instance instance)
089: throws Exception {
090:
091: int theSubset;
092:
093: theSubset = whichSubset(instance);
094: if (theSubset > -1)
095: return (double) m_distribution.maxClass(theSubset);
096: else
097: return (double) m_distribution.maxClass();
098: }
099:
100: /**
101: * Gets class probability for instance.
102: *
103: * @exception Exception if something goes wrong
104: */
105: public double classProb(int classIndex, Instance instance,
106: int theSubset) throws Exception {
107:
108: if (theSubset > -1) {
109: return m_distribution.prob(classIndex, theSubset);
110: } else {
111: double[] weights = weights(instance);
112: if (weights == null) {
113: return m_distribution.prob(classIndex);
114: } else {
115: double prob = 0;
116: for (int i = 0; i < weights.length; i++) {
117: prob += weights[i]
118: * m_distribution.prob(classIndex, i);
119: }
120: return prob;
121: }
122: }
123: }
124:
125: /**
126: * Gets class probability for instance.
127: *
128: * @exception Exception if something goes wrong
129: */
130: public double classProbLaplace(int classIndex, Instance instance,
131: int theSubset) throws Exception {
132:
133: if (theSubset > -1) {
134: return m_distribution.laplaceProb(classIndex, theSubset);
135: } else {
136: double[] weights = weights(instance);
137: if (weights == null) {
138: return m_distribution.laplaceProb(classIndex);
139: } else {
140: double prob = 0;
141: for (int i = 0; i < weights.length; i++) {
142: prob += weights[i]
143: * m_distribution.laplaceProb(classIndex, i);
144: }
145: return prob;
146: }
147: }
148: }
149:
150: /**
151: * Returns coding costs of model. Returns 0 if not overwritten.
152: */
153: public double codingCost() {
154:
155: return 0;
156: }
157:
158: /**
159: * Returns the distribution of class values induced by the model.
160: */
161: public final Distribution distribution() {
162:
163: return m_distribution;
164: }
165:
166: /**
167: * Prints left side of condition satisfied by instances.
168: *
169: * @param data the data.
170: */
171: public abstract String leftSide(Instances data);
172:
173: /**
174: * Prints left side of condition satisfied by instances in subset index.
175: */
176: public abstract String rightSide(int index, Instances data);
177:
178: /**
179: * Prints label for subset index of instances (eg class).
180: *
181: * @exception Exception if something goes wrong
182: */
183: public final String dumpLabel(int index, Instances data)
184: throws Exception {
185:
186: StringBuffer text;
187:
188: text = new StringBuffer();
189: text.append(((Instances) data).classAttribute().value(
190: m_distribution.maxClass(index)));
191: text.append(" ("
192: + Utils.roundDouble(m_distribution.perBag(index), 2));
193: if (Utils.gr(m_distribution.numIncorrect(index), 0))
194: text.append("/"
195: + Utils.roundDouble(m_distribution
196: .numIncorrect(index), 2));
197: text.append(")");
198:
199: return text.toString();
200: }
201:
202: public final String sourceClass(int index, Instances data)
203: throws Exception {
204:
205: System.err.println("sourceClass");
206: return (new StringBuffer(m_distribution.maxClass(index)))
207: .toString();
208: }
209:
210: public abstract String sourceExpression(int index, Instances data);
211:
212: /**
213: * Prints the split model.
214: *
215: * @exception Exception if something goes wrong
216: */
217: public final String dumpModel(Instances data) throws Exception {
218:
219: StringBuffer text;
220: int i;
221:
222: text = new StringBuffer();
223: for (i = 0; i < m_numSubsets; i++) {
224: text.append(leftSide(data) + rightSide(i, data) + ": ");
225: text.append(dumpLabel(i, data) + "\n");
226: }
227: return text.toString();
228: }
229:
230: /**
231: * Returns the number of created subsets for the split.
232: */
233: public final int numSubsets() {
234:
235: return m_numSubsets;
236: }
237:
238: /**
239: * Sets distribution associated with model.
240: */
241: public void resetDistribution(Instances data) throws Exception {
242:
243: m_distribution = new Distribution(data, this );
244: }
245:
246: /**
247: * Splits the given set of instances into subsets.
248: *
249: * @exception Exception if something goes wrong
250: */
251: public final Instances[] split(Instances data) throws Exception {
252:
253: Instances[] instances = new Instances[m_numSubsets];
254: double[] weights;
255: double newWeight;
256: Instance instance;
257: int subset, i, j;
258:
259: for (j = 0; j < m_numSubsets; j++)
260: instances[j] = new Instances((Instances) data, data
261: .numInstances());
262: for (i = 0; i < data.numInstances(); i++) {
263: instance = ((Instances) data).instance(i);
264: weights = weights(instance);
265: subset = whichSubset(instance);
266: if (subset > -1)
267: instances[subset].add(instance);
268: else
269: for (j = 0; j < m_numSubsets; j++)
270: if (Utils.gr(weights[j], 0)) {
271: newWeight = weights[j] * instance.weight();
272: instances[j].add(instance);
273: instances[j].lastInstance()
274: .setWeight(newWeight);
275: }
276: }
277: for (j = 0; j < m_numSubsets; j++)
278: instances[j].compactify();
279:
280: return instances;
281: }
282:
283: /**
284: * Returns weights if instance is assigned to more than one subset.
285: * Returns null if instance is only assigned to one subset.
286: */
287: public abstract double[] weights(Instance instance);
288:
289: /**
290: * Returns index of subset instance is assigned to.
291: * Returns -1 if instance is assigned to more than one subset.
292: *
293: * @exception Exception if something goes wrong
294: */
295: public abstract int whichSubset(Instance instance) throws Exception;
296: }
|