001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or (at
005: * your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful, but
008: * WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
010: * General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
015:
016: /*
017: * MixtureDistribution.java
018: * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
019: *
020: */
021:
022: package weka.classifiers.functions.pace;
023:
024: import weka.core.TechnicalInformation;
025: import weka.core.TechnicalInformation.Type;
026: import weka.core.TechnicalInformation.Field;
027: import weka.core.TechnicalInformationHandler;
028: import weka.core.matrix.DoubleVector;
029: import weka.core.matrix.IntVector;
030:
031: /**
032: * Abtract class for manipulating mixture distributions. <p>
033: *
034: * REFERENCES <p>
035: *
036: * Wang, Y. (2000). "A new approach to fitting linear models in high
037: * dimensional spaces." PhD Thesis. Department of Computer Science,
038: * University of Waikato, New Zealand. <p>
039: *
040: * Wang, Y. and Witten, I. H. (2002). "Modeling for optimal probability
041: * prediction." Proceedings of ICML'2002. Sydney. <p>
042: *
043: * @author Yong Wang (yongwang@cs.waikato.ac.nz)
044: * @version $Revision: 1.4 $ */
045:
046: public abstract class MixtureDistribution implements
047: TechnicalInformationHandler {
048:
049: protected DiscreteFunction mixingDistribution;
050:
051: /** The nonnegative-measure-based method */
052: public static final int NNMMethod = 1;
053:
054: /** The probability-measure-based method */
055: public static final int PMMethod = 2;
056:
057: // The CDF-based method
058: // public static final int CDFMethod = 3;
059:
060: // The method based on the Kolmogrov and von Mises measure
061: // public static final int ModifiedCDFMethod = 4;
062:
063: /**
064: * Returns an instance of a TechnicalInformation object, containing
065: * detailed information about the technical background of this class,
066: * e.g., paper reference or book this class is based on.
067: *
068: * @return the technical information about this class
069: */
070: public TechnicalInformation getTechnicalInformation() {
071: TechnicalInformation result;
072: TechnicalInformation additional;
073:
074: result = new TechnicalInformation(Type.PHDTHESIS);
075: result.setValue(Field.AUTHOR, "Wang, Y");
076: result.setValue(Field.YEAR, "2000");
077: result
078: .setValue(Field.TITLE,
079: "A new approach to fitting linear models in high dimensional spaces");
080: result
081: .setValue(Field.SCHOOL,
082: "Department of Computer Science, University of Waikato");
083: result.setValue(Field.ADDRESS, "Hamilton, New Zealand");
084:
085: additional = result.add(Type.INPROCEEDINGS);
086: additional.setValue(Field.AUTHOR, "Wang, Y. and Witten, I. H.");
087: additional.setValue(Field.YEAR, "2002");
088: additional.setValue(Field.TITLE,
089: "Modeling for optimal probability prediction");
090: additional
091: .setValue(Field.BOOKTITLE,
092: "Proceedings of the Nineteenth International Conference in Machine Learning");
093: additional.setValue(Field.YEAR, "2002");
094: additional.setValue(Field.PAGES, "650-657");
095: additional.setValue(Field.ADDRESS, "Sydney, Australia");
096:
097: return result;
098: }
099:
100: /**
101: * Gets the mixing distribution
102: *
103: * @return the mixing distribution
104: */
105: public DiscreteFunction getMixingDistribution() {
106: return mixingDistribution;
107: }
108:
109: /** Sets the mixing distribution
110: * @param d the mixing distribution
111: */
112: public void setMixingDistribution(DiscreteFunction d) {
113: mixingDistribution = d;
114: }
115:
116: /** Fits the mixture (or mixing) distribution to the data. The default
117: * method is the nonnegative-measure-based method.
118: * @param data the data, supposedly generated from the mixture model */
119: public void fit(DoubleVector data) {
120: fit(data, NNMMethod);
121: }
122:
123: /** Fits the mixture (or mixing) distribution to the data.
124: * @param data the data supposedly generated from the mixture
125: * @param method the method to be used. Refer to the static final
126: * variables of this class. */
127: public void fit(DoubleVector data, int method) {
128: DoubleVector data2 = (DoubleVector) data.clone();
129: if (data2.unsorted())
130: data2.sort();
131:
132: int n = data2.size();
133: int start = 0;
134: DoubleVector subset;
135: DiscreteFunction d = new DiscreteFunction();
136: for (int i = 0; i < n - 1; i++) {
137: if (separable(data2, start, i, data2.get(i + 1))
138: && separable(data2, i + 1, n - 1, data2.get(i))) {
139: subset = (DoubleVector) data2.subvector(start, i);
140: d.plusEquals(fitForSingleCluster(subset, method)
141: .timesEquals(i - start + 1));
142: start = i + 1;
143: }
144: }
145: subset = (DoubleVector) data2.subvector(start, n - 1);
146: d.plusEquals(fitForSingleCluster(subset, method).timesEquals(
147: n - start));
148: d.sort();
149: d.normalize();
150: mixingDistribution = d;
151: }
152:
153: /**
154: * Fits the mixture (or mixing) distribution to the data. The data is
155: * not pre-clustered for computational efficiency.
156: *
157: * @param data the data supposedly generated from the mixture
158: * @param method the method to be used. Refer to the static final
159: * variables of this class.
160: * @return the generated distribution
161: */
162: public DiscreteFunction fitForSingleCluster(DoubleVector data,
163: int method) {
164:
165: if (data.size() < 2)
166: return new DiscreteFunction(data);
167: DoubleVector sp = supportPoints(data, 0);
168: PaceMatrix fi = fittingIntervals(data);
169: PaceMatrix pm = probabilityMatrix(sp, fi);
170: PaceMatrix epm = new PaceMatrix(empiricalProbability(data, fi)
171: .timesEquals(1. / data.size()));
172:
173: IntVector pvt = (IntVector) IntVector.seq(0, sp.size() - 1);
174: DoubleVector weights;
175:
176: switch (method) {
177: case NNMMethod:
178: weights = pm.nnls(epm, pvt);
179: break;
180: case PMMethod:
181: weights = pm.nnlse1(epm, pvt);
182: break;
183: default:
184: throw new IllegalArgumentException("unknown method");
185: }
186:
187: DoubleVector sp2 = new DoubleVector(pvt.size());
188: for (int i = 0; i < sp2.size(); i++) {
189: sp2.set(i, sp.get(pvt.get(i)));
190: }
191:
192: DiscreteFunction d = new DiscreteFunction(sp2, weights);
193: d.sort();
194: d.normalize();
195: return d;
196: }
197:
198: /**
199: * Return true if a value can be considered for mixture estimatino
200: * separately from the data indexed between i0 and i1
201: *
202: * @param data the data supposedly generated from the mixture
203: * @param i0 the index of the first element in the group
204: * @param i1 the index of the last element in the group
205: * @param x the value
206: * @return true if a value can be considered
207: */
208: public abstract boolean separable(DoubleVector data, int i0,
209: int i1, double x);
210:
211: /**
212: * Contructs the set of support points for mixture estimation.
213: *
214: * @param data the data supposedly generated from the mixture
215: * @param ne the number of extra data that are suppposedly discarded
216: * earlier and not passed into here
217: * @return the set of support points
218: */
219: public abstract DoubleVector supportPoints(DoubleVector data, int ne);
220:
221: /**
222: * Contructs the set of fitting intervals for mixture estimation.
223: *
224: * @param data the data supposedly generated from the mixture
225: * @return the set of fitting intervals
226: */
227: public abstract PaceMatrix fittingIntervals(DoubleVector data);
228:
229: /**
230: * Contructs the probability matrix for mixture estimation, given a set
231: * of support points and a set of intervals.
232: *
233: * @param s the set of support points
234: * @param intervals the intervals
235: * @return the probability matrix
236: */
237: public abstract PaceMatrix probabilityMatrix(DoubleVector s,
238: PaceMatrix intervals);
239:
240: /**
241: * Computes the empirical probabilities of the data over a set of
242: * intervals.
243: *
244: * @param data the data
245: * @param intervals the intervals
246: * @return the empirical probabilities
247: */
248: public PaceMatrix empiricalProbability(DoubleVector data,
249: PaceMatrix intervals) {
250: int n = data.size();
251: int k = intervals.getRowDimension();
252: PaceMatrix epm = new PaceMatrix(k, 1, 0);
253:
254: double point;
255: for (int j = 0; j < n; j++) {
256: for (int i = 0; i < k; i++) {
257: point = 0.0;
258: if (intervals.get(i, 0) == data.get(j)
259: || intervals.get(i, 1) == data.get(j))
260: point = 0.5;
261: else if (intervals.get(i, 0) < data.get(j)
262: && intervals.get(i, 1) > data.get(j))
263: point = 1.0;
264: epm.setPlus(i, 0, point);
265: }
266: }
267: return epm;
268: }
269:
270: /**
271: * Converts to a string
272: *
273: * @return a string representation
274: */
275: public String toString() {
276: return "The mixing distribution:\n"
277: + mixingDistribution.toString();
278: }
279:
280: }
|