001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * DensityBasedClusterer.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.clusterers;
024:
025: import weka.core.Instance;
026: import weka.core.SerializedObject;
027: import weka.core.Utils;
028:
029: /**
030: * Abstract clustering model that produces (for each test instance)
031: * an estimate of the membership in each cluster
032: * (ie. a probability distribution).
033: *
034: * @author Mark Hall (mhall@cs.waikato.ac.nz)
035: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
036: * @version $Revision: 1.7 $
037: */
038: public abstract class DensityBasedClusterer extends Clusterer {
039:
040: /** for serialization */
041: private static final long serialVersionUID = -5950728041704213845L;
042:
043: // ===============
044: // Public methods.
045: // ===============
046:
047: /**
048: * Returns the prior probability of each cluster.
049: *
050: * @return the prior probability for each cluster
051: * @exception Exception if priors could not be
052: * returned successfully
053: */
054: public abstract double[] clusterPriors() throws Exception;
055:
056: /**
057: * Computes the log of the conditional density (per cluster) for a given instance.
058: *
059: * @param instance the instance to compute the density for
060: * @return an array containing the estimated densities
061: * @exception Exception if the density could not be computed
062: * successfully
063: */
064: public abstract double[] logDensityPerClusterForInstance(
065: Instance instance) throws Exception;
066:
067: /**
068: * Computes the density for a given instance.
069: *
070: * @param instance the instance to compute the density for
071: * @return the density.
072: * @exception Exception if the density could not be computed successfully
073: */
074: public double logDensityForInstance(Instance instance)
075: throws Exception {
076:
077: double[] a = logJointDensitiesForInstance(instance);
078: double max = a[Utils.maxIndex(a)];
079: double sum = 0.0;
080:
081: for (int i = 0; i < a.length; i++) {
082: sum += Math.exp(a[i] - max);
083: }
084:
085: return max + Math.log(sum);
086: }
087:
088: /**
089: * Returns the cluster probability distribution for an instance.
090: *
091: * @param instance the instance to be clustered
092: * @return the probability distribution
093: * @throws Exception if computation fails
094: */
095: public double[] distributionForInstance(Instance instance)
096: throws Exception {
097:
098: return Utils.logs2probs(logJointDensitiesForInstance(instance));
099: }
100:
101: /**
102: * Returns the logs of the joint densities for a given instance.
103: *
104: * @param inst the instance
105: * @return the array of values
106: * @exception Exception if values could not be computed
107: */
108: public double[] logJointDensitiesForInstance(Instance inst)
109: throws Exception {
110:
111: double[] weights = logDensityPerClusterForInstance(inst);
112: double[] priors = clusterPriors();
113:
114: for (int i = 0; i < weights.length; i++) {
115: if (priors[i] > 0) {
116: weights[i] += Math.log(priors[i]);
117: } else {
118: throw new IllegalArgumentException("Cluster empty!");
119: }
120: }
121: return weights;
122: }
123:
124: /**
125: * Creates copies of the current clusterer. Note that this method
126: * now uses Serialization to perform a deep copy, so the Clusterer
127: * object must be fully Serializable. Any currently built model will
128: * now be copied as well.
129: *
130: * @param model an example clusterer to copy
131: * @param num the number of clusterer copies to create.
132: * @return an array of clusterers.
133: * @exception Exception if an error occurs
134: */
135: public static DensityBasedClusterer[] makeCopies(
136: DensityBasedClusterer model, int num) throws Exception {
137: if (model == null) {
138: throw new Exception("No model clusterer set");
139: }
140: DensityBasedClusterer[] clusterers = new DensityBasedClusterer[num];
141: SerializedObject so = new SerializedObject(model);
142: for (int i = 0; i < clusterers.length; i++) {
143: clusterers[i] = (DensityBasedClusterer) so.getObject();
144: }
145: return clusterers;
146: }
147: }
|