001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * GainRatioSplitCrit.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.classifiers.trees.j48;
024:
025: import weka.core.Utils;
026:
027: /**
028: * Class for computing the gain ratio for a given distribution.
029: *
030: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
031: * @version $Revision: 1.7 $
032: */
033: public final class GainRatioSplitCrit extends EntropyBasedSplitCrit {
034:
035: /** for serialization */
036: private static final long serialVersionUID = -433336694718670930L;
037:
038: /**
039: * This method is a straightforward implementation of the gain
040: * ratio criterion for the given distribution.
041: */
042: public final double splitCritValue(Distribution bags) {
043:
044: double numerator;
045: double denumerator;
046:
047: numerator = oldEnt(bags) - newEnt(bags);
048:
049: // Splits with no gain are useless.
050: if (Utils.eq(numerator, 0))
051: return Double.MAX_VALUE;
052: denumerator = splitEnt(bags);
053:
054: // Test if split is trivial.
055: if (Utils.eq(denumerator, 0))
056: return Double.MAX_VALUE;
057:
058: // We take the reciprocal value because we want to minimize the
059: // splitting criterion's value.
060: return denumerator / numerator;
061: }
062:
063: /**
064: * This method computes the gain ratio in the same way C4.5 does.
065: *
066: * @param bags the distribution
067: * @param totalnoInst the weight of ALL instances
068: * @param numerator the info gain
069: */
070: public final double splitCritValue(Distribution bags,
071: double totalnoInst, double numerator) {
072:
073: double denumerator;
074: double noUnknown;
075: double unknownRate;
076: int i;
077:
078: // Compute split info.
079: denumerator = splitEnt(bags, totalnoInst);
080:
081: // Test if split is trivial.
082: if (Utils.eq(denumerator, 0))
083: return 0;
084: denumerator = denumerator / totalnoInst;
085:
086: return numerator / denumerator;
087: }
088:
089: /**
090: * Help method for computing the split entropy.
091: */
092: private final double splitEnt(Distribution bags, double totalnoInst) {
093:
094: double returnValue = 0;
095: double noUnknown;
096: int i;
097:
098: noUnknown = totalnoInst - bags.total();
099: if (Utils.gr(bags.total(), 0)) {
100: for (i = 0; i < bags.numBags(); i++)
101: returnValue = returnValue - logFunc(bags.perBag(i));
102: returnValue = returnValue - logFunc(noUnknown);
103: returnValue = returnValue + logFunc(totalnoInst);
104: }
105: return returnValue;
106: }
107: }
|