001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * AttributeStats.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.core;
024:
025: import java.io.Serializable;
026:
027: /**
028: * A Utility class that contains summary information on an
029: * the values that appear in a dataset for a particular attribute.
030: *
031: * @author <a href="mailto:len@reeltwo.com">Len Trigg</a>
032: * @version $Revision: 1.9 $
033: */
034: public class AttributeStats implements Serializable {
035:
036: /** for serialization */
037: private static final long serialVersionUID = 4434688832743939380L;
038:
039: /** The number of int-like values */
040: public int intCount = 0;
041:
042: /** The number of real-like values (i.e. have a fractional part) */
043: public int realCount = 0;
044:
045: /** The number of missing values */
046: public int missingCount = 0;
047:
048: /** The number of distinct values */
049: public int distinctCount = 0;
050:
051: /** The number of values that only appear once */
052: public int uniqueCount = 0;
053:
054: /** The total number of values (i.e. number of instances) */
055: public int totalCount = 0;
056:
057: /** Stats on numeric value distributions */
058: // perhaps Stats should be moved from weka.experiment to weka.core
059: public weka.experiment.Stats numericStats;
060:
061: /** Counts of each nominal value */
062: public int[] nominalCounts;
063:
064: /**
065: * Updates the counters for one more observed distinct value.
066: *
067: * @param value the value that has just been seen
068: * @param count the number of times the value appeared
069: */
070: protected void addDistinct(double value, int count) {
071:
072: if (count > 0) {
073: if (count == 1) {
074: uniqueCount++;
075: }
076: if (Utils.eq(value, (double) ((int) value))) {
077: intCount += count;
078: } else {
079: realCount += count;
080: }
081: if (nominalCounts != null) {
082: nominalCounts[(int) value] = count;
083: }
084: if (numericStats != null) {
085: numericStats.add(value, count);
086: numericStats.calculateDerived();
087: }
088: }
089: distinctCount++;
090: }
091:
092: /**
093: * Returns a human readable representation of this AttributeStats instance.
094: *
095: * @return a String represtinging these AttributeStats.
096: */
097: public String toString() {
098:
099: StringBuffer sb = new StringBuffer();
100: sb.append(Utils.padLeft("Type", 4)).append(
101: Utils.padLeft("Nom", 5));
102: sb.append(Utils.padLeft("Int", 5)).append(
103: Utils.padLeft("Real", 5));
104: sb.append(Utils.padLeft("Missing", 12));
105: sb.append(Utils.padLeft("Unique", 12));
106: sb.append(Utils.padLeft("Dist", 6));
107: if (nominalCounts != null) {
108: sb.append(' ');
109: for (int i = 0; i < nominalCounts.length; i++) {
110: sb.append(Utils.padLeft("C[" + i + "]", 5));
111: }
112: }
113: sb.append('\n');
114:
115: long percent;
116: percent = Math.round(100.0 * intCount / totalCount);
117: if (nominalCounts != null) {
118: sb.append(Utils.padLeft("Nom", 4)).append(' ');
119: sb.append(Utils.padLeft("" + percent, 3)).append("% ");
120: sb.append(Utils.padLeft("" + 0, 3)).append("% ");
121: } else {
122: sb.append(Utils.padLeft("Num", 4)).append(' ');
123: sb.append(Utils.padLeft("" + 0, 3)).append("% ");
124: sb.append(Utils.padLeft("" + percent, 3)).append("% ");
125: }
126: percent = Math.round(100.0 * realCount / totalCount);
127: sb.append(Utils.padLeft("" + percent, 3)).append("% ");
128: sb.append(Utils.padLeft("" + missingCount, 5)).append(" /");
129: percent = Math.round(100.0 * missingCount / totalCount);
130: sb.append(Utils.padLeft("" + percent, 3)).append("% ");
131: sb.append(Utils.padLeft("" + uniqueCount, 5)).append(" /");
132: percent = Math.round(100.0 * uniqueCount / totalCount);
133: sb.append(Utils.padLeft("" + percent, 3)).append("% ");
134: sb.append(Utils.padLeft("" + distinctCount, 5)).append(' ');
135: if (nominalCounts != null) {
136: for (int i = 0; i < nominalCounts.length; i++) {
137: sb.append(Utils.padLeft("" + nominalCounts[i], 5));
138: }
139: }
140: sb.append('\n');
141: return sb.toString();
142: }
143: }
|