Source Code Cross Referenced for ChiSquaredAttributeEval.java in  » Science » weka » weka » attributeSelection » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.attributeSelection 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *    This program is free software; you can redistribute it and/or modify
003:         *    it under the terms of the GNU General Public License as published by
004:         *    the Free Software Foundation; either version 2 of the License, or
005:         *    (at your option) any later version.
006:         *
007:         *    This program is distributed in the hope that it will be useful,
008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
010:         *    GNU General Public License for more details.
011:         *
012:         *    You should have received a copy of the GNU General Public License
013:         *    along with this program; if not, write to the Free Software
014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015:         */
016:
017:        /*
018:         *    ChiSquaredAttributeEval.java
019:         *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020:         *
021:         */
022:
023:        package weka.attributeSelection;
024:
025:        import weka.core.Capabilities;
026:        import weka.core.ContingencyTables;
027:        import weka.core.Instance;
028:        import weka.core.Instances;
029:        import weka.core.Option;
030:        import weka.core.OptionHandler;
031:        import weka.core.Utils;
032:        import weka.core.Capabilities.Capability;
033:        import weka.filters.Filter;
034:        import weka.filters.supervised.attribute.Discretize;
035:        import weka.filters.unsupervised.attribute.NumericToBinary;
036:
037:        import java.util.Enumeration;
038:        import java.util.Vector;
039:
040:        /** 
041:         <!-- globalinfo-start -->
042:         * ChiSquaredAttributeEval :<br/>
043:         * <br/>
044:         * Evaluates the worth of an attribute by computing the value of the chi-squared statistic with respect to the class.<br/>
045:         * <p/>
046:         <!-- globalinfo-end -->
047:         *
048:         <!-- options-start -->
049:         * Valid options are: <p/>
050:         * 
051:         * <pre> -M
052:         *  treat missing values as a seperate value.</pre>
053:         * 
054:         * <pre> -B
055:         *  just binarize numeric attributes instead 
056:         *  of properly discretizing them.</pre>
057:         * 
058:         <!-- options-end -->
059:         *
060:         * @author Eibe Frank (eibe@cs.waikato.ac.nz)
061:         * @version $Revision: 1.14 $ 
062:         * @see Discretize
063:         * @see NumericToBinary
064:         */
065:        public class ChiSquaredAttributeEval extends AttributeEvaluator
066:                implements  OptionHandler {
067:
068:            /** for serialization */
069:            static final long serialVersionUID = -8316857822521717692L;
070:
071:            /** Treat missing values as a seperate value */
072:            private boolean m_missing_merge;
073:
074:            /** Just binarize numeric attributes */
075:            private boolean m_Binarize;
076:
077:            /** The chi-squared value for each attribute */
078:            private double[] m_ChiSquareds;
079:
080:            /**
081:             * Returns a string describing this attribute evaluator
082:             * @return a description of the evaluator suitable for
083:             * displaying in the explorer/experimenter gui
084:             */
085:            public String globalInfo() {
086:                return "ChiSquaredAttributeEval :\n\nEvaluates the worth of an attribute "
087:                        + "by computing the value of the chi-squared statistic with respect to the class.\n";
088:            }
089:
090:            /**
091:             * Constructor
092:             */
093:            public ChiSquaredAttributeEval() {
094:                resetOptions();
095:            }
096:
097:            /**
098:             * Returns an enumeration describing the available options
099:             * @return an enumeration of all the available options
100:             **/
101:            public Enumeration listOptions() {
102:                Vector newVector = new Vector(2);
103:                newVector.addElement(new Option(
104:                        "\ttreat missing values as a seperate " + "value.",
105:                        "M", 0, "-M"));
106:                newVector.addElement(new Option(
107:                        "\tjust binarize numeric attributes instead \n"
108:                                + "\tof properly discretizing them.", "B", 0,
109:                        "-B"));
110:                return newVector.elements();
111:            }
112:
113:            /**
114:             * Parses a given list of options. <p/>
115:             *
116:             <!-- options-start -->
117:             * Valid options are: <p/>
118:             * 
119:             * <pre> -M
120:             *  treat missing values as a seperate value.</pre>
121:             * 
122:             * <pre> -B
123:             *  just binarize numeric attributes instead 
124:             *  of properly discretizing them.</pre>
125:             * 
126:             <!-- options-end -->
127:             *
128:             * @param options the list of options as an array of strings
129:             * @throws Exception if an option is not supported
130:             */
131:            public void setOptions(String[] options) throws Exception {
132:
133:                resetOptions();
134:                setMissingMerge(!(Utils.getFlag('M', options)));
135:                setBinarizeNumericAttributes(Utils.getFlag('B', options));
136:            }
137:
138:            /**
139:             * Gets the current settings.
140:             *
141:             * @return an array of strings suitable for passing to setOptions()
142:             */
143:            public String[] getOptions() {
144:                String[] options = new String[2];
145:                int current = 0;
146:
147:                if (!getMissingMerge()) {
148:                    options[current++] = "-M";
149:                }
150:                if (getBinarizeNumericAttributes()) {
151:                    options[current++] = "-B";
152:                }
153:
154:                while (current < options.length) {
155:                    options[current++] = "";
156:                }
157:
158:                return options;
159:            }
160:
161:            /**
162:             * Returns the tip text for this property
163:             * @return tip text for this property suitable for
164:             * displaying in the explorer/experimenter gui
165:             */
166:            public String binarizeNumericAttributesTipText() {
167:                return "Just binarize numeric attributes instead of properly discretizing them.";
168:            }
169:
170:            /**
171:             * Binarize numeric attributes.
172:             *
173:             * @param b true=binarize numeric attributes
174:             */
175:            public void setBinarizeNumericAttributes(boolean b) {
176:                m_Binarize = b;
177:            }
178:
179:            /**
180:             * get whether numeric attributes are just being binarized.
181:             *
182:             * @return true if missing values are being distributed.
183:             */
184:            public boolean getBinarizeNumericAttributes() {
185:                return m_Binarize;
186:            }
187:
188:            /**
189:             * Returns the tip text for this property
190:             * @return tip text for this property suitable for
191:             * displaying in the explorer/experimenter gui
192:             */
193:            public String missingMergeTipText() {
194:                return "Distribute counts for missing values. Counts are distributed "
195:                        + "across other values in proportion to their frequency. Otherwise, "
196:                        + "missing is treated as a separate value.";
197:            }
198:
199:            /**
200:             * distribute the counts for missing values across observed values
201:             *
202:             * @param b true=distribute missing values.
203:             */
204:            public void setMissingMerge(boolean b) {
205:                m_missing_merge = b;
206:            }
207:
208:            /**
209:             * get whether missing values are being distributed or not
210:             *
211:             * @return true if missing values are being distributed.
212:             */
213:            public boolean getMissingMerge() {
214:                return m_missing_merge;
215:            }
216:
217:            /**
218:             * Returns the capabilities of this evaluator.
219:             *
220:             * @return            the capabilities of this evaluator
221:             * @see               Capabilities
222:             */
223:            public Capabilities getCapabilities() {
224:                Capabilities result = super .getCapabilities();
225:
226:                // attributes
227:                result.enable(Capability.NOMINAL_ATTRIBUTES);
228:                result.enable(Capability.NUMERIC_ATTRIBUTES);
229:                result.enable(Capability.DATE_ATTRIBUTES);
230:                result.enable(Capability.MISSING_VALUES);
231:
232:                // class
233:                result.enable(Capability.NOMINAL_CLASS);
234:                result.enable(Capability.MISSING_CLASS_VALUES);
235:
236:                return result;
237:            }
238:
239:            /**
240:             * Initializes a chi-squared attribute evaluator.
241:             * Discretizes all attributes that are numeric.
242:             *
243:             * @param data set of instances serving as training data 
244:             * @throws Exception if the evaluator has not been 
245:             * generated successfully
246:             */
247:            public void buildEvaluator(Instances data) throws Exception {
248:
249:                // can evaluator handle data?
250:                getCapabilities().testWithFail(data);
251:
252:                int classIndex = data.classIndex();
253:                int numInstances = data.numInstances();
254:
255:                if (!m_Binarize) {
256:                    Discretize disTransform = new Discretize();
257:                    disTransform.setUseBetterEncoding(true);
258:                    disTransform.setInputFormat(data);
259:                    data = Filter.useFilter(data, disTransform);
260:                } else {
261:                    NumericToBinary binTransform = new NumericToBinary();
262:                    binTransform.setInputFormat(data);
263:                    data = Filter.useFilter(data, binTransform);
264:                }
265:                int numClasses = data.attribute(classIndex).numValues();
266:
267:                // Reserve space and initialize counters
268:                double[][][] counts = new double[data.numAttributes()][][];
269:                for (int k = 0; k < data.numAttributes(); k++) {
270:                    if (k != classIndex) {
271:                        int numValues = data.attribute(k).numValues();
272:                        counts[k] = new double[numValues + 1][numClasses + 1];
273:                    }
274:                }
275:
276:                // Initialize counters
277:                double[] temp = new double[numClasses + 1];
278:                for (int k = 0; k < numInstances; k++) {
279:                    Instance inst = data.instance(k);
280:                    if (inst.classIsMissing()) {
281:                        temp[numClasses] += inst.weight();
282:                    } else {
283:                        temp[(int) inst.classValue()] += inst.weight();
284:                    }
285:                }
286:                for (int k = 0; k < counts.length; k++) {
287:                    if (k != classIndex) {
288:                        for (int i = 0; i < temp.length; i++) {
289:                            counts[k][0][i] = temp[i];
290:                        }
291:                    }
292:                }
293:
294:                // Get counts
295:                for (int k = 0; k < numInstances; k++) {
296:                    Instance inst = data.instance(k);
297:                    for (int i = 0; i < inst.numValues(); i++) {
298:                        if (inst.index(i) != classIndex) {
299:                            if (inst.isMissingSparse(i)
300:                                    || inst.classIsMissing()) {
301:                                if (!inst.isMissingSparse(i)) {
302:                                    counts[inst.index(i)][(int) inst
303:                                            .valueSparse(i)][numClasses] += inst
304:                                            .weight();
305:                                    counts[inst.index(i)][0][numClasses] -= inst
306:                                            .weight();
307:                                } else if (!inst.classIsMissing()) {
308:                                    counts[inst.index(i)][data.attribute(
309:                                            inst.index(i)).numValues()][(int) inst
310:                                            .classValue()] += inst.weight();
311:                                    counts[inst.index(i)][0][(int) inst
312:                                            .classValue()] -= inst.weight();
313:                                } else {
314:                                    counts[inst.index(i)][data.attribute(
315:                                            inst.index(i)).numValues()][numClasses] += inst
316:                                            .weight();
317:                                    counts[inst.index(i)][0][numClasses] -= inst
318:                                            .weight();
319:                                }
320:                            } else {
321:                                counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst
322:                                        .classValue()] += inst.weight();
323:                                counts[inst.index(i)][0][(int) inst
324:                                        .classValue()] -= inst.weight();
325:                            }
326:                        }
327:                    }
328:                }
329:
330:                // distribute missing counts if required
331:                if (m_missing_merge) {
332:
333:                    for (int k = 0; k < data.numAttributes(); k++) {
334:                        if (k != classIndex) {
335:                            int numValues = data.attribute(k).numValues();
336:
337:                            // Compute marginals
338:                            double[] rowSums = new double[numValues];
339:                            double[] columnSums = new double[numClasses];
340:                            double sum = 0;
341:                            for (int i = 0; i < numValues; i++) {
342:                                for (int j = 0; j < numClasses; j++) {
343:                                    rowSums[i] += counts[k][i][j];
344:                                    columnSums[j] += counts[k][i][j];
345:                                }
346:                                sum += rowSums[i];
347:                            }
348:
349:                            if (Utils.gr(sum, 0)) {
350:                                double[][] additions = new double[numValues][numClasses];
351:
352:                                // Compute what needs to be added to each row
353:                                for (int i = 0; i < numValues; i++) {
354:                                    for (int j = 0; j < numClasses; j++) {
355:                                        additions[i][j] = (rowSums[i] / sum)
356:                                                * counts[k][numValues][j];
357:                                    }
358:                                }
359:
360:                                // Compute what needs to be added to each column
361:                                for (int i = 0; i < numClasses; i++) {
362:                                    for (int j = 0; j < numValues; j++) {
363:                                        additions[j][i] += (columnSums[i] / sum)
364:                                                * counts[k][j][numClasses];
365:                                    }
366:                                }
367:
368:                                // Compute what needs to be added to each cell
369:                                for (int i = 0; i < numClasses; i++) {
370:                                    for (int j = 0; j < numValues; j++) {
371:                                        additions[j][i] += (counts[k][j][i] / sum)
372:                                                * counts[k][numValues][numClasses];
373:                                    }
374:                                }
375:
376:                                // Make new contingency table
377:                                double[][] newTable = new double[numValues][numClasses];
378:                                for (int i = 0; i < numValues; i++) {
379:                                    for (int j = 0; j < numClasses; j++) {
380:                                        newTable[i][j] = counts[k][i][j]
381:                                                + additions[i][j];
382:                                    }
383:                                }
384:                                counts[k] = newTable;
385:                            }
386:                        }
387:                    }
388:                }
389:
390:                // Compute chi-squared values
391:                m_ChiSquareds = new double[data.numAttributes()];
392:                for (int i = 0; i < data.numAttributes(); i++) {
393:                    if (i != classIndex) {
394:                        m_ChiSquareds[i] = ContingencyTables.chiVal(
395:                                ContingencyTables.reduceMatrix(counts[i]),
396:                                false);
397:                    }
398:                }
399:            }
400:
401:            /**
402:             * Reset options to their default values
403:             */
404:            protected void resetOptions() {
405:                m_ChiSquareds = null;
406:                m_missing_merge = true;
407:                m_Binarize = false;
408:            }
409:
410:            /**
411:             * evaluates an individual attribute by measuring its
412:             * chi-squared value.
413:             *
414:             * @param attribute the index of the attribute to be evaluated
415:             * @return the chi-squared value
416:             * @throws Exception if the attribute could not be evaluated
417:             */
418:            public double evaluateAttribute(int attribute) throws Exception {
419:
420:                return m_ChiSquareds[attribute];
421:            }
422:
423:            /**
424:             * Describe the attribute evaluator
425:             * @return a description of the attribute evaluator as a string
426:             */
427:            public String toString() {
428:                StringBuffer text = new StringBuffer();
429:
430:                if (m_ChiSquareds == null) {
431:                    text
432:                            .append("Chi-squared attribute evaluator has not been built");
433:                } else {
434:                    text.append("\tChi-squared Ranking Filter");
435:                    if (!m_missing_merge) {
436:                        text.append("\n\tMissing values treated as seperate");
437:                    }
438:                    if (m_Binarize) {
439:                        text
440:                                .append("\n\tNumeric attributes are just binarized");
441:                    }
442:                }
443:
444:                text.append("\n");
445:                return text.toString();
446:            }
447:
448:            /**
449:             * Main method.
450:             *
451:             * @param args the options
452:             */
453:            public static void main(String[] args) {
454:                runEvaluator(new ChiSquaredAttributeEval(), args);
455:            }
456:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.