Source Code Cross Referenced for FCBFSearch.java in  » Science » weka » weka » attributeSelection » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.attributeSelection 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *    This program is free software; you can redistribute it and/or modify
003:         *    it under the terms of the GNU General Public License as published by
004:         *    the Free Software Foundation; either version 2 of the License, or
005:         *    (at your option) any later version.
006:         *
007:         *    This program is distributed in the hope that it will be useful,
008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
010:         *    GNU General Public License for more details.
011:         *
012:         *    You should have received a copy of the GNU General Public License
013:         *    along with this program; if not, write to the Free Software
014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015:         */
016:
017:        /*
018:         *    RELEASE INFORMATION (December 27, 2004)
019:         *    
020:         *    FCBF algorithm:
021:         *      Template obtained from Weka
022:         *      Developed for Weka by Zheng Alan Zhao   
023:         *      December 27, 2004
024:         *
025:         *    FCBF algorithm is a feature selection method based on Symmetrical Uncertainty Measurement for 
026:         *    relevance redundancy analysis. The details of FCBF algorithm are in:
027:         *
028:        <!-- technical-plaintext-start -->
029:         * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003.
030:        <!-- technical-plaintext-end -->
031:         *    
032:         *    
033:         *    CONTACT INFORMATION
034:         *    
035:         *    For algorithm implementation:
036:         *    Zheng Zhao: zhaozheng at asu.edu
037:         *      
038:         *    For the algorithm:
039:         *    Lei Yu: leiyu at asu.edu
040:         *    Huan Liu: hliu at asu.edu
041:         *     
042:         *    Data Mining and Machine Learning Lab
043:         *    Computer Science and Engineering Department
044:         *    Fulton School of Engineering
045:         *    Arizona State University
046:         *    Tempe, AZ 85287
047:         *
048:         *    FCBFSearch.java
049:         *
050:         *    Copyright (C) 2004 Data Mining and Machine Learning Lab, 
051:         *                       Computer Science and Engineering Department, 
052:         *			 Fulton School of Engineering, 
053:         *                       Arizona State University
054:         *
055:         */
056:
057:        package weka.attributeSelection;
058:
059:        import weka.core.Instances;
060:        import weka.core.Option;
061:        import weka.core.OptionHandler;
062:        import weka.core.Range;
063:        import weka.core.TechnicalInformation;
064:        import weka.core.TechnicalInformation.Type;
065:        import weka.core.TechnicalInformation.Field;
066:        import weka.core.TechnicalInformationHandler;
067:        import weka.core.Utils;
068:
069:        import java.util.Enumeration;
070:        import java.util.Vector;
071:
072:        /**
073:         <!-- globalinfo-start -->
074:         * FCBF : <br/>
075:         * <br/>
076:         * Feature selection method based on correlation measureand relevance&amp;redundancy analysis. Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).<br/>
077:         * <br/>
078:         * For more information see:<br/>
079:         * <br/>
080:         * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003.
081:         * <p/>
082:         <!-- globalinfo-end -->
083:         *
084:         <!-- technical-bibtex-start -->
085:         * BibTeX:
086:         * <pre>
087:         * &#64;inproceedings{Yu2003,
088:         *    author = {Lei Yu and Huan Liu},
089:         *    booktitle = {Proceedings of the Twentieth International Conference on Machine Learning},
090:         *    pages = {856-863},
091:         *    publisher = {AAAI Press},
092:         *    title = {Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution},
093:         *    year = {2003}
094:         * }
095:         * </pre>
096:         * <p/>
097:         <!-- technical-bibtex-end -->
098:         * 
099:         <!-- options-start -->
100:         * Valid options are: <p/>
101:         * 
102:         * <pre> -D &lt;create dataset&gt;
103:         *  Specify Whether the selector generates a new dataset.</pre>
104:         * 
105:         * <pre> -P &lt;start set&gt;
106:         *  Specify a starting set of attributes.
107:         *   Eg. 1,3,5-7.
108:         *  Any starting attributes specified are
109:         *  ignored during the ranking.</pre>
110:         * 
111:         * <pre> -T &lt;threshold&gt;
112:         *  Specify a theshold by which attributes
113:         *  may be discarded from the ranking.</pre>
114:         * 
115:         * <pre> -N &lt;num to select&gt;
116:         *  Specify number of attributes to select</pre>
117:         * 
118:         <!-- options-end -->
119:         *
120:         * @author Zheng Zhao: zhaozheng at asu.edu
121:         * @version $Revision: 1.6 $
122:         */
123:        public class FCBFSearch extends ASSearch implements  RankedOutputSearch,
124:                StartSetHandler, OptionHandler, TechnicalInformationHandler {
125:
126:            /** for serialization */
127:            static final long serialVersionUID = 8209699587428369942L;
128:
129:            /** Holds the starting set as an array of attributes */
130:            private int[] m_starting;
131:
132:            /** Holds the start set for the search as a range */
133:            private Range m_startRange;
134:
135:            /** Holds the ordered list of attributes */
136:            private int[] m_attributeList;
137:
138:            /** Holds the list of attribute merit scores */
139:            private double[] m_attributeMerit;
140:
141:            /** Data has class attribute---if unsupervised evaluator then no class */
142:            private boolean m_hasClass;
143:
144:            /** Class index of the data if supervised evaluator */
145:            private int m_classIndex;
146:
147:            /** The number of attribtes */
148:            private int m_numAttribs;
149:
150:            /**
151:             * A threshold by which to discard attributes---used by the
152:             * AttributeSelection module
153:             */
154:            private double m_threshold;
155:
156:            /** The number of attributes to select. -1 indicates that all attributes
157:                are to be retained. Has precedence over m_threshold */
158:            private int m_numToSelect = -1;
159:
160:            /** Used to compute the number to select */
161:            private int m_calculatedNumToSelect = -1;
162:
163:            /*-----------------add begin 2004-11-15 by alan-----------------*/
164:            /** Used to determine whether we create a new dataset according to the selected features */
165:            private boolean m_generateOutput = false;
166:
167:            /** Used to store the ref of the Evaluator we use*/
168:            private ASEvaluation m_asEval;
169:
170:            /** Holds the list of attribute merit scores generated by FCBF */
171:            private double[][] m_rankedFCBF;
172:
173:            /** Hold the list of selected features*/
174:            private double[][] m_selectedFeatures;
175:
176:            /*-----------------add end 2004-11-15 by alan-----------------*/
177:
178:            /**
179:             * Returns a string describing this search method
180:             * @return a description of the search suitable for
181:             * displaying in the explorer/experimenter gui
182:             */
183:            public String globalInfo() {
184:                return "FCBF : \n\nFeature selection method based on correlation measure"
185:                        + "and relevance&redundancy analysis. "
186:                        + "Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).\n\n"
187:                        + "For more information see:\n\n"
188:                        + getTechnicalInformation().toString();
189:            }
190:
191:            /**
192:             * Returns an instance of a TechnicalInformation object, containing 
193:             * detailed information about the technical background of this class,
194:             * e.g., paper reference or book this class is based on.
195:             * 
196:             * @return the technical information about this class
197:             */
198:            public TechnicalInformation getTechnicalInformation() {
199:                TechnicalInformation result;
200:
201:                result = new TechnicalInformation(Type.INPROCEEDINGS);
202:                result.setValue(Field.AUTHOR, "Lei Yu and Huan Liu");
203:                result
204:                        .setValue(
205:                                Field.TITLE,
206:                                "Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution");
207:                result
208:                        .setValue(Field.BOOKTITLE,
209:                                "Proceedings of the Twentieth International Conference on Machine Learning");
210:                result.setValue(Field.YEAR, "2003");
211:                result.setValue(Field.PAGES, "856-863");
212:                result.setValue(Field.PUBLISHER, "AAAI Press");
213:
214:                return result;
215:            }
216:
217:            /**
218:             * Constructor
219:             */
220:            public FCBFSearch() {
221:                resetOptions();
222:            }
223:
224:            /**
225:             * Returns the tip text for this property
226:             * @return tip text for this property suitable for
227:             * displaying in the explorer/experimenter gui
228:             */
229:            public String numToSelectTipText() {
230:                return "Specify the number of attributes to retain. The default value "
231:                        + "(-1) indicates that all attributes are to be retained. Use either "
232:                        + "this option or a threshold to reduce the attribute set.";
233:            }
234:
235:            /**
236:             * Specify the number of attributes to select from the ranked list. -1
237:             * indicates that all attributes are to be retained.
238:             * @param n the number of attributes to retain
239:             */
240:            public void setNumToSelect(int n) {
241:                m_numToSelect = n;
242:            }
243:
244:            /**
245:             * Gets the number of attributes to be retained.
246:             * @return the number of attributes to retain
247:             */
248:            public int getNumToSelect() {
249:                return m_numToSelect;
250:            }
251:
252:            /**
253:             * Gets the calculated number to select. This might be computed
254:             * from a threshold, or if < 0 is set as the number to select then
255:             * it is set to the number of attributes in the (transformed) data.
256:             * @return the calculated number of attributes to select
257:             */
258:            public int getCalculatedNumToSelect() {
259:                if (m_numToSelect >= 0) {
260:                    m_calculatedNumToSelect = m_numToSelect;
261:                }
262:                if (m_selectedFeatures.length > 0
263:                        && m_selectedFeatures.length < m_calculatedNumToSelect) {
264:                    m_calculatedNumToSelect = m_selectedFeatures.length;
265:                }
266:
267:                return m_calculatedNumToSelect;
268:            }
269:
270:            /**
271:             * Returns the tip text for this property
272:             * @return tip text for this property suitable for
273:             * displaying in the explorer/experimenter gui
274:             */
275:            public String thresholdTipText() {
276:                return "Set threshold by which attributes can be discarded. Default value "
277:                        + "results in no attributes being discarded. Use either this option or "
278:                        + "numToSelect to reduce the attribute set.";
279:            }
280:
281:            /**
282:             * Set the threshold by which the AttributeSelection module can discard
283:             * attributes.
284:             * @param threshold the threshold.
285:             */
286:            public void setThreshold(double threshold) {
287:                m_threshold = threshold;
288:            }
289:
290:            /**
291:             * Returns the threshold so that the AttributeSelection module can
292:             * discard attributes from the ranking.
293:             * @return the threshold
294:             */
295:            public double getThreshold() {
296:                return m_threshold;
297:            }
298:
299:            /**
300:             * Returns the tip text for this property
301:             * @return tip text for this property suitable for
302:             * displaying in the explorer/experimenter gui
303:             */
304:            public String generateRankingTipText() {
305:                return "A constant option. FCBF is capable of generating"
306:                        + " attribute rankings.";
307:            }
308:
309:            /**
310:             * This is a dummy set method---Ranker is ONLY capable of producing
311:             * a ranked list of attributes for attribute evaluators.
312:             * @param doRank this parameter is N/A and is ignored
313:             */
314:            public void setGenerateRanking(boolean doRank) {
315:            }
316:
317:            /**
318:             * This is a dummy method. Ranker can ONLY be used with attribute
319:             * evaluators and as such can only produce a ranked list of attributes
320:             * @return true all the time.
321:             */
322:            public boolean getGenerateRanking() {
323:                return true;
324:            }
325:
326:            /**
327:             * Returns the tip text for this property
328:             * @return tip text for this property suitable for
329:             * displaying in the explorer/experimenter gui
330:             */
331:
332:            public String generateDataOutputTipText() {
333:                return "Generating new dataset according to the selected features."
334:                        + " ";
335:            }
336:
337:            /**
338:             * Sets the flag, by which the AttributeSelection module decide
339:             * whether create a new dataset according to the selected features.
340:             * @param doGenerate the flag, by which the AttributeSelection module
341:             * decide whether create a new dataset according to the selected
342:             * features
343:             */
344:            public void setGenerateDataOutput(boolean doGenerate) {
345:                this .m_generateOutput = doGenerate;
346:
347:            }
348:
349:            /**
350:             * Returns the flag, by which the AttributeSelection module decide
351:             * whether create a new dataset according to the selected features.
352:             * @return the flag, by which the AttributeSelection module decide
353:             * whether create a new dataset according to the selected features.
354:             */
355:            public boolean getGenerateDataOutput() {
356:                return this .m_generateOutput;
357:            }
358:
359:            /**
360:             * Returns the tip text for this property
361:             * @return tip text for this property suitable for
362:             * displaying in the explorer/experimenter gui
363:             */
364:            public String startSetTipText() {
365:                return "Specify a set of attributes to ignore. "
366:                        + " When generating the ranking, FCBF will not evaluate the attributes "
367:                        + " in this list. "
368:                        + "This is specified as a comma "
369:                        + "seperated list off attribute indexes starting at 1. It can include "
370:                        + "ranges. Eg. 1,2,5-9,17.";
371:            }
372:
373:            /**
374:             * Sets a starting set of attributes for the search. It is the
375:             * search method's responsibility to report this start set (if any)
376:             * in its toString() method.
377:             * @param startSet a string containing a list of attributes (and or ranges),
378:             * eg. 1,2,6,10-15.
379:             * @throws Exception if start set can't be set.
380:             */
381:            public void setStartSet(String startSet) throws Exception {
382:                m_startRange.setRanges(startSet);
383:            }
384:
385:            /**
386:             * Returns a list of attributes (and or attribute ranges) as a String
387:             * @return a list of attributes (and or attribute ranges)
388:             */
389:            public String getStartSet() {
390:                return m_startRange.getRanges();
391:            }
392:
393:            /**
394:             * Returns an enumeration describing the available options.
395:             * @return an enumeration of all the available options.
396:             **/
397:            public Enumeration listOptions() {
398:                Vector newVector = new Vector(4);
399:
400:                newVector
401:                        .addElement(new Option(
402:                                "\tSpecify Whether the selector generates a new dataset.",
403:                                "D", 1, "-D <create dataset>"));
404:
405:                newVector.addElement(new Option(
406:                        "\tSpecify a starting set of attributes.\n"
407:                                + "\t\tEg. 1,3,5-7.\n"
408:                                + "\tAny starting attributes specified are\n"
409:                                + "\tignored during the ranking.", "P", 1,
410:                        "-P <start set>"));
411:
412:                newVector.addElement(new Option(
413:                        "\tSpecify a theshold by which attributes\n"
414:                                + "\tmay be discarded from the ranking.", "T",
415:                        1, "-T <threshold>"));
416:
417:                newVector.addElement(new Option(
418:                        "\tSpecify number of attributes to select", "N", 1,
419:                        "-N <num to select>"));
420:
421:                return newVector.elements();
422:
423:            }
424:
425:            /**
426:             * Parses a given list of options. <p/>
427:             *
428:             <!-- options-start -->
429:             * Valid options are: <p/>
430:             * 
431:             * <pre> -D &lt;create dataset&gt;
432:             *  Specify Whether the selector generates a new dataset.</pre>
433:             * 
434:             * <pre> -P &lt;start set&gt;
435:             *  Specify a starting set of attributes.
436:             *   Eg. 1,3,5-7.
437:             *  Any starting attributes specified are
438:             *  ignored during the ranking.</pre>
439:             * 
440:             * <pre> -T &lt;threshold&gt;
441:             *  Specify a theshold by which attributes
442:             *  may be discarded from the ranking.</pre>
443:             * 
444:             * <pre> -N &lt;num to select&gt;
445:             *  Specify number of attributes to select</pre>
446:             * 
447:             <!-- options-end -->
448:             *
449:             * @param options the list of options as an array of strings
450:             * @throws Exception if an option is not supported
451:             *
452:             **/
453:            public void setOptions(String[] options) throws Exception {
454:                String optionString;
455:                resetOptions();
456:
457:                optionString = Utils.getOption('D', options);
458:                if (optionString.length() != 0) {
459:                    setGenerateDataOutput(Boolean.getBoolean(optionString));
460:                }
461:
462:                optionString = Utils.getOption('P', options);
463:                if (optionString.length() != 0) {
464:                    setStartSet(optionString);
465:                }
466:
467:                optionString = Utils.getOption('T', options);
468:                if (optionString.length() != 0) {
469:                    Double temp;
470:                    temp = Double.valueOf(optionString);
471:                    setThreshold(temp.doubleValue());
472:                }
473:
474:                optionString = Utils.getOption('N', options);
475:                if (optionString.length() != 0) {
476:                    setNumToSelect(Integer.parseInt(optionString));
477:                }
478:            }
479:
480:            /**
481:             * Gets the current settings of ReliefFAttributeEval.
482:             *
483:             * @return an array of strings suitable for passing to setOptions()
484:             */
485:            public String[] getOptions() {
486:                String[] options = new String[8];
487:                int current = 0;
488:
489:                options[current++] = "-D";
490:                options[current++] = "" + getGenerateDataOutput();
491:
492:                if (!(getStartSet().equals(""))) {
493:                    options[current++] = "-P";
494:                    options[current++] = "" + startSetToString();
495:                }
496:
497:                options[current++] = "-T";
498:                options[current++] = "" + getThreshold();
499:
500:                options[current++] = "-N";
501:                options[current++] = "" + getNumToSelect();
502:
503:                while (current < options.length) {
504:                    options[current++] = "";
505:                }
506:                return options;
507:            }
508:
509:            /**
510:             * converts the array of starting attributes to a string. This is
511:             * used by getOptions to return the actual attributes specified
512:             * as the starting set. This is better than using m_startRanges.getRanges()
513:             * as the same start set can be specified in different ways from the
514:             * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
515:             * is stored in a database is comparable.
516:             * @return a comma seperated list of individual attribute numbers as a String
517:             */
518:            private String startSetToString() {
519:                StringBuffer FString = new StringBuffer();
520:                boolean didPrint;
521:
522:                if (m_starting == null) {
523:                    return getStartSet();
524:                }
525:
526:                for (int i = 0; i < m_starting.length; i++) {
527:                    didPrint = false;
528:
529:                    if ((m_hasClass == false)
530:                            || (m_hasClass == true && i != m_classIndex)) {
531:                        FString.append((m_starting[i] + 1));
532:                        didPrint = true;
533:                    }
534:
535:                    if (i == (m_starting.length - 1)) {
536:                        FString.append("");
537:                    } else {
538:                        if (didPrint) {
539:                            FString.append(",");
540:                        }
541:                    }
542:                }
543:
544:                return FString.toString();
545:            }
546:
547:            /**
548:             * Kind of a dummy search algorithm. Calls a Attribute evaluator to
549:             * evaluate each attribute not included in the startSet and then sorts
550:             * them to produce a ranked list of attributes.
551:             *
552:             * @param ASEval the attribute evaluator to guide the search
553:             * @param data the training instances.
554:             * @return an array (not necessarily ordered) of selected attribute indexes
555:             * @throws Exception if the search can't be completed
556:             */
557:            public int[] search(ASEvaluation ASEval, Instances data)
558:                    throws Exception {
559:                int i, j;
560:
561:                if (!(ASEval instanceof  AttributeSetEvaluator)) {
562:                    throw new Exception(ASEval.getClass().getName()
563:                            + " is not an " + "Attribute Set evaluator!");
564:                }
565:
566:                m_numAttribs = data.numAttributes();
567:
568:                if (ASEval instanceof  UnsupervisedAttributeEvaluator) {
569:                    m_hasClass = false;
570:                } else {
571:                    m_classIndex = data.classIndex();
572:                    if (m_classIndex >= 0) {
573:                        m_hasClass = true;
574:                    } else {
575:                        m_hasClass = false;
576:                    }
577:                }
578:
579:                // get the transformed data and check to see if the transformer
580:                // preserves a class index
581:                if (ASEval instanceof  AttributeTransformer) {
582:                    data = ((AttributeTransformer) ASEval).transformedHeader();
583:                    if (m_classIndex >= 0 && data.classIndex() >= 0) {
584:                        m_classIndex = data.classIndex();
585:                        m_hasClass = true;
586:                    }
587:                }
588:
589:                m_startRange.setUpper(m_numAttribs - 1);
590:                if (!(getStartSet().equals(""))) {
591:                    m_starting = m_startRange.getSelection();
592:                }
593:
594:                int sl = 0;
595:                if (m_starting != null) {
596:                    sl = m_starting.length;
597:                }
598:                if ((m_starting != null) && (m_hasClass == true)) {
599:                    // see if the supplied list contains the class index
600:                    boolean ok = false;
601:                    for (i = 0; i < sl; i++) {
602:                        if (m_starting[i] == m_classIndex) {
603:                            ok = true;
604:                            break;
605:                        }
606:                    }
607:
608:                    if (ok == false) {
609:                        sl++;
610:                    }
611:                } else {
612:                    if (m_hasClass == true) {
613:                        sl++;
614:                    }
615:                }
616:
617:                m_attributeList = new int[m_numAttribs - sl];
618:                m_attributeMerit = new double[m_numAttribs - sl];
619:
620:                // add in those attributes not in the starting (omit list)
621:                for (i = 0, j = 0; i < m_numAttribs; i++) {
622:                    if (!inStarting(i)) {
623:                        m_attributeList[j++] = i;
624:                    }
625:                }
626:
627:                this .m_asEval = ASEval;
628:                AttributeSetEvaluator ASEvaluator = (AttributeSetEvaluator) ASEval;
629:
630:                for (i = 0; i < m_attributeList.length; i++) {
631:                    m_attributeMerit[i] = ASEvaluator
632:                            .evaluateAttribute(m_attributeList[i]);
633:                }
634:
635:                double[][] tempRanked = rankedAttributes();
636:                int[] rankedAttributes = new int[m_selectedFeatures.length];
637:
638:                for (i = 0; i < m_selectedFeatures.length; i++) {
639:                    rankedAttributes[i] = (int) tempRanked[i][0];
640:                }
641:                return rankedAttributes;
642:            }
643:
644:            /**
645:             * Sorts the evaluated attribute list
646:             *
647:             * @return an array of sorted (highest eval to lowest) attribute indexes
648:             * @throws Exception of sorting can't be done.
649:             */
650:            public double[][] rankedAttributes() throws Exception {
651:                int i, j;
652:
653:                if (m_attributeList == null || m_attributeMerit == null) {
654:                    throw new Exception(
655:                            "Search must be performed before a ranked "
656:                                    + "attribute list can be obtained");
657:                }
658:
659:                int[] ranked = Utils.sort(m_attributeMerit);
660:                // reverse the order of the ranked indexes
661:                double[][] bestToWorst = new double[ranked.length][2];
662:
663:                for (i = ranked.length - 1, j = 0; i >= 0; i--) {
664:                    bestToWorst[j++][0] = ranked[i];
665:                    //alan: means in the arrary ranked, varialbe is from ranked as from small to large
666:                }
667:
668:                // convert the indexes to attribute indexes
669:                for (i = 0; i < bestToWorst.length; i++) {
670:                    int temp = ((int) bestToWorst[i][0]);
671:                    bestToWorst[i][0] = m_attributeList[temp]; //for the index
672:                    bestToWorst[i][1] = m_attributeMerit[temp]; //for the value of the index
673:                }
674:
675:                if (m_numToSelect > bestToWorst.length) {
676:                    throw new Exception(
677:                            "More attributes requested than exist in the data");
678:                }
679:
680:                this .FCBFElimination(bestToWorst);
681:
682:                if (m_numToSelect <= 0) {
683:                    if (m_threshold == -Double.MAX_VALUE) {
684:                        m_calculatedNumToSelect = m_selectedFeatures.length;
685:                    } else {
686:                        determineNumToSelectFromThreshold(m_selectedFeatures);
687:                    }
688:                }
689:                /*    if (m_numToSelect > 0) {
690:                  determineThreshFromNumToSelect(bestToWorst);
691:                  } */
692:
693:                return m_selectedFeatures;
694:            }
695:
696:            private void determineNumToSelectFromThreshold(double[][] ranking) {
697:                int count = 0;
698:                for (int i = 0; i < ranking.length; i++) {
699:                    if (ranking[i][1] > m_threshold) {
700:                        count++;
701:                    }
702:                }
703:                m_calculatedNumToSelect = count;
704:            }
705:
706:            private void determineThreshFromNumToSelect(double[][] ranking)
707:                    throws Exception {
708:                if (m_numToSelect > ranking.length) {
709:                    throw new Exception(
710:                            "More attributes requested than exist in the data");
711:                }
712:
713:                if (m_numToSelect == ranking.length) {
714:                    return;
715:                }
716:
717:                m_threshold = (ranking[m_numToSelect - 1][1] + ranking[m_numToSelect][1]) / 2.0;
718:            }
719:
720:            /**
721:             * returns a description of the search as a String
722:             * @return a description of the search
723:             */
724:            public String toString() {
725:                StringBuffer BfString = new StringBuffer();
726:                BfString.append("\tAttribute ranking.\n");
727:
728:                if (m_starting != null) {
729:                    BfString.append("\tIgnored attributes: ");
730:
731:                    BfString.append(startSetToString());
732:                    BfString.append("\n");
733:                }
734:
735:                if (m_threshold != -Double.MAX_VALUE) {
736:                    BfString.append("\tThreshold for discarding attributes: "
737:                            + Utils.doubleToString(m_threshold, 8, 4) + "\n");
738:                }
739:
740:                BfString.append("\n\n");
741:
742:                BfString.append("     J || SU(j,Class) ||    I || SU(i,j). \n");
743:
744:                for (int i = 0; i < m_rankedFCBF.length; i++) {
745:                    BfString.append(Utils.doubleToString(
746:                            m_rankedFCBF[i][0] + 1, 6, 0)
747:                            + " ; "
748:                            + Utils.doubleToString(m_rankedFCBF[i][1], 12, 7)
749:                            + " ; ");
750:                    if (m_rankedFCBF[i][2] == m_rankedFCBF[i][0]) {
751:                        BfString.append("    *\n");
752:                    } else {
753:                        BfString.append(Utils.doubleToString(
754:                                m_rankedFCBF[i][2] + 1, 5, 0)
755:                                + " ; " + m_rankedFCBF[i][3] + "\n");
756:                    }
757:                }
758:
759:                return BfString.toString();
760:            }
761:
762:            /**
763:             * Resets stuff to default values
764:             */
765:            protected void resetOptions() {
766:                m_starting = null;
767:                m_startRange = new Range();
768:                m_attributeList = null;
769:                m_attributeMerit = null;
770:                m_threshold = -Double.MAX_VALUE;
771:            }
772:
773:            private boolean inStarting(int feat) {
774:                // omit the class from the evaluation
775:                if ((m_hasClass == true) && (feat == m_classIndex)) {
776:                    return true;
777:                }
778:
779:                if (m_starting == null) {
780:                    return false;
781:                }
782:
783:                for (int i = 0; i < m_starting.length; i++) {
784:                    if (m_starting[i] == feat) {
785:                        return true;
786:                    }
787:                }
788:
789:                return false;
790:            }
791:
792:            private void FCBFElimination(double[][] rankedFeatures)
793:                    throws Exception {
794:
795:                int i, j;
796:
797:                m_rankedFCBF = new double[m_attributeList.length][4];
798:                int[] attributes = new int[1];
799:                int[] classAtrributes = new int[1];
800:
801:                int numSelectedAttributes = 0;
802:
803:                int startPoint = 0;
804:                double tempSUIJ = 0;
805:
806:                AttributeSetEvaluator ASEvaluator = (AttributeSetEvaluator) m_asEval;
807:
808:                for (i = 0; i < rankedFeatures.length; i++) {
809:                    m_rankedFCBF[i][0] = rankedFeatures[i][0];
810:                    m_rankedFCBF[i][1] = rankedFeatures[i][1];
811:                    m_rankedFCBF[i][2] = -1;
812:                }
813:
814:                while (startPoint < rankedFeatures.length) {
815:                    if (m_rankedFCBF[startPoint][2] != -1) {
816:                        startPoint++;
817:                        continue;
818:                    }
819:
820:                    m_rankedFCBF[startPoint][2] = m_rankedFCBF[startPoint][0];
821:                    numSelectedAttributes++;
822:
823:                    for (i = startPoint + 1; i < m_attributeList.length; i++) {
824:                        if (m_rankedFCBF[i][2] != -1) {
825:                            continue;
826:                        }
827:                        attributes[0] = (int) m_rankedFCBF[startPoint][0];
828:                        classAtrributes[0] = (int) m_rankedFCBF[i][0];
829:                        tempSUIJ = ASEvaluator.evaluateAttribute(attributes,
830:                                classAtrributes);
831:                        if (m_rankedFCBF[i][1] < tempSUIJ
832:                                || Math.abs(tempSUIJ - m_rankedFCBF[i][1]) < 1E-8) {
833:                            m_rankedFCBF[i][2] = m_rankedFCBF[startPoint][0];
834:                            m_rankedFCBF[i][3] = tempSUIJ;
835:                        }
836:                    }
837:                    startPoint++;
838:                }
839:
840:                m_selectedFeatures = new double[numSelectedAttributes][2];
841:
842:                for (i = 0, j = 0; i < m_attributeList.length; i++) {
843:                    if (m_rankedFCBF[i][2] == m_rankedFCBF[i][0]) {
844:                        m_selectedFeatures[j][0] = m_rankedFCBF[i][0];
845:                        m_selectedFeatures[j][1] = m_rankedFCBF[i][1];
846:                        j++;
847:                    }
848:                }
849:            }
850:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.