001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * RELEASE INFORMATION (December 27, 2004)
019: *
020: * FCBF algorithm:
021: * Template obtained from Weka
022: * Developed for Weka by Zheng Alan Zhao
023: * December 27, 2004
024: *
025: * FCBF algorithm is a feature selection method based on Symmetrical Uncertainty Measurement for
026: * relevance redundancy analysis. The details of FCBF algorithm are in:
027: *
028: <!-- technical-plaintext-start -->
029: * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003.
030: <!-- technical-plaintext-end -->
031: *
032: *
033: * CONTACT INFORMATION
034: *
035: * For algorithm implementation:
036: * Zheng Zhao: zhaozheng at asu.edu
037: *
038: * For the algorithm:
039: * Lei Yu: leiyu at asu.edu
040: * Huan Liu: hliu at asu.edu
041: *
042: * Data Mining and Machine Learning Lab
043: * Computer Science and Engineering Department
044: * Fulton School of Engineering
045: * Arizona State University
046: * Tempe, AZ 85287
047: *
048: * FCBFSearch.java
049: *
050: * Copyright (C) 2004 Data Mining and Machine Learning Lab,
051: * Computer Science and Engineering Department,
052: * Fulton School of Engineering,
053: * Arizona State University
054: *
055: */
056:
057: package weka.attributeSelection;
058:
059: import weka.core.Instances;
060: import weka.core.Option;
061: import weka.core.OptionHandler;
062: import weka.core.Range;
063: import weka.core.TechnicalInformation;
064: import weka.core.TechnicalInformation.Type;
065: import weka.core.TechnicalInformation.Field;
066: import weka.core.TechnicalInformationHandler;
067: import weka.core.Utils;
068:
069: import java.util.Enumeration;
070: import java.util.Vector;
071:
072: /**
073: <!-- globalinfo-start -->
074: * FCBF : <br/>
075: * <br/>
076: * Feature selection method based on correlation measureand relevance&redundancy analysis. Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).<br/>
077: * <br/>
078: * For more information see:<br/>
079: * <br/>
080: * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003.
081: * <p/>
082: <!-- globalinfo-end -->
083: *
084: <!-- technical-bibtex-start -->
085: * BibTeX:
086: * <pre>
087: * @inproceedings{Yu2003,
088: * author = {Lei Yu and Huan Liu},
089: * booktitle = {Proceedings of the Twentieth International Conference on Machine Learning},
090: * pages = {856-863},
091: * publisher = {AAAI Press},
092: * title = {Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution},
093: * year = {2003}
094: * }
095: * </pre>
096: * <p/>
097: <!-- technical-bibtex-end -->
098: *
099: <!-- options-start -->
100: * Valid options are: <p/>
101: *
102: * <pre> -D <create dataset>
103: * Specify Whether the selector generates a new dataset.</pre>
104: *
105: * <pre> -P <start set>
106: * Specify a starting set of attributes.
107: * Eg. 1,3,5-7.
108: * Any starting attributes specified are
109: * ignored during the ranking.</pre>
110: *
111: * <pre> -T <threshold>
112: * Specify a theshold by which attributes
113: * may be discarded from the ranking.</pre>
114: *
115: * <pre> -N <num to select>
116: * Specify number of attributes to select</pre>
117: *
118: <!-- options-end -->
119: *
120: * @author Zheng Zhao: zhaozheng at asu.edu
121: * @version $Revision: 1.6 $
122: */
123: public class FCBFSearch extends ASSearch implements RankedOutputSearch,
124: StartSetHandler, OptionHandler, TechnicalInformationHandler {
125:
126: /** for serialization */
127: static final long serialVersionUID = 8209699587428369942L;
128:
129: /** Holds the starting set as an array of attributes */
130: private int[] m_starting;
131:
132: /** Holds the start set for the search as a range */
133: private Range m_startRange;
134:
135: /** Holds the ordered list of attributes */
136: private int[] m_attributeList;
137:
138: /** Holds the list of attribute merit scores */
139: private double[] m_attributeMerit;
140:
141: /** Data has class attribute---if unsupervised evaluator then no class */
142: private boolean m_hasClass;
143:
144: /** Class index of the data if supervised evaluator */
145: private int m_classIndex;
146:
147: /** The number of attribtes */
148: private int m_numAttribs;
149:
150: /**
151: * A threshold by which to discard attributes---used by the
152: * AttributeSelection module
153: */
154: private double m_threshold;
155:
156: /** The number of attributes to select. -1 indicates that all attributes
157: are to be retained. Has precedence over m_threshold */
158: private int m_numToSelect = -1;
159:
160: /** Used to compute the number to select */
161: private int m_calculatedNumToSelect = -1;
162:
163: /*-----------------add begin 2004-11-15 by alan-----------------*/
164: /** Used to determine whether we create a new dataset according to the selected features */
165: private boolean m_generateOutput = false;
166:
167: /** Used to store the ref of the Evaluator we use*/
168: private ASEvaluation m_asEval;
169:
170: /** Holds the list of attribute merit scores generated by FCBF */
171: private double[][] m_rankedFCBF;
172:
173: /** Hold the list of selected features*/
174: private double[][] m_selectedFeatures;
175:
176: /*-----------------add end 2004-11-15 by alan-----------------*/
177:
178: /**
179: * Returns a string describing this search method
180: * @return a description of the search suitable for
181: * displaying in the explorer/experimenter gui
182: */
183: public String globalInfo() {
184: return "FCBF : \n\nFeature selection method based on correlation measure"
185: + "and relevance&redundancy analysis. "
186: + "Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).\n\n"
187: + "For more information see:\n\n"
188: + getTechnicalInformation().toString();
189: }
190:
191: /**
192: * Returns an instance of a TechnicalInformation object, containing
193: * detailed information about the technical background of this class,
194: * e.g., paper reference or book this class is based on.
195: *
196: * @return the technical information about this class
197: */
198: public TechnicalInformation getTechnicalInformation() {
199: TechnicalInformation result;
200:
201: result = new TechnicalInformation(Type.INPROCEEDINGS);
202: result.setValue(Field.AUTHOR, "Lei Yu and Huan Liu");
203: result
204: .setValue(
205: Field.TITLE,
206: "Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution");
207: result
208: .setValue(Field.BOOKTITLE,
209: "Proceedings of the Twentieth International Conference on Machine Learning");
210: result.setValue(Field.YEAR, "2003");
211: result.setValue(Field.PAGES, "856-863");
212: result.setValue(Field.PUBLISHER, "AAAI Press");
213:
214: return result;
215: }
216:
217: /**
218: * Constructor
219: */
220: public FCBFSearch() {
221: resetOptions();
222: }
223:
224: /**
225: * Returns the tip text for this property
226: * @return tip text for this property suitable for
227: * displaying in the explorer/experimenter gui
228: */
229: public String numToSelectTipText() {
230: return "Specify the number of attributes to retain. The default value "
231: + "(-1) indicates that all attributes are to be retained. Use either "
232: + "this option or a threshold to reduce the attribute set.";
233: }
234:
235: /**
236: * Specify the number of attributes to select from the ranked list. -1
237: * indicates that all attributes are to be retained.
238: * @param n the number of attributes to retain
239: */
240: public void setNumToSelect(int n) {
241: m_numToSelect = n;
242: }
243:
244: /**
245: * Gets the number of attributes to be retained.
246: * @return the number of attributes to retain
247: */
248: public int getNumToSelect() {
249: return m_numToSelect;
250: }
251:
252: /**
253: * Gets the calculated number to select. This might be computed
254: * from a threshold, or if < 0 is set as the number to select then
255: * it is set to the number of attributes in the (transformed) data.
256: * @return the calculated number of attributes to select
257: */
258: public int getCalculatedNumToSelect() {
259: if (m_numToSelect >= 0) {
260: m_calculatedNumToSelect = m_numToSelect;
261: }
262: if (m_selectedFeatures.length > 0
263: && m_selectedFeatures.length < m_calculatedNumToSelect) {
264: m_calculatedNumToSelect = m_selectedFeatures.length;
265: }
266:
267: return m_calculatedNumToSelect;
268: }
269:
270: /**
271: * Returns the tip text for this property
272: * @return tip text for this property suitable for
273: * displaying in the explorer/experimenter gui
274: */
275: public String thresholdTipText() {
276: return "Set threshold by which attributes can be discarded. Default value "
277: + "results in no attributes being discarded. Use either this option or "
278: + "numToSelect to reduce the attribute set.";
279: }
280:
281: /**
282: * Set the threshold by which the AttributeSelection module can discard
283: * attributes.
284: * @param threshold the threshold.
285: */
286: public void setThreshold(double threshold) {
287: m_threshold = threshold;
288: }
289:
290: /**
291: * Returns the threshold so that the AttributeSelection module can
292: * discard attributes from the ranking.
293: * @return the threshold
294: */
295: public double getThreshold() {
296: return m_threshold;
297: }
298:
299: /**
300: * Returns the tip text for this property
301: * @return tip text for this property suitable for
302: * displaying in the explorer/experimenter gui
303: */
304: public String generateRankingTipText() {
305: return "A constant option. FCBF is capable of generating"
306: + " attribute rankings.";
307: }
308:
309: /**
310: * This is a dummy set method---Ranker is ONLY capable of producing
311: * a ranked list of attributes for attribute evaluators.
312: * @param doRank this parameter is N/A and is ignored
313: */
314: public void setGenerateRanking(boolean doRank) {
315: }
316:
317: /**
318: * This is a dummy method. Ranker can ONLY be used with attribute
319: * evaluators and as such can only produce a ranked list of attributes
320: * @return true all the time.
321: */
322: public boolean getGenerateRanking() {
323: return true;
324: }
325:
326: /**
327: * Returns the tip text for this property
328: * @return tip text for this property suitable for
329: * displaying in the explorer/experimenter gui
330: */
331:
332: public String generateDataOutputTipText() {
333: return "Generating new dataset according to the selected features."
334: + " ";
335: }
336:
337: /**
338: * Sets the flag, by which the AttributeSelection module decide
339: * whether create a new dataset according to the selected features.
340: * @param doGenerate the flag, by which the AttributeSelection module
341: * decide whether create a new dataset according to the selected
342: * features
343: */
344: public void setGenerateDataOutput(boolean doGenerate) {
345: this .m_generateOutput = doGenerate;
346:
347: }
348:
349: /**
350: * Returns the flag, by which the AttributeSelection module decide
351: * whether create a new dataset according to the selected features.
352: * @return the flag, by which the AttributeSelection module decide
353: * whether create a new dataset according to the selected features.
354: */
355: public boolean getGenerateDataOutput() {
356: return this .m_generateOutput;
357: }
358:
359: /**
360: * Returns the tip text for this property
361: * @return tip text for this property suitable for
362: * displaying in the explorer/experimenter gui
363: */
364: public String startSetTipText() {
365: return "Specify a set of attributes to ignore. "
366: + " When generating the ranking, FCBF will not evaluate the attributes "
367: + " in this list. "
368: + "This is specified as a comma "
369: + "seperated list off attribute indexes starting at 1. It can include "
370: + "ranges. Eg. 1,2,5-9,17.";
371: }
372:
373: /**
374: * Sets a starting set of attributes for the search. It is the
375: * search method's responsibility to report this start set (if any)
376: * in its toString() method.
377: * @param startSet a string containing a list of attributes (and or ranges),
378: * eg. 1,2,6,10-15.
379: * @throws Exception if start set can't be set.
380: */
381: public void setStartSet(String startSet) throws Exception {
382: m_startRange.setRanges(startSet);
383: }
384:
385: /**
386: * Returns a list of attributes (and or attribute ranges) as a String
387: * @return a list of attributes (and or attribute ranges)
388: */
389: public String getStartSet() {
390: return m_startRange.getRanges();
391: }
392:
393: /**
394: * Returns an enumeration describing the available options.
395: * @return an enumeration of all the available options.
396: **/
397: public Enumeration listOptions() {
398: Vector newVector = new Vector(4);
399:
400: newVector
401: .addElement(new Option(
402: "\tSpecify Whether the selector generates a new dataset.",
403: "D", 1, "-D <create dataset>"));
404:
405: newVector.addElement(new Option(
406: "\tSpecify a starting set of attributes.\n"
407: + "\t\tEg. 1,3,5-7.\n"
408: + "\tAny starting attributes specified are\n"
409: + "\tignored during the ranking.", "P", 1,
410: "-P <start set>"));
411:
412: newVector.addElement(new Option(
413: "\tSpecify a theshold by which attributes\n"
414: + "\tmay be discarded from the ranking.", "T",
415: 1, "-T <threshold>"));
416:
417: newVector.addElement(new Option(
418: "\tSpecify number of attributes to select", "N", 1,
419: "-N <num to select>"));
420:
421: return newVector.elements();
422:
423: }
424:
425: /**
426: * Parses a given list of options. <p/>
427: *
428: <!-- options-start -->
429: * Valid options are: <p/>
430: *
431: * <pre> -D <create dataset>
432: * Specify Whether the selector generates a new dataset.</pre>
433: *
434: * <pre> -P <start set>
435: * Specify a starting set of attributes.
436: * Eg. 1,3,5-7.
437: * Any starting attributes specified are
438: * ignored during the ranking.</pre>
439: *
440: * <pre> -T <threshold>
441: * Specify a theshold by which attributes
442: * may be discarded from the ranking.</pre>
443: *
444: * <pre> -N <num to select>
445: * Specify number of attributes to select</pre>
446: *
447: <!-- options-end -->
448: *
449: * @param options the list of options as an array of strings
450: * @throws Exception if an option is not supported
451: *
452: **/
453: public void setOptions(String[] options) throws Exception {
454: String optionString;
455: resetOptions();
456:
457: optionString = Utils.getOption('D', options);
458: if (optionString.length() != 0) {
459: setGenerateDataOutput(Boolean.getBoolean(optionString));
460: }
461:
462: optionString = Utils.getOption('P', options);
463: if (optionString.length() != 0) {
464: setStartSet(optionString);
465: }
466:
467: optionString = Utils.getOption('T', options);
468: if (optionString.length() != 0) {
469: Double temp;
470: temp = Double.valueOf(optionString);
471: setThreshold(temp.doubleValue());
472: }
473:
474: optionString = Utils.getOption('N', options);
475: if (optionString.length() != 0) {
476: setNumToSelect(Integer.parseInt(optionString));
477: }
478: }
479:
480: /**
481: * Gets the current settings of ReliefFAttributeEval.
482: *
483: * @return an array of strings suitable for passing to setOptions()
484: */
485: public String[] getOptions() {
486: String[] options = new String[8];
487: int current = 0;
488:
489: options[current++] = "-D";
490: options[current++] = "" + getGenerateDataOutput();
491:
492: if (!(getStartSet().equals(""))) {
493: options[current++] = "-P";
494: options[current++] = "" + startSetToString();
495: }
496:
497: options[current++] = "-T";
498: options[current++] = "" + getThreshold();
499:
500: options[current++] = "-N";
501: options[current++] = "" + getNumToSelect();
502:
503: while (current < options.length) {
504: options[current++] = "";
505: }
506: return options;
507: }
508:
509: /**
510: * converts the array of starting attributes to a string. This is
511: * used by getOptions to return the actual attributes specified
512: * as the starting set. This is better than using m_startRanges.getRanges()
513: * as the same start set can be specified in different ways from the
514: * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
515: * is stored in a database is comparable.
516: * @return a comma seperated list of individual attribute numbers as a String
517: */
518: private String startSetToString() {
519: StringBuffer FString = new StringBuffer();
520: boolean didPrint;
521:
522: if (m_starting == null) {
523: return getStartSet();
524: }
525:
526: for (int i = 0; i < m_starting.length; i++) {
527: didPrint = false;
528:
529: if ((m_hasClass == false)
530: || (m_hasClass == true && i != m_classIndex)) {
531: FString.append((m_starting[i] + 1));
532: didPrint = true;
533: }
534:
535: if (i == (m_starting.length - 1)) {
536: FString.append("");
537: } else {
538: if (didPrint) {
539: FString.append(",");
540: }
541: }
542: }
543:
544: return FString.toString();
545: }
546:
547: /**
548: * Kind of a dummy search algorithm. Calls a Attribute evaluator to
549: * evaluate each attribute not included in the startSet and then sorts
550: * them to produce a ranked list of attributes.
551: *
552: * @param ASEval the attribute evaluator to guide the search
553: * @param data the training instances.
554: * @return an array (not necessarily ordered) of selected attribute indexes
555: * @throws Exception if the search can't be completed
556: */
557: public int[] search(ASEvaluation ASEval, Instances data)
558: throws Exception {
559: int i, j;
560:
561: if (!(ASEval instanceof AttributeSetEvaluator)) {
562: throw new Exception(ASEval.getClass().getName()
563: + " is not an " + "Attribute Set evaluator!");
564: }
565:
566: m_numAttribs = data.numAttributes();
567:
568: if (ASEval instanceof UnsupervisedAttributeEvaluator) {
569: m_hasClass = false;
570: } else {
571: m_classIndex = data.classIndex();
572: if (m_classIndex >= 0) {
573: m_hasClass = true;
574: } else {
575: m_hasClass = false;
576: }
577: }
578:
579: // get the transformed data and check to see if the transformer
580: // preserves a class index
581: if (ASEval instanceof AttributeTransformer) {
582: data = ((AttributeTransformer) ASEval).transformedHeader();
583: if (m_classIndex >= 0 && data.classIndex() >= 0) {
584: m_classIndex = data.classIndex();
585: m_hasClass = true;
586: }
587: }
588:
589: m_startRange.setUpper(m_numAttribs - 1);
590: if (!(getStartSet().equals(""))) {
591: m_starting = m_startRange.getSelection();
592: }
593:
594: int sl = 0;
595: if (m_starting != null) {
596: sl = m_starting.length;
597: }
598: if ((m_starting != null) && (m_hasClass == true)) {
599: // see if the supplied list contains the class index
600: boolean ok = false;
601: for (i = 0; i < sl; i++) {
602: if (m_starting[i] == m_classIndex) {
603: ok = true;
604: break;
605: }
606: }
607:
608: if (ok == false) {
609: sl++;
610: }
611: } else {
612: if (m_hasClass == true) {
613: sl++;
614: }
615: }
616:
617: m_attributeList = new int[m_numAttribs - sl];
618: m_attributeMerit = new double[m_numAttribs - sl];
619:
620: // add in those attributes not in the starting (omit list)
621: for (i = 0, j = 0; i < m_numAttribs; i++) {
622: if (!inStarting(i)) {
623: m_attributeList[j++] = i;
624: }
625: }
626:
627: this .m_asEval = ASEval;
628: AttributeSetEvaluator ASEvaluator = (AttributeSetEvaluator) ASEval;
629:
630: for (i = 0; i < m_attributeList.length; i++) {
631: m_attributeMerit[i] = ASEvaluator
632: .evaluateAttribute(m_attributeList[i]);
633: }
634:
635: double[][] tempRanked = rankedAttributes();
636: int[] rankedAttributes = new int[m_selectedFeatures.length];
637:
638: for (i = 0; i < m_selectedFeatures.length; i++) {
639: rankedAttributes[i] = (int) tempRanked[i][0];
640: }
641: return rankedAttributes;
642: }
643:
644: /**
645: * Sorts the evaluated attribute list
646: *
647: * @return an array of sorted (highest eval to lowest) attribute indexes
648: * @throws Exception of sorting can't be done.
649: */
650: public double[][] rankedAttributes() throws Exception {
651: int i, j;
652:
653: if (m_attributeList == null || m_attributeMerit == null) {
654: throw new Exception(
655: "Search must be performed before a ranked "
656: + "attribute list can be obtained");
657: }
658:
659: int[] ranked = Utils.sort(m_attributeMerit);
660: // reverse the order of the ranked indexes
661: double[][] bestToWorst = new double[ranked.length][2];
662:
663: for (i = ranked.length - 1, j = 0; i >= 0; i--) {
664: bestToWorst[j++][0] = ranked[i];
665: //alan: means in the arrary ranked, varialbe is from ranked as from small to large
666: }
667:
668: // convert the indexes to attribute indexes
669: for (i = 0; i < bestToWorst.length; i++) {
670: int temp = ((int) bestToWorst[i][0]);
671: bestToWorst[i][0] = m_attributeList[temp]; //for the index
672: bestToWorst[i][1] = m_attributeMerit[temp]; //for the value of the index
673: }
674:
675: if (m_numToSelect > bestToWorst.length) {
676: throw new Exception(
677: "More attributes requested than exist in the data");
678: }
679:
680: this .FCBFElimination(bestToWorst);
681:
682: if (m_numToSelect <= 0) {
683: if (m_threshold == -Double.MAX_VALUE) {
684: m_calculatedNumToSelect = m_selectedFeatures.length;
685: } else {
686: determineNumToSelectFromThreshold(m_selectedFeatures);
687: }
688: }
689: /* if (m_numToSelect > 0) {
690: determineThreshFromNumToSelect(bestToWorst);
691: } */
692:
693: return m_selectedFeatures;
694: }
695:
696: private void determineNumToSelectFromThreshold(double[][] ranking) {
697: int count = 0;
698: for (int i = 0; i < ranking.length; i++) {
699: if (ranking[i][1] > m_threshold) {
700: count++;
701: }
702: }
703: m_calculatedNumToSelect = count;
704: }
705:
706: private void determineThreshFromNumToSelect(double[][] ranking)
707: throws Exception {
708: if (m_numToSelect > ranking.length) {
709: throw new Exception(
710: "More attributes requested than exist in the data");
711: }
712:
713: if (m_numToSelect == ranking.length) {
714: return;
715: }
716:
717: m_threshold = (ranking[m_numToSelect - 1][1] + ranking[m_numToSelect][1]) / 2.0;
718: }
719:
720: /**
721: * returns a description of the search as a String
722: * @return a description of the search
723: */
724: public String toString() {
725: StringBuffer BfString = new StringBuffer();
726: BfString.append("\tAttribute ranking.\n");
727:
728: if (m_starting != null) {
729: BfString.append("\tIgnored attributes: ");
730:
731: BfString.append(startSetToString());
732: BfString.append("\n");
733: }
734:
735: if (m_threshold != -Double.MAX_VALUE) {
736: BfString.append("\tThreshold for discarding attributes: "
737: + Utils.doubleToString(m_threshold, 8, 4) + "\n");
738: }
739:
740: BfString.append("\n\n");
741:
742: BfString.append(" J || SU(j,Class) || I || SU(i,j). \n");
743:
744: for (int i = 0; i < m_rankedFCBF.length; i++) {
745: BfString.append(Utils.doubleToString(
746: m_rankedFCBF[i][0] + 1, 6, 0)
747: + " ; "
748: + Utils.doubleToString(m_rankedFCBF[i][1], 12, 7)
749: + " ; ");
750: if (m_rankedFCBF[i][2] == m_rankedFCBF[i][0]) {
751: BfString.append(" *\n");
752: } else {
753: BfString.append(Utils.doubleToString(
754: m_rankedFCBF[i][2] + 1, 5, 0)
755: + " ; " + m_rankedFCBF[i][3] + "\n");
756: }
757: }
758:
759: return BfString.toString();
760: }
761:
762: /**
763: * Resets stuff to default values
764: */
765: protected void resetOptions() {
766: m_starting = null;
767: m_startRange = new Range();
768: m_attributeList = null;
769: m_attributeMerit = null;
770: m_threshold = -Double.MAX_VALUE;
771: }
772:
773: private boolean inStarting(int feat) {
774: // omit the class from the evaluation
775: if ((m_hasClass == true) && (feat == m_classIndex)) {
776: return true;
777: }
778:
779: if (m_starting == null) {
780: return false;
781: }
782:
783: for (int i = 0; i < m_starting.length; i++) {
784: if (m_starting[i] == feat) {
785: return true;
786: }
787: }
788:
789: return false;
790: }
791:
792: private void FCBFElimination(double[][] rankedFeatures)
793: throws Exception {
794:
795: int i, j;
796:
797: m_rankedFCBF = new double[m_attributeList.length][4];
798: int[] attributes = new int[1];
799: int[] classAtrributes = new int[1];
800:
801: int numSelectedAttributes = 0;
802:
803: int startPoint = 0;
804: double tempSUIJ = 0;
805:
806: AttributeSetEvaluator ASEvaluator = (AttributeSetEvaluator) m_asEval;
807:
808: for (i = 0; i < rankedFeatures.length; i++) {
809: m_rankedFCBF[i][0] = rankedFeatures[i][0];
810: m_rankedFCBF[i][1] = rankedFeatures[i][1];
811: m_rankedFCBF[i][2] = -1;
812: }
813:
814: while (startPoint < rankedFeatures.length) {
815: if (m_rankedFCBF[startPoint][2] != -1) {
816: startPoint++;
817: continue;
818: }
819:
820: m_rankedFCBF[startPoint][2] = m_rankedFCBF[startPoint][0];
821: numSelectedAttributes++;
822:
823: for (i = startPoint + 1; i < m_attributeList.length; i++) {
824: if (m_rankedFCBF[i][2] != -1) {
825: continue;
826: }
827: attributes[0] = (int) m_rankedFCBF[startPoint][0];
828: classAtrributes[0] = (int) m_rankedFCBF[i][0];
829: tempSUIJ = ASEvaluator.evaluateAttribute(attributes,
830: classAtrributes);
831: if (m_rankedFCBF[i][1] < tempSUIJ
832: || Math.abs(tempSUIJ - m_rankedFCBF[i][1]) < 1E-8) {
833: m_rankedFCBF[i][2] = m_rankedFCBF[startPoint][0];
834: m_rankedFCBF[i][3] = tempSUIJ;
835: }
836: }
837: startPoint++;
838: }
839:
840: m_selectedFeatures = new double[numSelectedAttributes][2];
841:
842: for (i = 0, j = 0; i < m_attributeList.length; i++) {
843: if (m_rankedFCBF[i][2] == m_rankedFCBF[i][0]) {
844: m_selectedFeatures[j][0] = m_rankedFCBF[i][0];
845: m_selectedFeatures[j][1] = m_rankedFCBF[i][1];
846: j++;
847: }
848: }
849: }
850: }
|