Source Code Cross Referenced for OLM.java in  » Science » weka » weka » classifiers » misc » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.classifiers.misc 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         *    This program is free software; you can redistribute it and/or modify
0003:         *    it under the terms of the GNU General Public License as published by
0004:         *    the Free Software Foundation; either version 2 of the License, or
0005:         *    (at your option) any later version.
0006:         *
0007:         *    This program is distributed in the hope that it will be useful,
0008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
0009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0010:         *    GNU General Public License for more details.
0011:         *
0012:         *    You should have received a copy of the GNU General Public License
0013:         *    along with this program; if not, write to the Free Software
0014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015:         */
0016:
0017:        /*
0018:         *    OLM.java
0019:         *    Copyright (C) 2004 Stijn Lievens
0020:         *
0021:         */
0022:
0023:        package weka.classifiers.misc;
0024:
0025:        import weka.classifiers.RandomizableClassifier;
0026:        import weka.classifiers.misc.monotone.Coordinates;
0027:        import weka.classifiers.misc.monotone.DiscreteDistribution;
0028:        import weka.classifiers.misc.monotone.EnumerationIterator;
0029:        import weka.classifiers.misc.monotone.InstancesComparator;
0030:        import weka.classifiers.misc.monotone.InstancesUtil;
0031:        import weka.classifiers.misc.monotone.MultiDimensionalSort;
0032:        import weka.core.Attribute;
0033:        import weka.core.Capabilities;
0034:        import weka.core.FastVector;
0035:        import weka.core.Instance;
0036:        import weka.core.Instances;
0037:        import weka.core.Option;
0038:        import weka.core.SelectedTag;
0039:        import weka.core.Tag;
0040:        import weka.core.TechnicalInformation;
0041:        import weka.core.TechnicalInformationHandler;
0042:        import weka.core.Utils;
0043:        import weka.core.Capabilities.Capability;
0044:        import weka.core.TechnicalInformation.Field;
0045:        import weka.core.TechnicalInformation.Type;
0046:        import weka.estimators.DiscreteEstimator;
0047:
0048:        import java.util.ArrayList;
0049:        import java.util.Comparator;
0050:        import java.util.Enumeration;
0051:        import java.util.HashMap;
0052:        import java.util.Iterator;
0053:        import java.util.Map;
0054:        import java.util.Random;
0055:        import java.util.Vector;
0056:
0057:        /**
0058:         <!-- globalinfo-start -->
0059:         * This class is an implementation of the Ordinal Learning Method<br/>
0060:         * Further information regarding the algorithm and variants can be found in:<br/>
0061:         * <br/>
0062:         * Arie Ben-David (1992). Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications. Decision Sciences. 23:1357-1372.<br/>
0063:         * <br/>
0064:         * Lievens, Stijn (2003-2004). Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken..
0065:         * <p/>
0066:         <!-- globalinfo-end -->
0067:         *
0068:         <!-- technical-bibtex-start -->
0069:         * BibTeX:
0070:         * <pre>
0071:         * &#64;article{Ben-David1992,
0072:         *    author = {Arie Ben-David},
0073:         *    journal = {Decision Sciences},
0074:         *    pages = {1357-1372},
0075:         *    title = {Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications},
0076:         *    volume = {23},
0077:         *    year = {1992}
0078:         * }
0079:         * 
0080:         * &#64;mastersthesis{Lievens2003-2004,
0081:         *    author = {Lievens, Stijn},
0082:         *    school = {Ghent University},
0083:         *    title = {Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.},
0084:         *    year = {2003-2004}
0085:         * }
0086:         * </pre>
0087:         * <p/>
0088:         <!-- technical-bibtex-end -->
0089:         * 
0090:         <!-- options-start -->
0091:         * Valid options are: <p/>
0092:         * 
0093:         * <pre> -S &lt;num&gt;
0094:         *  Random number seed.
0095:         *  (default 1)</pre>
0096:         * 
0097:         * <pre> -D
0098:         *  If set, classifier is run in debug mode and
0099:         *  may output additional info to the console</pre>
0100:         * 
0101:         * <pre> -C &lt;CL|REG&gt;
0102:         *  Sets the classification type to be used.
0103:         *  (Default: REG)</pre>
0104:         * 
0105:         * <pre> -A &lt;MEAN|MED|MAX&gt;
0106:         *  Sets the averaging type used in phase 1 of the classifier.
0107:         *  (Default: MEAN)</pre>
0108:         * 
0109:         * <pre> -N &lt;NONE|EUCL|HAM&gt;
0110:         *  If different from NONE, a nearest neighbour rule is fired when the
0111:         *  rule base doesn't contain an example smaller than the instance
0112:         *  to be classified
0113:         *  (Default: NONE).</pre>
0114:         * 
0115:         * <pre> -E &lt;MIN|MAX|BOTH&gt;
0116:         *  Sets the extension type, i.e. the rule base to use.
0117:         *  (Default: MIN)</pre>
0118:         * 
0119:         * <pre> -sort
0120:         *  If set, the instances are also sorted within the same class
0121:         *  before building the rule bases</pre>
0122:         * 
0123:         <!-- options-end -->
0124:         *
0125:         * @author Stijn Lievens (stijn.lievens@ugent.be)
0126:         * @version $Revision: 1.1 $
0127:         */
0128:        public class OLM extends RandomizableClassifier implements 
0129:                TechnicalInformationHandler {
0130:
0131:            /** for serialization */
0132:            private static final long serialVersionUID = 3722951802290935192L;
0133:
0134:            /**
0135:             * Round the real value that is returned by the original algorithm 
0136:             * to the nearest label.
0137:             */
0138:            public static final int CT_ROUNDED = 0;
0139:
0140:            /**
0141:             * No rounding is performed during classification, this is the
0142:             * classification is done in a regression like way.
0143:             */
0144:            public static final int CT_REAL = 1;
0145:
0146:            /** the classification types */
0147:            public static final Tag[] TAGS_CLASSIFICATIONTYPES = {
0148:                    new Tag(CT_ROUNDED, "CL", "Round to nearest label"),
0149:                    new Tag(CT_REAL, "REG", "Regression-like classification") };
0150:
0151:            /**
0152:             * Use the mean for averaging in phase 1.  This is in fact a 
0153:             * non ordinal procedure.  The scores used for averaging are the internal
0154:             * values of WEKA.
0155:             */
0156:            public static final int AT_MEAN = 0;
0157:
0158:            /**
0159:             * Use the median for averaging in phase 1.  The possible values
0160:             * are in the extended set of labels, this is labels in between the
0161:             * original labels are possible.
0162:             */
0163:            public static final int AT_MEDIAN = 1;
0164:
0165:            /**
0166:             * Use the mode for averaging in phase 1.  The label
0167:             * that has maximum frequency is used.  If there is more 
0168:             * than one label that has maximum frequency, the lowest 
0169:             * one is prefered.
0170:             */
0171:            public static final int AT_MAXPROB = 2;
0172:
0173:            /** the averaging types */
0174:            public static final Tag[] TAGS_AVERAGINGTYPES = {
0175:                    new Tag(AT_MEAN, "MEAN", "Mean"),
0176:                    new Tag(AT_MEDIAN, "MED", "Median"),
0177:                    new Tag(AT_MAXPROB, "MAX", "Max probability") };
0178:
0179:            /** 
0180:             * No nearest neighbour rule will be fired when 
0181:             * classifying an instance for which there  is no smaller rule 
0182:             * in the rule base?
0183:             */
0184:            public static final int DT_NONE = -1;
0185:
0186:            /**
0187:             * Use the Euclidian distance whenever a nearest neighbour 
0188:             * rule is fired.
0189:             */
0190:            public static final int DT_EUCLID = 0;
0191:
0192:            /** 
0193:             * Use the Hamming distance, this is the  number of 
0194:             * positions in which the instances differ, whenever a 
0195:             * nearest neighbour rule is fired
0196:             */
0197:            public static final int DT_HAMMING = 1;
0198:
0199:            /** the distance types */
0200:            public static final Tag[] TAGS_DISTANCETYPES = {
0201:                    new Tag(DT_NONE, "NONE", "No nearest neighbor"),
0202:                    new Tag(DT_EUCLID, "EUCL", "Euclidean"),
0203:                    new Tag(DT_HAMMING, "HAM", "Hamming") };
0204:
0205:            /**
0206:             * Use only the minimal extension, as in the original algorithm 
0207:             * of Ben-David.
0208:             */
0209:            public static final int ET_MIN = 0;
0210:
0211:            /**
0212:             * Use only the maximal extension.  In this case an algorithm
0213:             * dual to the original one is performed.
0214:             */
0215:            public static final int ET_MAX = 1;
0216:
0217:            /**
0218:             * Combine both the minimal and maximal extension, and use the 
0219:             * midpoint of the resulting interval as prediction.
0220:             */
0221:            public static final int ET_BOTH = 2;
0222:
0223:            /** the mode types */
0224:            public static final Tag[] TAGS_EXTENSIONTYPES = {
0225:                    new Tag(ET_MIN, "MIN", "Minimal extension"),
0226:                    new Tag(ET_MAX, "MAX", "Maximal extension"),
0227:                    new Tag(ET_BOTH, "BOTH", "Minimal and maximal extension") };
0228:
0229:            /** 
0230:             * The training examples, used temporarily.
0231:             * m_train is cleared after the rule base is built.
0232:             */
0233:            private Instances m_train;
0234:
0235:            /** Number of classes in the original m_train */
0236:            private int m_numClasses;
0237:
0238:            /** 
0239:             * The rule base, should be consistent and contain no 
0240:             * redundant rules.  This is the rule base as in the original
0241:             * algorithm of Ben-David.
0242:             */
0243:            private Instances m_baseMin;
0244:
0245:            /** 
0246:             * This is a complentary rule base, using the maximal rather
0247:             * than the minimal extension.
0248:             */
0249:            private Instances m_baseMax;
0250:
0251:            /** 
0252:             * Map used in the method buildClassifier in order to quickly
0253:             * gather all info needed for phase 1.  This is a map containing
0254:             * (Coordinates, DiscreteEstimator)-pairs.
0255:             */
0256:            private Map m_estimatedDistributions;
0257:
0258:            /** classification type */
0259:            private int m_ctype = CT_REAL;
0260:
0261:            /** averaging type */
0262:            private int m_atype = AT_MEAN;
0263:
0264:            /** distance type */
0265:            private int m_dtype = DT_EUCLID;
0266:
0267:            /** mode type */
0268:            private int m_etype = ET_MIN;
0269:
0270:            /** 
0271:             * Should the instances be sorted such that minimal (resp. maximal)
0272:             * elements (per class) are treated first when building m_baseMin 
0273:             * (resp. m_baseMax).
0274:             */
0275:            private boolean m_sort = false;
0276:
0277:            /**
0278:             * Returns a string describing the classifier.
0279:             * @return a description suitable for displaying in the 
0280:             * explorer/experimenter gui
0281:             */
0282:            public String globalInfo() {
0283:                return "This class is an implementation of the Ordinal Learning "
0284:                        + "Method\n"
0285:                        + "Further information regarding the algorithm and variants "
0286:                        + "can be found in:\n\n"
0287:                        + getTechnicalInformation().toString();
0288:            }
0289:
0290:            /**
0291:             * Returns default capabilities of the classifier.
0292:             *
0293:             * @return      the capabilities of this classifier
0294:             */
0295:            public Capabilities getCapabilities() {
0296:                Capabilities result = super .getCapabilities();
0297:
0298:                // attributes
0299:                result.enable(Capability.NOMINAL_ATTRIBUTES);
0300:
0301:                // class
0302:                result.enable(Capability.NOMINAL_CLASS);
0303:                result.enable(Capability.MISSING_CLASS_VALUES);
0304:
0305:                // instances
0306:                result.setMinimumNumberInstances(0);
0307:
0308:                return result;
0309:            }
0310:
0311:            /**
0312:             * Returns an instance of a TechnicalInformation object, containing 
0313:             * detailed information about the technical background of this class,
0314:             * e.g., paper reference or book this class is based on.
0315:             * 
0316:             * @return the technical information about this class
0317:             */
0318:            public TechnicalInformation getTechnicalInformation() {
0319:                TechnicalInformation result;
0320:                TechnicalInformation additional;
0321:
0322:                result = new TechnicalInformation(Type.ARTICLE);
0323:                result.setValue(Field.AUTHOR, "Arie Ben-David");
0324:                result.setValue(Field.YEAR, "1992");
0325:                result
0326:                        .setValue(
0327:                                Field.TITLE,
0328:                                "Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications");
0329:                result.setValue(Field.JOURNAL, "Decision Sciences");
0330:                result.setValue(Field.PAGES, "1357-1372");
0331:                result.setValue(Field.VOLUME, "23");
0332:
0333:                additional = result.add(Type.MASTERSTHESIS);
0334:                additional.setValue(Field.AUTHOR, "Lievens, Stijn");
0335:                additional.setValue(Field.YEAR, "2003-2004");
0336:                additional
0337:                        .setValue(
0338:                                Field.TITLE,
0339:                                "Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.");
0340:                additional.setValue(Field.SCHOOL, "Ghent University");
0341:
0342:                return result;
0343:            }
0344:
0345:            /**
0346:             * Returns the tip text for this property.
0347:             *
0348:             * @return tip text for this property suitable for 
0349:             * displaying in the explorer/experimenter gui
0350:             */
0351:            public String classificationTypeTipText() {
0352:                return "Sets the classification type.";
0353:            }
0354:
0355:            /**
0356:             * Sets the classification type.
0357:             *
0358:             * @param value the classification type to be set.
0359:             */
0360:            public void setClassificationType(SelectedTag value) {
0361:                if (value.getTags() == TAGS_CLASSIFICATIONTYPES)
0362:                    m_ctype = value.getSelectedTag().getID();
0363:            }
0364:
0365:            /**
0366:             * Gets the classification type.
0367:             *
0368:             * @return the classification type
0369:             */
0370:            public SelectedTag getClassificationType() {
0371:                return new SelectedTag(m_ctype, TAGS_CLASSIFICATIONTYPES);
0372:            }
0373:
0374:            /**
0375:             * Returns the tip text for this property.
0376:             *
0377:             * @return tip text for this property suitable for 
0378:             * displaying in the explorer/experimenter gui
0379:             */
0380:            public String averagingTypeTipText() {
0381:                return "Choses the way in which the distributions are averaged in "
0382:                        + "the first phase of the algorithm.";
0383:            }
0384:
0385:            /**
0386:             * Sets the averaging type to use in phase 1 of the algorithm.  
0387:             *
0388:             * @param value the averaging type to use
0389:             */
0390:            public void setAveragingType(SelectedTag value) {
0391:                if (value.getTags() == TAGS_AVERAGINGTYPES)
0392:                    m_atype = value.getSelectedTag().getID();
0393:            }
0394:
0395:            /**
0396:             * Gets the averaging type.
0397:             *
0398:             * @return the averaging type
0399:             */
0400:            public SelectedTag getAveragingType() {
0401:                return new SelectedTag(m_atype, TAGS_AVERAGINGTYPES);
0402:            }
0403:
0404:            /**
0405:             * Returns the tip text for this property.
0406:             *
0407:             * @return tip text for this property suitable for 
0408:             * displaying in the explorer/experimenter gui
0409:             */
0410:            public String distanceTypeTipText() {
0411:                return "Sets the distance that is to be used by the nearest neighbour "
0412:                        + "rule";
0413:            }
0414:
0415:            /**
0416:             * Sets the distance type to be used by a nearest neighbour rule (if any).
0417:             *
0418:             * @param value the distance type to use
0419:             */
0420:            public void setDistanceType(SelectedTag value) {
0421:                if (value.getTags() == TAGS_DISTANCETYPES)
0422:                    m_dtype = value.getSelectedTag().getID();
0423:            }
0424:
0425:            /** 
0426:             * Gets the distance type used by a nearest neighbour rule (if any).
0427:             *
0428:             * @return the distance type
0429:             */
0430:            public SelectedTag getDistanceType() {
0431:                return new SelectedTag(m_dtype, TAGS_DISTANCETYPES);
0432:            }
0433:
0434:            /**
0435:             * Returns the tip text for this property.
0436:             *
0437:             * @return tip text for this property suitable for 
0438:             * displaying in the explorer/experimenter gui
0439:             */
0440:            public String extensionTypeTipText() {
0441:                return "Sets the extension type to use.";
0442:            }
0443:
0444:            /**
0445:             * Sets the extension type to use.
0446:             * The minimal extension is the one used by 
0447:             * Ben-David in the original algorithm.  The maximal extension is
0448:             * a completely dual variant of the minimal extension.  When using
0449:             * both, then the midpoint of the interval determined by both
0450:             * extensions is returned.
0451:             *
0452:             * @param value the extension type to use
0453:             */
0454:            public void setExtensionType(SelectedTag value) {
0455:                if (value.getTags() == TAGS_EXTENSIONTYPES)
0456:                    m_etype = value.getSelectedTag().getID();
0457:            }
0458:
0459:            /**
0460:             * Gets the extension type.
0461:             *
0462:             * @return the extension type
0463:             */
0464:            public SelectedTag getExtensionType() {
0465:                return new SelectedTag(m_etype, TAGS_EXTENSIONTYPES);
0466:            }
0467:
0468:            /**
0469:             * Returns the tip text for this property.
0470:             *
0471:             * @return tip text for this property suitable for 
0472:             * displaying in the explorer/experimenter gui
0473:             */
0474:            public String sortTipText() {
0475:                return "If true, the instances are also sorted within the classes "
0476:                        + "prior to building the rule bases.";
0477:            }
0478:
0479:            /**
0480:             * Sets if the instances are to be sorted prior to building the rule bases.
0481:             *
0482:             * @param sort if <code> true </code> the instances will be sorted
0483:             */
0484:            public void setSort(boolean sort) {
0485:                m_sort = sort;
0486:            }
0487:
0488:            /**
0489:             * Returns if the instances are sorted prior to building the rule bases.
0490:             * 
0491:             * @return <code> true </code> if instances are sorted prior to building
0492:             * the rule bases, <code> false </code> otherwise.
0493:             */
0494:            public boolean getSort() {
0495:                return m_sort;
0496:            }
0497:
0498:            /**
0499:             * Returns the tip text for this property.
0500:             *
0501:             * @return tip text for this property suitable for 
0502:             * displaying in the explorer/experimenter gui
0503:             */
0504:            public String seedTipText() {
0505:                return "Sets the seed that is used to randomize the instances prior "
0506:                        + "to building the rule bases";
0507:            }
0508:
0509:            /**
0510:             * Return the number of examples in the minimal rule base.
0511:             * The minimal rule base is the one that corresponds to the 
0512:             * rule base of Ben-David.
0513:             *
0514:             * @return the number of examples in the minimal rule base
0515:             */
0516:            public int getSizeRuleBaseMin() {
0517:                return m_baseMin.numInstances();
0518:            }
0519:
0520:            /**
0521:             * Return the number of examples in the maximal rule base.
0522:             * The maximal rule base is built using an algorithm
0523:             * dual to that for building the minimal rule base.
0524:             *
0525:             * @return the number of examples in the maximal rule base
0526:             */
0527:            public int getSizeRuleBaseMax() {
0528:                return m_baseMax.numInstances();
0529:            }
0530:
0531:            /** 
0532:             * Classifies a given instance according to the current settings
0533:             * of the classifier.
0534:             * 
0535:             * @param instance the instance to be classified
0536:             * @return a <code> double </code> that represents the classification,
0537:             * this could either be the internal value of a label, when rounding is 
0538:             * on, or a real number.
0539:             */
0540:            public double classifyInstance(Instance instance) {
0541:                double classValueMin = -1;
0542:                double classValueMax = -1;
0543:                double classValue;
0544:
0545:                if (m_etype == ET_MIN || m_etype == ET_BOTH) {
0546:                    classValueMin = classifyInstanceMin(instance);
0547:                }
0548:
0549:                if (m_etype == ET_MAX || m_etype == ET_BOTH) {
0550:                    classValueMax = classifyInstanceMax(instance);
0551:                }
0552:
0553:                switch (m_etype) {
0554:                case ET_MIN:
0555:                    classValue = classValueMin;
0556:                    break;
0557:                case ET_MAX:
0558:                    classValue = classValueMax;
0559:                    break;
0560:                case ET_BOTH:
0561:                    classValue = (classValueMin + classValueMax) / 2;
0562:                    break;
0563:                default:
0564:                    throw new IllegalStateException("Illegal mode type!");
0565:                }
0566:
0567:                // round if necessary and return 
0568:                return (m_ctype == CT_ROUNDED ? Utils.round(classValue)
0569:                        : classValue);
0570:            }
0571:
0572:            /**
0573:             * Classify <code> instance </code> using the minimal rule base.
0574:             * Rounding is never performed, this is the responsability
0575:             * of <code> classifyInstance </code>.
0576:             *
0577:             * @param instance the instance to be classified
0578:             * @return the classification according to the minimal rule base
0579:             */
0580:            private double classifyInstanceMin(Instance instance) {
0581:                double classValue = -1;
0582:                if (m_baseMin == null) {
0583:                    throw new IllegalStateException(
0584:                            "Classifier has not yet been built");
0585:                }
0586:
0587:                Iterator it = new EnumerationIterator(m_baseMin
0588:                        .enumerateInstances());
0589:                while (it.hasNext()) {
0590:                    Instance r = (Instance) it.next();
0591:
0592:                    // we assume that rules are ordered in decreasing class value order
0593:                    // so that the first one that we encounter is immediately the
0594:                    // one with the biggest class value
0595:                    if (InstancesUtil.smallerOrEqual(r, instance)) {
0596:                        classValue = r.classValue();
0597:                        break;
0598:                    }
0599:                }
0600:
0601:                // there is no smaller rule in the database
0602:                if (classValue == -1) {
0603:                    if (m_dtype != DT_NONE) {
0604:                        Instance[] nn = nearestRules(instance, m_baseMin);
0605:                        classValue = 0;
0606:                        // XXX for the moment we only use the mean to extract a 
0607:                        // classValue; other possibilities might be included later
0608:                        for (int i = 0; i < nn.length; i++) {
0609:                            classValue += nn[i].classValue();
0610:                        }
0611:                        classValue /= nn.length;
0612:                    } else {
0613:                        classValue = 0; // minimal class value
0614:                    }
0615:                }
0616:
0617:                return classValue; // no rounding!
0618:            }
0619:
0620:            /**
0621:             * Classify <code> instance </code> using the maximal rule base.
0622:             * Rounding is never performed, this is the responsability
0623:             * of <code> classifyInstance </code>.
0624:             * 
0625:             * @param instance the instance to be classified
0626:             * @return the classification according to the maximal rule base
0627:             */
0628:            private double classifyInstanceMax(Instance instance) {
0629:                double classValue = -1;
0630:                if (m_baseMax == null) {
0631:                    throw new IllegalStateException(
0632:                            "Classifier has not yet been built");
0633:                }
0634:
0635:                Iterator it = new EnumerationIterator(m_baseMax
0636:                        .enumerateInstances());
0637:                while (it.hasNext()) {
0638:                    Instance r = (Instance) it.next();
0639:
0640:                    // we assume that rules are ordered in increasing class value order
0641:                    // so that the first bigger one we encounter will be the one 
0642:                    // with the smallest label
0643:                    if (InstancesUtil.smallerOrEqual(instance, r)) {
0644:                        classValue = r.classValue();
0645:                        break;
0646:                    }
0647:                }
0648:
0649:                // there is no bigger rule in the database
0650:                if (classValue == -1) {
0651:                    if (m_dtype != DT_NONE) {
0652:                        // XXX see remark in classifyInstanceMin 
0653:                        Instance[] nn = nearestRules(instance, m_baseMax);
0654:                        classValue = 0;
0655:                        for (int i = 0; i < nn.length; i++) {
0656:                            classValue += nn[i].classValue();
0657:                        }
0658:                        classValue /= nn.length;
0659:                    } else {
0660:                        classValue = m_numClasses - 1; // maximal label 
0661:                    }
0662:                }
0663:
0664:                return classValue;
0665:            }
0666:
0667:            /**
0668:             * Find the instances in <code> base </code> that are, 
0669:             * according to the current distance type, closest 
0670:             * to <code> instance </code>.
0671:             *
0672:             * @param instance the instance around which one looks
0673:             * @param base the instances to choose from
0674:             * @return an array of <code> Instance </code> which contains
0675:             * the instances closest to <code> instance </code>
0676:             */
0677:            private Instance[] nearestRules(Instance instance, Instances base) {
0678:                double min = Double.POSITIVE_INFINITY;
0679:                double dist = 0;
0680:                double[] instanceDouble = InstancesUtil.toDataDouble(instance);
0681:                ArrayList nn = new ArrayList();
0682:                Iterator it = new EnumerationIterator(base.enumerateInstances());
0683:                while (it.hasNext()) {
0684:                    Instance r = (Instance) it.next();
0685:                    double[] rDouble = InstancesUtil.toDataDouble(r);
0686:                    switch (m_dtype) {
0687:                    case DT_EUCLID:
0688:                        dist = euclidDistance(instanceDouble, rDouble);
0689:                        break;
0690:                    case DT_HAMMING:
0691:                        dist = hammingDistance(instanceDouble, rDouble);
0692:                        break;
0693:                    default:
0694:                        throw new IllegalArgumentException(
0695:                                "distance type is not valid");
0696:                    }
0697:                    if (dist < min) {
0698:                        min = dist;
0699:                        nn.clear();
0700:                        nn.add(r);
0701:                    } else if (dist == min) {
0702:                        nn.add(r);
0703:                    }
0704:                }
0705:
0706:                nn.trimToSize();
0707:                return (Instance[]) nn.toArray(new Instance[0]);
0708:            }
0709:
0710:            /**
0711:             * Build the OLM classifier, meaning that the rule bases 
0712:             * are built.
0713:             *
0714:             * @param instances the instances to use for building the rule base
0715:             * @throws Exception if <code> instances </code> cannot be handled by
0716:             * the classifier.
0717:             */
0718:            public void buildClassifier(Instances instances) throws Exception {
0719:                getCapabilities().testWithFail(instances);
0720:
0721:                // copy the dataset 
0722:                m_train = new Instances(instances);
0723:                m_numClasses = m_train.numClasses();
0724:
0725:                // new dataset in which examples with missing class value are removed
0726:                m_train.deleteWithMissingClass();
0727:
0728:                // build the Map for the estimatedDistributions 
0729:                m_estimatedDistributions = new HashMap(
0730:                        m_train.numInstances() / 2);
0731:
0732:                // cycle through all instances 
0733:                Iterator it = new EnumerationIterator(m_train
0734:                        .enumerateInstances());
0735:                while (it.hasNext() == true) {
0736:                    Instance instance = (Instance) it.next();
0737:                    Coordinates c = new Coordinates(instance);
0738:
0739:                    // get DiscreteEstimator from the map
0740:                    DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions
0741:                            .get(c);
0742:
0743:                    // if no DiscreteEstimator is present in the map, create one 
0744:                    if (df == null) {
0745:                        df = new DiscreteEstimator(instances.numClasses(), 0);
0746:                    }
0747:                    df.addValue(instance.classValue(), instance.weight()); // update
0748:                    m_estimatedDistributions.put(c, df); // put back in map
0749:                }
0750:
0751:                // Create the attributes for m_baseMin and m_baseMax. 
0752:                // These are identical to those of m_train, except that the 
0753:                // class is set to 'numeric'
0754:                // The class attribute is moved to the back 
0755:                FastVector newAtts = new FastVector(m_train.numAttributes());
0756:                Attribute classAttribute = null;
0757:                for (int i = 0; i < m_train.numAttributes(); i++) {
0758:                    Attribute att = m_train.attribute(i);
0759:                    if (i != m_train.classIndex()) {
0760:                        newAtts.addElement(att.copy());
0761:                    } else {
0762:                        classAttribute = new Attribute(att.name()); //numeric attribute 
0763:                    }
0764:                }
0765:                newAtts.addElement(classAttribute);
0766:
0767:                // original training instances are replaced by an empty set
0768:                // of instances
0769:                m_train = new Instances(m_train.relationName(), newAtts,
0770:                        m_estimatedDistributions.size());
0771:                m_train.setClassIndex(m_train.numAttributes() - 1);
0772:
0773:                // We cycle through the map of estimatedDistributions and
0774:                // create one Instance for each entry in the map, with 
0775:                // a class value that is calculated from the distribution of
0776:                // the class values
0777:                it = m_estimatedDistributions.keySet().iterator();
0778:                while (it.hasNext()) {
0779:                    // XXX attValues must be here, otherwise things go wrong
0780:                    double[] attValues = new double[m_train.numAttributes()];
0781:                    Coordinates cc = (Coordinates) it.next();
0782:                    DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions
0783:                            .get(cc);
0784:                    cc.getValues(attValues);
0785:                    switch (m_atype) {
0786:                    case AT_MEAN:
0787:                        attValues[attValues.length - 1] = (new DiscreteDistribution(
0788:                                df)).mean();
0789:                        break;
0790:                    case AT_MEDIAN:
0791:                        attValues[attValues.length - 1] = (new DiscreteDistribution(
0792:                                df)).median();
0793:                        break;
0794:                    case AT_MAXPROB:
0795:                        attValues[attValues.length - 1] = (new DiscreteDistribution(
0796:                                df)).modes()[0];
0797:                        break;
0798:                    default:
0799:                        throw new IllegalStateException(
0800:                                "Not a valid averaging type");
0801:                    }
0802:
0803:                    // add the instance, we give it weight one 
0804:                    m_train.add(new Instance(1, attValues));
0805:                }
0806:
0807:                if (m_Debug == true) {
0808:                    System.out.println("The dataset after phase 1 :");
0809:                    System.out.println(m_train.toString());
0810:                }
0811:
0812:                /* Shuffle to training instances, to prevent the order dictated
0813:                 * by the map to play an important role in how the rule bases
0814:                 * are built. 
0815:                 */
0816:                m_train.randomize(new Random(getSeed()));
0817:
0818:                if (m_sort == false) {
0819:                    // sort the instances only in increasing class order
0820:                    m_train.sort(m_train.classIndex());
0821:                } else {
0822:                    // sort instances completely
0823:                    Comparator[] cc = new Comparator[m_train.numAttributes()];
0824:                    // sort the class, increasing 
0825:                    cc[0] = new InstancesComparator(m_train.classIndex());
0826:                    // sort the attributes, decreasing
0827:                    for (int i = 1; i < cc.length; i++) {
0828:                        cc[i] = new InstancesComparator(i - 1, true);
0829:                    }
0830:
0831:                    // copy instances into an array
0832:                    Instance[] tmp = new Instance[m_train.numInstances()];
0833:                    for (int i = 0; i < tmp.length; i++) {
0834:                        tmp[i] = m_train.instance(i);
0835:                    }
0836:                    MultiDimensionalSort.multiDimensionalSort(tmp, cc);
0837:
0838:                    // copy sorted array back into Instances
0839:                    m_train.delete();
0840:                    for (int i = 0; i < tmp.length; i++) {
0841:                        m_train.add(tmp[i]);
0842:                    }
0843:                }
0844:
0845:                // phase 2: building the rule bases themselves
0846:                m_baseMin = new Instances(m_train, m_estimatedDistributions
0847:                        .size() / 4);
0848:                phaseTwoMin();
0849:                m_baseMax = new Instances(m_train, m_estimatedDistributions
0850:                        .size() / 4);
0851:                phaseTwoMax();
0852:            }
0853:
0854:            /**
0855:             * This implements the second phase of the OLM algorithm.
0856:             * We build the rule base  m_baseMin, according to the conflict
0857:             * resolution mechanism described in the thesis. 
0858:             */
0859:            private void phaseTwoMin() {
0860:
0861:                // loop through instances backwards, this is biggest class labels first
0862:                for (int i = m_train.numInstances() - 1; i >= 0; i--) {
0863:                    Instance e = m_train.instance(i);
0864:
0865:                    // if the example is redundant with m_base, we discard it
0866:                    if (isRedundant(e) == false) {
0867:                        // how many examples are redundant if we would add e
0868:                        int[] redundancies = makesRedundant(e);
0869:                        if (redundancies[0] == 1
0870:                                && causesReversedPreference(e) == false) {
0871:                            // there is one example made redundant be e, and 
0872:                            // adding e doesn't cause reversed preferences  
0873:                            // so we replace the indicated rule by e
0874:                            m_baseMin.delete(redundancies[1]);
0875:                            m_baseMin.add(e);
0876:                            continue;
0877:                        }
0878:
0879:                        if (redundancies[0] == 0) {
0880:                            // adding e causes no redundancies, what about 
0881:                            // reversed preferences ?
0882:                            int[] revPref = reversedPreferences(e);
0883:                            if (revPref[0] == 1) {
0884:                                // there would be one reversed preference, we 
0885:                                // prefer the example e because it has a lower label
0886:                                m_baseMin.delete(revPref[1]);
0887:                                m_baseMin.add(e);
0888:                                continue;
0889:                            }
0890:
0891:                            if (revPref[0] == 0) {
0892:                                // this means: e causes no redundancies and no 
0893:                                // reversed preferences.  We can simply add it.
0894:                                m_baseMin.add(e);
0895:                            }
0896:                        }
0897:                    }
0898:                }
0899:            }
0900:
0901:            /** 
0902:             * This implements the second phase of the OLM algorithm.
0903:             * We build the rule base  m_baseMax .
0904:             */
0905:            private void phaseTwoMax() {
0906:
0907:                // loop through instances, smallest class labels first
0908:                for (int i = 0; i < m_train.numInstances(); i++) {
0909:                    Instance e = m_train.instance(i);
0910:
0911:                    // if the example is redundant with m_base, we discard it
0912:                    if (isRedundantMax(e) == false) {
0913:                        // how many examples are redundant if we would add e
0914:                        int[] redundancies = makesRedundantMax(e);
0915:                        if (redundancies[0] == 1
0916:                                && causesReversedPreferenceMax(e) == false) {
0917:                            // there is one example made redundant be e, and 
0918:                            // adding e doesn't cause reversed preferences  
0919:                            // so we replace the indicated rule by e
0920:                            m_baseMax.delete(redundancies[1]);
0921:                            m_baseMax.add(e);
0922:                            continue;
0923:                        }
0924:
0925:                        if (redundancies[0] == 0) {
0926:                            // adding e causes no redundancies, what about 
0927:                            // reversed preferences ?
0928:                            int[] revPref = reversedPreferencesMax(e);
0929:                            if (revPref[0] == 1) {
0930:                                // there would be one reversed preference, we 
0931:                                // prefer the example e because it has a lower label
0932:                                m_baseMax.delete(revPref[1]);
0933:                                m_baseMax.add(e);
0934:                                continue;
0935:                            }
0936:
0937:                            if (revPref[0] == 0) {
0938:                                // this means: e causes no redundancies and no 
0939:                                // reversed preferences.  We can simply add it.
0940:                                m_baseMax.add(e);
0941:                            }
0942:                        }
0943:                    }
0944:                }
0945:            }
0946:
0947:            /**
0948:             * Returns a string description of the classifier.  In debug
0949:             * mode, the rule bases are added to the string representation
0950:             * as well.  This means that the description can become rather
0951:             * lengthy.
0952:             *
0953:             * @return a <code> String </code> describing the classifier.
0954:             */
0955:            public String toString() {
0956:                StringBuffer sb = new StringBuffer();
0957:                sb.append("OLM\n===\n\n");
0958:                if (m_etype == ET_MIN || m_etype == ET_BOTH) {
0959:                    if (m_baseMin != null) {
0960:                        sb
0961:                                .append("Number of examples in the minimal rule base = "
0962:                                        + m_baseMin.numInstances() + "\n");
0963:                    } else {
0964:                        sb.append("minimal rule base not yet created");
0965:                    }
0966:                }
0967:
0968:                if (m_etype == ET_MAX || m_etype == ET_BOTH) {
0969:                    if (m_baseMax != null) {
0970:                        sb
0971:                                .append("Number of examples in the maximal rule base = "
0972:                                        + m_baseMax.numInstances() + "\n");
0973:                    } else {
0974:                        sb.append("maximal rule base not yet created");
0975:                    }
0976:                }
0977:
0978:                if (m_Debug == true) {
0979:
0980:                    if (m_etype == ET_MIN || m_etype == ET_BOTH) {
0981:                        sb.append("The minimal rule base is \n");
0982:                        if (m_baseMin != null) {
0983:                            sb.append(m_baseMin.toString());
0984:                        } else {
0985:                            sb.append(".... not yet created");
0986:                        }
0987:                    }
0988:
0989:                    if (m_etype == ET_MAX || m_etype == ET_BOTH) {
0990:                        sb.append("The second rule base is \n");
0991:                        if (m_baseMax != null) {
0992:                            sb.append(m_baseMax.toString());
0993:                        } else {
0994:                            sb.append(".... not yet created");
0995:                        }
0996:                    }
0997:                }
0998:
0999:                sb.append("\n");
1000:                return sb.toString();
1001:            }
1002:
1003:            /**
1004:             * Is <code> instance </code> redundant wrt the 
1005:             * current <code> m_baseMin </code>.
1006:             * This mean we are looking if there is an element in 
1007:             * <code> m_baseMin </code> smaller than <code> instance </code>
1008:             * and with the same class.
1009:             *
1010:             * @param instance the instance of which the redundancy needs to be 
1011:             * checked
1012:             * @return <code> true </code> if <code> instance </code>
1013:             * is redundant, <code> false </code> otherwise
1014:             */
1015:            private boolean isRedundant(Instance instance) {
1016:                Iterator it = new EnumerationIterator(m_baseMin
1017:                        .enumerateInstances());
1018:                while (it.hasNext()) {
1019:                    Instance r = (Instance) it.next();
1020:                    if (instance.classValue() == r.classValue()
1021:                            && InstancesUtil.smallerOrEqual(r, instance)) {
1022:                        return true;
1023:                    }
1024:                }
1025:                return false;
1026:            }
1027:
1028:            /**
1029:             * Is <code> instance </code> redundant wrt the 
1030:             * current <code> m_baseMax </code>.
1031:             * This is dual to the previous method, this means that 
1032:             * <code> instance </code> is redundant if there exists
1033:             * and example greater of equal than <code> instance </code>
1034:             * with the same class value.
1035:             * 
1036:             * @param instance the instance of which the redundancy needs to be 
1037:             * checked
1038:             * @return <code> true </code> if <code> instance </code>
1039:             * is redundant, <code> false </code> otherwise
1040:             */
1041:            private boolean isRedundantMax(Instance instance) {
1042:                Iterator it = new EnumerationIterator(m_baseMax
1043:                        .enumerateInstances());
1044:                while (it.hasNext()) {
1045:                    Instance r = (Instance) it.next();
1046:                    if (instance.classValue() == r.classValue()
1047:                            && InstancesUtil.smallerOrEqual(instance, r)) {
1048:                        return true;
1049:                    }
1050:                }
1051:                return false;
1052:            }
1053:
1054:            /**
1055:             * If we were to add <code> instance </code>, how many
1056:             * and which elements from <code> m_baseMin </code> would 
1057:             * be made redundant by <code> instance </code>.
1058:             * 
1059:             * @param instance the instance of which the influence is to be determined
1060:             * @return an array containing two components 
1061:             * [0] is 0 if instance makes no elements redundant;
1062:             *     is 1 if there is one rule that is made redundant by instance;
1063:             *     is 2 if there is more than one rule that is made redundant; 
1064:             * [1] if [0] == 1, this entry gives the element that is made redundant
1065:             */
1066:            private int[] makesRedundant(Instance instance) {
1067:                int[] ret = new int[2];
1068:                for (int i = 0; i < m_baseMin.numInstances(); i++) {
1069:                    Instance r = m_baseMin.instance(i);
1070:                    if (r.classValue() == instance.classValue()
1071:                            && InstancesUtil.smallerOrEqual(instance, r)) {
1072:                        if (ret[0] == 0) {
1073:                            ret[0] = 1;
1074:                            ret[1] = i;
1075:                        } else { // this means ret[0] == 1
1076:                            ret[0] = 2;
1077:                            return ret;
1078:                        }
1079:                    }
1080:                }
1081:                return ret;
1082:            }
1083:
1084:            /**
1085:             * If we were to add <code> instance </code>, how many
1086:             * and which elements from <code> m_baseMax </code> would 
1087:             * be made redundant by <code> instance </code>.
1088:             * This method is simply dual to <code> makesRedundant </code>.
1089:             * 
1090:             * @param instance the instance to add
1091:             * @return an array containing two components 
1092:             * [0] is 0 if instance makes no elements redundant;
1093:             *     is 1 if there is one rule that is made redundant by instance;
1094:             *     is 2 if there is more than one rule that is made redundant; 
1095:             * [1] if [0] == 1, this entry gives the element that is made redundant
1096:             */
1097:            private int[] makesRedundantMax(Instance instance) {
1098:                int[] ret = new int[2];
1099:                for (int i = 0; i < m_baseMax.numInstances(); i++) {
1100:                    Instance r = m_baseMax.instance(i);
1101:                    if (r.classValue() == instance.classValue()
1102:                            && InstancesUtil.smallerOrEqual(r, instance)) {
1103:                        if (ret[0] == 0) {
1104:                            ret[0] = 1;
1105:                            ret[1] = i;
1106:                        } else { // this means ret[0] == 1
1107:                            ret[0] = 2;
1108:                            return ret;
1109:                        }
1110:                    }
1111:                }
1112:                return ret;
1113:            }
1114:
1115:            /**
1116:             * Checks if adding <code> instance </code> to <code> m_baseMin </code>
1117:             * causes reversed preference amongst the elements of <code>
1118:             * m_baseMin </code>.
1119:             *
1120:             * @param instance the instance of which the influence needs to be 
1121:             * determined
1122:             * @return <code> true </code> if adding <code> instance </code> 
1123:             * to the rule base would cause reversed preference among the rules, 
1124:             * <code> false </code> otherwise.
1125:             */
1126:            private boolean causesReversedPreference(Instance instance) {
1127:                Iterator it = new EnumerationIterator(m_baseMin
1128:                        .enumerateInstances());
1129:                while (it.hasNext()) {
1130:                    Instance r = (Instance) it.next();
1131:                    if (instance.classValue() > r.classValue()
1132:                            && InstancesUtil.smallerOrEqual(instance, r)) {
1133:                        // in the original version of OLM this should not happen
1134:                        System.err
1135:                                .println("Should not happen in the original OLM algorithm");
1136:                        return true;
1137:                    } else if (r.classValue() > instance.classValue()
1138:                            && InstancesUtil.smallerOrEqual(r, instance)) {
1139:                        return true;
1140:                    }
1141:                }
1142:                return false;
1143:            }
1144:
1145:            /**
1146:             * Checks if adding <code> instance </code> to <code> m_baseMax </code>
1147:             * causes reversed preference amongst the elements of 
1148:             * <code> m_baseMax </code>.
1149:             * 
1150:             * @param instance the instance to add
1151:             * @return true if adding <code> instance </code> to the rule 
1152:             * base would cause reversed preference among the rules, 
1153:             * false otherwise.
1154:             */
1155:            private boolean causesReversedPreferenceMax(Instance instance) {
1156:                Iterator it = new EnumerationIterator(m_baseMax
1157:                        .enumerateInstances());
1158:                while (it.hasNext()) {
1159:                    Instance r = (Instance) it.next();
1160:                    if (instance.classValue() > r.classValue()
1161:                            && InstancesUtil.smallerOrEqual(instance, r)) {
1162:                        return true;
1163:                    } else if (r.classValue() > instance.classValue()
1164:                            && InstancesUtil.smallerOrEqual(r, instance)) {
1165:                        return true;
1166:                    }
1167:                }
1168:                return false;
1169:            }
1170:
1171:            /**
1172:             * Find indices of the elements from <code> m_baseMin </code>
1173:             * that are inconsistent with instance.
1174:             * 
1175:             * @param instance the instance of which the influence needs to be
1176:             * determined
1177:             * @return an array containing two components 
1178:             * [0] is 0 if instance is consistent with all present elements 
1179:             *     is 1 if instance is inconsistent (reversed preference) with
1180:             *           exactly one element
1181:             *     is 2 if instance is inconsistent with more than one element
1182:             * [1] if [0] == 1, this entry gives the index of the 
1183:             *                  element that has reversed preference wrt to 
1184:             *                  <code> instance </code>
1185:             */
1186:            private int[] reversedPreferences(Instance instance) {
1187:                int[] revPreferences = new int[2];
1188:                for (int i = 0; i < m_baseMin.numInstances(); i++) {
1189:                    Instance r = m_baseMin.instance(i);
1190:                    if (instance.classValue() < r.classValue()
1191:                            && InstancesUtil.smallerOrEqual(r, instance)) {
1192:                        if (revPreferences[0] == 0) {
1193:                            revPreferences[0] = 1;
1194:                            revPreferences[1] = i;
1195:                        } else {
1196:                            revPreferences[0] = 2;
1197:                            return revPreferences;
1198:                        }
1199:                    }
1200:                }
1201:                return revPreferences;
1202:            }
1203:
1204:            /**
1205:             * Find indices of the elements from <code> m_baseMin </code>
1206:             * that are inconsistent with instance.
1207:             * 
1208:             * @param instance the instance of which the influence needs to be
1209:             * determined
1210:             * @return an array containing two components 
1211:             * [0] is 0 if instance is consistent with all present elements 
1212:             *     is 1 if instance is inconsistent (reversed preference) with
1213:             *           exactly one element
1214:             *     is 2 if instance is inconsistent with more than one element
1215:             * [1] if [0] == 1, this entry gives the index of the 
1216:             *                  element that has reversed preference wrt to 
1217:             *                  <code> instance </code>
1218:             */
1219:            private int[] reversedPreferencesMax(Instance instance) {
1220:                int[] revPreferences = new int[2];
1221:                for (int i = 0; i < m_baseMax.numInstances(); i++) {
1222:                    Instance r = m_baseMax.instance(i);
1223:                    if (instance.classValue() > r.classValue()
1224:                            && InstancesUtil.smallerOrEqual(instance, r)) {
1225:                        if (revPreferences[0] == 0) {
1226:                            revPreferences[0] = 1;
1227:                            revPreferences[1] = i;
1228:                        } else {
1229:                            revPreferences[0] = 2;
1230:                            return revPreferences;
1231:                        }
1232:                    }
1233:                }
1234:                return revPreferences;
1235:            }
1236:
1237:            /**
1238:             * Calculates the square of the Euclidian distance between
1239:             * two arrays of doubles.  The arrays should have the same length.
1240:             *
1241:             * @param a1 the first array
1242:             * @param a2 the second array
1243:             * @return the square of the Euclidian distance between the two
1244:             * arrays <code> a1 </code> and <code> a2 </code> 
1245:             */
1246:            private double euclidDistance(double[] a1, double[] a2) {
1247:                double dist = 0;
1248:                for (int i = 0; i < a1.length; i++) {
1249:                    dist += (a1[i] - a2[i]) * (a1[i] - a2[i]);
1250:                }
1251:                return dist;
1252:            }
1253:
1254:            /** 
1255:             * Calculates the Hamming distances between two arrays, this
1256:             * means one counts the number of positions in which these
1257:             * two array differ.  The arrays should be of the same length.
1258:             * 
1259:             * @param a1 the first array
1260:             * @param a2 the second array
1261:             * @return the requested Hamming distance
1262:             * The Hamming distance between a1 and a2, this is 
1263:             * the number of positions in which a1 and a2 differ
1264:             */
1265:            private int hammingDistance(double[] a1, double[] a2) {
1266:                int dist = 0;
1267:                for (int i = 0; i < a1.length; i++) {
1268:                    dist += (a1[i] == a2[i]) ? 0 : 1;
1269:                }
1270:                return dist;
1271:            }
1272:
1273:            /**
1274:             * Get an enumeration of all available options for this classifier.
1275:             * 
1276:             * @return an enumeration of available options
1277:             */
1278:            public Enumeration listOptions() {
1279:                Vector options = new Vector();
1280:
1281:                Enumeration enm = super .listOptions();
1282:                while (enm.hasMoreElements())
1283:                    options.addElement(enm.nextElement());
1284:
1285:                String description = "\tSets the classification type to be used.\n"
1286:                        + "\t(Default: "
1287:                        + new SelectedTag(CT_REAL, TAGS_CLASSIFICATIONTYPES)
1288:                        + ")";
1289:                String synopsis = "-C "
1290:                        + Tag.toOptionList(TAGS_CLASSIFICATIONTYPES);
1291:                String name = "C";
1292:                options.addElement(new Option(description, name, 1, synopsis));
1293:
1294:                description = "\tSets the averaging type used in phase 1 of the classifier.\n"
1295:                        + "\t(Default: "
1296:                        + new SelectedTag(AT_MEAN, TAGS_AVERAGINGTYPES) + ")";
1297:                synopsis = "-A " + Tag.toOptionList(TAGS_AVERAGINGTYPES);
1298:                name = "A";
1299:                options.addElement(new Option(description, name, 1, synopsis));
1300:
1301:                description = "\tIf different from "
1302:                        + new SelectedTag(DT_NONE, TAGS_DISTANCETYPES)
1303:                        + ", a nearest neighbour rule is fired when the\n"
1304:                        + "\trule base doesn't contain an example smaller than the instance\n"
1305:                        + "\tto be classified\n" + "\t(Default: "
1306:                        + new SelectedTag(DT_NONE, TAGS_DISTANCETYPES) + ").";
1307:                synopsis = "-N " + Tag.toOptionList(TAGS_DISTANCETYPES);
1308:                name = "N";
1309:                options.addElement(new Option(description, name, 1, synopsis));
1310:
1311:                description = "\tSets the extension type, i.e. the rule base to use."
1312:                        + "\n\t(Default: "
1313:                        + new SelectedTag(ET_MIN, TAGS_EXTENSIONTYPES) + ")";
1314:                synopsis = "-E " + Tag.toOptionList(TAGS_EXTENSIONTYPES);
1315:                name = "E";
1316:                options.addElement(new Option(description, name, 1, synopsis));
1317:
1318:                description = "\tIf set, the instances are also sorted within the same class\n"
1319:                        + "\tbefore building the rule bases";
1320:                synopsis = "-sort";
1321:                name = "sort";
1322:                options.addElement(new Option(description, name, 0, synopsis));
1323:
1324:                return options.elements();
1325:            }
1326:
1327:            /** 
1328:             * Parses the options for this object. <p/>
1329:             *
1330:             <!-- options-start -->
1331:             * Valid options are: <p/>
1332:             * 
1333:             * <pre> -S &lt;num&gt;
1334:             *  Random number seed.
1335:             *  (default 1)</pre>
1336:             * 
1337:             * <pre> -D
1338:             *  If set, classifier is run in debug mode and
1339:             *  may output additional info to the console</pre>
1340:             * 
1341:             * <pre> -C &lt;CL|REG&gt;
1342:             *  Sets the classification type to be used.
1343:             *  (Default: REG)</pre>
1344:             * 
1345:             * <pre> -A &lt;MEAN|MED|MAX&gt;
1346:             *  Sets the averaging type used in phase 1 of the classifier.
1347:             *  (Default: MEAN)</pre>
1348:             * 
1349:             * <pre> -N &lt;NONE|EUCL|HAM&gt;
1350:             *  If different from NONE, a nearest neighbour rule is fired when the
1351:             *  rule base doesn't contain an example smaller than the instance
1352:             *  to be classified
1353:             *  (Default: NONE).</pre>
1354:             * 
1355:             * <pre> -E &lt;MIN|MAX|BOTH&gt;
1356:             *  Sets the extension type, i.e. the rule base to use.
1357:             *  (Default: MIN)</pre>
1358:             * 
1359:             * <pre> -sort
1360:             *  If set, the instances are also sorted within the same class
1361:             *  before building the rule bases</pre>
1362:             * 
1363:             <!-- options-end -->
1364:             *
1365:             * @param options an array of strings containing the options 
1366:             * @throws Exception if there are options that have invalid arguments.
1367:             */
1368:            public void setOptions(String[] options) throws Exception {
1369:                String args;
1370:
1371:                // classification type
1372:                args = Utils.getOption('C', options);
1373:                if (args.length() != 0)
1374:                    setClassificationType(new SelectedTag(args,
1375:                            TAGS_CLASSIFICATIONTYPES));
1376:                else
1377:                    setClassificationType(new SelectedTag(CT_REAL,
1378:                            TAGS_CLASSIFICATIONTYPES));
1379:
1380:                // averaging type
1381:                args = Utils.getOption('A', options);
1382:                if (args.length() != 0)
1383:                    setAveragingType(new SelectedTag(args, TAGS_AVERAGINGTYPES));
1384:                else
1385:                    setAveragingType(new SelectedTag(AT_MEAN,
1386:                            TAGS_AVERAGINGTYPES));
1387:
1388:                // distance type
1389:                args = Utils.getOption('N', options);
1390:                if (args.length() != 0)
1391:                    setDistanceType(new SelectedTag(args, TAGS_DISTANCETYPES));
1392:                else
1393:                    setDistanceType(new SelectedTag(DT_NONE, TAGS_DISTANCETYPES));
1394:
1395:                // extension type
1396:                args = Utils.getOption('E', options);
1397:                if (args.length() != 0)
1398:                    setExtensionType(new SelectedTag(args, TAGS_EXTENSIONTYPES));
1399:                else
1400:                    setExtensionType(new SelectedTag(ET_MIN,
1401:                            TAGS_EXTENSIONTYPES));
1402:
1403:                // sort ? 
1404:                setSort(Utils.getFlag("sort", options));
1405:
1406:                super .setOptions(options);
1407:            }
1408:
1409:            /** 
1410:             * Gets an array of string with the current options of the classifier.
1411:             *
1412:             * @return an array suitable as argument for <code> setOptions </code>
1413:             */
1414:            public String[] getOptions() {
1415:                int i;
1416:                Vector result;
1417:                String[] options;
1418:
1419:                result = new Vector();
1420:
1421:                options = super .getOptions();
1422:                for (i = 0; i < options.length; i++)
1423:                    result.add(options[i]);
1424:
1425:                // classification type
1426:                result.add("-C");
1427:                result.add("" + getClassificationType());
1428:
1429:                result.add("-A");
1430:                result.add("" + getAveragingType());
1431:
1432:                result.add("-N");
1433:                result.add("" + getDistanceType());
1434:
1435:                result.add("-E");
1436:                result.add("" + getExtensionType());
1437:
1438:                if (getSort())
1439:                    result.add("-sort");
1440:
1441:                return (String[]) result.toArray(new String[result.size()]);
1442:            }
1443:
1444:            /**
1445:             * Main method for testing this class.
1446:             * 
1447:             * @param args the command line arguments
1448:             */
1449:            public static void main(String[] args) {
1450:                runClassifier(new OLM(), args);
1451:            }
1452:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.