Source Code Cross Referenced for CheckAssociator.java in » Science » weka » weka » associations » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.associations
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         *    This program is free software; you can redistribute it and/or modify
0003:         *    it under the terms of the GNU General Public License as published by
0004:         *    the Free Software Foundation; either version 2 of the License, or
0005:         *    (at your option) any later version.
0006:         *
0007:         *    This program is distributed in the hope that it will be useful,
0008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
0009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0010:         *    GNU General Public License for more details.
0011:         *
0012:         *    You should have received a copy of the GNU General Public License
0013:         *    along with this program; if not, write to the Free Software
0014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015:         */
0016:
0017:        /*
0018:         * CheckAssociator.java
0019:         * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
0020:         *
0021:         */
0022:
0023:        package weka.associations;
0024:
0025:        import weka.core.Attribute;
0026:        import weka.core.CheckScheme;
0027:        import weka.core.FastVector;
0028:        import weka.core.Instances;
0029:        import weka.core.MultiInstanceCapabilitiesHandler;
0030:        import weka.core.Option;
0031:        import weka.core.OptionHandler;
0032:        import weka.core.SerializationHelper;
0033:        import weka.core.TestInstances;
0034:        import weka.core.Utils;
0035:        import weka.core.WeightedInstancesHandler;
0036:
0037:        import java.util.Enumeration;
0038:        import java.util.Random;
0039:        import java.util.Vector;
0040:
0041:        /**
0042:         * Class for examining the capabilities and finding problems with 
0043:         * associators. If you implement an associators using the WEKA.libraries,
0044:         * you should run the checks on it to ensure robustness and correct
0045:         * operation. Passing all the tests of this object does not mean
0046:         * bugs in the associators don't exist, but this will help find some
0047:         * common ones. <p/>
0048:         * 
0049:         * Typical usage: <p/>
0050:         * <code>java weka.associations.CheckAssociator -W associator_name 
0051:         * -- associator_options </code><p/>
0052:         * 
0053:         * CheckAssociator reports on the following:
0054:         * <ul>
0055:         *    <li> Associator abilities 
0056:         *      <ul>
0057:         *         <li> Possible command line options to the associators </li>
0058:         *         <li> Whether the associators can predict nominal, numeric, string, 
0059:         *              date or relational class attributes. </li>
0060:         *         <li> Whether the associators can handle numeric predictor attributes </li>
0061:         *         <li> Whether the associators can handle nominal predictor attributes </li>
0062:         *         <li> Whether the associators can handle string predictor attributes </li>
0063:         *         <li> Whether the associators can handle date predictor attributes </li>
0064:         *         <li> Whether the associators can handle relational predictor attributes </li>
0065:         *         <li> Whether the associators can handle multi-instance data </li>
0066:         *         <li> Whether the associators can handle missing predictor values </li>
0067:         *         <li> Whether the associators can handle missing class values </li>
0068:         *         <li> Whether a nominal associators only handles 2 class problems </li>
0069:         *         <li> Whether the associators can handle instance weights </li>
0070:         *      </ul>
0071:         *    </li>
0072:         *    <li> Correct functioning 
0073:         *      <ul>
0074:         *         <li> Correct initialisation during buildAssociations (i.e. no result
0075:         *              changes when buildAssociations called repeatedly) </li>
0076:         *         <li> Whether the associators alters the data pased to it 
0077:         *              (number of instances, instance order, instance weights, etc) </li>
0078:         *      </ul>
0079:         *    </li>
0080:         *    <li> Degenerate cases 
0081:         *      <ul>
0082:         *         <li> building associators with zero training instances </li>
0083:         *         <li> all but one predictor attribute values missing </li>
0084:         *         <li> all predictor attribute values missing </li>
0085:         *         <li> all but one class values missing </li>
0086:         *         <li> all class values missing </li>
0087:         *      </ul>
0088:         *    </li>
0089:         * </ul>
0090:         * Running CheckAssociator with the debug option set will output the 
0091:         * training dataset for any failed tests.<p/>
0092:         *
0093:         * The <code>weka.associations.AbstractAssociatorTest</code> uses this
0094:         * class to test all the associators. Any changes here, have to be 
0095:         * checked in that abstract test class, too. <p/>
0096:         *
0097:         <!-- options-start -->
0098:         * Valid options are: <p/>
0099:         * 
0100:         * <pre> -D
0101:         *  Turn on debugging output.</pre>
0102:         * 
0103:         * <pre> -S
0104:         *  Silent mode - prints nothing to stdout.</pre>
0105:         * 
0106:         * <pre> -N &lt;num&gt;
0107:         *  The number of instances in the datasets (default 20).</pre>
0108:         * 
0109:         * <pre> -nominal &lt;num&gt;
0110:         *  The number of nominal attributes (default 2).</pre>
0111:         * 
0112:         * <pre> -nominal-values &lt;num&gt;
0113:         *  The number of values for nominal attributes (default 1).</pre>
0114:         * 
0115:         * <pre> -numeric &lt;num&gt;
0116:         *  The number of numeric attributes (default 1).</pre>
0117:         * 
0118:         * <pre> -string &lt;num&gt;
0119:         *  The number of string attributes (default 1).</pre>
0120:         * 
0121:         * <pre> -date &lt;num&gt;
0122:         *  The number of date attributes (default 1).</pre>
0123:         * 
0124:         * <pre> -relational &lt;num&gt;
0125:         *  The number of relational attributes (default 1).</pre>
0126:         * 
0127:         * <pre> -num-instances-relational &lt;num&gt;
0128:         *  The number of instances in relational/bag attributes (default 10).</pre>
0129:         * 
0130:         * <pre> -words &lt;comma-separated-list&gt;
0131:         *  The words to use in string attributes.</pre>
0132:         * 
0133:         * <pre> -word-separators &lt;chars&gt;
0134:         *  The word separators to use in string attributes.</pre>
0135:         * 
0136:         * <pre> -W
0137:         *  Full name of the associator analysed.
0138:         *  eg: weka.associations.Apriori
0139:         *  (default weka.associations.Apriori)</pre>
0140:         * 
0141:         * <pre> 
0142:         * Options specific to associator weka.associations.Apriori:
0143:         * </pre>
0144:         * 
0145:         * <pre> -N &lt;required number of rules output&gt;
0146:         *  The required number of rules. (default = 10)</pre>
0147:         * 
0148:         * <pre> -T &lt;0=confidence | 1=lift | 2=leverage | 3=Conviction&gt;
0149:         *  The metric type by which to rank rules. (default = confidence)</pre>
0150:         * 
0151:         * <pre> -C &lt;minimum metric score of a rule&gt;
0152:         *  The minimum confidence of a rule. (default = 0.9)</pre>
0153:         * 
0154:         * <pre> -D &lt;delta for minimum support&gt;
0155:         *  The delta by which the minimum support is decreased in
0156:         *  each iteration. (default = 0.05)</pre>
0157:         * 
0158:         * <pre> -U &lt;upper bound for minimum support&gt;
0159:         *  Upper bound for minimum support. (default = 1.0)</pre>
0160:         * 
0161:         * <pre> -M &lt;lower bound for minimum support&gt;
0162:         *  The lower bound for the minimum support. (default = 0.1)</pre>
0163:         * 
0164:         * <pre> -S &lt;significance level&gt;
0165:         *  If used, rules are tested for significance at
0166:         *  the given level. Slower. (default = no significance testing)</pre>
0167:         * 
0168:         * <pre> -I
0169:         *  If set the itemsets found are also output. (default = no)</pre>
0170:         * 
0171:         * <pre> -R
0172:         *  Remove columns that contain all missing values (default = no)</pre>
0173:         * 
0174:         * <pre> -V
0175:         *  Report progress iteratively. (default = no)</pre>
0176:         * 
0177:         * <pre> -A
0178:         *  If set class association rules are mined. (default = no)</pre>
0179:         * 
0180:         * <pre> -c &lt;the class index&gt;
0181:         *  The class index. (default = last)</pre>
0182:         * 
0183:         <!-- options-end -->
0184:         *
0185:         * Options after -- are passed to the designated associator.<p/>
0186:         *
0187:         * @author Len Trigg (trigg@cs.waikato.ac.nz)
0188:         * @author FracPete (fracpete at waikato dot ac dot nz)
0189:         * @version $Revision: 1.5 $
0190:         * @see TestInstances
0191:         */
0192:        public class CheckAssociator extends CheckScheme {
0193:
0194:            /*
0195:             * Note about test methods:
0196:             * - methods return array of booleans
0197:             * - first index: success or not
0198:             * - second index: acceptable or not (e.g., Exception is OK)
0199:             *
0200:             * FracPete (fracpete at waikato dot ac dot nz)
0201:             */
0202:
0203:            /** a "dummy" class type */
0204:            public final static int NO_CLASS = -1;
0205:
0206:            /*** The associator to be examined */
0207:            protected Associator m_Associator = new weka.associations.Apriori();
0208:
0209:            /**
0210:             * Returns an enumeration describing the available options.
0211:             *
0212:             * @return an enumeration of all the available options.
0213:             */
0214:            public Enumeration listOptions() {
0215:                Vector result = new Vector();
0216:
0217:                Enumeration en = super .listOptions();
0218:                while (en.hasMoreElements())
0219:                    result.addElement(en.nextElement());
0220:
0221:                result.addElement(new Option(
0222:                        "\tFull name of the associator analysed.\n"
0223:                                + "\teg: weka.associations.Apriori\n"
0224:                                + "\t(default weka.associations.Apriori)", "W",
0225:                        1, "-W"));
0226:
0227:                if ((m_Associator != null)
0228:                        && (m_Associator instanceof  OptionHandler)) {
0229:                    result.addElement(new Option("", "", 0,
0230:                            "\nOptions specific to associator "
0231:                                    + m_Associator.getClass().getName() + ":"));
0232:                    Enumeration enu = ((OptionHandler) m_Associator)
0233:                            .listOptions();
0234:                    while (enu.hasMoreElements())
0235:                        result.addElement(enu.nextElement());
0236:                }
0237:
0238:                return result.elements();
0239:            }
0240:
0241:            /**
0242:             * Parses a given list of options. 
0243:             *
0244:             <!-- options-start -->
0245:             * Valid options are: <p/>
0246:             * 
0247:             * <pre> -D
0248:             *  Turn on debugging output.</pre>
0249:             * 
0250:             * <pre> -S
0251:             *  Silent mode - prints nothing to stdout.</pre>
0252:             * 
0253:             * <pre> -N &lt;num&gt;
0254:             *  The number of instances in the datasets (default 20).</pre>
0255:             * 
0256:             * <pre> -nominal &lt;num&gt;
0257:             *  The number of nominal attributes (default 2).</pre>
0258:             * 
0259:             * <pre> -nominal-values &lt;num&gt;
0260:             *  The number of values for nominal attributes (default 1).</pre>
0261:             * 
0262:             * <pre> -numeric &lt;num&gt;
0263:             *  The number of numeric attributes (default 1).</pre>
0264:             * 
0265:             * <pre> -string &lt;num&gt;
0266:             *  The number of string attributes (default 1).</pre>
0267:             * 
0268:             * <pre> -date &lt;num&gt;
0269:             *  The number of date attributes (default 1).</pre>
0270:             * 
0271:             * <pre> -relational &lt;num&gt;
0272:             *  The number of relational attributes (default 1).</pre>
0273:             * 
0274:             * <pre> -num-instances-relational &lt;num&gt;
0275:             *  The number of instances in relational/bag attributes (default 10).</pre>
0276:             * 
0277:             * <pre> -words &lt;comma-separated-list&gt;
0278:             *  The words to use in string attributes.</pre>
0279:             * 
0280:             * <pre> -word-separators &lt;chars&gt;
0281:             *  The word separators to use in string attributes.</pre>
0282:             * 
0283:             * <pre> -W
0284:             *  Full name of the associator analysed.
0285:             *  eg: weka.associations.Apriori
0286:             *  (default weka.associations.Apriori)</pre>
0287:             * 
0288:             * <pre> 
0289:             * Options specific to associator weka.associations.Apriori:
0290:             * </pre>
0291:             * 
0292:             * <pre> -N &lt;required number of rules output&gt;
0293:             *  The required number of rules. (default = 10)</pre>
0294:             * 
0295:             * <pre> -T &lt;0=confidence | 1=lift | 2=leverage | 3=Conviction&gt;
0296:             *  The metric type by which to rank rules. (default = confidence)</pre>
0297:             * 
0298:             * <pre> -C &lt;minimum metric score of a rule&gt;
0299:             *  The minimum confidence of a rule. (default = 0.9)</pre>
0300:             * 
0301:             * <pre> -D &lt;delta for minimum support&gt;
0302:             *  The delta by which the minimum support is decreased in
0303:             *  each iteration. (default = 0.05)</pre>
0304:             * 
0305:             * <pre> -U &lt;upper bound for minimum support&gt;
0306:             *  Upper bound for minimum support. (default = 1.0)</pre>
0307:             * 
0308:             * <pre> -M &lt;lower bound for minimum support&gt;
0309:             *  The lower bound for the minimum support. (default = 0.1)</pre>
0310:             * 
0311:             * <pre> -S &lt;significance level&gt;
0312:             *  If used, rules are tested for significance at
0313:             *  the given level. Slower. (default = no significance testing)</pre>
0314:             * 
0315:             * <pre> -I
0316:             *  If set the itemsets found are also output. (default = no)</pre>
0317:             * 
0318:             * <pre> -R
0319:             *  Remove columns that contain all missing values (default = no)</pre>
0320:             * 
0321:             * <pre> -V
0322:             *  Report progress iteratively. (default = no)</pre>
0323:             * 
0324:             * <pre> -A
0325:             *  If set class association rules are mined. (default = no)</pre>
0326:             * 
0327:             * <pre> -c &lt;the class index&gt;
0328:             *  The class index. (default = last)</pre>
0329:             * 
0330:             <!-- options-end -->
0331:             *
0332:             * @param options the list of options as an array of strings
0333:             * @throws Exception if an option is not supported
0334:             */
0335:            public void setOptions(String[] options) throws Exception {
0336:                String tmpStr;
0337:
0338:                super .setOptions(options);
0339:
0340:                tmpStr = Utils.getOption('W', options);
0341:                if (tmpStr.length() == 0)
0342:                    tmpStr = weka.associations.Apriori.class.getName();
0343:                setAssociator((Associator) forName("weka.associations",
0344:                        Associator.class, tmpStr, Utils
0345:                                .partitionOptions(options)));
0346:            }
0347:
0348:            /**
0349:             * Gets the current settings of the CheckAssociator.
0350:             *
0351:             * @return an array of strings suitable for passing to setOptions
0352:             */
0353:            public String[] getOptions() {
0354:                Vector result;
0355:                String[] options;
0356:                int i;
0357:
0358:                result = new Vector();
0359:
0360:                options = super .getOptions();
0361:                for (i = 0; i < options.length; i++)
0362:                    result.add(options[i]);
0363:
0364:                if (getAssociator() != null) {
0365:                    result.add("-W");
0366:                    result.add(getAssociator().getClass().getName());
0367:                }
0368:
0369:                if ((m_Associator != null)
0370:                        && (m_Associator instanceof  OptionHandler))
0371:                    options = ((OptionHandler) m_Associator).getOptions();
0372:                else
0373:                    options = new String[0];
0374:
0375:                if (options.length > 0) {
0376:                    result.add("--");
0377:                    for (i = 0; i < options.length; i++)
0378:                        result.add(options[i]);
0379:                }
0380:
0381:                return (String[]) result.toArray(new String[result.size()]);
0382:            }
0383:
0384:            /**
0385:             * Begin the tests, reporting results to System.out
0386:             */
0387:            public void doTests() {
0388:
0389:                if (getAssociator() == null) {
0390:                    println("\n=== No associator set ===");
0391:                    return;
0392:                }
0393:                println("\n=== Check on Associator: "
0394:                        + getAssociator().getClass().getName() + " ===\n");
0395:
0396:                // Start tests
0397:                m_ClasspathProblems = false;
0398:                println("--> Checking for interfaces");
0399:                canTakeOptions();
0400:                boolean weightedInstancesHandler = weightedInstancesHandler()[0];
0401:                boolean multiInstanceHandler = multiInstanceHandler()[0];
0402:                println("--> Associator tests");
0403:                declaresSerialVersionUID();
0404:                println("--> no class attribute");
0405:                testsWithoutClass(weightedInstancesHandler,
0406:                        multiInstanceHandler);
0407:                println("--> with class attribute");
0408:                testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler,
0409:                        multiInstanceHandler);
0410:                testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler,
0411:                        multiInstanceHandler);
0412:                testsPerClassType(Attribute.DATE, weightedInstancesHandler,
0413:                        multiInstanceHandler);
0414:                testsPerClassType(Attribute.STRING, weightedInstancesHandler,
0415:                        multiInstanceHandler);
0416:                testsPerClassType(Attribute.RELATIONAL,
0417:                        weightedInstancesHandler, multiInstanceHandler);
0418:            }
0419:
0420:            /**
0421:             * Set the associator to test. 
0422:             *
0423:             * @param newAssociator the Associator to use.
0424:             */
0425:            public void setAssociator(Associator newAssociator) {
0426:                m_Associator = newAssociator;
0427:            }
0428:
0429:            /**
0430:             * Get the associator being tested
0431:             *
0432:             * @return the associator being tested
0433:             */
0434:            public Associator getAssociator() {
0435:                return m_Associator;
0436:            }
0437:
0438:            /**
0439:             * Run a battery of tests for a given class attribute type
0440:             *
0441:             * @param classType true if the class attribute should be numeric
0442:             * @param weighted true if the associator says it handles weights
0443:             * @param multiInstance true if the associator is a multi-instance associator
0444:             */
0445:            protected void testsPerClassType(int classType, boolean weighted,
0446:                    boolean multiInstance) {
0447:
0448:                boolean PNom = canPredict(true, false, false, false, false,
0449:                        multiInstance, classType)[0];
0450:                boolean PNum = canPredict(false, true, false, false, false,
0451:                        multiInstance, classType)[0];
0452:                boolean PStr = canPredict(false, false, true, false, false,
0453:                        multiInstance, classType)[0];
0454:                boolean PDat = canPredict(false, false, false, true, false,
0455:                        multiInstance, classType)[0];
0456:                boolean PRel;
0457:                if (!multiInstance)
0458:                    PRel = canPredict(false, false, false, false, true,
0459:                            multiInstance, classType)[0];
0460:                else
0461:                    PRel = false;
0462:
0463:                if (PNom || PNum || PStr || PDat || PRel) {
0464:                    if (weighted)
0465:                        instanceWeights(PNom, PNum, PStr, PDat, PRel,
0466:                                multiInstance, classType);
0467:
0468:                    if (classType == Attribute.NOMINAL)
0469:                        canHandleNClasses(PNom, PNum, PStr, PDat, PRel,
0470:                                multiInstance, 4);
0471:
0472:                    if (!multiInstance) {
0473:                        canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat,
0474:                                PRel, multiInstance, classType, 0);
0475:                        canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat,
0476:                                PRel, multiInstance, classType, 1);
0477:                    }
0478:
0479:                    canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel,
0480:                            multiInstance, classType);
0481:                    boolean handleMissingPredictors = canHandleMissing(PNom,
0482:                            PNum, PStr, PDat, PRel, multiInstance, classType,
0483:                            true, false, 20)[0];
0484:                    if (handleMissingPredictors)
0485:                        canHandleMissing(PNom, PNum, PStr, PDat, PRel,
0486:                                multiInstance, classType, true, false, 100);
0487:
0488:                    boolean handleMissingClass = canHandleMissing(PNom, PNum,
0489:                            PStr, PDat, PRel, multiInstance, classType, false,
0490:                            true, 20)[0];
0491:                    if (handleMissingClass)
0492:                        canHandleMissing(PNom, PNum, PStr, PDat, PRel,
0493:                                multiInstance, classType, false, true, 100);
0494:
0495:                    correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel,
0496:                            multiInstance, classType);
0497:                    datasetIntegrity(PNom, PNum, PStr, PDat, PRel,
0498:                            multiInstance, classType, handleMissingPredictors,
0499:                            handleMissingClass);
0500:                }
0501:            }
0502:
0503:            /**
0504:             * Run a battery of tests without a class
0505:             *
0506:             * @param weighted true if the associator says it handles weights
0507:             * @param multiInstance true if the associator is a multi-instance associator
0508:             */
0509:            protected void testsWithoutClass(boolean weighted,
0510:                    boolean multiInstance) {
0511:
0512:                boolean PNom = canPredict(true, false, false, false, false,
0513:                        multiInstance, NO_CLASS)[0];
0514:                boolean PNum = canPredict(false, true, false, false, false,
0515:                        multiInstance, NO_CLASS)[0];
0516:                boolean PStr = canPredict(false, false, true, false, false,
0517:                        multiInstance, NO_CLASS)[0];
0518:                boolean PDat = canPredict(false, false, false, true, false,
0519:                        multiInstance, NO_CLASS)[0];
0520:                boolean PRel;
0521:                if (!multiInstance)
0522:                    PRel = canPredict(false, false, false, false, true,
0523:                            multiInstance, NO_CLASS)[0];
0524:                else
0525:                    PRel = false;
0526:
0527:                if (PNom || PNum || PStr || PDat || PRel) {
0528:                    if (weighted)
0529:                        instanceWeights(PNom, PNum, PStr, PDat, PRel,
0530:                                multiInstance, NO_CLASS);
0531:
0532:                    canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel,
0533:                            multiInstance, NO_CLASS);
0534:                    boolean handleMissingPredictors = canHandleMissing(PNom,
0535:                            PNum, PStr, PDat, PRel, multiInstance, NO_CLASS,
0536:                            true, false, 20)[0];
0537:                    if (handleMissingPredictors)
0538:                        canHandleMissing(PNom, PNum, PStr, PDat, PRel,
0539:                                multiInstance, NO_CLASS, true, false, 100);
0540:
0541:                    correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel,
0542:                            multiInstance, NO_CLASS);
0543:                    datasetIntegrity(PNom, PNum, PStr, PDat, PRel,
0544:                            multiInstance, NO_CLASS, handleMissingPredictors,
0545:                            false);
0546:                }
0547:            }
0548:
0549:            /**
0550:             * Checks whether the scheme can take command line options.
0551:             *
0552:             * @return index 0 is true if the associator can take options
0553:             */
0554:            protected boolean[] canTakeOptions() {
0555:
0556:                boolean[] result = new boolean[2];
0557:
0558:                print("options...");
0559:                if (m_Associator instanceof  OptionHandler) {
0560:                    println("yes");
0561:                    if (m_Debug) {
0562:                        println("\n=== Full report ===");
0563:                        Enumeration enu = ((OptionHandler) m_Associator)
0564:                                .listOptions();
0565:                        while (enu.hasMoreElements()) {
0566:                            Option option = (Option) enu.nextElement();
0567:                            print(option.synopsis() + "\n"
0568:                                    + option.description() + "\n");
0569:                        }
0570:                        println("\n");
0571:                    }
0572:                    result[0] = true;
0573:                } else {
0574:                    println("no");
0575:                    result[0] = false;
0576:                }
0577:
0578:                return result;
0579:            }
0580:
0581:            /**
0582:             * Checks whether the scheme says it can handle instance weights.
0583:             *
0584:             * @return true if the associator handles instance weights
0585:             */
0586:            protected boolean[] weightedInstancesHandler() {
0587:
0588:                boolean[] result = new boolean[2];
0589:
0590:                print("weighted instances associator...");
0591:                if (m_Associator instanceof  WeightedInstancesHandler) {
0592:                    println("yes");
0593:                    result[0] = true;
0594:                } else {
0595:                    println("no");
0596:                    result[0] = false;
0597:                }
0598:
0599:                return result;
0600:            }
0601:
0602:            /**
0603:             * Checks whether the scheme handles multi-instance data.
0604:             * 
0605:             * @return true if the associator handles multi-instance data
0606:             */
0607:            protected boolean[] multiInstanceHandler() {
0608:                boolean[] result = new boolean[2];
0609:
0610:                print("multi-instance associator...");
0611:                if (m_Associator instanceof  MultiInstanceCapabilitiesHandler) {
0612:                    println("yes");
0613:                    result[0] = true;
0614:                } else {
0615:                    println("no");
0616:                    result[0] = false;
0617:                }
0618:
0619:                return result;
0620:            }
0621:
0622:            /**
0623:             * tests for a serialVersionUID. Fails in case the scheme doesn't declare
0624:             * a UID.
0625:             *
0626:             * @return index 0 is true if the scheme declares a UID
0627:             */
0628:            protected boolean[] declaresSerialVersionUID() {
0629:                boolean[] result = new boolean[2];
0630:
0631:                print("serialVersionUID...");
0632:
0633:                result[0] = !SerializationHelper.needsUID(m_Associator
0634:                        .getClass());
0635:
0636:                if (result[0])
0637:                    println("yes");
0638:                else
0639:                    println("no");
0640:
0641:                return result;
0642:            }
0643:
0644:            /**
0645:             * Checks basic prediction of the scheme, for simple non-troublesome
0646:             * datasets.
0647:             *
0648:             * @param nominalPredictor if true use nominal predictor attributes
0649:             * @param numericPredictor if true use numeric predictor attributes
0650:             * @param stringPredictor if true use string predictor attributes
0651:             * @param datePredictor if true use date predictor attributes
0652:             * @param relationalPredictor if true use relational predictor attributes
0653:             * @param multiInstance whether multi-instance is needed
0654:             * @param classType the class type (NOMINAL, NUMERIC, etc.)
0655:             * @return index 0 is true if the test was passed, index 1 is true if test 
0656:             *         was acceptable
0657:             */
0658:            protected boolean[] canPredict(boolean nominalPredictor,
0659:                    boolean numericPredictor, boolean stringPredictor,
0660:                    boolean datePredictor, boolean relationalPredictor,
0661:                    boolean multiInstance, int classType) {
0662:
0663:                print("basic predict");
0664:                printAttributeSummary(nominalPredictor, numericPredictor,
0665:                        stringPredictor, datePredictor, relationalPredictor,
0666:                        multiInstance, classType);
0667:                print("...");
0668:                FastVector accepts = new FastVector();
0669:                accepts.addElement("any");
0670:                accepts.addElement("unary");
0671:                accepts.addElement("binary");
0672:                accepts.addElement("nominal");
0673:                accepts.addElement("numeric");
0674:                accepts.addElement("string");
0675:                accepts.addElement("date");
0676:                accepts.addElement("relational");
0677:                accepts.addElement("multi-instance");
0678:                accepts.addElement("not in classpath");
0679:                int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
0680:                boolean predictorMissing = false, classMissing = false;
0681:
0682:                return runBasicTest(nominalPredictor, numericPredictor,
0683:                        stringPredictor, datePredictor, relationalPredictor,
0684:                        multiInstance, classType, missingLevel,
0685:                        predictorMissing, classMissing, numTrain, numClasses,
0686:                        accepts);
0687:            }
0688:
0689:            /**
0690:             * Checks whether nominal schemes can handle more than two classes.
0691:             * If a scheme is only designed for two-class problems it should
0692:             * throw an appropriate exception for multi-class problems.
0693:             *
0694:             * @param nominalPredictor if true use nominal predictor attributes
0695:             * @param numericPredictor if true use numeric predictor attributes
0696:             * @param stringPredictor if true use string predictor attributes
0697:             * @param datePredictor if true use date predictor attributes
0698:             * @param relationalPredictor if true use relational predictor attributes
0699:             * @param multiInstance whether multi-instance is needed
0700:             * @param numClasses the number of classes to test
0701:             * @return index 0 is true if the test was passed, index 1 is true if test 
0702:             *         was acceptable
0703:             */
0704:            protected boolean[] canHandleNClasses(boolean nominalPredictor,
0705:                    boolean numericPredictor, boolean stringPredictor,
0706:                    boolean datePredictor, boolean relationalPredictor,
0707:                    boolean multiInstance, int numClasses) {
0708:
0709:                print("more than two class problems");
0710:                printAttributeSummary(nominalPredictor, numericPredictor,
0711:                        stringPredictor, datePredictor, relationalPredictor,
0712:                        multiInstance, Attribute.NOMINAL);
0713:                print("...");
0714:                FastVector accepts = new FastVector();
0715:                accepts.addElement("number");
0716:                accepts.addElement("class");
0717:                int numTrain = getNumInstances(), missingLevel = 0;
0718:                boolean predictorMissing = false, classMissing = false;
0719:
0720:                return runBasicTest(nominalPredictor, numericPredictor,
0721:                        stringPredictor, datePredictor, relationalPredictor,
0722:                        multiInstance, Attribute.NOMINAL, missingLevel,
0723:                        predictorMissing, classMissing, numTrain, numClasses,
0724:                        accepts);
0725:            }
0726:
0727:            /**
0728:             * Checks whether the scheme can handle class attributes as Nth attribute.
0729:             *
0730:             * @param nominalPredictor if true use nominal predictor attributes
0731:             * @param numericPredictor if true use numeric predictor attributes
0732:             * @param stringPredictor if true use string predictor attributes
0733:             * @param datePredictor if true use date predictor attributes
0734:             * @param relationalPredictor if true use relational predictor attributes
0735:             * @param multiInstance whether multi-instance is needed
0736:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
0737:             * @param classIndex the index of the class attribute (0-based, -1 means last attribute)
0738:             * @return index 0 is true if the test was passed, index 1 is true if test 
0739:             *         was acceptable
0740:             * @see TestInstances#CLASS_IS_LAST
0741:             */
0742:            protected boolean[] canHandleClassAsNthAttribute(
0743:                    boolean nominalPredictor, boolean numericPredictor,
0744:                    boolean stringPredictor, boolean datePredictor,
0745:                    boolean relationalPredictor, boolean multiInstance,
0746:                    int classType, int classIndex) {
0747:
0748:                if (classIndex == TestInstances.CLASS_IS_LAST)
0749:                    print("class attribute as last attribute");
0750:                else
0751:                    print("class attribute as " + (classIndex + 1)
0752:                            + ". attribute");
0753:                printAttributeSummary(nominalPredictor, numericPredictor,
0754:                        stringPredictor, datePredictor, relationalPredictor,
0755:                        multiInstance, classType);
0756:                print("...");
0757:                FastVector accepts = new FastVector();
0758:                int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
0759:                boolean predictorMissing = false, classMissing = false;
0760:
0761:                return runBasicTest(nominalPredictor, numericPredictor,
0762:                        stringPredictor, datePredictor, relationalPredictor,
0763:                        multiInstance, classType, classIndex, missingLevel,
0764:                        predictorMissing, classMissing, numTrain, numClasses,
0765:                        accepts);
0766:            }
0767:
0768:            /**
0769:             * Checks whether the scheme can handle zero training instances.
0770:             *
0771:             * @param nominalPredictor if true use nominal predictor attributes
0772:             * @param numericPredictor if true use numeric predictor attributes
0773:             * @param stringPredictor if true use string predictor attributes
0774:             * @param datePredictor if true use date predictor attributes
0775:             * @param relationalPredictor if true use relational predictor attributes
0776:             * @param multiInstance whether multi-instance is needed
0777:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
0778:             * @return index 0 is true if the test was passed, index 1 is true if test 
0779:             *         was acceptable
0780:             */
0781:            protected boolean[] canHandleZeroTraining(boolean nominalPredictor,
0782:                    boolean numericPredictor, boolean stringPredictor,
0783:                    boolean datePredictor, boolean relationalPredictor,
0784:                    boolean multiInstance, int classType) {
0785:
0786:                print("handle zero training instances");
0787:                printAttributeSummary(nominalPredictor, numericPredictor,
0788:                        stringPredictor, datePredictor, relationalPredictor,
0789:                        multiInstance, classType);
0790:                print("...");
0791:                FastVector accepts = new FastVector();
0792:                accepts.addElement("train");
0793:                accepts.addElement("value");
0794:                int numTrain = 0, numClasses = 2, missingLevel = 0;
0795:                boolean predictorMissing = false, classMissing = false;
0796:
0797:                return runBasicTest(nominalPredictor, numericPredictor,
0798:                        stringPredictor, datePredictor, relationalPredictor,
0799:                        multiInstance, classType, missingLevel,
0800:                        predictorMissing, classMissing, numTrain, numClasses,
0801:                        accepts);
0802:            }
0803:
0804:            /**
0805:             * Checks whether the scheme correctly initialises models when 
0806:             * buildAssociations is called. This test calls buildAssociations with
0807:             * one training dataset. buildAssociations is then called on a training 
0808:             * set with different structure, and then again with the original training 
0809:             * set. If the equals method of the AssociatorEvaluation class returns 
0810:             * false, this is noted as incorrect build initialisation.
0811:             *
0812:             * @param nominalPredictor if true use nominal predictor attributes
0813:             * @param numericPredictor if true use numeric predictor attributes
0814:             * @param stringPredictor if true use string predictor attributes
0815:             * @param datePredictor if true use date predictor attributes
0816:             * @param relationalPredictor if true use relational predictor attributes
0817:             * @param multiInstance whether multi-instance is needed
0818:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
0819:             * @return index 0 is true if the test was passed
0820:             */
0821:            protected boolean[] correctBuildInitialisation(
0822:                    boolean nominalPredictor, boolean numericPredictor,
0823:                    boolean stringPredictor, boolean datePredictor,
0824:                    boolean relationalPredictor, boolean multiInstance,
0825:                    int classType) {
0826:
0827:                boolean[] result = new boolean[2];
0828:
0829:                print("correct initialisation during buildAssociations");
0830:                printAttributeSummary(nominalPredictor, numericPredictor,
0831:                        stringPredictor, datePredictor, relationalPredictor,
0832:                        multiInstance, classType);
0833:                print("...");
0834:                int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
0835:                boolean predictorMissing = false, classMissing = false;
0836:
0837:                Instances train1 = null;
0838:                Instances train2 = null;
0839:                Associator associator = null;
0840:                AssociatorEvaluation evaluation1A = null;
0841:                AssociatorEvaluation evaluation1B = null;
0842:                AssociatorEvaluation evaluation2 = null;
0843:                int stage = 0;
0844:                try {
0845:
0846:                    // Make two train sets with different numbers of attributes
0847:                    train1 = makeTestDataset(42, numTrain,
0848:                            nominalPredictor ? getNumNominal() : 0,
0849:                            numericPredictor ? getNumNumeric() : 0,
0850:                            stringPredictor ? getNumString() : 0,
0851:                            datePredictor ? getNumDate() : 0,
0852:                            relationalPredictor ? getNumRelational() : 0,
0853:                            numClasses, classType, multiInstance);
0854:                    train2 = makeTestDataset(84, numTrain,
0855:                            nominalPredictor ? getNumNominal() + 1 : 0,
0856:                            numericPredictor ? getNumNumeric() + 1 : 0,
0857:                            stringPredictor ? getNumString() + 1 : 0,
0858:                            datePredictor ? getNumDate() + 1 : 0,
0859:                            relationalPredictor ? getNumRelational() + 1 : 0,
0860:                            numClasses, classType, multiInstance);
0861:                    if (missingLevel > 0) {
0862:                        addMissing(train1, missingLevel, predictorMissing,
0863:                                classMissing);
0864:                        addMissing(train2, missingLevel, predictorMissing,
0865:                                classMissing);
0866:                    }
0867:
0868:                    associator = Associator.makeCopies(getAssociator(), 1)[0];
0869:                    evaluation1A = new AssociatorEvaluation();
0870:                    evaluation1B = new AssociatorEvaluation();
0871:                    evaluation2 = new AssociatorEvaluation();
0872:                } catch (Exception ex) {
0873:                    throw new Error("Error setting up for tests: "
0874:                            + ex.getMessage());
0875:                }
0876:                try {
0877:                    stage = 0;
0878:                    evaluation1A.evaluate(associator, train1);
0879:
0880:                    stage = 1;
0881:                    evaluation2.evaluate(associator, train2);
0882:
0883:                    stage = 2;
0884:                    evaluation1B.evaluate(associator, train1);
0885:
0886:                    stage = 3;
0887:                    if (!evaluation1A.equals(evaluation1B)) {
0888:                        if (m_Debug) {
0889:                            println("\n=== Full report ===\n"
0890:                                    + evaluation1A
0891:                                            .toSummaryString("\nFirst buildAssociations()")
0892:                                    + "\n\n");
0893:                            println(evaluation1B
0894:                                    .toSummaryString("\nSecond buildAssociations()")
0895:                                    + "\n\n");
0896:                        }
0897:                        throw new Exception(
0898:                                "Results differ between buildAssociations calls");
0899:                    }
0900:                    println("yes");
0901:                    result[0] = true;
0902:
0903:                    if (false && m_Debug) {
0904:                        println("\n=== Full report ===\n"
0905:                                + evaluation1A
0906:                                        .toSummaryString("\nFirst buildAssociations()")
0907:                                + "\n\n");
0908:                        println(evaluation1B
0909:                                .toSummaryString("\nSecond buildAssociations()")
0910:                                + "\n\n");
0911:                    }
0912:                } catch (Exception ex) {
0913:                    println("no");
0914:                    result[0] = false;
0915:
0916:                    if (m_Debug) {
0917:                        println("\n=== Full Report ===");
0918:                        print("Problem during building");
0919:                        switch (stage) {
0920:                        case 0:
0921:                            print(" of dataset 1");
0922:                            break;
0923:                        case 1:
0924:                            print(" of dataset 2");
0925:                            break;
0926:                        case 2:
0927:                            print(" of dataset 1 (2nd build)");
0928:                            break;
0929:                        case 3:
0930:                            print(", comparing results from builds of dataset 1");
0931:                            break;
0932:                        }
0933:                        println(": " + ex.getMessage() + "\n");
0934:                        println("here are the datasets:\n");
0935:                        println("=== Train1 Dataset ===\n" + train1.toString()
0936:                                + "\n");
0937:                        println("=== Train2 Dataset ===\n" + train2.toString()
0938:                                + "\n");
0939:                    }
0940:                }
0941:
0942:                return result;
0943:            }
0944:
0945:            /**
0946:             * Checks basic missing value handling of the scheme. If the missing
0947:             * values cause an exception to be thrown by the scheme, this will be
0948:             * recorded.
0949:             *
0950:             * @param nominalPredictor if true use nominal predictor attributes
0951:             * @param numericPredictor if true use numeric predictor attributes
0952:             * @param stringPredictor if true use string predictor attributes
0953:             * @param datePredictor if true use date predictor attributes
0954:             * @param relationalPredictor if true use relational predictor attributes
0955:             * @param multiInstance whether multi-instance is needed
0956:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
0957:             * @param predictorMissing true if the missing values may be in 
0958:             * the predictors
0959:             * @param classMissing true if the missing values may be in the class
0960:             * @param missingLevel the percentage of missing values
0961:             * @return index 0 is true if the test was passed, index 1 is true if test 
0962:             *         was acceptable
0963:             */
0964:            protected boolean[] canHandleMissing(boolean nominalPredictor,
0965:                    boolean numericPredictor, boolean stringPredictor,
0966:                    boolean datePredictor, boolean relationalPredictor,
0967:                    boolean multiInstance, int classType,
0968:                    boolean predictorMissing, boolean classMissing,
0969:                    int missingLevel) {
0970:
0971:                if (missingLevel == 100)
0972:                    print("100% ");
0973:                print("missing");
0974:                if (predictorMissing) {
0975:                    print(" predictor");
0976:                    if (classMissing)
0977:                        print(" and");
0978:                }
0979:                if (classMissing)
0980:                    print(" class");
0981:                print(" values");
0982:                printAttributeSummary(nominalPredictor, numericPredictor,
0983:                        stringPredictor, datePredictor, relationalPredictor,
0984:                        multiInstance, classType);
0985:                print("...");
0986:                FastVector accepts = new FastVector();
0987:                accepts.addElement("missing");
0988:                accepts.addElement("value");
0989:                accepts.addElement("train");
0990:                int numTrain = getNumInstances(), numClasses = 2;
0991:
0992:                return runBasicTest(nominalPredictor, numericPredictor,
0993:                        stringPredictor, datePredictor, relationalPredictor,
0994:                        multiInstance, classType, missingLevel,
0995:                        predictorMissing, classMissing, numTrain, numClasses,
0996:                        accepts);
0997:            }
0998:
0999:            /**
1000:             * Checks whether the associator can handle instance weights.
1001:             * This test compares the associator performance on two datasets
1002:             * that are identical except for the training weights. If the 
1003:             * results change, then the associator must be using the weights. It
1004:             * may be possible to get a false positive from this test if the 
1005:             * weight changes aren't significant enough to induce a change
1006:             * in associator performance (but the weights are chosen to minimize
1007:             * the likelihood of this).
1008:             *
1009:             * @param nominalPredictor if true use nominal predictor attributes
1010:             * @param numericPredictor if true use numeric predictor attributes
1011:             * @param stringPredictor if true use string predictor attributes
1012:             * @param datePredictor if true use date predictor attributes
1013:             * @param relationalPredictor if true use relational predictor attributes
1014:             * @param multiInstance whether multi-instance is needed
1015:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1016:             * @return index 0 true if the test was passed
1017:             */
1018:            protected boolean[] instanceWeights(boolean nominalPredictor,
1019:                    boolean numericPredictor, boolean stringPredictor,
1020:                    boolean datePredictor, boolean relationalPredictor,
1021:                    boolean multiInstance, int classType) {
1022:
1023:                print("associator uses instance weights");
1024:                printAttributeSummary(nominalPredictor, numericPredictor,
1025:                        stringPredictor, datePredictor, relationalPredictor,
1026:                        multiInstance, classType);
1027:                print("...");
1028:                int numTrain = 2 * getNumInstances(), numClasses = 2, missingLevel = 0;
1029:                boolean predictorMissing = false, classMissing = false;
1030:
1031:                boolean[] result = new boolean[2];
1032:                Instances train = null;
1033:                Associator[] associators = null;
1034:                AssociatorEvaluation evaluationB = null;
1035:                AssociatorEvaluation evaluationI = null;
1036:                boolean evalFail = false;
1037:                try {
1038:                    train = makeTestDataset(42, numTrain,
1039:                            nominalPredictor ? getNumNominal() + 1 : 0,
1040:                            numericPredictor ? getNumNumeric() + 1 : 0,
1041:                            stringPredictor ? getNumString() : 0,
1042:                            datePredictor ? getNumDate() : 0,
1043:                            relationalPredictor ? getNumRelational() : 0,
1044:                            numClasses, classType, multiInstance);
1045:                    if (missingLevel > 0)
1046:                        addMissing(train, missingLevel, predictorMissing,
1047:                                classMissing);
1048:                    associators = Associator.makeCopies(getAssociator(), 2);
1049:                    evaluationB = new AssociatorEvaluation();
1050:                    evaluationI = new AssociatorEvaluation();
1051:                    evaluationB.evaluate(associators[0], train);
1052:                } catch (Exception ex) {
1053:                    throw new Error("Error setting up for tests: "
1054:                            + ex.getMessage());
1055:                }
1056:                try {
1057:
1058:                    // Now modify instance weights and re-built/test
1059:                    for (int i = 0; i < train.numInstances(); i++) {
1060:                        train.instance(i).setWeight(0);
1061:                    }
1062:                    Random random = new Random(1);
1063:                    for (int i = 0; i < train.numInstances() / 2; i++) {
1064:                        int inst = Math.abs(random.nextInt())
1065:                                % train.numInstances();
1066:                        int weight = Math.abs(random.nextInt()) % 10 + 1;
1067:                        train.instance(inst).setWeight(weight);
1068:                    }
1069:                    evaluationI.evaluate(associators[1], train);
1070:                    if (evaluationB.equals(evaluationI)) {
1071:                        //	println("no");
1072:                        evalFail = true;
1073:                        throw new Exception("evalFail");
1074:                    }
1075:
1076:                    println("yes");
1077:                    result[0] = true;
1078:                } catch (Exception ex) {
1079:                    println("no");
1080:                    result[0] = false;
1081:
1082:                    if (m_Debug) {
1083:                        println("\n=== Full Report ===");
1084:
1085:                        if (evalFail) {
1086:                            println("Results don't differ between non-weighted and "
1087:                                    + "weighted instance models.");
1088:                            println("Here are the results:\n");
1089:                            println(evaluationB
1090:                                    .toSummaryString("\nboth methods\n"));
1091:                        } else {
1092:                            print("Problem during building");
1093:                            println(": " + ex.getMessage() + "\n");
1094:                        }
1095:                        println("Here is the dataset:\n");
1096:                        println("=== Train Dataset ===\n" + train.toString()
1097:                                + "\n");
1098:                        println("=== Train Weights ===\n");
1099:                        for (int i = 0; i < train.numInstances(); i++) {
1100:                            println(" " + (i + 1) + "    "
1101:                                    + train.instance(i).weight());
1102:                        }
1103:                    }
1104:                }
1105:
1106:                return result;
1107:            }
1108:
1109:            /**
1110:             * Checks whether the scheme alters the training dataset during
1111:             * building. If the scheme needs to modify the data it should take 
1112:             * a copy of the training data. Currently checks for changes to header 
1113:             * structure, number of instances, order of instances, instance weights.
1114:             *
1115:             * @param nominalPredictor if true use nominal predictor attributes
1116:             * @param numericPredictor if true use numeric predictor attributes
1117:             * @param stringPredictor if true use string predictor attributes
1118:             * @param datePredictor if true use date predictor attributes
1119:             * @param relationalPredictor if true use relational predictor attributes
1120:             * @param multiInstance whether multi-instance is needed
1121:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1122:             * @param predictorMissing true if we know the associator can handle
1123:             * (at least) moderate missing predictor values
1124:             * @param classMissing true if we know the associator can handle
1125:             * (at least) moderate missing class values
1126:             * @return index 0 is true if the test was passed
1127:             */
1128:            protected boolean[] datasetIntegrity(boolean nominalPredictor,
1129:                    boolean numericPredictor, boolean stringPredictor,
1130:                    boolean datePredictor, boolean relationalPredictor,
1131:                    boolean multiInstance, int classType,
1132:                    boolean predictorMissing, boolean classMissing) {
1133:
1134:                print("associator doesn't alter original datasets");
1135:                printAttributeSummary(nominalPredictor, numericPredictor,
1136:                        stringPredictor, datePredictor, relationalPredictor,
1137:                        multiInstance, classType);
1138:                print("...");
1139:                int numTrain = getNumInstances(), numClasses = 2, missingLevel = 20;
1140:
1141:                boolean[] result = new boolean[2];
1142:                Instances train = null;
1143:                Associator associator = null;
1144:                try {
1145:                    train = makeTestDataset(42, numTrain,
1146:                            nominalPredictor ? getNumNominal() : 0,
1147:                            numericPredictor ? getNumNumeric() : 0,
1148:                            stringPredictor ? getNumString() : 0,
1149:                            datePredictor ? getNumDate() : 0,
1150:                            relationalPredictor ? getNumRelational() : 0,
1151:                            numClasses, classType, multiInstance);
1152:                    if (missingLevel > 0)
1153:                        addMissing(train, missingLevel, predictorMissing,
1154:                                classMissing);
1155:                    associator = Associator.makeCopies(getAssociator(), 1)[0];
1156:                } catch (Exception ex) {
1157:                    throw new Error("Error setting up for tests: "
1158:                            + ex.getMessage());
1159:                }
1160:                try {
1161:                    Instances trainCopy = new Instances(train);
1162:                    associator.buildAssociations(trainCopy);
1163:                    compareDatasets(train, trainCopy);
1164:
1165:                    println("yes");
1166:                    result[0] = true;
1167:                } catch (Exception ex) {
1168:                    println("no");
1169:                    result[0] = false;
1170:
1171:                    if (m_Debug) {
1172:                        println("\n=== Full Report ===");
1173:                        print("Problem during building");
1174:                        println(": " + ex.getMessage() + "\n");
1175:                        println("Here is the dataset:\n");
1176:                        println("=== Train Dataset ===\n" + train.toString()
1177:                                + "\n");
1178:                    }
1179:                }
1180:
1181:                return result;
1182:            }
1183:
1184:            /**
1185:             * Runs a text on the datasets with the given characteristics.
1186:             * 
1187:             * @param nominalPredictor if true use nominal predictor attributes
1188:             * @param numericPredictor if true use numeric predictor attributes
1189:             * @param stringPredictor if true use string predictor attributes
1190:             * @param datePredictor if true use date predictor attributes
1191:             * @param relationalPredictor if true use relational predictor attributes
1192:             * @param multiInstance whether multi-instance is needed
1193:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1194:             * @param missingLevel the percentage of missing values
1195:             * @param predictorMissing true if the missing values may be in 
1196:             * the predictors
1197:             * @param classMissing true if the missing values may be in the class
1198:             * @param numTrain the number of instances in the training set
1199:             * @param numClasses the number of classes
1200:             * @param accepts the acceptable string in an exception
1201:             * @return index 0 is true if the test was passed, index 1 is true if test 
1202:             *         was acceptable
1203:             */
1204:            protected boolean[] runBasicTest(boolean nominalPredictor,
1205:                    boolean numericPredictor, boolean stringPredictor,
1206:                    boolean datePredictor, boolean relationalPredictor,
1207:                    boolean multiInstance, int classType, int missingLevel,
1208:                    boolean predictorMissing, boolean classMissing,
1209:                    int numTrain, int numClasses, FastVector accepts) {
1210:
1211:                return runBasicTest(nominalPredictor, numericPredictor,
1212:                        stringPredictor, datePredictor, relationalPredictor,
1213:                        multiInstance, classType, TestInstances.CLASS_IS_LAST,
1214:                        missingLevel, predictorMissing, classMissing, numTrain,
1215:                        numClasses, accepts);
1216:            }
1217:
1218:            /**
1219:             * Runs a text on the datasets with the given characteristics.
1220:             * 
1221:             * @param nominalPredictor if true use nominal predictor attributes
1222:             * @param numericPredictor if true use numeric predictor attributes
1223:             * @param stringPredictor if true use string predictor attributes
1224:             * @param datePredictor if true use date predictor attributes
1225:             * @param relationalPredictor if true use relational predictor attributes
1226:             * @param multiInstance whether multi-instance is needed
1227:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1228:             * @param classIndex the attribute index of the class
1229:             * @param missingLevel the percentage of missing values
1230:             * @param predictorMissing true if the missing values may be in 
1231:             * the predictors
1232:             * @param classMissing true if the missing values may be in the class
1233:             * @param numTrain the number of instances in the training set
1234:             * @param numClasses the number of classes
1235:             * @param accepts the acceptable string in an exception
1236:             * @return index 0 is true if the test was passed, index 1 is true if test 
1237:             *         was acceptable
1238:             */
1239:            protected boolean[] runBasicTest(boolean nominalPredictor,
1240:                    boolean numericPredictor, boolean stringPredictor,
1241:                    boolean datePredictor, boolean relationalPredictor,
1242:                    boolean multiInstance, int classType, int classIndex,
1243:                    int missingLevel, boolean predictorMissing,
1244:                    boolean classMissing, int numTrain, int numClasses,
1245:                    FastVector accepts) {
1246:
1247:                boolean[] result = new boolean[2];
1248:                Instances train = null;
1249:                Associator associator = null;
1250:                try {
1251:                    train = makeTestDataset(42, numTrain,
1252:                            nominalPredictor ? getNumNominal() : 0,
1253:                            numericPredictor ? getNumNumeric() : 0,
1254:                            stringPredictor ? getNumString() : 0,
1255:                            datePredictor ? getNumDate() : 0,
1256:                            relationalPredictor ? getNumRelational() : 0,
1257:                            numClasses, classType, classIndex, multiInstance);
1258:                    if (missingLevel > 0)
1259:                        addMissing(train, missingLevel, predictorMissing,
1260:                                classMissing);
1261:                    associator = Associator.makeCopies(getAssociator(), 1)[0];
1262:                } catch (Exception ex) {
1263:                    ex.printStackTrace();
1264:                    throw new Error("Error setting up for tests: "
1265:                            + ex.getMessage());
1266:                }
1267:                try {
1268:                    associator.buildAssociations(train);
1269:                    println("yes");
1270:                    result[0] = true;
1271:                } catch (Exception ex) {
1272:                    boolean acceptable = false;
1273:                    String msg;
1274:                    if (ex.getMessage() == null)
1275:                        msg = "";
1276:                    else
1277:                        msg = ex.getMessage().toLowerCase();
1278:                    if (msg.indexOf("not in classpath") > -1)
1279:                        m_ClasspathProblems = true;
1280:
1281:                    for (int i = 0; i < accepts.size(); i++) {
1282:                        if (msg.indexOf((String) accepts.elementAt(i)) >= 0) {
1283:                            acceptable = true;
1284:                        }
1285:                    }
1286:
1287:                    println("no" + (acceptable ? " (OK error message)" : ""));
1288:                    result[1] = acceptable;
1289:
1290:                    if (m_Debug) {
1291:                        println("\n=== Full Report ===");
1292:                        print("Problem during building");
1293:                        println(": " + ex.getMessage() + "\n");
1294:                        if (!acceptable) {
1295:                            if (accepts.size() > 0) {
1296:                                print("Error message doesn't mention ");
1297:                                for (int i = 0; i < accepts.size(); i++) {
1298:                                    if (i != 0) {
1299:                                        print(" or ");
1300:                                    }
1301:                                    print('"' + (String) accepts.elementAt(i) + '"');
1302:                                }
1303:                            }
1304:                            println("here is the dataset:\n");
1305:                            println("=== Train Dataset ===\n"
1306:                                    + train.toString() + "\n");
1307:                        }
1308:                    }
1309:                }
1310:
1311:                return result;
1312:            }
1313:
1314:            /**
1315:             * Make a simple set of instances, which can later be modified
1316:             * for use in specific tests.
1317:             *
1318:             * @param seed the random number seed
1319:             * @param numInstances the number of instances to generate
1320:             * @param numNominal the number of nominal attributes
1321:             * @param numNumeric the number of numeric attributes
1322:             * @param numString the number of string attributes
1323:             * @param numDate the number of date attributes
1324:             * @param numRelational the number of relational attributes
1325:             * @param numClasses the number of classes (if nominal class)
1326:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1327:             * @param multiInstance whether the dataset should a multi-instance dataset
1328:             * @return the test dataset
1329:             * @throws Exception if the dataset couldn't be generated
1330:             * @see #process(Instances)
1331:             */
1332:            protected Instances makeTestDataset(int seed, int numInstances,
1333:                    int numNominal, int numNumeric, int numString, int numDate,
1334:                    int numRelational, int numClasses, int classType,
1335:                    boolean multiInstance) throws Exception {
1336:
1337:                return makeTestDataset(seed, numInstances, numNominal,
1338:                        numNumeric, numString, numDate, numRelational,
1339:                        numClasses, classType, TestInstances.CLASS_IS_LAST,
1340:                        multiInstance);
1341:            }
1342:
1343:            /**
1344:             * Make a simple set of instances with variable position of the class 
1345:             * attribute, which can later be modified for use in specific tests.
1346:             *
1347:             * @param seed the random number seed
1348:             * @param numInstances the number of instances to generate
1349:             * @param numNominal the number of nominal attributes
1350:             * @param numNumeric the number of numeric attributes
1351:             * @param numString the number of string attributes
1352:             * @param numDate the number of date attributes
1353:             * @param numRelational the number of relational attributes
1354:             * @param numClasses the number of classes (if nominal class)
1355:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1356:             * @param classIndex the index of the class (0-based, -1 as last)
1357:             * @param multiInstance whether the dataset should a multi-instance dataset
1358:             * @return the test dataset
1359:             * @throws Exception if the dataset couldn't be generated
1360:             * @see TestInstances#CLASS_IS_LAST
1361:             * @see #process(Instances)
1362:             */
1363:            protected Instances makeTestDataset(int seed, int numInstances,
1364:                    int numNominal, int numNumeric, int numString, int numDate,
1365:                    int numRelational, int numClasses, int classType,
1366:                    int classIndex, boolean multiInstance) throws Exception {
1367:
1368:                TestInstances dataset = new TestInstances();
1369:
1370:                dataset.setSeed(seed);
1371:                dataset.setNumInstances(numInstances);
1372:                dataset.setNumNominal(numNominal);
1373:                dataset.setNumNumeric(numNumeric);
1374:                dataset.setNumString(numString);
1375:                dataset.setNumDate(numDate);
1376:                dataset.setNumRelational(numRelational);
1377:                dataset.setNumClasses(numClasses);
1378:                if (classType == NO_CLASS) {
1379:                    dataset.setClassType(Attribute.NOMINAL); // ignored
1380:                    dataset.setClassIndex(TestInstances.NO_CLASS);
1381:                } else {
1382:                    dataset.setClassType(classType);
1383:                    dataset.setClassIndex(classIndex);
1384:                }
1385:                dataset.setNumClasses(numClasses);
1386:                dataset.setMultiInstance(multiInstance);
1387:                dataset.setWords(getWords());
1388:                dataset.setWordSeparators(getWordSeparators());
1389:
1390:                return process(dataset.generate());
1391:            }
1392:
1393:            /**
1394:             * Print out a short summary string for the dataset characteristics
1395:             *
1396:             * @param nominalPredictor true if nominal predictor attributes are present
1397:             * @param numericPredictor true if numeric predictor attributes are present
1398:             * @param stringPredictor true if string predictor attributes are present
1399:             * @param datePredictor true if date predictor attributes are present
1400:             * @param relationalPredictor true if relational predictor attributes are present
1401:             * @param multiInstance whether multi-instance is needed
1402:             * @param classType the class type (NUMERIC, NOMINAL, etc.)
1403:             */
1404:            protected void printAttributeSummary(boolean nominalPredictor,
1405:                    boolean numericPredictor, boolean stringPredictor,
1406:                    boolean datePredictor, boolean relationalPredictor,
1407:                    boolean multiInstance, int classType) {
1408:
1409:                String str = "";
1410:
1411:                if (numericPredictor)
1412:                    str += " numeric";
1413:
1414:                if (nominalPredictor) {
1415:                    if (str.length() > 0)
1416:                        str += " &";
1417:                    str += " nominal";
1418:                }
1419:
1420:                if (stringPredictor) {
1421:                    if (str.length() > 0)
1422:                        str += " &";
1423:                    str += " string";
1424:                }
1425:
1426:                if (datePredictor) {
1427:                    if (str.length() > 0)
1428:                        str += " &";
1429:                    str += " date";
1430:                }
1431:
1432:                if (relationalPredictor) {
1433:                    if (str.length() > 0)
1434:                        str += " &";
1435:                    str += " relational";
1436:                }
1437:
1438:                str += " predictors)";
1439:
1440:                switch (classType) {
1441:                case Attribute.NUMERIC:
1442:                    str = " (numeric class," + str;
1443:                    break;
1444:                case Attribute.NOMINAL:
1445:                    str = " (nominal class," + str;
1446:                    break;
1447:                case Attribute.STRING:
1448:                    str = " (string class," + str;
1449:                    break;
1450:                case Attribute.DATE:
1451:                    str = " (date class," + str;
1452:                    break;
1453:                case Attribute.RELATIONAL:
1454:                    str = " (relational class," + str;
1455:                    break;
1456:                case NO_CLASS:
1457:                    str = " (no class," + str;
1458:                    break;
1459:                }
1460:
1461:                print(str);
1462:            }
1463:
1464:            /**
1465:             * Test method for this class
1466:             * 
1467:             * @param args the commandline parameters
1468:             */
1469:            public static void main(String[] args) {
1470:                runCheck(new CheckAssociator(), args);
1471:            }
1472:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.