0001: /*
0002: * This program is free software; you can redistribute it and/or modify
0003: * it under the terms of the GNU General Public License as published by
0004: * the Free Software Foundation; either version 2 of the License, or
0005: * (at your option) any later version.
0006: *
0007: * This program is distributed in the hope that it will be useful,
0008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: * GNU General Public License for more details.
0011: *
0012: * You should have received a copy of the GNU General Public License
0013: * along with this program; if not, write to the Free Software
0014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015: */
0016:
0017: /*
0018: * CheckAssociator.java
0019: * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
0020: *
0021: */
0022:
0023: package weka.associations;
0024:
0025: import weka.core.Attribute;
0026: import weka.core.CheckScheme;
0027: import weka.core.FastVector;
0028: import weka.core.Instances;
0029: import weka.core.MultiInstanceCapabilitiesHandler;
0030: import weka.core.Option;
0031: import weka.core.OptionHandler;
0032: import weka.core.SerializationHelper;
0033: import weka.core.TestInstances;
0034: import weka.core.Utils;
0035: import weka.core.WeightedInstancesHandler;
0036:
0037: import java.util.Enumeration;
0038: import java.util.Random;
0039: import java.util.Vector;
0040:
0041: /**
0042: * Class for examining the capabilities and finding problems with
0043: * associators. If you implement an associators using the WEKA.libraries,
0044: * you should run the checks on it to ensure robustness and correct
0045: * operation. Passing all the tests of this object does not mean
0046: * bugs in the associators don't exist, but this will help find some
0047: * common ones. <p/>
0048: *
0049: * Typical usage: <p/>
0050: * <code>java weka.associations.CheckAssociator -W associator_name
0051: * -- associator_options </code><p/>
0052: *
0053: * CheckAssociator reports on the following:
0054: * <ul>
0055: * <li> Associator abilities
0056: * <ul>
0057: * <li> Possible command line options to the associators </li>
0058: * <li> Whether the associators can predict nominal, numeric, string,
0059: * date or relational class attributes. </li>
0060: * <li> Whether the associators can handle numeric predictor attributes </li>
0061: * <li> Whether the associators can handle nominal predictor attributes </li>
0062: * <li> Whether the associators can handle string predictor attributes </li>
0063: * <li> Whether the associators can handle date predictor attributes </li>
0064: * <li> Whether the associators can handle relational predictor attributes </li>
0065: * <li> Whether the associators can handle multi-instance data </li>
0066: * <li> Whether the associators can handle missing predictor values </li>
0067: * <li> Whether the associators can handle missing class values </li>
0068: * <li> Whether a nominal associators only handles 2 class problems </li>
0069: * <li> Whether the associators can handle instance weights </li>
0070: * </ul>
0071: * </li>
0072: * <li> Correct functioning
0073: * <ul>
0074: * <li> Correct initialisation during buildAssociations (i.e. no result
0075: * changes when buildAssociations called repeatedly) </li>
0076: * <li> Whether the associators alters the data pased to it
0077: * (number of instances, instance order, instance weights, etc) </li>
0078: * </ul>
0079: * </li>
0080: * <li> Degenerate cases
0081: * <ul>
0082: * <li> building associators with zero training instances </li>
0083: * <li> all but one predictor attribute values missing </li>
0084: * <li> all predictor attribute values missing </li>
0085: * <li> all but one class values missing </li>
0086: * <li> all class values missing </li>
0087: * </ul>
0088: * </li>
0089: * </ul>
0090: * Running CheckAssociator with the debug option set will output the
0091: * training dataset for any failed tests.<p/>
0092: *
0093: * The <code>weka.associations.AbstractAssociatorTest</code> uses this
0094: * class to test all the associators. Any changes here, have to be
0095: * checked in that abstract test class, too. <p/>
0096: *
0097: <!-- options-start -->
0098: * Valid options are: <p/>
0099: *
0100: * <pre> -D
0101: * Turn on debugging output.</pre>
0102: *
0103: * <pre> -S
0104: * Silent mode - prints nothing to stdout.</pre>
0105: *
0106: * <pre> -N <num>
0107: * The number of instances in the datasets (default 20).</pre>
0108: *
0109: * <pre> -nominal <num>
0110: * The number of nominal attributes (default 2).</pre>
0111: *
0112: * <pre> -nominal-values <num>
0113: * The number of values for nominal attributes (default 1).</pre>
0114: *
0115: * <pre> -numeric <num>
0116: * The number of numeric attributes (default 1).</pre>
0117: *
0118: * <pre> -string <num>
0119: * The number of string attributes (default 1).</pre>
0120: *
0121: * <pre> -date <num>
0122: * The number of date attributes (default 1).</pre>
0123: *
0124: * <pre> -relational <num>
0125: * The number of relational attributes (default 1).</pre>
0126: *
0127: * <pre> -num-instances-relational <num>
0128: * The number of instances in relational/bag attributes (default 10).</pre>
0129: *
0130: * <pre> -words <comma-separated-list>
0131: * The words to use in string attributes.</pre>
0132: *
0133: * <pre> -word-separators <chars>
0134: * The word separators to use in string attributes.</pre>
0135: *
0136: * <pre> -W
0137: * Full name of the associator analysed.
0138: * eg: weka.associations.Apriori
0139: * (default weka.associations.Apriori)</pre>
0140: *
0141: * <pre>
0142: * Options specific to associator weka.associations.Apriori:
0143: * </pre>
0144: *
0145: * <pre> -N <required number of rules output>
0146: * The required number of rules. (default = 10)</pre>
0147: *
0148: * <pre> -T <0=confidence | 1=lift | 2=leverage | 3=Conviction>
0149: * The metric type by which to rank rules. (default = confidence)</pre>
0150: *
0151: * <pre> -C <minimum metric score of a rule>
0152: * The minimum confidence of a rule. (default = 0.9)</pre>
0153: *
0154: * <pre> -D <delta for minimum support>
0155: * The delta by which the minimum support is decreased in
0156: * each iteration. (default = 0.05)</pre>
0157: *
0158: * <pre> -U <upper bound for minimum support>
0159: * Upper bound for minimum support. (default = 1.0)</pre>
0160: *
0161: * <pre> -M <lower bound for minimum support>
0162: * The lower bound for the minimum support. (default = 0.1)</pre>
0163: *
0164: * <pre> -S <significance level>
0165: * If used, rules are tested for significance at
0166: * the given level. Slower. (default = no significance testing)</pre>
0167: *
0168: * <pre> -I
0169: * If set the itemsets found are also output. (default = no)</pre>
0170: *
0171: * <pre> -R
0172: * Remove columns that contain all missing values (default = no)</pre>
0173: *
0174: * <pre> -V
0175: * Report progress iteratively. (default = no)</pre>
0176: *
0177: * <pre> -A
0178: * If set class association rules are mined. (default = no)</pre>
0179: *
0180: * <pre> -c <the class index>
0181: * The class index. (default = last)</pre>
0182: *
0183: <!-- options-end -->
0184: *
0185: * Options after -- are passed to the designated associator.<p/>
0186: *
0187: * @author Len Trigg (trigg@cs.waikato.ac.nz)
0188: * @author FracPete (fracpete at waikato dot ac dot nz)
0189: * @version $Revision: 1.5 $
0190: * @see TestInstances
0191: */
0192: public class CheckAssociator extends CheckScheme {
0193:
0194: /*
0195: * Note about test methods:
0196: * - methods return array of booleans
0197: * - first index: success or not
0198: * - second index: acceptable or not (e.g., Exception is OK)
0199: *
0200: * FracPete (fracpete at waikato dot ac dot nz)
0201: */
0202:
0203: /** a "dummy" class type */
0204: public final static int NO_CLASS = -1;
0205:
0206: /*** The associator to be examined */
0207: protected Associator m_Associator = new weka.associations.Apriori();
0208:
0209: /**
0210: * Returns an enumeration describing the available options.
0211: *
0212: * @return an enumeration of all the available options.
0213: */
0214: public Enumeration listOptions() {
0215: Vector result = new Vector();
0216:
0217: Enumeration en = super .listOptions();
0218: while (en.hasMoreElements())
0219: result.addElement(en.nextElement());
0220:
0221: result.addElement(new Option(
0222: "\tFull name of the associator analysed.\n"
0223: + "\teg: weka.associations.Apriori\n"
0224: + "\t(default weka.associations.Apriori)", "W",
0225: 1, "-W"));
0226:
0227: if ((m_Associator != null)
0228: && (m_Associator instanceof OptionHandler)) {
0229: result.addElement(new Option("", "", 0,
0230: "\nOptions specific to associator "
0231: + m_Associator.getClass().getName() + ":"));
0232: Enumeration enu = ((OptionHandler) m_Associator)
0233: .listOptions();
0234: while (enu.hasMoreElements())
0235: result.addElement(enu.nextElement());
0236: }
0237:
0238: return result.elements();
0239: }
0240:
0241: /**
0242: * Parses a given list of options.
0243: *
0244: <!-- options-start -->
0245: * Valid options are: <p/>
0246: *
0247: * <pre> -D
0248: * Turn on debugging output.</pre>
0249: *
0250: * <pre> -S
0251: * Silent mode - prints nothing to stdout.</pre>
0252: *
0253: * <pre> -N <num>
0254: * The number of instances in the datasets (default 20).</pre>
0255: *
0256: * <pre> -nominal <num>
0257: * The number of nominal attributes (default 2).</pre>
0258: *
0259: * <pre> -nominal-values <num>
0260: * The number of values for nominal attributes (default 1).</pre>
0261: *
0262: * <pre> -numeric <num>
0263: * The number of numeric attributes (default 1).</pre>
0264: *
0265: * <pre> -string <num>
0266: * The number of string attributes (default 1).</pre>
0267: *
0268: * <pre> -date <num>
0269: * The number of date attributes (default 1).</pre>
0270: *
0271: * <pre> -relational <num>
0272: * The number of relational attributes (default 1).</pre>
0273: *
0274: * <pre> -num-instances-relational <num>
0275: * The number of instances in relational/bag attributes (default 10).</pre>
0276: *
0277: * <pre> -words <comma-separated-list>
0278: * The words to use in string attributes.</pre>
0279: *
0280: * <pre> -word-separators <chars>
0281: * The word separators to use in string attributes.</pre>
0282: *
0283: * <pre> -W
0284: * Full name of the associator analysed.
0285: * eg: weka.associations.Apriori
0286: * (default weka.associations.Apriori)</pre>
0287: *
0288: * <pre>
0289: * Options specific to associator weka.associations.Apriori:
0290: * </pre>
0291: *
0292: * <pre> -N <required number of rules output>
0293: * The required number of rules. (default = 10)</pre>
0294: *
0295: * <pre> -T <0=confidence | 1=lift | 2=leverage | 3=Conviction>
0296: * The metric type by which to rank rules. (default = confidence)</pre>
0297: *
0298: * <pre> -C <minimum metric score of a rule>
0299: * The minimum confidence of a rule. (default = 0.9)</pre>
0300: *
0301: * <pre> -D <delta for minimum support>
0302: * The delta by which the minimum support is decreased in
0303: * each iteration. (default = 0.05)</pre>
0304: *
0305: * <pre> -U <upper bound for minimum support>
0306: * Upper bound for minimum support. (default = 1.0)</pre>
0307: *
0308: * <pre> -M <lower bound for minimum support>
0309: * The lower bound for the minimum support. (default = 0.1)</pre>
0310: *
0311: * <pre> -S <significance level>
0312: * If used, rules are tested for significance at
0313: * the given level. Slower. (default = no significance testing)</pre>
0314: *
0315: * <pre> -I
0316: * If set the itemsets found are also output. (default = no)</pre>
0317: *
0318: * <pre> -R
0319: * Remove columns that contain all missing values (default = no)</pre>
0320: *
0321: * <pre> -V
0322: * Report progress iteratively. (default = no)</pre>
0323: *
0324: * <pre> -A
0325: * If set class association rules are mined. (default = no)</pre>
0326: *
0327: * <pre> -c <the class index>
0328: * The class index. (default = last)</pre>
0329: *
0330: <!-- options-end -->
0331: *
0332: * @param options the list of options as an array of strings
0333: * @throws Exception if an option is not supported
0334: */
0335: public void setOptions(String[] options) throws Exception {
0336: String tmpStr;
0337:
0338: super .setOptions(options);
0339:
0340: tmpStr = Utils.getOption('W', options);
0341: if (tmpStr.length() == 0)
0342: tmpStr = weka.associations.Apriori.class.getName();
0343: setAssociator((Associator) forName("weka.associations",
0344: Associator.class, tmpStr, Utils
0345: .partitionOptions(options)));
0346: }
0347:
0348: /**
0349: * Gets the current settings of the CheckAssociator.
0350: *
0351: * @return an array of strings suitable for passing to setOptions
0352: */
0353: public String[] getOptions() {
0354: Vector result;
0355: String[] options;
0356: int i;
0357:
0358: result = new Vector();
0359:
0360: options = super .getOptions();
0361: for (i = 0; i < options.length; i++)
0362: result.add(options[i]);
0363:
0364: if (getAssociator() != null) {
0365: result.add("-W");
0366: result.add(getAssociator().getClass().getName());
0367: }
0368:
0369: if ((m_Associator != null)
0370: && (m_Associator instanceof OptionHandler))
0371: options = ((OptionHandler) m_Associator).getOptions();
0372: else
0373: options = new String[0];
0374:
0375: if (options.length > 0) {
0376: result.add("--");
0377: for (i = 0; i < options.length; i++)
0378: result.add(options[i]);
0379: }
0380:
0381: return (String[]) result.toArray(new String[result.size()]);
0382: }
0383:
0384: /**
0385: * Begin the tests, reporting results to System.out
0386: */
0387: public void doTests() {
0388:
0389: if (getAssociator() == null) {
0390: println("\n=== No associator set ===");
0391: return;
0392: }
0393: println("\n=== Check on Associator: "
0394: + getAssociator().getClass().getName() + " ===\n");
0395:
0396: // Start tests
0397: m_ClasspathProblems = false;
0398: println("--> Checking for interfaces");
0399: canTakeOptions();
0400: boolean weightedInstancesHandler = weightedInstancesHandler()[0];
0401: boolean multiInstanceHandler = multiInstanceHandler()[0];
0402: println("--> Associator tests");
0403: declaresSerialVersionUID();
0404: println("--> no class attribute");
0405: testsWithoutClass(weightedInstancesHandler,
0406: multiInstanceHandler);
0407: println("--> with class attribute");
0408: testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler,
0409: multiInstanceHandler);
0410: testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler,
0411: multiInstanceHandler);
0412: testsPerClassType(Attribute.DATE, weightedInstancesHandler,
0413: multiInstanceHandler);
0414: testsPerClassType(Attribute.STRING, weightedInstancesHandler,
0415: multiInstanceHandler);
0416: testsPerClassType(Attribute.RELATIONAL,
0417: weightedInstancesHandler, multiInstanceHandler);
0418: }
0419:
0420: /**
0421: * Set the associator to test.
0422: *
0423: * @param newAssociator the Associator to use.
0424: */
0425: public void setAssociator(Associator newAssociator) {
0426: m_Associator = newAssociator;
0427: }
0428:
0429: /**
0430: * Get the associator being tested
0431: *
0432: * @return the associator being tested
0433: */
0434: public Associator getAssociator() {
0435: return m_Associator;
0436: }
0437:
0438: /**
0439: * Run a battery of tests for a given class attribute type
0440: *
0441: * @param classType true if the class attribute should be numeric
0442: * @param weighted true if the associator says it handles weights
0443: * @param multiInstance true if the associator is a multi-instance associator
0444: */
0445: protected void testsPerClassType(int classType, boolean weighted,
0446: boolean multiInstance) {
0447:
0448: boolean PNom = canPredict(true, false, false, false, false,
0449: multiInstance, classType)[0];
0450: boolean PNum = canPredict(false, true, false, false, false,
0451: multiInstance, classType)[0];
0452: boolean PStr = canPredict(false, false, true, false, false,
0453: multiInstance, classType)[0];
0454: boolean PDat = canPredict(false, false, false, true, false,
0455: multiInstance, classType)[0];
0456: boolean PRel;
0457: if (!multiInstance)
0458: PRel = canPredict(false, false, false, false, true,
0459: multiInstance, classType)[0];
0460: else
0461: PRel = false;
0462:
0463: if (PNom || PNum || PStr || PDat || PRel) {
0464: if (weighted)
0465: instanceWeights(PNom, PNum, PStr, PDat, PRel,
0466: multiInstance, classType);
0467:
0468: if (classType == Attribute.NOMINAL)
0469: canHandleNClasses(PNom, PNum, PStr, PDat, PRel,
0470: multiInstance, 4);
0471:
0472: if (!multiInstance) {
0473: canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat,
0474: PRel, multiInstance, classType, 0);
0475: canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat,
0476: PRel, multiInstance, classType, 1);
0477: }
0478:
0479: canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel,
0480: multiInstance, classType);
0481: boolean handleMissingPredictors = canHandleMissing(PNom,
0482: PNum, PStr, PDat, PRel, multiInstance, classType,
0483: true, false, 20)[0];
0484: if (handleMissingPredictors)
0485: canHandleMissing(PNom, PNum, PStr, PDat, PRel,
0486: multiInstance, classType, true, false, 100);
0487:
0488: boolean handleMissingClass = canHandleMissing(PNom, PNum,
0489: PStr, PDat, PRel, multiInstance, classType, false,
0490: true, 20)[0];
0491: if (handleMissingClass)
0492: canHandleMissing(PNom, PNum, PStr, PDat, PRel,
0493: multiInstance, classType, false, true, 100);
0494:
0495: correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel,
0496: multiInstance, classType);
0497: datasetIntegrity(PNom, PNum, PStr, PDat, PRel,
0498: multiInstance, classType, handleMissingPredictors,
0499: handleMissingClass);
0500: }
0501: }
0502:
0503: /**
0504: * Run a battery of tests without a class
0505: *
0506: * @param weighted true if the associator says it handles weights
0507: * @param multiInstance true if the associator is a multi-instance associator
0508: */
0509: protected void testsWithoutClass(boolean weighted,
0510: boolean multiInstance) {
0511:
0512: boolean PNom = canPredict(true, false, false, false, false,
0513: multiInstance, NO_CLASS)[0];
0514: boolean PNum = canPredict(false, true, false, false, false,
0515: multiInstance, NO_CLASS)[0];
0516: boolean PStr = canPredict(false, false, true, false, false,
0517: multiInstance, NO_CLASS)[0];
0518: boolean PDat = canPredict(false, false, false, true, false,
0519: multiInstance, NO_CLASS)[0];
0520: boolean PRel;
0521: if (!multiInstance)
0522: PRel = canPredict(false, false, false, false, true,
0523: multiInstance, NO_CLASS)[0];
0524: else
0525: PRel = false;
0526:
0527: if (PNom || PNum || PStr || PDat || PRel) {
0528: if (weighted)
0529: instanceWeights(PNom, PNum, PStr, PDat, PRel,
0530: multiInstance, NO_CLASS);
0531:
0532: canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel,
0533: multiInstance, NO_CLASS);
0534: boolean handleMissingPredictors = canHandleMissing(PNom,
0535: PNum, PStr, PDat, PRel, multiInstance, NO_CLASS,
0536: true, false, 20)[0];
0537: if (handleMissingPredictors)
0538: canHandleMissing(PNom, PNum, PStr, PDat, PRel,
0539: multiInstance, NO_CLASS, true, false, 100);
0540:
0541: correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel,
0542: multiInstance, NO_CLASS);
0543: datasetIntegrity(PNom, PNum, PStr, PDat, PRel,
0544: multiInstance, NO_CLASS, handleMissingPredictors,
0545: false);
0546: }
0547: }
0548:
0549: /**
0550: * Checks whether the scheme can take command line options.
0551: *
0552: * @return index 0 is true if the associator can take options
0553: */
0554: protected boolean[] canTakeOptions() {
0555:
0556: boolean[] result = new boolean[2];
0557:
0558: print("options...");
0559: if (m_Associator instanceof OptionHandler) {
0560: println("yes");
0561: if (m_Debug) {
0562: println("\n=== Full report ===");
0563: Enumeration enu = ((OptionHandler) m_Associator)
0564: .listOptions();
0565: while (enu.hasMoreElements()) {
0566: Option option = (Option) enu.nextElement();
0567: print(option.synopsis() + "\n"
0568: + option.description() + "\n");
0569: }
0570: println("\n");
0571: }
0572: result[0] = true;
0573: } else {
0574: println("no");
0575: result[0] = false;
0576: }
0577:
0578: return result;
0579: }
0580:
0581: /**
0582: * Checks whether the scheme says it can handle instance weights.
0583: *
0584: * @return true if the associator handles instance weights
0585: */
0586: protected boolean[] weightedInstancesHandler() {
0587:
0588: boolean[] result = new boolean[2];
0589:
0590: print("weighted instances associator...");
0591: if (m_Associator instanceof WeightedInstancesHandler) {
0592: println("yes");
0593: result[0] = true;
0594: } else {
0595: println("no");
0596: result[0] = false;
0597: }
0598:
0599: return result;
0600: }
0601:
0602: /**
0603: * Checks whether the scheme handles multi-instance data.
0604: *
0605: * @return true if the associator handles multi-instance data
0606: */
0607: protected boolean[] multiInstanceHandler() {
0608: boolean[] result = new boolean[2];
0609:
0610: print("multi-instance associator...");
0611: if (m_Associator instanceof MultiInstanceCapabilitiesHandler) {
0612: println("yes");
0613: result[0] = true;
0614: } else {
0615: println("no");
0616: result[0] = false;
0617: }
0618:
0619: return result;
0620: }
0621:
0622: /**
0623: * tests for a serialVersionUID. Fails in case the scheme doesn't declare
0624: * a UID.
0625: *
0626: * @return index 0 is true if the scheme declares a UID
0627: */
0628: protected boolean[] declaresSerialVersionUID() {
0629: boolean[] result = new boolean[2];
0630:
0631: print("serialVersionUID...");
0632:
0633: result[0] = !SerializationHelper.needsUID(m_Associator
0634: .getClass());
0635:
0636: if (result[0])
0637: println("yes");
0638: else
0639: println("no");
0640:
0641: return result;
0642: }
0643:
0644: /**
0645: * Checks basic prediction of the scheme, for simple non-troublesome
0646: * datasets.
0647: *
0648: * @param nominalPredictor if true use nominal predictor attributes
0649: * @param numericPredictor if true use numeric predictor attributes
0650: * @param stringPredictor if true use string predictor attributes
0651: * @param datePredictor if true use date predictor attributes
0652: * @param relationalPredictor if true use relational predictor attributes
0653: * @param multiInstance whether multi-instance is needed
0654: * @param classType the class type (NOMINAL, NUMERIC, etc.)
0655: * @return index 0 is true if the test was passed, index 1 is true if test
0656: * was acceptable
0657: */
0658: protected boolean[] canPredict(boolean nominalPredictor,
0659: boolean numericPredictor, boolean stringPredictor,
0660: boolean datePredictor, boolean relationalPredictor,
0661: boolean multiInstance, int classType) {
0662:
0663: print("basic predict");
0664: printAttributeSummary(nominalPredictor, numericPredictor,
0665: stringPredictor, datePredictor, relationalPredictor,
0666: multiInstance, classType);
0667: print("...");
0668: FastVector accepts = new FastVector();
0669: accepts.addElement("any");
0670: accepts.addElement("unary");
0671: accepts.addElement("binary");
0672: accepts.addElement("nominal");
0673: accepts.addElement("numeric");
0674: accepts.addElement("string");
0675: accepts.addElement("date");
0676: accepts.addElement("relational");
0677: accepts.addElement("multi-instance");
0678: accepts.addElement("not in classpath");
0679: int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
0680: boolean predictorMissing = false, classMissing = false;
0681:
0682: return runBasicTest(nominalPredictor, numericPredictor,
0683: stringPredictor, datePredictor, relationalPredictor,
0684: multiInstance, classType, missingLevel,
0685: predictorMissing, classMissing, numTrain, numClasses,
0686: accepts);
0687: }
0688:
0689: /**
0690: * Checks whether nominal schemes can handle more than two classes.
0691: * If a scheme is only designed for two-class problems it should
0692: * throw an appropriate exception for multi-class problems.
0693: *
0694: * @param nominalPredictor if true use nominal predictor attributes
0695: * @param numericPredictor if true use numeric predictor attributes
0696: * @param stringPredictor if true use string predictor attributes
0697: * @param datePredictor if true use date predictor attributes
0698: * @param relationalPredictor if true use relational predictor attributes
0699: * @param multiInstance whether multi-instance is needed
0700: * @param numClasses the number of classes to test
0701: * @return index 0 is true if the test was passed, index 1 is true if test
0702: * was acceptable
0703: */
0704: protected boolean[] canHandleNClasses(boolean nominalPredictor,
0705: boolean numericPredictor, boolean stringPredictor,
0706: boolean datePredictor, boolean relationalPredictor,
0707: boolean multiInstance, int numClasses) {
0708:
0709: print("more than two class problems");
0710: printAttributeSummary(nominalPredictor, numericPredictor,
0711: stringPredictor, datePredictor, relationalPredictor,
0712: multiInstance, Attribute.NOMINAL);
0713: print("...");
0714: FastVector accepts = new FastVector();
0715: accepts.addElement("number");
0716: accepts.addElement("class");
0717: int numTrain = getNumInstances(), missingLevel = 0;
0718: boolean predictorMissing = false, classMissing = false;
0719:
0720: return runBasicTest(nominalPredictor, numericPredictor,
0721: stringPredictor, datePredictor, relationalPredictor,
0722: multiInstance, Attribute.NOMINAL, missingLevel,
0723: predictorMissing, classMissing, numTrain, numClasses,
0724: accepts);
0725: }
0726:
0727: /**
0728: * Checks whether the scheme can handle class attributes as Nth attribute.
0729: *
0730: * @param nominalPredictor if true use nominal predictor attributes
0731: * @param numericPredictor if true use numeric predictor attributes
0732: * @param stringPredictor if true use string predictor attributes
0733: * @param datePredictor if true use date predictor attributes
0734: * @param relationalPredictor if true use relational predictor attributes
0735: * @param multiInstance whether multi-instance is needed
0736: * @param classType the class type (NUMERIC, NOMINAL, etc.)
0737: * @param classIndex the index of the class attribute (0-based, -1 means last attribute)
0738: * @return index 0 is true if the test was passed, index 1 is true if test
0739: * was acceptable
0740: * @see TestInstances#CLASS_IS_LAST
0741: */
0742: protected boolean[] canHandleClassAsNthAttribute(
0743: boolean nominalPredictor, boolean numericPredictor,
0744: boolean stringPredictor, boolean datePredictor,
0745: boolean relationalPredictor, boolean multiInstance,
0746: int classType, int classIndex) {
0747:
0748: if (classIndex == TestInstances.CLASS_IS_LAST)
0749: print("class attribute as last attribute");
0750: else
0751: print("class attribute as " + (classIndex + 1)
0752: + ". attribute");
0753: printAttributeSummary(nominalPredictor, numericPredictor,
0754: stringPredictor, datePredictor, relationalPredictor,
0755: multiInstance, classType);
0756: print("...");
0757: FastVector accepts = new FastVector();
0758: int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
0759: boolean predictorMissing = false, classMissing = false;
0760:
0761: return runBasicTest(nominalPredictor, numericPredictor,
0762: stringPredictor, datePredictor, relationalPredictor,
0763: multiInstance, classType, classIndex, missingLevel,
0764: predictorMissing, classMissing, numTrain, numClasses,
0765: accepts);
0766: }
0767:
0768: /**
0769: * Checks whether the scheme can handle zero training instances.
0770: *
0771: * @param nominalPredictor if true use nominal predictor attributes
0772: * @param numericPredictor if true use numeric predictor attributes
0773: * @param stringPredictor if true use string predictor attributes
0774: * @param datePredictor if true use date predictor attributes
0775: * @param relationalPredictor if true use relational predictor attributes
0776: * @param multiInstance whether multi-instance is needed
0777: * @param classType the class type (NUMERIC, NOMINAL, etc.)
0778: * @return index 0 is true if the test was passed, index 1 is true if test
0779: * was acceptable
0780: */
0781: protected boolean[] canHandleZeroTraining(boolean nominalPredictor,
0782: boolean numericPredictor, boolean stringPredictor,
0783: boolean datePredictor, boolean relationalPredictor,
0784: boolean multiInstance, int classType) {
0785:
0786: print("handle zero training instances");
0787: printAttributeSummary(nominalPredictor, numericPredictor,
0788: stringPredictor, datePredictor, relationalPredictor,
0789: multiInstance, classType);
0790: print("...");
0791: FastVector accepts = new FastVector();
0792: accepts.addElement("train");
0793: accepts.addElement("value");
0794: int numTrain = 0, numClasses = 2, missingLevel = 0;
0795: boolean predictorMissing = false, classMissing = false;
0796:
0797: return runBasicTest(nominalPredictor, numericPredictor,
0798: stringPredictor, datePredictor, relationalPredictor,
0799: multiInstance, classType, missingLevel,
0800: predictorMissing, classMissing, numTrain, numClasses,
0801: accepts);
0802: }
0803:
0804: /**
0805: * Checks whether the scheme correctly initialises models when
0806: * buildAssociations is called. This test calls buildAssociations with
0807: * one training dataset. buildAssociations is then called on a training
0808: * set with different structure, and then again with the original training
0809: * set. If the equals method of the AssociatorEvaluation class returns
0810: * false, this is noted as incorrect build initialisation.
0811: *
0812: * @param nominalPredictor if true use nominal predictor attributes
0813: * @param numericPredictor if true use numeric predictor attributes
0814: * @param stringPredictor if true use string predictor attributes
0815: * @param datePredictor if true use date predictor attributes
0816: * @param relationalPredictor if true use relational predictor attributes
0817: * @param multiInstance whether multi-instance is needed
0818: * @param classType the class type (NUMERIC, NOMINAL, etc.)
0819: * @return index 0 is true if the test was passed
0820: */
0821: protected boolean[] correctBuildInitialisation(
0822: boolean nominalPredictor, boolean numericPredictor,
0823: boolean stringPredictor, boolean datePredictor,
0824: boolean relationalPredictor, boolean multiInstance,
0825: int classType) {
0826:
0827: boolean[] result = new boolean[2];
0828:
0829: print("correct initialisation during buildAssociations");
0830: printAttributeSummary(nominalPredictor, numericPredictor,
0831: stringPredictor, datePredictor, relationalPredictor,
0832: multiInstance, classType);
0833: print("...");
0834: int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
0835: boolean predictorMissing = false, classMissing = false;
0836:
0837: Instances train1 = null;
0838: Instances train2 = null;
0839: Associator associator = null;
0840: AssociatorEvaluation evaluation1A = null;
0841: AssociatorEvaluation evaluation1B = null;
0842: AssociatorEvaluation evaluation2 = null;
0843: int stage = 0;
0844: try {
0845:
0846: // Make two train sets with different numbers of attributes
0847: train1 = makeTestDataset(42, numTrain,
0848: nominalPredictor ? getNumNominal() : 0,
0849: numericPredictor ? getNumNumeric() : 0,
0850: stringPredictor ? getNumString() : 0,
0851: datePredictor ? getNumDate() : 0,
0852: relationalPredictor ? getNumRelational() : 0,
0853: numClasses, classType, multiInstance);
0854: train2 = makeTestDataset(84, numTrain,
0855: nominalPredictor ? getNumNominal() + 1 : 0,
0856: numericPredictor ? getNumNumeric() + 1 : 0,
0857: stringPredictor ? getNumString() + 1 : 0,
0858: datePredictor ? getNumDate() + 1 : 0,
0859: relationalPredictor ? getNumRelational() + 1 : 0,
0860: numClasses, classType, multiInstance);
0861: if (missingLevel > 0) {
0862: addMissing(train1, missingLevel, predictorMissing,
0863: classMissing);
0864: addMissing(train2, missingLevel, predictorMissing,
0865: classMissing);
0866: }
0867:
0868: associator = Associator.makeCopies(getAssociator(), 1)[0];
0869: evaluation1A = new AssociatorEvaluation();
0870: evaluation1B = new AssociatorEvaluation();
0871: evaluation2 = new AssociatorEvaluation();
0872: } catch (Exception ex) {
0873: throw new Error("Error setting up for tests: "
0874: + ex.getMessage());
0875: }
0876: try {
0877: stage = 0;
0878: evaluation1A.evaluate(associator, train1);
0879:
0880: stage = 1;
0881: evaluation2.evaluate(associator, train2);
0882:
0883: stage = 2;
0884: evaluation1B.evaluate(associator, train1);
0885:
0886: stage = 3;
0887: if (!evaluation1A.equals(evaluation1B)) {
0888: if (m_Debug) {
0889: println("\n=== Full report ===\n"
0890: + evaluation1A
0891: .toSummaryString("\nFirst buildAssociations()")
0892: + "\n\n");
0893: println(evaluation1B
0894: .toSummaryString("\nSecond buildAssociations()")
0895: + "\n\n");
0896: }
0897: throw new Exception(
0898: "Results differ between buildAssociations calls");
0899: }
0900: println("yes");
0901: result[0] = true;
0902:
0903: if (false && m_Debug) {
0904: println("\n=== Full report ===\n"
0905: + evaluation1A
0906: .toSummaryString("\nFirst buildAssociations()")
0907: + "\n\n");
0908: println(evaluation1B
0909: .toSummaryString("\nSecond buildAssociations()")
0910: + "\n\n");
0911: }
0912: } catch (Exception ex) {
0913: println("no");
0914: result[0] = false;
0915:
0916: if (m_Debug) {
0917: println("\n=== Full Report ===");
0918: print("Problem during building");
0919: switch (stage) {
0920: case 0:
0921: print(" of dataset 1");
0922: break;
0923: case 1:
0924: print(" of dataset 2");
0925: break;
0926: case 2:
0927: print(" of dataset 1 (2nd build)");
0928: break;
0929: case 3:
0930: print(", comparing results from builds of dataset 1");
0931: break;
0932: }
0933: println(": " + ex.getMessage() + "\n");
0934: println("here are the datasets:\n");
0935: println("=== Train1 Dataset ===\n" + train1.toString()
0936: + "\n");
0937: println("=== Train2 Dataset ===\n" + train2.toString()
0938: + "\n");
0939: }
0940: }
0941:
0942: return result;
0943: }
0944:
0945: /**
0946: * Checks basic missing value handling of the scheme. If the missing
0947: * values cause an exception to be thrown by the scheme, this will be
0948: * recorded.
0949: *
0950: * @param nominalPredictor if true use nominal predictor attributes
0951: * @param numericPredictor if true use numeric predictor attributes
0952: * @param stringPredictor if true use string predictor attributes
0953: * @param datePredictor if true use date predictor attributes
0954: * @param relationalPredictor if true use relational predictor attributes
0955: * @param multiInstance whether multi-instance is needed
0956: * @param classType the class type (NUMERIC, NOMINAL, etc.)
0957: * @param predictorMissing true if the missing values may be in
0958: * the predictors
0959: * @param classMissing true if the missing values may be in the class
0960: * @param missingLevel the percentage of missing values
0961: * @return index 0 is true if the test was passed, index 1 is true if test
0962: * was acceptable
0963: */
0964: protected boolean[] canHandleMissing(boolean nominalPredictor,
0965: boolean numericPredictor, boolean stringPredictor,
0966: boolean datePredictor, boolean relationalPredictor,
0967: boolean multiInstance, int classType,
0968: boolean predictorMissing, boolean classMissing,
0969: int missingLevel) {
0970:
0971: if (missingLevel == 100)
0972: print("100% ");
0973: print("missing");
0974: if (predictorMissing) {
0975: print(" predictor");
0976: if (classMissing)
0977: print(" and");
0978: }
0979: if (classMissing)
0980: print(" class");
0981: print(" values");
0982: printAttributeSummary(nominalPredictor, numericPredictor,
0983: stringPredictor, datePredictor, relationalPredictor,
0984: multiInstance, classType);
0985: print("...");
0986: FastVector accepts = new FastVector();
0987: accepts.addElement("missing");
0988: accepts.addElement("value");
0989: accepts.addElement("train");
0990: int numTrain = getNumInstances(), numClasses = 2;
0991:
0992: return runBasicTest(nominalPredictor, numericPredictor,
0993: stringPredictor, datePredictor, relationalPredictor,
0994: multiInstance, classType, missingLevel,
0995: predictorMissing, classMissing, numTrain, numClasses,
0996: accepts);
0997: }
0998:
0999: /**
1000: * Checks whether the associator can handle instance weights.
1001: * This test compares the associator performance on two datasets
1002: * that are identical except for the training weights. If the
1003: * results change, then the associator must be using the weights. It
1004: * may be possible to get a false positive from this test if the
1005: * weight changes aren't significant enough to induce a change
1006: * in associator performance (but the weights are chosen to minimize
1007: * the likelihood of this).
1008: *
1009: * @param nominalPredictor if true use nominal predictor attributes
1010: * @param numericPredictor if true use numeric predictor attributes
1011: * @param stringPredictor if true use string predictor attributes
1012: * @param datePredictor if true use date predictor attributes
1013: * @param relationalPredictor if true use relational predictor attributes
1014: * @param multiInstance whether multi-instance is needed
1015: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1016: * @return index 0 true if the test was passed
1017: */
1018: protected boolean[] instanceWeights(boolean nominalPredictor,
1019: boolean numericPredictor, boolean stringPredictor,
1020: boolean datePredictor, boolean relationalPredictor,
1021: boolean multiInstance, int classType) {
1022:
1023: print("associator uses instance weights");
1024: printAttributeSummary(nominalPredictor, numericPredictor,
1025: stringPredictor, datePredictor, relationalPredictor,
1026: multiInstance, classType);
1027: print("...");
1028: int numTrain = 2 * getNumInstances(), numClasses = 2, missingLevel = 0;
1029: boolean predictorMissing = false, classMissing = false;
1030:
1031: boolean[] result = new boolean[2];
1032: Instances train = null;
1033: Associator[] associators = null;
1034: AssociatorEvaluation evaluationB = null;
1035: AssociatorEvaluation evaluationI = null;
1036: boolean evalFail = false;
1037: try {
1038: train = makeTestDataset(42, numTrain,
1039: nominalPredictor ? getNumNominal() + 1 : 0,
1040: numericPredictor ? getNumNumeric() + 1 : 0,
1041: stringPredictor ? getNumString() : 0,
1042: datePredictor ? getNumDate() : 0,
1043: relationalPredictor ? getNumRelational() : 0,
1044: numClasses, classType, multiInstance);
1045: if (missingLevel > 0)
1046: addMissing(train, missingLevel, predictorMissing,
1047: classMissing);
1048: associators = Associator.makeCopies(getAssociator(), 2);
1049: evaluationB = new AssociatorEvaluation();
1050: evaluationI = new AssociatorEvaluation();
1051: evaluationB.evaluate(associators[0], train);
1052: } catch (Exception ex) {
1053: throw new Error("Error setting up for tests: "
1054: + ex.getMessage());
1055: }
1056: try {
1057:
1058: // Now modify instance weights and re-built/test
1059: for (int i = 0; i < train.numInstances(); i++) {
1060: train.instance(i).setWeight(0);
1061: }
1062: Random random = new Random(1);
1063: for (int i = 0; i < train.numInstances() / 2; i++) {
1064: int inst = Math.abs(random.nextInt())
1065: % train.numInstances();
1066: int weight = Math.abs(random.nextInt()) % 10 + 1;
1067: train.instance(inst).setWeight(weight);
1068: }
1069: evaluationI.evaluate(associators[1], train);
1070: if (evaluationB.equals(evaluationI)) {
1071: // println("no");
1072: evalFail = true;
1073: throw new Exception("evalFail");
1074: }
1075:
1076: println("yes");
1077: result[0] = true;
1078: } catch (Exception ex) {
1079: println("no");
1080: result[0] = false;
1081:
1082: if (m_Debug) {
1083: println("\n=== Full Report ===");
1084:
1085: if (evalFail) {
1086: println("Results don't differ between non-weighted and "
1087: + "weighted instance models.");
1088: println("Here are the results:\n");
1089: println(evaluationB
1090: .toSummaryString("\nboth methods\n"));
1091: } else {
1092: print("Problem during building");
1093: println(": " + ex.getMessage() + "\n");
1094: }
1095: println("Here is the dataset:\n");
1096: println("=== Train Dataset ===\n" + train.toString()
1097: + "\n");
1098: println("=== Train Weights ===\n");
1099: for (int i = 0; i < train.numInstances(); i++) {
1100: println(" " + (i + 1) + " "
1101: + train.instance(i).weight());
1102: }
1103: }
1104: }
1105:
1106: return result;
1107: }
1108:
1109: /**
1110: * Checks whether the scheme alters the training dataset during
1111: * building. If the scheme needs to modify the data it should take
1112: * a copy of the training data. Currently checks for changes to header
1113: * structure, number of instances, order of instances, instance weights.
1114: *
1115: * @param nominalPredictor if true use nominal predictor attributes
1116: * @param numericPredictor if true use numeric predictor attributes
1117: * @param stringPredictor if true use string predictor attributes
1118: * @param datePredictor if true use date predictor attributes
1119: * @param relationalPredictor if true use relational predictor attributes
1120: * @param multiInstance whether multi-instance is needed
1121: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1122: * @param predictorMissing true if we know the associator can handle
1123: * (at least) moderate missing predictor values
1124: * @param classMissing true if we know the associator can handle
1125: * (at least) moderate missing class values
1126: * @return index 0 is true if the test was passed
1127: */
1128: protected boolean[] datasetIntegrity(boolean nominalPredictor,
1129: boolean numericPredictor, boolean stringPredictor,
1130: boolean datePredictor, boolean relationalPredictor,
1131: boolean multiInstance, int classType,
1132: boolean predictorMissing, boolean classMissing) {
1133:
1134: print("associator doesn't alter original datasets");
1135: printAttributeSummary(nominalPredictor, numericPredictor,
1136: stringPredictor, datePredictor, relationalPredictor,
1137: multiInstance, classType);
1138: print("...");
1139: int numTrain = getNumInstances(), numClasses = 2, missingLevel = 20;
1140:
1141: boolean[] result = new boolean[2];
1142: Instances train = null;
1143: Associator associator = null;
1144: try {
1145: train = makeTestDataset(42, numTrain,
1146: nominalPredictor ? getNumNominal() : 0,
1147: numericPredictor ? getNumNumeric() : 0,
1148: stringPredictor ? getNumString() : 0,
1149: datePredictor ? getNumDate() : 0,
1150: relationalPredictor ? getNumRelational() : 0,
1151: numClasses, classType, multiInstance);
1152: if (missingLevel > 0)
1153: addMissing(train, missingLevel, predictorMissing,
1154: classMissing);
1155: associator = Associator.makeCopies(getAssociator(), 1)[0];
1156: } catch (Exception ex) {
1157: throw new Error("Error setting up for tests: "
1158: + ex.getMessage());
1159: }
1160: try {
1161: Instances trainCopy = new Instances(train);
1162: associator.buildAssociations(trainCopy);
1163: compareDatasets(train, trainCopy);
1164:
1165: println("yes");
1166: result[0] = true;
1167: } catch (Exception ex) {
1168: println("no");
1169: result[0] = false;
1170:
1171: if (m_Debug) {
1172: println("\n=== Full Report ===");
1173: print("Problem during building");
1174: println(": " + ex.getMessage() + "\n");
1175: println("Here is the dataset:\n");
1176: println("=== Train Dataset ===\n" + train.toString()
1177: + "\n");
1178: }
1179: }
1180:
1181: return result;
1182: }
1183:
1184: /**
1185: * Runs a text on the datasets with the given characteristics.
1186: *
1187: * @param nominalPredictor if true use nominal predictor attributes
1188: * @param numericPredictor if true use numeric predictor attributes
1189: * @param stringPredictor if true use string predictor attributes
1190: * @param datePredictor if true use date predictor attributes
1191: * @param relationalPredictor if true use relational predictor attributes
1192: * @param multiInstance whether multi-instance is needed
1193: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1194: * @param missingLevel the percentage of missing values
1195: * @param predictorMissing true if the missing values may be in
1196: * the predictors
1197: * @param classMissing true if the missing values may be in the class
1198: * @param numTrain the number of instances in the training set
1199: * @param numClasses the number of classes
1200: * @param accepts the acceptable string in an exception
1201: * @return index 0 is true if the test was passed, index 1 is true if test
1202: * was acceptable
1203: */
1204: protected boolean[] runBasicTest(boolean nominalPredictor,
1205: boolean numericPredictor, boolean stringPredictor,
1206: boolean datePredictor, boolean relationalPredictor,
1207: boolean multiInstance, int classType, int missingLevel,
1208: boolean predictorMissing, boolean classMissing,
1209: int numTrain, int numClasses, FastVector accepts) {
1210:
1211: return runBasicTest(nominalPredictor, numericPredictor,
1212: stringPredictor, datePredictor, relationalPredictor,
1213: multiInstance, classType, TestInstances.CLASS_IS_LAST,
1214: missingLevel, predictorMissing, classMissing, numTrain,
1215: numClasses, accepts);
1216: }
1217:
1218: /**
1219: * Runs a text on the datasets with the given characteristics.
1220: *
1221: * @param nominalPredictor if true use nominal predictor attributes
1222: * @param numericPredictor if true use numeric predictor attributes
1223: * @param stringPredictor if true use string predictor attributes
1224: * @param datePredictor if true use date predictor attributes
1225: * @param relationalPredictor if true use relational predictor attributes
1226: * @param multiInstance whether multi-instance is needed
1227: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1228: * @param classIndex the attribute index of the class
1229: * @param missingLevel the percentage of missing values
1230: * @param predictorMissing true if the missing values may be in
1231: * the predictors
1232: * @param classMissing true if the missing values may be in the class
1233: * @param numTrain the number of instances in the training set
1234: * @param numClasses the number of classes
1235: * @param accepts the acceptable string in an exception
1236: * @return index 0 is true if the test was passed, index 1 is true if test
1237: * was acceptable
1238: */
1239: protected boolean[] runBasicTest(boolean nominalPredictor,
1240: boolean numericPredictor, boolean stringPredictor,
1241: boolean datePredictor, boolean relationalPredictor,
1242: boolean multiInstance, int classType, int classIndex,
1243: int missingLevel, boolean predictorMissing,
1244: boolean classMissing, int numTrain, int numClasses,
1245: FastVector accepts) {
1246:
1247: boolean[] result = new boolean[2];
1248: Instances train = null;
1249: Associator associator = null;
1250: try {
1251: train = makeTestDataset(42, numTrain,
1252: nominalPredictor ? getNumNominal() : 0,
1253: numericPredictor ? getNumNumeric() : 0,
1254: stringPredictor ? getNumString() : 0,
1255: datePredictor ? getNumDate() : 0,
1256: relationalPredictor ? getNumRelational() : 0,
1257: numClasses, classType, classIndex, multiInstance);
1258: if (missingLevel > 0)
1259: addMissing(train, missingLevel, predictorMissing,
1260: classMissing);
1261: associator = Associator.makeCopies(getAssociator(), 1)[0];
1262: } catch (Exception ex) {
1263: ex.printStackTrace();
1264: throw new Error("Error setting up for tests: "
1265: + ex.getMessage());
1266: }
1267: try {
1268: associator.buildAssociations(train);
1269: println("yes");
1270: result[0] = true;
1271: } catch (Exception ex) {
1272: boolean acceptable = false;
1273: String msg;
1274: if (ex.getMessage() == null)
1275: msg = "";
1276: else
1277: msg = ex.getMessage().toLowerCase();
1278: if (msg.indexOf("not in classpath") > -1)
1279: m_ClasspathProblems = true;
1280:
1281: for (int i = 0; i < accepts.size(); i++) {
1282: if (msg.indexOf((String) accepts.elementAt(i)) >= 0) {
1283: acceptable = true;
1284: }
1285: }
1286:
1287: println("no" + (acceptable ? " (OK error message)" : ""));
1288: result[1] = acceptable;
1289:
1290: if (m_Debug) {
1291: println("\n=== Full Report ===");
1292: print("Problem during building");
1293: println(": " + ex.getMessage() + "\n");
1294: if (!acceptable) {
1295: if (accepts.size() > 0) {
1296: print("Error message doesn't mention ");
1297: for (int i = 0; i < accepts.size(); i++) {
1298: if (i != 0) {
1299: print(" or ");
1300: }
1301: print('"' + (String) accepts.elementAt(i) + '"');
1302: }
1303: }
1304: println("here is the dataset:\n");
1305: println("=== Train Dataset ===\n"
1306: + train.toString() + "\n");
1307: }
1308: }
1309: }
1310:
1311: return result;
1312: }
1313:
1314: /**
1315: * Make a simple set of instances, which can later be modified
1316: * for use in specific tests.
1317: *
1318: * @param seed the random number seed
1319: * @param numInstances the number of instances to generate
1320: * @param numNominal the number of nominal attributes
1321: * @param numNumeric the number of numeric attributes
1322: * @param numString the number of string attributes
1323: * @param numDate the number of date attributes
1324: * @param numRelational the number of relational attributes
1325: * @param numClasses the number of classes (if nominal class)
1326: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1327: * @param multiInstance whether the dataset should a multi-instance dataset
1328: * @return the test dataset
1329: * @throws Exception if the dataset couldn't be generated
1330: * @see #process(Instances)
1331: */
1332: protected Instances makeTestDataset(int seed, int numInstances,
1333: int numNominal, int numNumeric, int numString, int numDate,
1334: int numRelational, int numClasses, int classType,
1335: boolean multiInstance) throws Exception {
1336:
1337: return makeTestDataset(seed, numInstances, numNominal,
1338: numNumeric, numString, numDate, numRelational,
1339: numClasses, classType, TestInstances.CLASS_IS_LAST,
1340: multiInstance);
1341: }
1342:
1343: /**
1344: * Make a simple set of instances with variable position of the class
1345: * attribute, which can later be modified for use in specific tests.
1346: *
1347: * @param seed the random number seed
1348: * @param numInstances the number of instances to generate
1349: * @param numNominal the number of nominal attributes
1350: * @param numNumeric the number of numeric attributes
1351: * @param numString the number of string attributes
1352: * @param numDate the number of date attributes
1353: * @param numRelational the number of relational attributes
1354: * @param numClasses the number of classes (if nominal class)
1355: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1356: * @param classIndex the index of the class (0-based, -1 as last)
1357: * @param multiInstance whether the dataset should a multi-instance dataset
1358: * @return the test dataset
1359: * @throws Exception if the dataset couldn't be generated
1360: * @see TestInstances#CLASS_IS_LAST
1361: * @see #process(Instances)
1362: */
1363: protected Instances makeTestDataset(int seed, int numInstances,
1364: int numNominal, int numNumeric, int numString, int numDate,
1365: int numRelational, int numClasses, int classType,
1366: int classIndex, boolean multiInstance) throws Exception {
1367:
1368: TestInstances dataset = new TestInstances();
1369:
1370: dataset.setSeed(seed);
1371: dataset.setNumInstances(numInstances);
1372: dataset.setNumNominal(numNominal);
1373: dataset.setNumNumeric(numNumeric);
1374: dataset.setNumString(numString);
1375: dataset.setNumDate(numDate);
1376: dataset.setNumRelational(numRelational);
1377: dataset.setNumClasses(numClasses);
1378: if (classType == NO_CLASS) {
1379: dataset.setClassType(Attribute.NOMINAL); // ignored
1380: dataset.setClassIndex(TestInstances.NO_CLASS);
1381: } else {
1382: dataset.setClassType(classType);
1383: dataset.setClassIndex(classIndex);
1384: }
1385: dataset.setNumClasses(numClasses);
1386: dataset.setMultiInstance(multiInstance);
1387: dataset.setWords(getWords());
1388: dataset.setWordSeparators(getWordSeparators());
1389:
1390: return process(dataset.generate());
1391: }
1392:
1393: /**
1394: * Print out a short summary string for the dataset characteristics
1395: *
1396: * @param nominalPredictor true if nominal predictor attributes are present
1397: * @param numericPredictor true if numeric predictor attributes are present
1398: * @param stringPredictor true if string predictor attributes are present
1399: * @param datePredictor true if date predictor attributes are present
1400: * @param relationalPredictor true if relational predictor attributes are present
1401: * @param multiInstance whether multi-instance is needed
1402: * @param classType the class type (NUMERIC, NOMINAL, etc.)
1403: */
1404: protected void printAttributeSummary(boolean nominalPredictor,
1405: boolean numericPredictor, boolean stringPredictor,
1406: boolean datePredictor, boolean relationalPredictor,
1407: boolean multiInstance, int classType) {
1408:
1409: String str = "";
1410:
1411: if (numericPredictor)
1412: str += " numeric";
1413:
1414: if (nominalPredictor) {
1415: if (str.length() > 0)
1416: str += " &";
1417: str += " nominal";
1418: }
1419:
1420: if (stringPredictor) {
1421: if (str.length() > 0)
1422: str += " &";
1423: str += " string";
1424: }
1425:
1426: if (datePredictor) {
1427: if (str.length() > 0)
1428: str += " &";
1429: str += " date";
1430: }
1431:
1432: if (relationalPredictor) {
1433: if (str.length() > 0)
1434: str += " &";
1435: str += " relational";
1436: }
1437:
1438: str += " predictors)";
1439:
1440: switch (classType) {
1441: case Attribute.NUMERIC:
1442: str = " (numeric class," + str;
1443: break;
1444: case Attribute.NOMINAL:
1445: str = " (nominal class," + str;
1446: break;
1447: case Attribute.STRING:
1448: str = " (string class," + str;
1449: break;
1450: case Attribute.DATE:
1451: str = " (date class," + str;
1452: break;
1453: case Attribute.RELATIONAL:
1454: str = " (relational class," + str;
1455: break;
1456: case NO_CLASS:
1457: str = " (no class," + str;
1458: break;
1459: }
1460:
1461: print(str);
1462: }
1463:
1464: /**
1465: * Test method for this class
1466: *
1467: * @param args the commandline parameters
1468: */
1469: public static void main(String[] args) {
1470: runCheck(new CheckAssociator(), args);
1471: }
1472: }
|