0001: /*
0002: * This program is free software; you can redistribute it and/or modify
0003: * it under the terms of the GNU General Public License as published by
0004: * the Free Software Foundation; either version 2 of the License, or
0005: * (at your option) any later version.
0006: *
0007: * This program is distributed in the hope that it will be useful,
0008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: * GNU General Public License for more details.
0011: *
0012: * You should have received a copy of the GNU General Public License
0013: * along with this program; if not, write to the Free Software
0014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015: */
0016:
0017: /*
0018: * TestInstances.java
0019: * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
0020: */
0021:
0022: package weka.core;
0023:
0024: import weka.core.Capabilities.Capability;
0025:
0026: import java.io.Serializable;
0027: import java.util.Enumeration;
0028: import java.util.Random;
0029: import java.util.StringTokenizer;
0030: import java.util.Vector;
0031:
0032: /**
0033: * Generates artificial datasets for testing. In case of Multi-Instance data
0034: * the settings for the number of attributes applies to the data inside
0035: * the bag. Originally based on code from the CheckClassifier.<p/>
0036: *
0037: <!-- options-start -->
0038: * Valid options are: <p/>
0039: *
0040: * <pre> -relation <name>
0041: * The name of the data set.</pre>
0042: *
0043: * <pre> -seed <num>
0044: * The seed value.</pre>
0045: *
0046: * <pre> -num-instances <num>
0047: * The number of instances in the datasets (default 20).</pre>
0048: *
0049: * <pre> -class-type <num>
0050: * The class type, see constants in weka.core.Attribute
0051: * (default 1=nominal).</pre>
0052: *
0053: * <pre> -class-values <num>
0054: * The number of classes to generate (for nominal classes only)
0055: * (default 2).</pre>
0056: *
0057: * <pre> -class-index <num>
0058: * The class index, with -1=last, (default -1).</pre>
0059: *
0060: * <pre> -no-class
0061: * Doesn't include a class attribute in the output.</pre>
0062: *
0063: * <pre> -nominal <num>
0064: * The number of nominal attributes (default 1).</pre>
0065: *
0066: * <pre> -nominal-values <num>
0067: * The number of values for nominal attributes (default 2).</pre>
0068: *
0069: * <pre> -numeric <num>
0070: * The number of numeric attributes (default 0).</pre>
0071: *
0072: * <pre> -string <num>
0073: * The number of string attributes (default 0).</pre>
0074: *
0075: * <pre> -words <comma-separated-list>
0076: * The words to use in string attributes.</pre>
0077: *
0078: * <pre> -word-separators <chars>
0079: * The word separators to use in string attributes.</pre>
0080: *
0081: * <pre> -date <num>
0082: * The number of date attributes (default 0).</pre>
0083: *
0084: * <pre> -relational <num>
0085: * The number of relational attributes (default 0).</pre>
0086: *
0087: * <pre> -relational-nominal <num>
0088: * The number of nominal attributes in a rel. attribute (default 1).</pre>
0089: *
0090: * <pre> -relational-nominal-values <num>
0091: * The number of values for nominal attributes in a rel. attribute (default 2).</pre>
0092: *
0093: * <pre> -relational-numeric <num>
0094: * The number of numeric attributes in a rel. attribute (default 0).</pre>
0095: *
0096: * <pre> -relational-string <num>
0097: * The number of string attributes in a rel. attribute (default 0).</pre>
0098: *
0099: * <pre> -relational-date <num>
0100: * The number of date attributes in a rel. attribute (default 0).</pre>
0101: *
0102: * <pre> -num-instances-relational <num>
0103: * The number of instances in relational/bag attributes (default 10).</pre>
0104: *
0105: * <pre> -multi-instance
0106: * Generates multi-instance data.</pre>
0107: *
0108: * <pre> -W <classname>
0109: * The Capabilities handler to base the dataset on.
0110: * The other parameters can be used to override the ones
0111: * determined from the handler. Additional parameters for
0112: * handler can be passed on after the '--'.</pre>
0113: *
0114: <!-- options-end -->
0115: *
0116: * @author FracPete (fracpete at waikato dot ac dot nz)
0117: * @version $Revision: 1.9 $
0118: * @see weka.classifiers.CheckClassifier
0119: */
0120: public class TestInstances implements Cloneable, Serializable,
0121: OptionHandler {
0122:
0123: /** for serialization */
0124: private static final long serialVersionUID = -6263968936330390469L;
0125:
0126: /** can be used for settting the class attribute index to last
0127: * @see #setClassIndex(int) */
0128: public final static int CLASS_IS_LAST = -1;
0129:
0130: /** can be used to avoid generating a class attribute
0131: * @see #setClassIndex(int) */
0132: public final static int NO_CLASS = -2;
0133:
0134: /** the default list of words used in strings */
0135: public final static String[] DEFAULT_WORDS = { "The", "quick",
0136: "brown", "fox", "jumps", "over", "the", "lazy", "dog" };
0137:
0138: /** the default word separators used in strings */
0139: public final static String DEFAULT_SEPARATORS = " ";
0140:
0141: /** for generating String attributes/classes */
0142: protected String[] m_Words = DEFAULT_WORDS;
0143:
0144: /** for generating String attributes/classes */
0145: protected String m_WordSeparators = DEFAULT_SEPARATORS;
0146:
0147: /** the name of the relation */
0148: protected String m_Relation = "Testdata";
0149:
0150: /** the seed value */
0151: protected int m_Seed = 1;
0152:
0153: /** the random number generator */
0154: protected Random m_Random = new Random(m_Seed);
0155:
0156: /** the number of instances */
0157: protected int m_NumInstances = 20;
0158:
0159: /** the class type */
0160: protected int m_ClassType = Attribute.NOMINAL;
0161:
0162: /** the number of classes (in case of NOMINAL class) */
0163: protected int m_NumClasses = 2;
0164:
0165: /** the class index (-1 is LAST, -2 means no class)
0166: * @see #CLASS_IS_LAST
0167: * @see #NO_CLASS */
0168: protected int m_ClassIndex = CLASS_IS_LAST;
0169:
0170: /** the number of nominal attributes */
0171: protected int m_NumNominal = 1;
0172:
0173: /** the number of values for nominal attributes */
0174: protected int m_NumNominalValues = 2;
0175:
0176: /** the number of numeric attributes */
0177: protected int m_NumNumeric = 0;
0178:
0179: /** the number of string attributes */
0180: protected int m_NumString = 0;
0181:
0182: /** the number of date attributes */
0183: protected int m_NumDate = 0;
0184:
0185: /** the number of relational attributes */
0186: protected int m_NumRelational = 0;
0187:
0188: /** the number of nominal attributes in a relational attribute */
0189: protected int m_NumRelationalNominal = 1;
0190:
0191: /** the number of values for nominal attributes in relational attributes */
0192: protected int m_NumRelationalNominalValues = 2;
0193:
0194: /** the number of numeric attributes in a relational attribute */
0195: protected int m_NumRelationalNumeric = 0;
0196:
0197: /** the number of string attributes in a relational attribute */
0198: protected int m_NumRelationalString = 0;
0199:
0200: /** the number of date attributes in a relational attribute */
0201: protected int m_NumRelationalDate = 0;
0202:
0203: /** whether to generate Multi-Instance data or not */
0204: protected boolean m_MultiInstance = false;
0205:
0206: /** the number of instances in relational attributes (applies also for bags
0207: * in multi-instance) */
0208: protected int m_NumInstancesRelational = 10;
0209:
0210: /** the format of the multi-instance data */
0211: protected Instances[] m_RelationalFormat = null;
0212:
0213: /** the format of the multi-instance data of the class */
0214: protected Instances m_RelationalClassFormat = null;
0215:
0216: /** the generated data */
0217: protected Instances m_Data = null;
0218:
0219: /** the CapabilitiesHandler to get the Capabilities from */
0220: protected CapabilitiesHandler m_Handler = null;
0221:
0222: /**
0223: * the default constructor
0224: */
0225: public TestInstances() {
0226: super ();
0227:
0228: setRelation("Testdata");
0229: setSeed(1);
0230: setNumInstances(20);
0231: setClassType(Attribute.NOMINAL);
0232: setNumClasses(2);
0233: setClassIndex(CLASS_IS_LAST);
0234: setNumNominal(1);
0235: setNumNominalValues(2);
0236: setNumNumeric(0);
0237: setNumString(0);
0238: setNumDate(0);
0239: setNumRelational(0);
0240: setNumRelationalNominal(1);
0241: setNumRelationalNominalValues(2);
0242: setNumRelationalNumeric(0);
0243: setNumRelationalString(0);
0244: setNumRelationalDate(0);
0245: setNumInstancesRelational(10);
0246: setMultiInstance(false);
0247: setWords(arrayToList(DEFAULT_WORDS));
0248: setWordSeparators(DEFAULT_SEPARATORS);
0249: }
0250:
0251: /**
0252: * creates a clone of the current object
0253: *
0254: * @return a clone of the current object
0255: */
0256: public Object clone() {
0257: TestInstances result;
0258:
0259: result = new TestInstances();
0260: result.assign(this );
0261:
0262: return result;
0263: }
0264:
0265: /**
0266: * updates itself with all the settings from the given TestInstances
0267: * object
0268: *
0269: * @param t the object to get the settings from
0270: */
0271: public void assign(TestInstances t) {
0272: setRelation(t.getRelation());
0273: setSeed(t.getSeed());
0274: setNumInstances(t.getNumInstances());
0275: setClassType(t.getClassType());
0276: setNumClasses(t.getNumClasses());
0277: setClassIndex(t.getClassIndex());
0278: setNumNominal(t.getNumNominal());
0279: setNumNominalValues(t.getNumNominalValues());
0280: setNumNumeric(t.getNumNumeric());
0281: setNumString(t.getNumString());
0282: setNumDate(t.getNumDate());
0283: setNumRelational(t.getNumRelational());
0284: setNumRelationalNominal(t.getNumRelationalNominal());
0285: setNumRelationalNominalValues(t.getNumRelationalNominalValues());
0286: setNumRelationalNumeric(t.getNumRelationalNumeric());
0287: setNumRelationalString(t.getNumRelationalString());
0288: setNumRelationalDate(t.getNumRelationalDate());
0289: setMultiInstance(t.getMultiInstance());
0290: for (int i = 0; i < t.getNumRelational(); i++)
0291: setRelationalFormat(i, t.getRelationalFormat(i));
0292: setRelationalClassFormat(t.getRelationalClassFormat());
0293: setNumInstancesRelational(t.getNumInstancesRelational());
0294: setWords(t.getWords());
0295: setWordSeparators(t.getWordSeparators());
0296: }
0297:
0298: /**
0299: * Returns an enumeration describing the available options.
0300: *
0301: * @return an enumeration of all the available options.
0302: */
0303: public Enumeration listOptions() {
0304: Vector result = new Vector();
0305:
0306: result.addElement(new Option("\tThe name of the data set.",
0307: "relation", 1, "-relation <name>"));
0308:
0309: result.addElement(new Option("\tThe seed value.", "seed", 1,
0310: "-seed <num>"));
0311:
0312: result
0313: .addElement(new Option(
0314: "\tThe number of instances in the datasets (default 20).",
0315: "num-instances", 1, "-num-instances <num>"));
0316:
0317: result.addElement(new Option(
0318: "\tThe class type, see constants in weka.core.Attribute\n"
0319: + "\t(default 1=nominal).", "class-type", 1,
0320: "-class-type <num>"));
0321:
0322: result.addElement(new Option(
0323: "\tThe number of classes to generate (for nominal classes only)\n"
0324: + "\t(default 2).", "class-values", 1,
0325: "-class-values <num>"));
0326:
0327: result.addElement(new Option(
0328: "\tThe class index, with -1=last, (default -1).",
0329: "class-index", 1, "-class-index <num>"));
0330:
0331: result.addElement(new Option(
0332: "\tDoesn't include a class attribute in the output.",
0333: "no-class", 0, "-no-class"));
0334:
0335: result.addElement(new Option(
0336: "\tThe number of nominal attributes (default 1).",
0337: "nominal", 1, "-nominal <num>"));
0338:
0339: result
0340: .addElement(new Option(
0341: "\tThe number of values for nominal attributes (default 2).",
0342: "nominal-values", 1, "-nominal-values <num>"));
0343:
0344: result.addElement(new Option(
0345: "\tThe number of numeric attributes (default 0).",
0346: "numeric", 1, "-numeric <num>"));
0347:
0348: result.addElement(new Option(
0349: "\tThe number of string attributes (default 0).",
0350: "string", 1, "-string <num>"));
0351:
0352: result.addElement(new Option(
0353: "\tThe words to use in string attributes.", "words", 1,
0354: "-words <comma-separated-list>"));
0355:
0356: result.addElement(new Option(
0357: "\tThe word separators to use in string attributes.",
0358: "word-separators", 1, "-word-separators <chars>"));
0359:
0360: result.addElement(new Option(
0361: "\tThe number of date attributes (default 0).", "date",
0362: 1, "-date <num>"));
0363:
0364: result.addElement(new Option(
0365: "\tThe number of relational attributes (default 0).",
0366: "relational", 1, "-relational <num>"));
0367:
0368: result
0369: .addElement(new Option(
0370: "\tThe number of nominal attributes in a rel. attribute (default 1).",
0371: "relational-nominal", 1,
0372: "-relational-nominal <num>"));
0373:
0374: result
0375: .addElement(new Option(
0376: "\tThe number of values for nominal attributes in a rel. attribute (default 2).",
0377: "relational-nominal-values", 1,
0378: "-relational-nominal-values <num>"));
0379:
0380: result
0381: .addElement(new Option(
0382: "\tThe number of numeric attributes in a rel. attribute (default 0).",
0383: "relational-numeric", 1,
0384: "-relational-numeric <num>"));
0385:
0386: result
0387: .addElement(new Option(
0388: "\tThe number of string attributes in a rel. attribute (default 0).",
0389: "relational-string", 1,
0390: "-relational-string <num>"));
0391:
0392: result
0393: .addElement(new Option(
0394: "\tThe number of date attributes in a rel. attribute (default 0).",
0395: "relational-date", 1, "-relational-date <num>"));
0396:
0397: result
0398: .addElement(new Option(
0399: "\tThe number of instances in relational/bag attributes (default 10).",
0400: "num-instances-relational", 1,
0401: "-num-instances-relational <num>"));
0402:
0403: result.addElement(new Option(
0404: "\tGenerates multi-instance data.", "multi-instance",
0405: 0, "-multi-instance"));
0406:
0407: result
0408: .addElement(new Option(
0409: "\tThe Capabilities handler to base the dataset on.\n"
0410: + "\tThe other parameters can be used to override the ones\n"
0411: + "\tdetermined from the handler. Additional parameters for\n"
0412: + "\thandler can be passed on after the '--'.",
0413: "W", 1, "-W <classname>"));
0414:
0415: return result.elements();
0416: }
0417:
0418: /**
0419: * Parses a given list of options. <p/>
0420: *
0421: <!-- options-start -->
0422: * Valid options are: <p/>
0423: *
0424: * <pre> -relation <name>
0425: * The name of the data set.</pre>
0426: *
0427: * <pre> -seed <num>
0428: * The seed value.</pre>
0429: *
0430: * <pre> -num-instances <num>
0431: * The number of instances in the datasets (default 20).</pre>
0432: *
0433: * <pre> -class-type <num>
0434: * The class type, see constants in weka.core.Attribute
0435: * (default 1=nominal).</pre>
0436: *
0437: * <pre> -class-values <num>
0438: * The number of classes to generate (for nominal classes only)
0439: * (default 2).</pre>
0440: *
0441: * <pre> -class-index <num>
0442: * The class index, with -1=last, (default -1).</pre>
0443: *
0444: * <pre> -no-class
0445: * Doesn't include a class attribute in the output.</pre>
0446: *
0447: * <pre> -nominal <num>
0448: * The number of nominal attributes (default 1).</pre>
0449: *
0450: * <pre> -nominal-values <num>
0451: * The number of values for nominal attributes (default 2).</pre>
0452: *
0453: * <pre> -numeric <num>
0454: * The number of numeric attributes (default 0).</pre>
0455: *
0456: * <pre> -string <num>
0457: * The number of string attributes (default 0).</pre>
0458: *
0459: * <pre> -words <comma-separated-list>
0460: * The words to use in string attributes.</pre>
0461: *
0462: * <pre> -word-separators <chars>
0463: * The word separators to use in string attributes.</pre>
0464: *
0465: * <pre> -date <num>
0466: * The number of date attributes (default 0).</pre>
0467: *
0468: * <pre> -relational <num>
0469: * The number of relational attributes (default 0).</pre>
0470: *
0471: * <pre> -relational-nominal <num>
0472: * The number of nominal attributes in a rel. attribute (default 1).</pre>
0473: *
0474: * <pre> -relational-nominal-values <num>
0475: * The number of values for nominal attributes in a rel. attribute (default 2).</pre>
0476: *
0477: * <pre> -relational-numeric <num>
0478: * The number of numeric attributes in a rel. attribute (default 0).</pre>
0479: *
0480: * <pre> -relational-string <num>
0481: * The number of string attributes in a rel. attribute (default 0).</pre>
0482: *
0483: * <pre> -relational-date <num>
0484: * The number of date attributes in a rel. attribute (default 0).</pre>
0485: *
0486: * <pre> -num-instances-relational <num>
0487: * The number of instances in relational/bag attributes (default 10).</pre>
0488: *
0489: * <pre> -multi-instance
0490: * Generates multi-instance data.</pre>
0491: *
0492: * <pre> -W <classname>
0493: * The Capabilities handler to base the dataset on.
0494: * The other parameters can be used to override the ones
0495: * determined from the handler. Additional parameters for
0496: * handler can be passed on after the '--'.</pre>
0497: *
0498: <!-- options-end -->
0499: *
0500: * @param options the list of options as an array of strings
0501: * @throws Exception if an option is not supported
0502: */
0503: public void setOptions(String[] options) throws Exception {
0504: String tmpStr;
0505: Class cls;
0506: CapabilitiesHandler handler;
0507: boolean initialized;
0508:
0509: initialized = false;
0510:
0511: tmpStr = Utils.getOption('W', options);
0512: if (tmpStr.length() > 0) {
0513: cls = Class.forName(tmpStr);
0514: if (ClassDiscovery.hasInterface(CapabilitiesHandler.class,
0515: cls)) {
0516: initialized = true;
0517: handler = (CapabilitiesHandler) cls.newInstance();
0518: if (handler instanceof OptionHandler)
0519: ((OptionHandler) handler).setOptions(Utils
0520: .partitionOptions(options));
0521: setHandler(handler);
0522: // initialize
0523: this .assign(forCapabilities(handler.getCapabilities()));
0524: } else {
0525: throw new IllegalArgumentException("Class '" + tmpStr
0526: + "' is not a CapabilitiesHandler!");
0527: }
0528: }
0529:
0530: tmpStr = Utils.getOption("relation", options);
0531: if (tmpStr.length() != 0)
0532: setRelation(tmpStr);
0533: else if (!initialized)
0534: setRelation("Testdata");
0535:
0536: tmpStr = Utils.getOption("seed", options);
0537: if (tmpStr.length() != 0)
0538: setSeed(Integer.parseInt(tmpStr));
0539: else if (!initialized)
0540: setSeed(1);
0541:
0542: tmpStr = Utils.getOption("num-instances", options);
0543: if (tmpStr.length() != 0)
0544: setNumInstances(Integer.parseInt(tmpStr));
0545: else if (!initialized)
0546: setNumInstances(20);
0547:
0548: setNoClass(Utils.getFlag("no-class", options));
0549:
0550: if (!getNoClass()) {
0551: tmpStr = Utils.getOption("class-type", options);
0552: if (tmpStr.length() != 0)
0553: setClassType(Integer.parseInt(tmpStr));
0554: else if (!initialized)
0555: setClassType(Attribute.NOMINAL);
0556:
0557: tmpStr = Utils.getOption("class-values", options);
0558: if (tmpStr.length() != 0)
0559: setNumClasses(Integer.parseInt(tmpStr));
0560: else if (!initialized)
0561: setNumClasses(2);
0562:
0563: tmpStr = Utils.getOption("class-index", options);
0564: if (tmpStr.length() != 0)
0565: setClassIndex(Integer.parseInt(tmpStr));
0566: else if (!initialized)
0567: setClassIndex(-1);
0568: }
0569:
0570: tmpStr = Utils.getOption("nominal", options);
0571: if (tmpStr.length() != 0)
0572: setNumNominal(Integer.parseInt(tmpStr));
0573: else if (!initialized)
0574: setNumNominal(1);
0575:
0576: tmpStr = Utils.getOption("nominal-values", options);
0577: if (tmpStr.length() != 0)
0578: setNumNominalValues(Integer.parseInt(tmpStr));
0579: else if (!initialized)
0580: setNumNominalValues(2);
0581:
0582: tmpStr = Utils.getOption("numeric", options);
0583: if (tmpStr.length() != 0)
0584: setNumNumeric(Integer.parseInt(tmpStr));
0585: else if (!initialized)
0586: setNumNumeric(0);
0587:
0588: tmpStr = Utils.getOption("string", options);
0589: if (tmpStr.length() != 0)
0590: setNumString(Integer.parseInt(tmpStr));
0591: else if (!initialized)
0592: setNumString(0);
0593:
0594: tmpStr = Utils.getOption("words", options);
0595: if (tmpStr.length() != 0)
0596: setWords(tmpStr);
0597: else if (!initialized)
0598: setWords(arrayToList(DEFAULT_WORDS));
0599:
0600: if (Utils.getOptionPos("word-separators", options) > -1) {
0601: tmpStr = Utils.getOption("word-separators", options);
0602: setWordSeparators(tmpStr);
0603: } else if (!initialized) {
0604: setWordSeparators(DEFAULT_SEPARATORS);
0605: }
0606:
0607: tmpStr = Utils.getOption("date", options);
0608: if (tmpStr.length() != 0)
0609: setNumDate(Integer.parseInt(tmpStr));
0610: else if (!initialized)
0611: setNumDate(0);
0612:
0613: tmpStr = Utils.getOption("relational", options);
0614: if (tmpStr.length() != 0)
0615: setNumRelational(Integer.parseInt(tmpStr));
0616: else if (!initialized)
0617: setNumRelational(0);
0618:
0619: tmpStr = Utils.getOption("relational-nominal", options);
0620: if (tmpStr.length() != 0)
0621: setNumRelationalNominal(Integer.parseInt(tmpStr));
0622: else if (!initialized)
0623: setNumRelationalNominal(1);
0624:
0625: tmpStr = Utils.getOption("relational-nominal-values", options);
0626: if (tmpStr.length() != 0)
0627: setNumRelationalNominalValues(Integer.parseInt(tmpStr));
0628: else if (!initialized)
0629: setNumRelationalNominalValues(2);
0630:
0631: tmpStr = Utils.getOption("relational-numeric", options);
0632: if (tmpStr.length() != 0)
0633: setNumRelationalNumeric(Integer.parseInt(tmpStr));
0634: else if (!initialized)
0635: setNumRelationalNumeric(0);
0636:
0637: tmpStr = Utils.getOption("relational-string", options);
0638: if (tmpStr.length() != 0)
0639: setNumRelationalString(Integer.parseInt(tmpStr));
0640: else if (!initialized)
0641: setNumRelationalString(0);
0642:
0643: tmpStr = Utils.getOption("num-instances-relational", options);
0644: if (tmpStr.length() != 0)
0645: setNumInstancesRelational(Integer.parseInt(tmpStr));
0646: else if (!initialized)
0647: setNumInstancesRelational(10);
0648:
0649: if (!initialized)
0650: setMultiInstance(Utils.getFlag("multi-instance", options));
0651: }
0652:
0653: /**
0654: * Gets the current settings of this object.
0655: *
0656: * @return an array of strings suitable for passing to setOptions
0657: */
0658: public String[] getOptions() {
0659: Vector result;
0660: String[] options;
0661: int i;
0662:
0663: result = new Vector();
0664:
0665: result.add("-relation");
0666: result.add(getRelation());
0667:
0668: result.add("-seed");
0669: result.add("" + getSeed());
0670:
0671: result.add("-num-instances");
0672: result.add("" + getNumInstances());
0673:
0674: if (getNoClass()) {
0675: result.add("-no-class");
0676: } else {
0677: result.add("-class-type");
0678: result.add("" + getClassType());
0679:
0680: result.add("-class-values");
0681: result.add("" + getNumClasses());
0682:
0683: result.add("-class-index");
0684: result.add("" + getClassIndex());
0685: }
0686:
0687: result.add("-nominal");
0688: result.add("" + getNumNominal());
0689:
0690: result.add("-nominal-values");
0691: result.add("" + getNumNominalValues());
0692:
0693: result.add("-numeric");
0694: result.add("" + getNumNumeric());
0695:
0696: result.add("-string");
0697: result.add("" + getNumString());
0698:
0699: result.add("-words");
0700: result.add("" + getWords());
0701:
0702: result.add("-word-separators");
0703: result.add("" + getWordSeparators());
0704:
0705: result.add("-date");
0706: result.add("" + getNumDate());
0707:
0708: result.add("-relational");
0709: result.add("" + getNumRelational());
0710:
0711: result.add("-relational-nominal");
0712: result.add("" + getNumRelationalNominal());
0713:
0714: result.add("-relational-nominal-values");
0715: result.add("" + getNumRelationalNominalValues());
0716:
0717: result.add("-relational-numeric");
0718: result.add("" + getNumRelationalNumeric());
0719:
0720: result.add("-relational-string");
0721: result.add("" + getNumRelationalString());
0722:
0723: result.add("-relational-date");
0724: result.add("" + getNumRelationalDate());
0725:
0726: result.add("-num-instances-relational");
0727: result.add("" + getNumInstancesRelational());
0728:
0729: if (getMultiInstance())
0730: result.add("-multi-instance");
0731:
0732: if (getHandler() != null) {
0733: result.add("-W");
0734: result.add(getHandler().getClass().getName());
0735: if (getHandler() instanceof OptionHandler) {
0736: result.add("--");
0737: options = ((OptionHandler) getHandler()).getOptions();
0738: for (i = 0; i < options.length; i++)
0739: result.add(options[i]);
0740: }
0741: }
0742:
0743: return (String[]) result.toArray(new String[result.size()]);
0744: }
0745:
0746: /**
0747: * sets the name of the relation
0748: *
0749: * @param value the name of the relation
0750: */
0751: public void setRelation(String value) {
0752: m_Relation = value;
0753: }
0754:
0755: /**
0756: * returns the current name of the relation
0757: *
0758: * @return the name of the relation
0759: */
0760: public String getRelation() {
0761: return m_Relation;
0762: }
0763:
0764: /**
0765: * sets the seed value for the random number generator
0766: *
0767: * @param value the seed
0768: */
0769: public void setSeed(int value) {
0770: m_Seed = value;
0771: m_Random = new Random(m_Seed);
0772: }
0773:
0774: /**
0775: * returns the current seed value
0776: *
0777: * @return the seed value
0778: */
0779: public int getSeed() {
0780: return m_Seed;
0781: }
0782:
0783: /**
0784: * sets the number of instances to produce
0785: *
0786: * @param value the number of instances
0787: */
0788: public void setNumInstances(int value) {
0789: m_NumInstances = value;
0790: }
0791:
0792: /**
0793: * returns the current number of instances to produce
0794: *
0795: * @return the number of instances
0796: */
0797: public int getNumInstances() {
0798: return m_NumInstances;
0799: }
0800:
0801: /**
0802: * sets the class attribute type
0803: *
0804: * @param value the class attribute type
0805: */
0806: public void setClassType(int value) {
0807: m_ClassType = value;
0808: m_RelationalClassFormat = null;
0809: }
0810:
0811: /**
0812: * returns the current class type
0813: *
0814: * @return the class attribute type
0815: */
0816: public int getClassType() {
0817: return m_ClassType;
0818: }
0819:
0820: /**
0821: * sets the number of classes
0822: *
0823: * @param value the number of classes
0824: */
0825: public void setNumClasses(int value) {
0826: m_NumClasses = value;
0827: }
0828:
0829: /**
0830: * returns the current number of classes
0831: *
0832: * @return the number of classes
0833: */
0834: public int getNumClasses() {
0835: return m_NumClasses;
0836: }
0837:
0838: /**
0839: * sets the class index (0-based)
0840: *
0841: * @param value the class index
0842: * @see #CLASS_IS_LAST
0843: * @see #NO_CLASS
0844: */
0845: public void setClassIndex(int value) {
0846: m_ClassIndex = value;
0847: }
0848:
0849: /**
0850: * returns the current class index (0-based), -1 is last attribute
0851: *
0852: * @return the class index
0853: * @see #CLASS_IS_LAST
0854: * @see #NO_CLASS
0855: */
0856: public int getClassIndex() {
0857: return m_ClassIndex;
0858: }
0859:
0860: /**
0861: * whether to have no class, e.g., for clusterers; otherwise the class
0862: * attribute index is set to last
0863: *
0864: * @param value whether to have no class
0865: * @see #CLASS_IS_LAST
0866: * @see #NO_CLASS
0867: */
0868: public void setNoClass(boolean value) {
0869: if (value)
0870: setClassIndex(NO_CLASS);
0871: else
0872: setClassIndex(CLASS_IS_LAST);
0873: }
0874:
0875: /**
0876: * whether no class attribute is generated
0877: *
0878: * @return true if no class attribute is generated
0879: */
0880: public boolean getNoClass() {
0881: return (getClassIndex() == NO_CLASS);
0882: }
0883:
0884: /**
0885: * sets the number of nominal attributes
0886: *
0887: * @param value the number of nominal attributes
0888: */
0889: public void setNumNominal(int value) {
0890: m_NumNominal = value;
0891: }
0892:
0893: /**
0894: * returns the current number of nominal attributes
0895: *
0896: * @return the number of nominal attributes
0897: */
0898: public int getNumNominal() {
0899: return m_NumNominal;
0900: }
0901:
0902: /**
0903: * sets the number of values for nominal attributes
0904: *
0905: * @param value the number of values
0906: */
0907: public void setNumNominalValues(int value) {
0908: m_NumNominalValues = value;
0909: }
0910:
0911: /**
0912: * returns the current number of values for nominal attributes
0913: *
0914: * @return the number of values
0915: */
0916: public int getNumNominalValues() {
0917: return m_NumNominalValues;
0918: }
0919:
0920: /**
0921: * sets the number of numeric attributes
0922: *
0923: * @param value the number of numeric attributes
0924: */
0925: public void setNumNumeric(int value) {
0926: m_NumNumeric = value;
0927: }
0928:
0929: /**
0930: * returns the current number of numeric attributes
0931: *
0932: * @return the number of numeric attributes
0933: */
0934: public int getNumNumeric() {
0935: return m_NumNumeric;
0936: }
0937:
0938: /**
0939: * sets the number of string attributes
0940: *
0941: * @param value the number of string attributes
0942: */
0943: public void setNumString(int value) {
0944: m_NumString = value;
0945: }
0946:
0947: /**
0948: * returns the current number of string attributes
0949: *
0950: * @return the number of string attributes
0951: */
0952: public int getNumString() {
0953: return m_NumString;
0954: }
0955:
0956: /**
0957: * turns the comma-separated list into an array
0958: *
0959: * @param value the list to process
0960: * @return the list as array
0961: */
0962: protected static String[] listToArray(String value) {
0963: StringTokenizer tok;
0964: Vector list;
0965:
0966: list = new Vector();
0967: tok = new StringTokenizer(value, ",");
0968: while (tok.hasMoreTokens())
0969: list.add(tok.nextToken());
0970:
0971: return (String[]) list.toArray(new String[list.size()]);
0972: }
0973:
0974: /**
0975: * turns the array into a comma-separated list
0976: *
0977: * @param value the array to process
0978: * @return the array as list
0979: */
0980: protected static String arrayToList(String[] value) {
0981: String result;
0982: int i;
0983:
0984: result = "";
0985:
0986: for (i = 0; i < value.length; i++) {
0987: if (i > 0)
0988: result += ",";
0989: result += value[i];
0990: }
0991:
0992: return result;
0993: }
0994:
0995: /**
0996: * Sets the comma-separated list of words to use for generating strings. The
0997: * list must contain at least 2 words, otherwise an exception will be thrown.
0998: *
0999: * @param value the list of words
1000: * @throws IllegalArgumentException if not at least 2 words are provided
1001: */
1002: public void setWords(String value) {
1003: if (listToArray(value).length < 2)
1004: throw new IllegalArgumentException(
1005: "At least 2 words must be provided!");
1006:
1007: m_Words = listToArray(value);
1008: }
1009:
1010: /**
1011: * returns the words used for assembling strings in a comma-separated list.
1012: *
1013: * @return the words as comma-separated list
1014: */
1015: public String getWords() {
1016: return arrayToList(m_Words);
1017: }
1018:
1019: /**
1020: * sets the word separators (chars) to use for assembling strings.
1021: *
1022: * @param value the characters to use as separators
1023: */
1024: public void setWordSeparators(String value) {
1025: m_WordSeparators = value;
1026: }
1027:
1028: /**
1029: * returns the word separators (chars) to use for assembling strings.
1030: *
1031: * @return the current separators
1032: */
1033: public String getWordSeparators() {
1034: return m_WordSeparators;
1035: }
1036:
1037: /**
1038: * sets the number of date attributes
1039: *
1040: * @param value the number of date attributes
1041: */
1042: public void setNumDate(int value) {
1043: m_NumDate = value;
1044: }
1045:
1046: /**
1047: * returns the current number of date attributes
1048: *
1049: * @return the number of date attributes
1050: */
1051: public int getNumDate() {
1052: return m_NumDate;
1053: }
1054:
1055: /**
1056: * sets the number of relational attributes
1057: *
1058: * @param value the number of relational attributes
1059: */
1060: public void setNumRelational(int value) {
1061: m_NumRelational = value;
1062: m_RelationalFormat = new Instances[value];
1063: }
1064:
1065: /**
1066: * returns the current number of relational attributes
1067: *
1068: * @return the number of relational attributes
1069: */
1070: public int getNumRelational() {
1071: return m_NumRelational;
1072: }
1073:
1074: /**
1075: * sets the number of nominal attributes in a relational attribute
1076: *
1077: * @param value the number of nominal attributes
1078: */
1079: public void setNumRelationalNominal(int value) {
1080: m_NumRelationalNominal = value;
1081: }
1082:
1083: /**
1084: * returns the current number of nominal attributes in a relational attribute
1085: *
1086: * @return the number of nominal attributes
1087: */
1088: public int getNumRelationalNominal() {
1089: return m_NumRelationalNominal;
1090: }
1091:
1092: /**
1093: * sets the number of values for nominal attributes in a relational attribute
1094: *
1095: * @param value the number of values
1096: */
1097: public void setNumRelationalNominalValues(int value) {
1098: m_NumRelationalNominalValues = value;
1099: }
1100:
1101: /**
1102: * returns the current number of values for nominal attributes in a relational attribute
1103: *
1104: * @return the number of values
1105: */
1106: public int getNumRelationalNominalValues() {
1107: return m_NumRelationalNominalValues;
1108: }
1109:
1110: /**
1111: * sets the number of numeric attributes in a relational attribute
1112: *
1113: * @param value the number of numeric attributes
1114: */
1115: public void setNumRelationalNumeric(int value) {
1116: m_NumRelationalNumeric = value;
1117: }
1118:
1119: /**
1120: * returns the current number of numeric attributes in a relational attribute
1121: *
1122: * @return the number of numeric attributes
1123: */
1124: public int getNumRelationalNumeric() {
1125: return m_NumRelationalNumeric;
1126: }
1127:
1128: /**
1129: * sets the number of string attributes in a relational attribute
1130: *
1131: * @param value the number of string attributes
1132: */
1133: public void setNumRelationalString(int value) {
1134: m_NumRelationalString = value;
1135: }
1136:
1137: /**
1138: * returns the current number of string attributes in a relational attribute
1139: *
1140: * @return the number of string attributes
1141: */
1142: public int getNumRelationalString() {
1143: return m_NumRelationalString;
1144: }
1145:
1146: /**
1147: * sets the number of date attributes in a relational attribute
1148: *
1149: * @param value the number of date attributes
1150: */
1151: public void setNumRelationalDate(int value) {
1152: m_NumRelationalDate = value;
1153: }
1154:
1155: /**
1156: * returns the current number of date attributes in a relational attribute
1157: *
1158: * @return the number of date attributes
1159: */
1160: public int getNumRelationalDate() {
1161: return m_NumRelationalDate;
1162: }
1163:
1164: /**
1165: * sets the number of instances in relational/bag attributes to produce
1166: *
1167: * @param value the number of instances
1168: */
1169: public void setNumInstancesRelational(int value) {
1170: m_NumInstancesRelational = value;
1171: }
1172:
1173: /**
1174: * returns the current number of instances in relational/bag attributes to produce
1175: *
1176: * @return the number of instances
1177: */
1178: public int getNumInstancesRelational() {
1179: return m_NumInstancesRelational;
1180: }
1181:
1182: /**
1183: * sets whether multi-instance data should be generated (with a fixed
1184: * data structure)
1185: *
1186: * @param value whether multi-instance data is generated
1187: */
1188: public void setMultiInstance(boolean value) {
1189: m_MultiInstance = value;
1190: }
1191:
1192: /**
1193: * Gets whether multi-instance data (with a fixed structure) is generated
1194: *
1195: * @return true if multi-instance data is generated
1196: */
1197: public boolean getMultiInstance() {
1198: return m_MultiInstance;
1199: }
1200:
1201: /**
1202: * sets the structure for the bags for the relational attribute
1203: *
1204: * @param index the index of the relational attribute
1205: * @param value the new structure
1206: */
1207: public void setRelationalFormat(int index, Instances value) {
1208: if (value != null)
1209: m_RelationalFormat[index] = new Instances(value, 0);
1210: else
1211: m_RelationalFormat[index] = null;
1212: }
1213:
1214: /**
1215: * returns the format for the specified relational attribute, can be null
1216: *
1217: * @param index the index of the relational attribute
1218: * @return the current structure
1219: */
1220: public Instances getRelationalFormat(int index) {
1221: return m_RelationalFormat[index];
1222: }
1223:
1224: /**
1225: * sets the structure for the relational class attribute
1226: *
1227: * @param value the structure for the relational attribute
1228: */
1229: public void setRelationalClassFormat(Instances value) {
1230: if (value != null)
1231: m_RelationalClassFormat = new Instances(value, 0);
1232: else
1233: m_RelationalClassFormat = null;
1234: }
1235:
1236: /**
1237: * returns the current strcuture of the relational class attribute, can
1238: * be null
1239: *
1240: * @return the relational structure of the class attribute
1241: */
1242: public Instances getRelationalClassFormat() {
1243: return m_RelationalClassFormat;
1244: }
1245:
1246: /**
1247: * returns the overall number of attributes (incl. class, if that is also
1248: * generated)
1249: *
1250: * @return the overall number of attributes
1251: */
1252: public int getNumAttributes() {
1253: int result;
1254:
1255: result = m_NumNominal + m_NumNumeric + m_NumString + m_NumDate
1256: + m_NumRelational;
1257:
1258: if (!getNoClass())
1259: result++;
1260:
1261: return result;
1262: }
1263:
1264: /**
1265: * returns the current dataset, can be null
1266: *
1267: * @return the current dataset
1268: */
1269: public Instances getData() {
1270: return m_Data;
1271: }
1272:
1273: /**
1274: * sets the Capabilities handler to generate the data for
1275: *
1276: * @param value the handler to generate the data for
1277: */
1278: public void setHandler(CapabilitiesHandler value) {
1279: m_Handler = value;
1280: }
1281:
1282: /**
1283: * returns the current set CapabilitiesHandler to generate the dataset
1284: * for, can be null
1285: *
1286: * @return the handler to generate the data for
1287: */
1288: public CapabilitiesHandler getHandler() {
1289: return m_Handler;
1290: }
1291:
1292: /**
1293: * creates a new Attribute of the given type
1294: *
1295: * @param index the index of the current attribute (0-based)
1296: * @param attType the attribute type (NUMERIC, NOMINAL, etc.)
1297: * @return the configured attribute
1298: * @throws Exception if something goes wrong, e.g., an unknown attribute type
1299: *
1300: * @see Attribute#type()
1301: * @see #CLASS_IS_LAST
1302: * @see #NO_CLASS
1303: */
1304: protected Attribute generateAttribute(int index, int attType)
1305: throws Exception {
1306: Attribute result;
1307: String name;
1308: int valIndex;
1309: int nomCount;
1310: String prefix;
1311:
1312: result = null;
1313:
1314: // determine name and start-index
1315: if (index == CLASS_IS_LAST) {
1316: valIndex = 0;
1317: name = "Class";
1318: prefix = "class";
1319: nomCount = getNumClasses();
1320: } else {
1321: valIndex = index;
1322: nomCount = getNumNominalValues();
1323: prefix = "att" + (valIndex + 1) + "val";
1324:
1325: switch (attType) {
1326: case Attribute.NOMINAL:
1327: name = "Nominal" + (valIndex + 1);
1328: break;
1329:
1330: case Attribute.NUMERIC:
1331: name = "Numeric" + (valIndex + 1);
1332: break;
1333:
1334: case Attribute.STRING:
1335: name = "String" + (valIndex + 1);
1336: break;
1337:
1338: case Attribute.DATE:
1339: name = "Date" + (valIndex + 1);
1340: break;
1341:
1342: case Attribute.RELATIONAL:
1343: name = "Relational" + (valIndex + 1);
1344: break;
1345:
1346: default:
1347: throw new IllegalArgumentException("Attribute type '"
1348: + attType + "' unknown!");
1349: }
1350: }
1351:
1352: switch (attType) {
1353: case Attribute.NOMINAL:
1354: FastVector nomStrings = new FastVector(valIndex + 1);
1355: for (int j = 0; j < nomCount; j++)
1356: nomStrings.addElement(prefix + (j + 1));
1357: result = new Attribute(name, nomStrings);
1358: break;
1359:
1360: case Attribute.NUMERIC:
1361: result = new Attribute(name);
1362: break;
1363:
1364: case Attribute.STRING:
1365: result = new Attribute(name, (FastVector) null);
1366: break;
1367:
1368: case Attribute.DATE:
1369: result = new Attribute(name, "yyyy-mm-dd");
1370: break;
1371:
1372: case Attribute.RELATIONAL:
1373: Instances rel;
1374: if (index == CLASS_IS_LAST)
1375: rel = getRelationalClassFormat();
1376: else
1377: rel = getRelationalFormat(index);
1378:
1379: if (rel == null) {
1380: TestInstances dataset = new TestInstances();
1381: dataset.setNumNominal(getNumRelationalNominal());
1382: dataset
1383: .setNumNominalValues(getNumRelationalNominalValues());
1384: dataset.setNumNumeric(getNumRelationalNumeric());
1385: dataset.setNumString(getNumRelationalString());
1386: dataset.setNumDate(getNumRelationalDate());
1387: dataset.setNumInstances(0);
1388: dataset.setClassType(Attribute.NOMINAL); // dummy to avoid endless recursion, will be deleted anyway
1389: rel = new Instances(dataset.generate());
1390: if (!getNoClass()) {
1391: int clsIndex = rel.classIndex();
1392: rel.setClassIndex(-1);
1393: rel.deleteAttributeAt(clsIndex);
1394: }
1395: }
1396: result = new Attribute(name, rel);
1397: break;
1398:
1399: default:
1400: throw new IllegalArgumentException("Attribute type '"
1401: + attType + "' unknown!");
1402: }
1403:
1404: return result;
1405: }
1406:
1407: /**
1408: * Generates the class value
1409: *
1410: * @param data the dataset to work on
1411: * @return the new class value
1412: * @throws Exception if something goes wrong
1413: */
1414: protected double generateClassValue(Instances data)
1415: throws Exception {
1416: double result = Double.NaN;
1417:
1418: switch (m_ClassType) {
1419: case Attribute.NUMERIC:
1420: result = m_Random.nextFloat() * 0.25
1421: + Math.abs(m_Random.nextInt())
1422: % Math.max(2, m_NumNominal);
1423: break;
1424:
1425: case Attribute.NOMINAL:
1426: result = Math.abs(m_Random.nextInt()) % data.numClasses();
1427: break;
1428:
1429: case Attribute.STRING:
1430: String str = "";
1431: for (int n = 0; n < m_Words.length; n++) {
1432: if ((n > 0) && (m_WordSeparators.length() != 0))
1433: str += m_WordSeparators.charAt(m_Random
1434: .nextInt(m_WordSeparators.length()));
1435: str += m_Words[m_Random.nextInt(m_Words.length)];
1436: }
1437: result = data.classAttribute().addStringValue(str);
1438: break;
1439:
1440: case Attribute.DATE:
1441: result = data.classAttribute().parseDate(
1442: (2000 + m_Random.nextInt(100)) + "-01-01");
1443: break;
1444:
1445: case Attribute.RELATIONAL:
1446: if (getRelationalClassFormat() != null) {
1447: result = data.classAttribute().addRelation(
1448: getRelationalClassFormat());
1449: } else {
1450: TestInstances dataset = new TestInstances();
1451: dataset.setNumNominal(getNumRelationalNominal());
1452: dataset
1453: .setNumNominalValues(getNumRelationalNominalValues());
1454: dataset.setNumNumeric(getNumRelationalNumeric());
1455: dataset.setNumString(getNumRelationalString());
1456: dataset.setNumDate(getNumRelationalDate());
1457: dataset.setNumInstances(getNumInstancesRelational());
1458: dataset.setClassType(Attribute.NOMINAL); // dummy to avoid endless recursion, will be deleted anyway
1459: Instances rel = new Instances(dataset.generate());
1460: int clsIndex = rel.classIndex();
1461: rel.setClassIndex(-1);
1462: rel.deleteAttributeAt(clsIndex);
1463: result = data.classAttribute().addRelation(rel);
1464: }
1465: break;
1466: }
1467:
1468: return result;
1469: }
1470:
1471: /**
1472: * Generates a new value for the specified attribute. The classValue
1473: * might be used in the process.
1474: *
1475: * @param data the dataset to work on
1476: * @param index the index of the attribute
1477: * @param classVal the class value for the current instance, might be
1478: * used in the calculation
1479: * @return the new attribute value
1480: * @throws Exception if something goes wrong
1481: */
1482: protected double generateAttributeValue(Instances data, int index,
1483: double classVal) throws Exception {
1484: double result = Double.NaN;
1485:
1486: switch (data.attribute(index).type()) {
1487: case Attribute.NUMERIC:
1488: result = classVal * 4 + m_Random.nextFloat() * 1 - 0.5;
1489: break;
1490:
1491: case Attribute.NOMINAL:
1492: if (m_Random.nextFloat() < 0.2) {
1493: result = Math.abs(m_Random.nextInt())
1494: % data.attribute(index).numValues();
1495: } else {
1496: result = ((int) classVal)
1497: % data.attribute(index).numValues();
1498: }
1499: //result = m_Random.nextInt(data.attribute(index).numValues());
1500: break;
1501:
1502: case Attribute.STRING:
1503: String str = "";
1504: for (int n = 0; n < m_Words.length; n++) {
1505: if ((n > 0) && (m_WordSeparators.length() != 0))
1506: str += m_WordSeparators.charAt(m_Random
1507: .nextInt(m_WordSeparators.length()));
1508: str += m_Words[m_Random.nextInt(m_Words.length)];
1509: }
1510: result = data.attribute(index).addStringValue(str);
1511: break;
1512:
1513: case Attribute.DATE:
1514: result = data.attribute(index).parseDate(
1515: (2000 + m_Random.nextInt(100)) + "-01-01");
1516: break;
1517:
1518: case Attribute.RELATIONAL:
1519: Instances rel = new Instances(data.attribute(index)
1520: .relation(), 0);
1521: for (int n = 0; n < getNumInstancesRelational(); n++) {
1522: Instance inst = new Instance(rel.numAttributes());
1523: inst.setDataset(data);
1524: for (int i = 0; i < rel.numAttributes(); i++) {
1525: inst.setValue(i, generateAttributeValue(rel, i, 0));
1526: }
1527: rel.add(inst);
1528: }
1529: result = data.attribute(index).addRelation(rel);
1530: break;
1531: }
1532:
1533: return result;
1534: }
1535:
1536: /**
1537: * generates a new dataset.
1538: *
1539: * @return the generated data
1540: * @throws Exception if something goes wrong
1541: */
1542: public Instances generate() throws Exception {
1543: if (getMultiInstance()) {
1544: TestInstances bag = (TestInstances) this .clone();
1545: bag.setMultiInstance(false);
1546: bag.setNumInstances(0);
1547: bag.setSeed(m_Random.nextInt());
1548: Instances bagFormat = bag.generate();
1549: bagFormat.setClassIndex(-1);
1550: bagFormat.deleteAttributeAt(bagFormat.numAttributes() - 1);
1551:
1552: // generate multi-instance structure
1553: TestInstances structure = new TestInstances();
1554: structure.setSeed(m_Random.nextInt());
1555: structure.setNumNominal(1);
1556: structure.setNumRelational(1);
1557: structure.setRelationalFormat(0, bagFormat);
1558: structure.setClassType(getClassType());
1559: structure.setNumClasses(getNumClasses());
1560: structure
1561: .setRelationalClassFormat(getRelationalClassFormat());
1562: structure.setNumInstances(getNumInstances());
1563: m_Data = structure.generate();
1564:
1565: // generate bags
1566: bag.setNumInstances(getNumInstancesRelational());
1567: for (int i = 0; i < getNumInstances(); i++) {
1568: bag.setSeed(m_Random.nextInt());
1569: Instances bagData = new Instances(bag.generate());
1570: bagData.setClassIndex(-1);
1571: bagData.deleteAttributeAt(bagData.numAttributes() - 1);
1572: double val = m_Data.attribute(1).addRelation(bagData);
1573: m_Data.instance(i).setValue(1, val);
1574: }
1575: } else {
1576: // initialize
1577: int clsIndex = m_ClassIndex;
1578: if (clsIndex == CLASS_IS_LAST)
1579: clsIndex = getNumAttributes() - 1;
1580:
1581: // generate attributes
1582: FastVector attributes = new FastVector(getNumAttributes());
1583: // Add Nominal attributes
1584: for (int i = 0; i < getNumNominal(); i++)
1585: attributes.addElement(generateAttribute(i,
1586: Attribute.NOMINAL));
1587:
1588: // Add m_Numeric attributes
1589: for (int i = 0; i < getNumNumeric(); i++)
1590: attributes.addElement(generateAttribute(i,
1591: Attribute.NUMERIC));
1592:
1593: // Add some String attributes...
1594: for (int i = 0; i < getNumString(); i++)
1595: attributes.addElement(generateAttribute(i,
1596: Attribute.STRING));
1597:
1598: // Add some Date attributes...
1599: for (int i = 0; i < getNumDate(); i++)
1600: attributes.addElement(generateAttribute(i,
1601: Attribute.DATE));
1602:
1603: // Add some Relational attributes...
1604: for (int i = 0; i < getNumRelational(); i++)
1605: attributes.addElement(generateAttribute(i,
1606: Attribute.RELATIONAL));
1607:
1608: // Add class attribute
1609: if (clsIndex != NO_CLASS)
1610: attributes.insertElementAt(generateAttribute(
1611: CLASS_IS_LAST, getClassType()), clsIndex);
1612:
1613: m_Data = new Instances(getRelation(), attributes,
1614: getNumInstances());
1615: m_Data.setClassIndex(clsIndex);
1616:
1617: // generate instances
1618: for (int i = 0; i < getNumInstances(); i++) {
1619: Instance current = new Instance(getNumAttributes());
1620: current.setDataset(m_Data);
1621:
1622: // class
1623: double classVal;
1624: if (clsIndex != NO_CLASS) {
1625: classVal = generateClassValue(m_Data);
1626: current.setClassValue(classVal);
1627: } else {
1628: classVal = m_Random.nextFloat();
1629: }
1630:
1631: // other attributes
1632: for (int n = 0; n < getNumAttributes(); n++) {
1633: if (clsIndex == n)
1634: continue;
1635:
1636: current.setValue(n, generateAttributeValue(m_Data,
1637: n, classVal));
1638: }
1639:
1640: m_Data.add(current);
1641: }
1642: }
1643:
1644: if (m_Data.classIndex() == NO_CLASS)
1645: m_Data.setClassIndex(-1);
1646:
1647: return getData();
1648: }
1649:
1650: /**
1651: * returns a TestInstances instance setup already for the the given
1652: * capabilities.
1653: *
1654: * @param c the capabilities to base the TestInstances on
1655: * @return the configured TestInstances object
1656: */
1657: public static TestInstances forCapabilities(Capabilities c) {
1658: TestInstances result;
1659:
1660: result = new TestInstances();
1661:
1662: // multi-instance?
1663: if (c.getOwner() instanceof MultiInstanceCapabilitiesHandler) {
1664: Capabilities multi = (Capabilities) ((MultiInstanceCapabilitiesHandler) c
1665: .getOwner()).getMultiInstanceCapabilities().clone();
1666: multi.setOwner(null); // otherwise recursive!
1667: result = forCapabilities(multi);
1668: result.setMultiInstance(true);
1669: } else {
1670: // class
1671: if (c.handles(Capability.NO_CLASS))
1672: result.setClassIndex(NO_CLASS);
1673: else if (c.handles(Capability.NOMINAL_CLASS))
1674: result.setClassType(Attribute.NOMINAL);
1675: else if (c.handles(Capability.BINARY_CLASS))
1676: result.setClassType(Attribute.NOMINAL);
1677: else if (c.handles(Capability.NUMERIC_CLASS))
1678: result.setClassType(Attribute.NUMERIC);
1679: else if (c.handles(Capability.DATE_CLASS))
1680: result.setClassType(Attribute.DATE);
1681: else if (c.handles(Capability.STRING_CLASS))
1682: result.setClassType(Attribute.STRING);
1683: else if (c.handles(Capability.RELATIONAL_CLASS))
1684: result.setClassType(Attribute.RELATIONAL);
1685:
1686: // # of classes
1687: if (c.handles(Capability.UNARY_CLASS))
1688: result.setNumClasses(1);
1689: if (c.handles(Capability.BINARY_CLASS))
1690: result.setNumClasses(2);
1691: if (c.handles(Capability.NOMINAL_CLASS))
1692: result.setNumClasses(4);
1693:
1694: // attributes
1695: if (c.handles(Capability.NOMINAL_ATTRIBUTES)) {
1696: result.setNumNominal(1);
1697: result.setNumRelationalNominal(1);
1698: } else {
1699: result.setNumNominal(0);
1700: result.setNumRelationalNominal(0);
1701: }
1702:
1703: if (c.handles(Capability.NUMERIC_ATTRIBUTES)) {
1704: result.setNumNumeric(1);
1705: result.setNumRelationalNumeric(1);
1706: } else {
1707: result.setNumNumeric(0);
1708: result.setNumRelationalNumeric(0);
1709: }
1710:
1711: if (c.handles(Capability.DATE_ATTRIBUTES)) {
1712: result.setNumDate(1);
1713: result.setNumRelationalDate(1);
1714: } else {
1715: result.setNumDate(0);
1716: result.setNumRelationalDate(0);
1717: }
1718:
1719: if (c.handles(Capability.STRING_ATTRIBUTES)) {
1720: result.setNumString(1);
1721: result.setNumRelationalString(1);
1722: } else {
1723: result.setNumString(0);
1724: result.setNumRelationalString(0);
1725: }
1726:
1727: if (c.handles(Capability.RELATIONAL_ATTRIBUTES))
1728: result.setNumRelational(1);
1729: else
1730: result.setNumRelational(0);
1731: }
1732:
1733: return result;
1734: }
1735:
1736: /**
1737: * returns a string representation of the object
1738: *
1739: * @return a string representation of the object
1740: */
1741: public String toString() {
1742: String result;
1743:
1744: result = "";
1745: result += "Relation: " + getRelation() + "\n";
1746: result += "Seed: " + getSeed() + "\n";
1747: result += "# Instances: " + getNumInstances() + "\n";
1748: result += "ClassType: " + getClassType() + "\n";
1749: result += "# Classes: " + getNumClasses() + "\n";
1750: result += "Class index: " + getClassIndex() + "\n";
1751: result += "# Nominal: " + getNumNominal() + "\n";
1752: result += "# Nominal values: " + getNumNominalValues() + "\n";
1753: result += "# Numeric: " + getNumNumeric() + "\n";
1754: result += "# String: " + getNumString() + "\n";
1755: result += "# Date: " + getNumDate() + "\n";
1756: result += "# Relational: " + getNumRelational() + "\n";
1757: result += " - # Nominal: " + getNumRelationalNominal() + "\n";
1758: result += " - # Nominal values: "
1759: + getNumRelationalNominalValues() + "\n";
1760: result += " - # Numeric: " + getNumRelationalNumeric() + "\n";
1761: result += " - # String: " + getNumRelationalString() + "\n";
1762: result += " - # Date: " + getNumRelationalDate() + "\n";
1763: result += " - # Instances: " + getNumInstancesRelational()
1764: + "\n";
1765: result += "Multi-Instance: " + getMultiInstance() + "\n";
1766: result += "Words: " + getWords() + "\n";
1767: result += "Word separators: " + getWordSeparators() + "\n";
1768:
1769: return result;
1770: }
1771:
1772: /**
1773: * for running the class from commandline, prints the generated data
1774: * to stdout
1775: *
1776: * @param args the commandline parameters
1777: * @throws Exception if something goes wrong
1778: */
1779: public static void main(String[] args) throws Exception {
1780: TestInstances inst;
1781:
1782: inst = new TestInstances();
1783:
1784: // help requested?
1785: if (Utils.getFlag("h", args) || Utils.getFlag("help", args)) {
1786: StringBuffer result = new StringBuffer();
1787: result.append("\nTest data generator options:\n\n");
1788:
1789: result.append("-h|-help\n\tprints this help\n");
1790:
1791: Enumeration enm = inst.listOptions();
1792: while (enm.hasMoreElements()) {
1793: Option option = (Option) enm.nextElement();
1794: result.append(option.synopsis() + "\n"
1795: + option.description() + "\n");
1796: }
1797:
1798: System.out.println(result);
1799: System.exit(0);
1800: }
1801:
1802: // generate data
1803: inst.setOptions(args);
1804: System.out.println(inst.generate());
1805: }
1806: }
|