0001: /*
0002: * This program is free software; you can redistribute it and/or modify
0003: * it under the terms of the GNU General Public License as published by
0004: * the Free Software Foundation; either version 2 of the License, or
0005: * (at your option) any later version.
0006: *
0007: * This program is distributed in the hope that it will be useful,
0008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: * GNU General Public License for more details.
0011: *
0012: * You should have received a copy of the GNU General Public License
0013: * along with this program; if not, write to the Free Software
0014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015: */
0016:
0017: /*
0018: * OLM.java
0019: * Copyright (C) 2004 Stijn Lievens
0020: *
0021: */
0022:
0023: package weka.classifiers.misc;
0024:
0025: import weka.classifiers.RandomizableClassifier;
0026: import weka.classifiers.misc.monotone.Coordinates;
0027: import weka.classifiers.misc.monotone.DiscreteDistribution;
0028: import weka.classifiers.misc.monotone.EnumerationIterator;
0029: import weka.classifiers.misc.monotone.InstancesComparator;
0030: import weka.classifiers.misc.monotone.InstancesUtil;
0031: import weka.classifiers.misc.monotone.MultiDimensionalSort;
0032: import weka.core.Attribute;
0033: import weka.core.Capabilities;
0034: import weka.core.FastVector;
0035: import weka.core.Instance;
0036: import weka.core.Instances;
0037: import weka.core.Option;
0038: import weka.core.SelectedTag;
0039: import weka.core.Tag;
0040: import weka.core.TechnicalInformation;
0041: import weka.core.TechnicalInformationHandler;
0042: import weka.core.Utils;
0043: import weka.core.Capabilities.Capability;
0044: import weka.core.TechnicalInformation.Field;
0045: import weka.core.TechnicalInformation.Type;
0046: import weka.estimators.DiscreteEstimator;
0047:
0048: import java.util.ArrayList;
0049: import java.util.Comparator;
0050: import java.util.Enumeration;
0051: import java.util.HashMap;
0052: import java.util.Iterator;
0053: import java.util.Map;
0054: import java.util.Random;
0055: import java.util.Vector;
0056:
0057: /**
0058: <!-- globalinfo-start -->
0059: * This class is an implementation of the Ordinal Learning Method<br/>
0060: * Further information regarding the algorithm and variants can be found in:<br/>
0061: * <br/>
0062: * Arie Ben-David (1992). Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications. Decision Sciences. 23:1357-1372.<br/>
0063: * <br/>
0064: * Lievens, Stijn (2003-2004). Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken..
0065: * <p/>
0066: <!-- globalinfo-end -->
0067: *
0068: <!-- technical-bibtex-start -->
0069: * BibTeX:
0070: * <pre>
0071: * @article{Ben-David1992,
0072: * author = {Arie Ben-David},
0073: * journal = {Decision Sciences},
0074: * pages = {1357-1372},
0075: * title = {Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications},
0076: * volume = {23},
0077: * year = {1992}
0078: * }
0079: *
0080: * @mastersthesis{Lievens2003-2004,
0081: * author = {Lievens, Stijn},
0082: * school = {Ghent University},
0083: * title = {Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.},
0084: * year = {2003-2004}
0085: * }
0086: * </pre>
0087: * <p/>
0088: <!-- technical-bibtex-end -->
0089: *
0090: <!-- options-start -->
0091: * Valid options are: <p/>
0092: *
0093: * <pre> -S <num>
0094: * Random number seed.
0095: * (default 1)</pre>
0096: *
0097: * <pre> -D
0098: * If set, classifier is run in debug mode and
0099: * may output additional info to the console</pre>
0100: *
0101: * <pre> -C <CL|REG>
0102: * Sets the classification type to be used.
0103: * (Default: REG)</pre>
0104: *
0105: * <pre> -A <MEAN|MED|MAX>
0106: * Sets the averaging type used in phase 1 of the classifier.
0107: * (Default: MEAN)</pre>
0108: *
0109: * <pre> -N <NONE|EUCL|HAM>
0110: * If different from NONE, a nearest neighbour rule is fired when the
0111: * rule base doesn't contain an example smaller than the instance
0112: * to be classified
0113: * (Default: NONE).</pre>
0114: *
0115: * <pre> -E <MIN|MAX|BOTH>
0116: * Sets the extension type, i.e. the rule base to use.
0117: * (Default: MIN)</pre>
0118: *
0119: * <pre> -sort
0120: * If set, the instances are also sorted within the same class
0121: * before building the rule bases</pre>
0122: *
0123: <!-- options-end -->
0124: *
0125: * @author Stijn Lievens (stijn.lievens@ugent.be)
0126: * @version $Revision: 1.1 $
0127: */
0128: public class OLM extends RandomizableClassifier implements
0129: TechnicalInformationHandler {
0130:
0131: /** for serialization */
0132: private static final long serialVersionUID = 3722951802290935192L;
0133:
0134: /**
0135: * Round the real value that is returned by the original algorithm
0136: * to the nearest label.
0137: */
0138: public static final int CT_ROUNDED = 0;
0139:
0140: /**
0141: * No rounding is performed during classification, this is the
0142: * classification is done in a regression like way.
0143: */
0144: public static final int CT_REAL = 1;
0145:
0146: /** the classification types */
0147: public static final Tag[] TAGS_CLASSIFICATIONTYPES = {
0148: new Tag(CT_ROUNDED, "CL", "Round to nearest label"),
0149: new Tag(CT_REAL, "REG", "Regression-like classification") };
0150:
0151: /**
0152: * Use the mean for averaging in phase 1. This is in fact a
0153: * non ordinal procedure. The scores used for averaging are the internal
0154: * values of WEKA.
0155: */
0156: public static final int AT_MEAN = 0;
0157:
0158: /**
0159: * Use the median for averaging in phase 1. The possible values
0160: * are in the extended set of labels, this is labels in between the
0161: * original labels are possible.
0162: */
0163: public static final int AT_MEDIAN = 1;
0164:
0165: /**
0166: * Use the mode for averaging in phase 1. The label
0167: * that has maximum frequency is used. If there is more
0168: * than one label that has maximum frequency, the lowest
0169: * one is prefered.
0170: */
0171: public static final int AT_MAXPROB = 2;
0172:
0173: /** the averaging types */
0174: public static final Tag[] TAGS_AVERAGINGTYPES = {
0175: new Tag(AT_MEAN, "MEAN", "Mean"),
0176: new Tag(AT_MEDIAN, "MED", "Median"),
0177: new Tag(AT_MAXPROB, "MAX", "Max probability") };
0178:
0179: /**
0180: * No nearest neighbour rule will be fired when
0181: * classifying an instance for which there is no smaller rule
0182: * in the rule base?
0183: */
0184: public static final int DT_NONE = -1;
0185:
0186: /**
0187: * Use the Euclidian distance whenever a nearest neighbour
0188: * rule is fired.
0189: */
0190: public static final int DT_EUCLID = 0;
0191:
0192: /**
0193: * Use the Hamming distance, this is the number of
0194: * positions in which the instances differ, whenever a
0195: * nearest neighbour rule is fired
0196: */
0197: public static final int DT_HAMMING = 1;
0198:
0199: /** the distance types */
0200: public static final Tag[] TAGS_DISTANCETYPES = {
0201: new Tag(DT_NONE, "NONE", "No nearest neighbor"),
0202: new Tag(DT_EUCLID, "EUCL", "Euclidean"),
0203: new Tag(DT_HAMMING, "HAM", "Hamming") };
0204:
0205: /**
0206: * Use only the minimal extension, as in the original algorithm
0207: * of Ben-David.
0208: */
0209: public static final int ET_MIN = 0;
0210:
0211: /**
0212: * Use only the maximal extension. In this case an algorithm
0213: * dual to the original one is performed.
0214: */
0215: public static final int ET_MAX = 1;
0216:
0217: /**
0218: * Combine both the minimal and maximal extension, and use the
0219: * midpoint of the resulting interval as prediction.
0220: */
0221: public static final int ET_BOTH = 2;
0222:
0223: /** the mode types */
0224: public static final Tag[] TAGS_EXTENSIONTYPES = {
0225: new Tag(ET_MIN, "MIN", "Minimal extension"),
0226: new Tag(ET_MAX, "MAX", "Maximal extension"),
0227: new Tag(ET_BOTH, "BOTH", "Minimal and maximal extension") };
0228:
0229: /**
0230: * The training examples, used temporarily.
0231: * m_train is cleared after the rule base is built.
0232: */
0233: private Instances m_train;
0234:
0235: /** Number of classes in the original m_train */
0236: private int m_numClasses;
0237:
0238: /**
0239: * The rule base, should be consistent and contain no
0240: * redundant rules. This is the rule base as in the original
0241: * algorithm of Ben-David.
0242: */
0243: private Instances m_baseMin;
0244:
0245: /**
0246: * This is a complentary rule base, using the maximal rather
0247: * than the minimal extension.
0248: */
0249: private Instances m_baseMax;
0250:
0251: /**
0252: * Map used in the method buildClassifier in order to quickly
0253: * gather all info needed for phase 1. This is a map containing
0254: * (Coordinates, DiscreteEstimator)-pairs.
0255: */
0256: private Map m_estimatedDistributions;
0257:
0258: /** classification type */
0259: private int m_ctype = CT_REAL;
0260:
0261: /** averaging type */
0262: private int m_atype = AT_MEAN;
0263:
0264: /** distance type */
0265: private int m_dtype = DT_EUCLID;
0266:
0267: /** mode type */
0268: private int m_etype = ET_MIN;
0269:
0270: /**
0271: * Should the instances be sorted such that minimal (resp. maximal)
0272: * elements (per class) are treated first when building m_baseMin
0273: * (resp. m_baseMax).
0274: */
0275: private boolean m_sort = false;
0276:
0277: /**
0278: * Returns a string describing the classifier.
0279: * @return a description suitable for displaying in the
0280: * explorer/experimenter gui
0281: */
0282: public String globalInfo() {
0283: return "This class is an implementation of the Ordinal Learning "
0284: + "Method\n"
0285: + "Further information regarding the algorithm and variants "
0286: + "can be found in:\n\n"
0287: + getTechnicalInformation().toString();
0288: }
0289:
0290: /**
0291: * Returns default capabilities of the classifier.
0292: *
0293: * @return the capabilities of this classifier
0294: */
0295: public Capabilities getCapabilities() {
0296: Capabilities result = super .getCapabilities();
0297:
0298: // attributes
0299: result.enable(Capability.NOMINAL_ATTRIBUTES);
0300:
0301: // class
0302: result.enable(Capability.NOMINAL_CLASS);
0303: result.enable(Capability.MISSING_CLASS_VALUES);
0304:
0305: // instances
0306: result.setMinimumNumberInstances(0);
0307:
0308: return result;
0309: }
0310:
0311: /**
0312: * Returns an instance of a TechnicalInformation object, containing
0313: * detailed information about the technical background of this class,
0314: * e.g., paper reference or book this class is based on.
0315: *
0316: * @return the technical information about this class
0317: */
0318: public TechnicalInformation getTechnicalInformation() {
0319: TechnicalInformation result;
0320: TechnicalInformation additional;
0321:
0322: result = new TechnicalInformation(Type.ARTICLE);
0323: result.setValue(Field.AUTHOR, "Arie Ben-David");
0324: result.setValue(Field.YEAR, "1992");
0325: result
0326: .setValue(
0327: Field.TITLE,
0328: "Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications");
0329: result.setValue(Field.JOURNAL, "Decision Sciences");
0330: result.setValue(Field.PAGES, "1357-1372");
0331: result.setValue(Field.VOLUME, "23");
0332:
0333: additional = result.add(Type.MASTERSTHESIS);
0334: additional.setValue(Field.AUTHOR, "Lievens, Stijn");
0335: additional.setValue(Field.YEAR, "2003-2004");
0336: additional
0337: .setValue(
0338: Field.TITLE,
0339: "Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.");
0340: additional.setValue(Field.SCHOOL, "Ghent University");
0341:
0342: return result;
0343: }
0344:
0345: /**
0346: * Returns the tip text for this property.
0347: *
0348: * @return tip text for this property suitable for
0349: * displaying in the explorer/experimenter gui
0350: */
0351: public String classificationTypeTipText() {
0352: return "Sets the classification type.";
0353: }
0354:
0355: /**
0356: * Sets the classification type.
0357: *
0358: * @param value the classification type to be set.
0359: */
0360: public void setClassificationType(SelectedTag value) {
0361: if (value.getTags() == TAGS_CLASSIFICATIONTYPES)
0362: m_ctype = value.getSelectedTag().getID();
0363: }
0364:
0365: /**
0366: * Gets the classification type.
0367: *
0368: * @return the classification type
0369: */
0370: public SelectedTag getClassificationType() {
0371: return new SelectedTag(m_ctype, TAGS_CLASSIFICATIONTYPES);
0372: }
0373:
0374: /**
0375: * Returns the tip text for this property.
0376: *
0377: * @return tip text for this property suitable for
0378: * displaying in the explorer/experimenter gui
0379: */
0380: public String averagingTypeTipText() {
0381: return "Choses the way in which the distributions are averaged in "
0382: + "the first phase of the algorithm.";
0383: }
0384:
0385: /**
0386: * Sets the averaging type to use in phase 1 of the algorithm.
0387: *
0388: * @param value the averaging type to use
0389: */
0390: public void setAveragingType(SelectedTag value) {
0391: if (value.getTags() == TAGS_AVERAGINGTYPES)
0392: m_atype = value.getSelectedTag().getID();
0393: }
0394:
0395: /**
0396: * Gets the averaging type.
0397: *
0398: * @return the averaging type
0399: */
0400: public SelectedTag getAveragingType() {
0401: return new SelectedTag(m_atype, TAGS_AVERAGINGTYPES);
0402: }
0403:
0404: /**
0405: * Returns the tip text for this property.
0406: *
0407: * @return tip text for this property suitable for
0408: * displaying in the explorer/experimenter gui
0409: */
0410: public String distanceTypeTipText() {
0411: return "Sets the distance that is to be used by the nearest neighbour "
0412: + "rule";
0413: }
0414:
0415: /**
0416: * Sets the distance type to be used by a nearest neighbour rule (if any).
0417: *
0418: * @param value the distance type to use
0419: */
0420: public void setDistanceType(SelectedTag value) {
0421: if (value.getTags() == TAGS_DISTANCETYPES)
0422: m_dtype = value.getSelectedTag().getID();
0423: }
0424:
0425: /**
0426: * Gets the distance type used by a nearest neighbour rule (if any).
0427: *
0428: * @return the distance type
0429: */
0430: public SelectedTag getDistanceType() {
0431: return new SelectedTag(m_dtype, TAGS_DISTANCETYPES);
0432: }
0433:
0434: /**
0435: * Returns the tip text for this property.
0436: *
0437: * @return tip text for this property suitable for
0438: * displaying in the explorer/experimenter gui
0439: */
0440: public String extensionTypeTipText() {
0441: return "Sets the extension type to use.";
0442: }
0443:
0444: /**
0445: * Sets the extension type to use.
0446: * The minimal extension is the one used by
0447: * Ben-David in the original algorithm. The maximal extension is
0448: * a completely dual variant of the minimal extension. When using
0449: * both, then the midpoint of the interval determined by both
0450: * extensions is returned.
0451: *
0452: * @param value the extension type to use
0453: */
0454: public void setExtensionType(SelectedTag value) {
0455: if (value.getTags() == TAGS_EXTENSIONTYPES)
0456: m_etype = value.getSelectedTag().getID();
0457: }
0458:
0459: /**
0460: * Gets the extension type.
0461: *
0462: * @return the extension type
0463: */
0464: public SelectedTag getExtensionType() {
0465: return new SelectedTag(m_etype, TAGS_EXTENSIONTYPES);
0466: }
0467:
0468: /**
0469: * Returns the tip text for this property.
0470: *
0471: * @return tip text for this property suitable for
0472: * displaying in the explorer/experimenter gui
0473: */
0474: public String sortTipText() {
0475: return "If true, the instances are also sorted within the classes "
0476: + "prior to building the rule bases.";
0477: }
0478:
0479: /**
0480: * Sets if the instances are to be sorted prior to building the rule bases.
0481: *
0482: * @param sort if <code> true </code> the instances will be sorted
0483: */
0484: public void setSort(boolean sort) {
0485: m_sort = sort;
0486: }
0487:
0488: /**
0489: * Returns if the instances are sorted prior to building the rule bases.
0490: *
0491: * @return <code> true </code> if instances are sorted prior to building
0492: * the rule bases, <code> false </code> otherwise.
0493: */
0494: public boolean getSort() {
0495: return m_sort;
0496: }
0497:
0498: /**
0499: * Returns the tip text for this property.
0500: *
0501: * @return tip text for this property suitable for
0502: * displaying in the explorer/experimenter gui
0503: */
0504: public String seedTipText() {
0505: return "Sets the seed that is used to randomize the instances prior "
0506: + "to building the rule bases";
0507: }
0508:
0509: /**
0510: * Return the number of examples in the minimal rule base.
0511: * The minimal rule base is the one that corresponds to the
0512: * rule base of Ben-David.
0513: *
0514: * @return the number of examples in the minimal rule base
0515: */
0516: public int getSizeRuleBaseMin() {
0517: return m_baseMin.numInstances();
0518: }
0519:
0520: /**
0521: * Return the number of examples in the maximal rule base.
0522: * The maximal rule base is built using an algorithm
0523: * dual to that for building the minimal rule base.
0524: *
0525: * @return the number of examples in the maximal rule base
0526: */
0527: public int getSizeRuleBaseMax() {
0528: return m_baseMax.numInstances();
0529: }
0530:
0531: /**
0532: * Classifies a given instance according to the current settings
0533: * of the classifier.
0534: *
0535: * @param instance the instance to be classified
0536: * @return a <code> double </code> that represents the classification,
0537: * this could either be the internal value of a label, when rounding is
0538: * on, or a real number.
0539: */
0540: public double classifyInstance(Instance instance) {
0541: double classValueMin = -1;
0542: double classValueMax = -1;
0543: double classValue;
0544:
0545: if (m_etype == ET_MIN || m_etype == ET_BOTH) {
0546: classValueMin = classifyInstanceMin(instance);
0547: }
0548:
0549: if (m_etype == ET_MAX || m_etype == ET_BOTH) {
0550: classValueMax = classifyInstanceMax(instance);
0551: }
0552:
0553: switch (m_etype) {
0554: case ET_MIN:
0555: classValue = classValueMin;
0556: break;
0557: case ET_MAX:
0558: classValue = classValueMax;
0559: break;
0560: case ET_BOTH:
0561: classValue = (classValueMin + classValueMax) / 2;
0562: break;
0563: default:
0564: throw new IllegalStateException("Illegal mode type!");
0565: }
0566:
0567: // round if necessary and return
0568: return (m_ctype == CT_ROUNDED ? Utils.round(classValue)
0569: : classValue);
0570: }
0571:
0572: /**
0573: * Classify <code> instance </code> using the minimal rule base.
0574: * Rounding is never performed, this is the responsability
0575: * of <code> classifyInstance </code>.
0576: *
0577: * @param instance the instance to be classified
0578: * @return the classification according to the minimal rule base
0579: */
0580: private double classifyInstanceMin(Instance instance) {
0581: double classValue = -1;
0582: if (m_baseMin == null) {
0583: throw new IllegalStateException(
0584: "Classifier has not yet been built");
0585: }
0586:
0587: Iterator it = new EnumerationIterator(m_baseMin
0588: .enumerateInstances());
0589: while (it.hasNext()) {
0590: Instance r = (Instance) it.next();
0591:
0592: // we assume that rules are ordered in decreasing class value order
0593: // so that the first one that we encounter is immediately the
0594: // one with the biggest class value
0595: if (InstancesUtil.smallerOrEqual(r, instance)) {
0596: classValue = r.classValue();
0597: break;
0598: }
0599: }
0600:
0601: // there is no smaller rule in the database
0602: if (classValue == -1) {
0603: if (m_dtype != DT_NONE) {
0604: Instance[] nn = nearestRules(instance, m_baseMin);
0605: classValue = 0;
0606: // XXX for the moment we only use the mean to extract a
0607: // classValue; other possibilities might be included later
0608: for (int i = 0; i < nn.length; i++) {
0609: classValue += nn[i].classValue();
0610: }
0611: classValue /= nn.length;
0612: } else {
0613: classValue = 0; // minimal class value
0614: }
0615: }
0616:
0617: return classValue; // no rounding!
0618: }
0619:
0620: /**
0621: * Classify <code> instance </code> using the maximal rule base.
0622: * Rounding is never performed, this is the responsability
0623: * of <code> classifyInstance </code>.
0624: *
0625: * @param instance the instance to be classified
0626: * @return the classification according to the maximal rule base
0627: */
0628: private double classifyInstanceMax(Instance instance) {
0629: double classValue = -1;
0630: if (m_baseMax == null) {
0631: throw new IllegalStateException(
0632: "Classifier has not yet been built");
0633: }
0634:
0635: Iterator it = new EnumerationIterator(m_baseMax
0636: .enumerateInstances());
0637: while (it.hasNext()) {
0638: Instance r = (Instance) it.next();
0639:
0640: // we assume that rules are ordered in increasing class value order
0641: // so that the first bigger one we encounter will be the one
0642: // with the smallest label
0643: if (InstancesUtil.smallerOrEqual(instance, r)) {
0644: classValue = r.classValue();
0645: break;
0646: }
0647: }
0648:
0649: // there is no bigger rule in the database
0650: if (classValue == -1) {
0651: if (m_dtype != DT_NONE) {
0652: // XXX see remark in classifyInstanceMin
0653: Instance[] nn = nearestRules(instance, m_baseMax);
0654: classValue = 0;
0655: for (int i = 0; i < nn.length; i++) {
0656: classValue += nn[i].classValue();
0657: }
0658: classValue /= nn.length;
0659: } else {
0660: classValue = m_numClasses - 1; // maximal label
0661: }
0662: }
0663:
0664: return classValue;
0665: }
0666:
0667: /**
0668: * Find the instances in <code> base </code> that are,
0669: * according to the current distance type, closest
0670: * to <code> instance </code>.
0671: *
0672: * @param instance the instance around which one looks
0673: * @param base the instances to choose from
0674: * @return an array of <code> Instance </code> which contains
0675: * the instances closest to <code> instance </code>
0676: */
0677: private Instance[] nearestRules(Instance instance, Instances base) {
0678: double min = Double.POSITIVE_INFINITY;
0679: double dist = 0;
0680: double[] instanceDouble = InstancesUtil.toDataDouble(instance);
0681: ArrayList nn = new ArrayList();
0682: Iterator it = new EnumerationIterator(base.enumerateInstances());
0683: while (it.hasNext()) {
0684: Instance r = (Instance) it.next();
0685: double[] rDouble = InstancesUtil.toDataDouble(r);
0686: switch (m_dtype) {
0687: case DT_EUCLID:
0688: dist = euclidDistance(instanceDouble, rDouble);
0689: break;
0690: case DT_HAMMING:
0691: dist = hammingDistance(instanceDouble, rDouble);
0692: break;
0693: default:
0694: throw new IllegalArgumentException(
0695: "distance type is not valid");
0696: }
0697: if (dist < min) {
0698: min = dist;
0699: nn.clear();
0700: nn.add(r);
0701: } else if (dist == min) {
0702: nn.add(r);
0703: }
0704: }
0705:
0706: nn.trimToSize();
0707: return (Instance[]) nn.toArray(new Instance[0]);
0708: }
0709:
0710: /**
0711: * Build the OLM classifier, meaning that the rule bases
0712: * are built.
0713: *
0714: * @param instances the instances to use for building the rule base
0715: * @throws Exception if <code> instances </code> cannot be handled by
0716: * the classifier.
0717: */
0718: public void buildClassifier(Instances instances) throws Exception {
0719: getCapabilities().testWithFail(instances);
0720:
0721: // copy the dataset
0722: m_train = new Instances(instances);
0723: m_numClasses = m_train.numClasses();
0724:
0725: // new dataset in which examples with missing class value are removed
0726: m_train.deleteWithMissingClass();
0727:
0728: // build the Map for the estimatedDistributions
0729: m_estimatedDistributions = new HashMap(
0730: m_train.numInstances() / 2);
0731:
0732: // cycle through all instances
0733: Iterator it = new EnumerationIterator(m_train
0734: .enumerateInstances());
0735: while (it.hasNext() == true) {
0736: Instance instance = (Instance) it.next();
0737: Coordinates c = new Coordinates(instance);
0738:
0739: // get DiscreteEstimator from the map
0740: DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions
0741: .get(c);
0742:
0743: // if no DiscreteEstimator is present in the map, create one
0744: if (df == null) {
0745: df = new DiscreteEstimator(instances.numClasses(), 0);
0746: }
0747: df.addValue(instance.classValue(), instance.weight()); // update
0748: m_estimatedDistributions.put(c, df); // put back in map
0749: }
0750:
0751: // Create the attributes for m_baseMin and m_baseMax.
0752: // These are identical to those of m_train, except that the
0753: // class is set to 'numeric'
0754: // The class attribute is moved to the back
0755: FastVector newAtts = new FastVector(m_train.numAttributes());
0756: Attribute classAttribute = null;
0757: for (int i = 0; i < m_train.numAttributes(); i++) {
0758: Attribute att = m_train.attribute(i);
0759: if (i != m_train.classIndex()) {
0760: newAtts.addElement(att.copy());
0761: } else {
0762: classAttribute = new Attribute(att.name()); //numeric attribute
0763: }
0764: }
0765: newAtts.addElement(classAttribute);
0766:
0767: // original training instances are replaced by an empty set
0768: // of instances
0769: m_train = new Instances(m_train.relationName(), newAtts,
0770: m_estimatedDistributions.size());
0771: m_train.setClassIndex(m_train.numAttributes() - 1);
0772:
0773: // We cycle through the map of estimatedDistributions and
0774: // create one Instance for each entry in the map, with
0775: // a class value that is calculated from the distribution of
0776: // the class values
0777: it = m_estimatedDistributions.keySet().iterator();
0778: while (it.hasNext()) {
0779: // XXX attValues must be here, otherwise things go wrong
0780: double[] attValues = new double[m_train.numAttributes()];
0781: Coordinates cc = (Coordinates) it.next();
0782: DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions
0783: .get(cc);
0784: cc.getValues(attValues);
0785: switch (m_atype) {
0786: case AT_MEAN:
0787: attValues[attValues.length - 1] = (new DiscreteDistribution(
0788: df)).mean();
0789: break;
0790: case AT_MEDIAN:
0791: attValues[attValues.length - 1] = (new DiscreteDistribution(
0792: df)).median();
0793: break;
0794: case AT_MAXPROB:
0795: attValues[attValues.length - 1] = (new DiscreteDistribution(
0796: df)).modes()[0];
0797: break;
0798: default:
0799: throw new IllegalStateException(
0800: "Not a valid averaging type");
0801: }
0802:
0803: // add the instance, we give it weight one
0804: m_train.add(new Instance(1, attValues));
0805: }
0806:
0807: if (m_Debug == true) {
0808: System.out.println("The dataset after phase 1 :");
0809: System.out.println(m_train.toString());
0810: }
0811:
0812: /* Shuffle to training instances, to prevent the order dictated
0813: * by the map to play an important role in how the rule bases
0814: * are built.
0815: */
0816: m_train.randomize(new Random(getSeed()));
0817:
0818: if (m_sort == false) {
0819: // sort the instances only in increasing class order
0820: m_train.sort(m_train.classIndex());
0821: } else {
0822: // sort instances completely
0823: Comparator[] cc = new Comparator[m_train.numAttributes()];
0824: // sort the class, increasing
0825: cc[0] = new InstancesComparator(m_train.classIndex());
0826: // sort the attributes, decreasing
0827: for (int i = 1; i < cc.length; i++) {
0828: cc[i] = new InstancesComparator(i - 1, true);
0829: }
0830:
0831: // copy instances into an array
0832: Instance[] tmp = new Instance[m_train.numInstances()];
0833: for (int i = 0; i < tmp.length; i++) {
0834: tmp[i] = m_train.instance(i);
0835: }
0836: MultiDimensionalSort.multiDimensionalSort(tmp, cc);
0837:
0838: // copy sorted array back into Instances
0839: m_train.delete();
0840: for (int i = 0; i < tmp.length; i++) {
0841: m_train.add(tmp[i]);
0842: }
0843: }
0844:
0845: // phase 2: building the rule bases themselves
0846: m_baseMin = new Instances(m_train, m_estimatedDistributions
0847: .size() / 4);
0848: phaseTwoMin();
0849: m_baseMax = new Instances(m_train, m_estimatedDistributions
0850: .size() / 4);
0851: phaseTwoMax();
0852: }
0853:
0854: /**
0855: * This implements the second phase of the OLM algorithm.
0856: * We build the rule base m_baseMin, according to the conflict
0857: * resolution mechanism described in the thesis.
0858: */
0859: private void phaseTwoMin() {
0860:
0861: // loop through instances backwards, this is biggest class labels first
0862: for (int i = m_train.numInstances() - 1; i >= 0; i--) {
0863: Instance e = m_train.instance(i);
0864:
0865: // if the example is redundant with m_base, we discard it
0866: if (isRedundant(e) == false) {
0867: // how many examples are redundant if we would add e
0868: int[] redundancies = makesRedundant(e);
0869: if (redundancies[0] == 1
0870: && causesReversedPreference(e) == false) {
0871: // there is one example made redundant be e, and
0872: // adding e doesn't cause reversed preferences
0873: // so we replace the indicated rule by e
0874: m_baseMin.delete(redundancies[1]);
0875: m_baseMin.add(e);
0876: continue;
0877: }
0878:
0879: if (redundancies[0] == 0) {
0880: // adding e causes no redundancies, what about
0881: // reversed preferences ?
0882: int[] revPref = reversedPreferences(e);
0883: if (revPref[0] == 1) {
0884: // there would be one reversed preference, we
0885: // prefer the example e because it has a lower label
0886: m_baseMin.delete(revPref[1]);
0887: m_baseMin.add(e);
0888: continue;
0889: }
0890:
0891: if (revPref[0] == 0) {
0892: // this means: e causes no redundancies and no
0893: // reversed preferences. We can simply add it.
0894: m_baseMin.add(e);
0895: }
0896: }
0897: }
0898: }
0899: }
0900:
0901: /**
0902: * This implements the second phase of the OLM algorithm.
0903: * We build the rule base m_baseMax .
0904: */
0905: private void phaseTwoMax() {
0906:
0907: // loop through instances, smallest class labels first
0908: for (int i = 0; i < m_train.numInstances(); i++) {
0909: Instance e = m_train.instance(i);
0910:
0911: // if the example is redundant with m_base, we discard it
0912: if (isRedundantMax(e) == false) {
0913: // how many examples are redundant if we would add e
0914: int[] redundancies = makesRedundantMax(e);
0915: if (redundancies[0] == 1
0916: && causesReversedPreferenceMax(e) == false) {
0917: // there is one example made redundant be e, and
0918: // adding e doesn't cause reversed preferences
0919: // so we replace the indicated rule by e
0920: m_baseMax.delete(redundancies[1]);
0921: m_baseMax.add(e);
0922: continue;
0923: }
0924:
0925: if (redundancies[0] == 0) {
0926: // adding e causes no redundancies, what about
0927: // reversed preferences ?
0928: int[] revPref = reversedPreferencesMax(e);
0929: if (revPref[0] == 1) {
0930: // there would be one reversed preference, we
0931: // prefer the example e because it has a lower label
0932: m_baseMax.delete(revPref[1]);
0933: m_baseMax.add(e);
0934: continue;
0935: }
0936:
0937: if (revPref[0] == 0) {
0938: // this means: e causes no redundancies and no
0939: // reversed preferences. We can simply add it.
0940: m_baseMax.add(e);
0941: }
0942: }
0943: }
0944: }
0945: }
0946:
0947: /**
0948: * Returns a string description of the classifier. In debug
0949: * mode, the rule bases are added to the string representation
0950: * as well. This means that the description can become rather
0951: * lengthy.
0952: *
0953: * @return a <code> String </code> describing the classifier.
0954: */
0955: public String toString() {
0956: StringBuffer sb = new StringBuffer();
0957: sb.append("OLM\n===\n\n");
0958: if (m_etype == ET_MIN || m_etype == ET_BOTH) {
0959: if (m_baseMin != null) {
0960: sb
0961: .append("Number of examples in the minimal rule base = "
0962: + m_baseMin.numInstances() + "\n");
0963: } else {
0964: sb.append("minimal rule base not yet created");
0965: }
0966: }
0967:
0968: if (m_etype == ET_MAX || m_etype == ET_BOTH) {
0969: if (m_baseMax != null) {
0970: sb
0971: .append("Number of examples in the maximal rule base = "
0972: + m_baseMax.numInstances() + "\n");
0973: } else {
0974: sb.append("maximal rule base not yet created");
0975: }
0976: }
0977:
0978: if (m_Debug == true) {
0979:
0980: if (m_etype == ET_MIN || m_etype == ET_BOTH) {
0981: sb.append("The minimal rule base is \n");
0982: if (m_baseMin != null) {
0983: sb.append(m_baseMin.toString());
0984: } else {
0985: sb.append(".... not yet created");
0986: }
0987: }
0988:
0989: if (m_etype == ET_MAX || m_etype == ET_BOTH) {
0990: sb.append("The second rule base is \n");
0991: if (m_baseMax != null) {
0992: sb.append(m_baseMax.toString());
0993: } else {
0994: sb.append(".... not yet created");
0995: }
0996: }
0997: }
0998:
0999: sb.append("\n");
1000: return sb.toString();
1001: }
1002:
1003: /**
1004: * Is <code> instance </code> redundant wrt the
1005: * current <code> m_baseMin </code>.
1006: * This mean we are looking if there is an element in
1007: * <code> m_baseMin </code> smaller than <code> instance </code>
1008: * and with the same class.
1009: *
1010: * @param instance the instance of which the redundancy needs to be
1011: * checked
1012: * @return <code> true </code> if <code> instance </code>
1013: * is redundant, <code> false </code> otherwise
1014: */
1015: private boolean isRedundant(Instance instance) {
1016: Iterator it = new EnumerationIterator(m_baseMin
1017: .enumerateInstances());
1018: while (it.hasNext()) {
1019: Instance r = (Instance) it.next();
1020: if (instance.classValue() == r.classValue()
1021: && InstancesUtil.smallerOrEqual(r, instance)) {
1022: return true;
1023: }
1024: }
1025: return false;
1026: }
1027:
1028: /**
1029: * Is <code> instance </code> redundant wrt the
1030: * current <code> m_baseMax </code>.
1031: * This is dual to the previous method, this means that
1032: * <code> instance </code> is redundant if there exists
1033: * and example greater of equal than <code> instance </code>
1034: * with the same class value.
1035: *
1036: * @param instance the instance of which the redundancy needs to be
1037: * checked
1038: * @return <code> true </code> if <code> instance </code>
1039: * is redundant, <code> false </code> otherwise
1040: */
1041: private boolean isRedundantMax(Instance instance) {
1042: Iterator it = new EnumerationIterator(m_baseMax
1043: .enumerateInstances());
1044: while (it.hasNext()) {
1045: Instance r = (Instance) it.next();
1046: if (instance.classValue() == r.classValue()
1047: && InstancesUtil.smallerOrEqual(instance, r)) {
1048: return true;
1049: }
1050: }
1051: return false;
1052: }
1053:
1054: /**
1055: * If we were to add <code> instance </code>, how many
1056: * and which elements from <code> m_baseMin </code> would
1057: * be made redundant by <code> instance </code>.
1058: *
1059: * @param instance the instance of which the influence is to be determined
1060: * @return an array containing two components
1061: * [0] is 0 if instance makes no elements redundant;
1062: * is 1 if there is one rule that is made redundant by instance;
1063: * is 2 if there is more than one rule that is made redundant;
1064: * [1] if [0] == 1, this entry gives the element that is made redundant
1065: */
1066: private int[] makesRedundant(Instance instance) {
1067: int[] ret = new int[2];
1068: for (int i = 0; i < m_baseMin.numInstances(); i++) {
1069: Instance r = m_baseMin.instance(i);
1070: if (r.classValue() == instance.classValue()
1071: && InstancesUtil.smallerOrEqual(instance, r)) {
1072: if (ret[0] == 0) {
1073: ret[0] = 1;
1074: ret[1] = i;
1075: } else { // this means ret[0] == 1
1076: ret[0] = 2;
1077: return ret;
1078: }
1079: }
1080: }
1081: return ret;
1082: }
1083:
1084: /**
1085: * If we were to add <code> instance </code>, how many
1086: * and which elements from <code> m_baseMax </code> would
1087: * be made redundant by <code> instance </code>.
1088: * This method is simply dual to <code> makesRedundant </code>.
1089: *
1090: * @param instance the instance to add
1091: * @return an array containing two components
1092: * [0] is 0 if instance makes no elements redundant;
1093: * is 1 if there is one rule that is made redundant by instance;
1094: * is 2 if there is more than one rule that is made redundant;
1095: * [1] if [0] == 1, this entry gives the element that is made redundant
1096: */
1097: private int[] makesRedundantMax(Instance instance) {
1098: int[] ret = new int[2];
1099: for (int i = 0; i < m_baseMax.numInstances(); i++) {
1100: Instance r = m_baseMax.instance(i);
1101: if (r.classValue() == instance.classValue()
1102: && InstancesUtil.smallerOrEqual(r, instance)) {
1103: if (ret[0] == 0) {
1104: ret[0] = 1;
1105: ret[1] = i;
1106: } else { // this means ret[0] == 1
1107: ret[0] = 2;
1108: return ret;
1109: }
1110: }
1111: }
1112: return ret;
1113: }
1114:
1115: /**
1116: * Checks if adding <code> instance </code> to <code> m_baseMin </code>
1117: * causes reversed preference amongst the elements of <code>
1118: * m_baseMin </code>.
1119: *
1120: * @param instance the instance of which the influence needs to be
1121: * determined
1122: * @return <code> true </code> if adding <code> instance </code>
1123: * to the rule base would cause reversed preference among the rules,
1124: * <code> false </code> otherwise.
1125: */
1126: private boolean causesReversedPreference(Instance instance) {
1127: Iterator it = new EnumerationIterator(m_baseMin
1128: .enumerateInstances());
1129: while (it.hasNext()) {
1130: Instance r = (Instance) it.next();
1131: if (instance.classValue() > r.classValue()
1132: && InstancesUtil.smallerOrEqual(instance, r)) {
1133: // in the original version of OLM this should not happen
1134: System.err
1135: .println("Should not happen in the original OLM algorithm");
1136: return true;
1137: } else if (r.classValue() > instance.classValue()
1138: && InstancesUtil.smallerOrEqual(r, instance)) {
1139: return true;
1140: }
1141: }
1142: return false;
1143: }
1144:
1145: /**
1146: * Checks if adding <code> instance </code> to <code> m_baseMax </code>
1147: * causes reversed preference amongst the elements of
1148: * <code> m_baseMax </code>.
1149: *
1150: * @param instance the instance to add
1151: * @return true if adding <code> instance </code> to the rule
1152: * base would cause reversed preference among the rules,
1153: * false otherwise.
1154: */
1155: private boolean causesReversedPreferenceMax(Instance instance) {
1156: Iterator it = new EnumerationIterator(m_baseMax
1157: .enumerateInstances());
1158: while (it.hasNext()) {
1159: Instance r = (Instance) it.next();
1160: if (instance.classValue() > r.classValue()
1161: && InstancesUtil.smallerOrEqual(instance, r)) {
1162: return true;
1163: } else if (r.classValue() > instance.classValue()
1164: && InstancesUtil.smallerOrEqual(r, instance)) {
1165: return true;
1166: }
1167: }
1168: return false;
1169: }
1170:
1171: /**
1172: * Find indices of the elements from <code> m_baseMin </code>
1173: * that are inconsistent with instance.
1174: *
1175: * @param instance the instance of which the influence needs to be
1176: * determined
1177: * @return an array containing two components
1178: * [0] is 0 if instance is consistent with all present elements
1179: * is 1 if instance is inconsistent (reversed preference) with
1180: * exactly one element
1181: * is 2 if instance is inconsistent with more than one element
1182: * [1] if [0] == 1, this entry gives the index of the
1183: * element that has reversed preference wrt to
1184: * <code> instance </code>
1185: */
1186: private int[] reversedPreferences(Instance instance) {
1187: int[] revPreferences = new int[2];
1188: for (int i = 0; i < m_baseMin.numInstances(); i++) {
1189: Instance r = m_baseMin.instance(i);
1190: if (instance.classValue() < r.classValue()
1191: && InstancesUtil.smallerOrEqual(r, instance)) {
1192: if (revPreferences[0] == 0) {
1193: revPreferences[0] = 1;
1194: revPreferences[1] = i;
1195: } else {
1196: revPreferences[0] = 2;
1197: return revPreferences;
1198: }
1199: }
1200: }
1201: return revPreferences;
1202: }
1203:
1204: /**
1205: * Find indices of the elements from <code> m_baseMin </code>
1206: * that are inconsistent with instance.
1207: *
1208: * @param instance the instance of which the influence needs to be
1209: * determined
1210: * @return an array containing two components
1211: * [0] is 0 if instance is consistent with all present elements
1212: * is 1 if instance is inconsistent (reversed preference) with
1213: * exactly one element
1214: * is 2 if instance is inconsistent with more than one element
1215: * [1] if [0] == 1, this entry gives the index of the
1216: * element that has reversed preference wrt to
1217: * <code> instance </code>
1218: */
1219: private int[] reversedPreferencesMax(Instance instance) {
1220: int[] revPreferences = new int[2];
1221: for (int i = 0; i < m_baseMax.numInstances(); i++) {
1222: Instance r = m_baseMax.instance(i);
1223: if (instance.classValue() > r.classValue()
1224: && InstancesUtil.smallerOrEqual(instance, r)) {
1225: if (revPreferences[0] == 0) {
1226: revPreferences[0] = 1;
1227: revPreferences[1] = i;
1228: } else {
1229: revPreferences[0] = 2;
1230: return revPreferences;
1231: }
1232: }
1233: }
1234: return revPreferences;
1235: }
1236:
1237: /**
1238: * Calculates the square of the Euclidian distance between
1239: * two arrays of doubles. The arrays should have the same length.
1240: *
1241: * @param a1 the first array
1242: * @param a2 the second array
1243: * @return the square of the Euclidian distance between the two
1244: * arrays <code> a1 </code> and <code> a2 </code>
1245: */
1246: private double euclidDistance(double[] a1, double[] a2) {
1247: double dist = 0;
1248: for (int i = 0; i < a1.length; i++) {
1249: dist += (a1[i] - a2[i]) * (a1[i] - a2[i]);
1250: }
1251: return dist;
1252: }
1253:
1254: /**
1255: * Calculates the Hamming distances between two arrays, this
1256: * means one counts the number of positions in which these
1257: * two array differ. The arrays should be of the same length.
1258: *
1259: * @param a1 the first array
1260: * @param a2 the second array
1261: * @return the requested Hamming distance
1262: * The Hamming distance between a1 and a2, this is
1263: * the number of positions in which a1 and a2 differ
1264: */
1265: private int hammingDistance(double[] a1, double[] a2) {
1266: int dist = 0;
1267: for (int i = 0; i < a1.length; i++) {
1268: dist += (a1[i] == a2[i]) ? 0 : 1;
1269: }
1270: return dist;
1271: }
1272:
1273: /**
1274: * Get an enumeration of all available options for this classifier.
1275: *
1276: * @return an enumeration of available options
1277: */
1278: public Enumeration listOptions() {
1279: Vector options = new Vector();
1280:
1281: Enumeration enm = super .listOptions();
1282: while (enm.hasMoreElements())
1283: options.addElement(enm.nextElement());
1284:
1285: String description = "\tSets the classification type to be used.\n"
1286: + "\t(Default: "
1287: + new SelectedTag(CT_REAL, TAGS_CLASSIFICATIONTYPES)
1288: + ")";
1289: String synopsis = "-C "
1290: + Tag.toOptionList(TAGS_CLASSIFICATIONTYPES);
1291: String name = "C";
1292: options.addElement(new Option(description, name, 1, synopsis));
1293:
1294: description = "\tSets the averaging type used in phase 1 of the classifier.\n"
1295: + "\t(Default: "
1296: + new SelectedTag(AT_MEAN, TAGS_AVERAGINGTYPES) + ")";
1297: synopsis = "-A " + Tag.toOptionList(TAGS_AVERAGINGTYPES);
1298: name = "A";
1299: options.addElement(new Option(description, name, 1, synopsis));
1300:
1301: description = "\tIf different from "
1302: + new SelectedTag(DT_NONE, TAGS_DISTANCETYPES)
1303: + ", a nearest neighbour rule is fired when the\n"
1304: + "\trule base doesn't contain an example smaller than the instance\n"
1305: + "\tto be classified\n" + "\t(Default: "
1306: + new SelectedTag(DT_NONE, TAGS_DISTANCETYPES) + ").";
1307: synopsis = "-N " + Tag.toOptionList(TAGS_DISTANCETYPES);
1308: name = "N";
1309: options.addElement(new Option(description, name, 1, synopsis));
1310:
1311: description = "\tSets the extension type, i.e. the rule base to use."
1312: + "\n\t(Default: "
1313: + new SelectedTag(ET_MIN, TAGS_EXTENSIONTYPES) + ")";
1314: synopsis = "-E " + Tag.toOptionList(TAGS_EXTENSIONTYPES);
1315: name = "E";
1316: options.addElement(new Option(description, name, 1, synopsis));
1317:
1318: description = "\tIf set, the instances are also sorted within the same class\n"
1319: + "\tbefore building the rule bases";
1320: synopsis = "-sort";
1321: name = "sort";
1322: options.addElement(new Option(description, name, 0, synopsis));
1323:
1324: return options.elements();
1325: }
1326:
1327: /**
1328: * Parses the options for this object. <p/>
1329: *
1330: <!-- options-start -->
1331: * Valid options are: <p/>
1332: *
1333: * <pre> -S <num>
1334: * Random number seed.
1335: * (default 1)</pre>
1336: *
1337: * <pre> -D
1338: * If set, classifier is run in debug mode and
1339: * may output additional info to the console</pre>
1340: *
1341: * <pre> -C <CL|REG>
1342: * Sets the classification type to be used.
1343: * (Default: REG)</pre>
1344: *
1345: * <pre> -A <MEAN|MED|MAX>
1346: * Sets the averaging type used in phase 1 of the classifier.
1347: * (Default: MEAN)</pre>
1348: *
1349: * <pre> -N <NONE|EUCL|HAM>
1350: * If different from NONE, a nearest neighbour rule is fired when the
1351: * rule base doesn't contain an example smaller than the instance
1352: * to be classified
1353: * (Default: NONE).</pre>
1354: *
1355: * <pre> -E <MIN|MAX|BOTH>
1356: * Sets the extension type, i.e. the rule base to use.
1357: * (Default: MIN)</pre>
1358: *
1359: * <pre> -sort
1360: * If set, the instances are also sorted within the same class
1361: * before building the rule bases</pre>
1362: *
1363: <!-- options-end -->
1364: *
1365: * @param options an array of strings containing the options
1366: * @throws Exception if there are options that have invalid arguments.
1367: */
1368: public void setOptions(String[] options) throws Exception {
1369: String args;
1370:
1371: // classification type
1372: args = Utils.getOption('C', options);
1373: if (args.length() != 0)
1374: setClassificationType(new SelectedTag(args,
1375: TAGS_CLASSIFICATIONTYPES));
1376: else
1377: setClassificationType(new SelectedTag(CT_REAL,
1378: TAGS_CLASSIFICATIONTYPES));
1379:
1380: // averaging type
1381: args = Utils.getOption('A', options);
1382: if (args.length() != 0)
1383: setAveragingType(new SelectedTag(args, TAGS_AVERAGINGTYPES));
1384: else
1385: setAveragingType(new SelectedTag(AT_MEAN,
1386: TAGS_AVERAGINGTYPES));
1387:
1388: // distance type
1389: args = Utils.getOption('N', options);
1390: if (args.length() != 0)
1391: setDistanceType(new SelectedTag(args, TAGS_DISTANCETYPES));
1392: else
1393: setDistanceType(new SelectedTag(DT_NONE, TAGS_DISTANCETYPES));
1394:
1395: // extension type
1396: args = Utils.getOption('E', options);
1397: if (args.length() != 0)
1398: setExtensionType(new SelectedTag(args, TAGS_EXTENSIONTYPES));
1399: else
1400: setExtensionType(new SelectedTag(ET_MIN,
1401: TAGS_EXTENSIONTYPES));
1402:
1403: // sort ?
1404: setSort(Utils.getFlag("sort", options));
1405:
1406: super .setOptions(options);
1407: }
1408:
1409: /**
1410: * Gets an array of string with the current options of the classifier.
1411: *
1412: * @return an array suitable as argument for <code> setOptions </code>
1413: */
1414: public String[] getOptions() {
1415: int i;
1416: Vector result;
1417: String[] options;
1418:
1419: result = new Vector();
1420:
1421: options = super .getOptions();
1422: for (i = 0; i < options.length; i++)
1423: result.add(options[i]);
1424:
1425: // classification type
1426: result.add("-C");
1427: result.add("" + getClassificationType());
1428:
1429: result.add("-A");
1430: result.add("" + getAveragingType());
1431:
1432: result.add("-N");
1433: result.add("" + getDistanceType());
1434:
1435: result.add("-E");
1436: result.add("" + getExtensionType());
1437:
1438: if (getSort())
1439: result.add("-sort");
1440:
1441: return (String[]) result.toArray(new String[result.size()]);
1442: }
1443:
1444: /**
1445: * Main method for testing this class.
1446: *
1447: * @param args the command line arguments
1448: */
1449: public static void main(String[] args) {
1450: runClassifier(new OLM(), args);
1451: }
1452: }
|