0001: /*
0002: * This program is free software; you can redistribute it and/or modify
0003: * it under the terms of the GNU General Public License as published by
0004: * the Free Software Foundation; either version 2 of the License, or
0005: * (at your option) any later version.
0006: *
0007: * This program is distributed in the hope that it will be useful,
0008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: * GNU General Public License for more details.
0011: *
0012: * You should have received a copy of the GNU General Public License
0013: * along with this program; if not, write to the Free Software
0014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015: */
0016:
0017: /*
0018: * Instance.java
0019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
0020: *
0021: */
0022:
0023: package weka.core;
0024:
0025: import java.io.Serializable;
0026: import java.util.Enumeration;
0027:
0028: /**
0029: * Class for handling an instance. All values (numeric, date, nominal, string
0030: * or relational) are internally stored as floating-point numbers. If an
0031: * attribute is nominal (or a string or relational), the stored value is the
0032: * index of the corresponding nominal (or string or relational) value in the
0033: * attribute's definition. We have chosen this approach in favor of a more
0034: * elegant object-oriented approach because it is much faster. <p>
0035: *
0036: * Typical usage (code from the main() method of this class): <p>
0037: *
0038: * <code>
0039: * ... <br>
0040: *
0041: * // Create empty instance with three attribute values <br>
0042: * Instance inst = new Instance(3); <br><br>
0043: *
0044: * // Set instance's values for the attributes "length", "weight", and "position"<br>
0045: * inst.setValue(length, 5.3); <br>
0046: * inst.setValue(weight, 300); <br>
0047: * inst.setValue(position, "first"); <br><br>
0048: *
0049: * // Set instance's dataset to be the dataset "race" <br>
0050: * inst.setDataset(race); <br><br>
0051: *
0052: * // Print the instance <br>
0053: * System.out.println("The instance: " + inst); <br>
0054: *
0055: * ... <br>
0056: * </code><p>
0057: *
0058: * All methods that change an instance are safe, ie. a change of an
0059: * instance does not affect any other instances. All methods that
0060: * change an instance's attribute values clone the attribute value
0061: * vector before it is changed. If your application heavily modifies
0062: * instance values, it may be faster to create a new instance from scratch.
0063: *
0064: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
0065: * @version $Revision: 1.25 $
0066: */
0067: public class Instance implements Copyable, Serializable {
0068:
0069: /** for serialization */
0070: static final long serialVersionUID = 1482635194499365122L;
0071:
0072: /** Constant representing a missing value. */
0073: protected static final double MISSING_VALUE = Double.NaN;
0074:
0075: /**
0076: * The dataset the instance has access to. Null if the instance
0077: * doesn't have access to any dataset. Only if an instance has
0078: * access to a dataset, it knows about the actual attribute types.
0079: */
0080: protected/*@spec_public@*/Instances m_Dataset;
0081:
0082: /** The instance's attribute values. */
0083: protected/*@spec_public non_null@*/double[] m_AttValues;
0084:
0085: /** The instance's weight. */
0086: protected double m_Weight;
0087:
0088: /**
0089: * Constructor that copies the attribute values and the weight from
0090: * the given instance. Reference to the dataset is set to null.
0091: * (ie. the instance doesn't have access to information about the
0092: * attribute types)
0093: *
0094: * @param instance the instance from which the attribute
0095: * values and the weight are to be copied
0096: */
0097: //@ ensures m_Dataset == null;
0098: public Instance(/*@non_null@*/Instance instance) {
0099:
0100: m_AttValues = instance.m_AttValues;
0101: m_Weight = instance.m_Weight;
0102: m_Dataset = null;
0103: }
0104:
0105: /**
0106: * Constructor that inititalizes instance variable with given
0107: * values. Reference to the dataset is set to null. (ie. the instance
0108: * doesn't have access to information about the attribute types)
0109: *
0110: * @param weight the instance's weight
0111: * @param attValues a vector of attribute values
0112: */
0113: //@ ensures m_Dataset == null;
0114: public Instance(double weight, /*@non_null@*/double[] attValues) {
0115:
0116: m_AttValues = attValues;
0117: m_Weight = weight;
0118: m_Dataset = null;
0119: }
0120:
0121: /**
0122: * Constructor of an instance that sets weight to one, all values to
0123: * be missing, and the reference to the dataset to null. (ie. the instance
0124: * doesn't have access to information about the attribute types)
0125: *
0126: * @param numAttributes the size of the instance
0127: */
0128: //@ requires numAttributes > 0; // Or maybe == 0 is okay too?
0129: //@ ensures m_Dataset == null;
0130: public Instance(int numAttributes) {
0131:
0132: m_AttValues = new double[numAttributes];
0133: for (int i = 0; i < m_AttValues.length; i++) {
0134: m_AttValues[i] = MISSING_VALUE;
0135: }
0136: m_Weight = 1;
0137: m_Dataset = null;
0138: }
0139:
0140: /**
0141: * Returns the attribute with the given index.
0142: *
0143: * @param index the attribute's index
0144: * @return the attribute at the given position
0145: * @throws UnassignedDatasetException if instance doesn't have access to a
0146: * dataset
0147: */
0148: //@ requires m_Dataset != null;
0149: public/*@pure@*/Attribute attribute(int index) {
0150:
0151: if (m_Dataset == null) {
0152: throw new UnassignedDatasetException(
0153: "Instance doesn't have access to a dataset!");
0154: }
0155: return m_Dataset.attribute(index);
0156: }
0157:
0158: /**
0159: * Returns the attribute with the given index. Does the same
0160: * thing as attribute().
0161: *
0162: * @param indexOfIndex the index of the attribute's index
0163: * @return the attribute at the given position
0164: * @throws UnassignedDatasetException if instance doesn't have access to a
0165: * dataset
0166: */
0167: //@ requires m_Dataset != null;
0168: public/*@pure@*/Attribute attributeSparse(int indexOfIndex) {
0169:
0170: if (m_Dataset == null) {
0171: throw new UnassignedDatasetException(
0172: "Instance doesn't have access to a dataset!");
0173: }
0174: return m_Dataset.attribute(indexOfIndex);
0175: }
0176:
0177: /**
0178: * Returns class attribute.
0179: *
0180: * @return the class attribute
0181: * @throws UnassignedDatasetException if the class is not set or the
0182: * instance doesn't have access to a dataset
0183: */
0184: //@ requires m_Dataset != null;
0185: public/*@pure@*/Attribute classAttribute() {
0186:
0187: if (m_Dataset == null) {
0188: throw new UnassignedDatasetException(
0189: "Instance doesn't have access to a dataset!");
0190: }
0191: return m_Dataset.classAttribute();
0192: }
0193:
0194: /**
0195: * Returns the class attribute's index.
0196: *
0197: * @return the class index as an integer
0198: * @throws UnassignedDatasetException if instance doesn't have access to a dataset
0199: */
0200: //@ requires m_Dataset != null;
0201: //@ ensures \result == m_Dataset.classIndex();
0202: public/*@pure@*/int classIndex() {
0203:
0204: if (m_Dataset == null) {
0205: throw new UnassignedDatasetException(
0206: "Instance doesn't have access to a dataset!");
0207: }
0208: return m_Dataset.classIndex();
0209: }
0210:
0211: /**
0212: * Tests if an instance's class is missing.
0213: *
0214: * @return true if the instance's class is missing
0215: * @throws UnassignedClassException if the class is not set or the instance doesn't
0216: * have access to a dataset
0217: */
0218: //@ requires classIndex() >= 0;
0219: public/*@pure@*/boolean classIsMissing() {
0220:
0221: if (classIndex() < 0) {
0222: throw new UnassignedClassException("Class is not set!");
0223: }
0224: return isMissing(classIndex());
0225: }
0226:
0227: /**
0228: * Returns an instance's class value in internal format. (ie. as a
0229: * floating-point number)
0230: *
0231: * @return the corresponding value as a double (If the
0232: * corresponding attribute is nominal (or a string) then it returns the
0233: * value's index as a double).
0234: * @throws UnassignedClassException if the class is not set or the instance doesn't
0235: * have access to a dataset
0236: */
0237: //@ requires classIndex() >= 0;
0238: public/*@pure@*/double classValue() {
0239:
0240: if (classIndex() < 0) {
0241: throw new UnassignedClassException("Class is not set!");
0242: }
0243: return value(classIndex());
0244: }
0245:
0246: /**
0247: * Produces a shallow copy of this instance. The copy has
0248: * access to the same dataset. (if you want to make a copy
0249: * that doesn't have access to the dataset, use
0250: * <code>new Instance(instance)</code>
0251: *
0252: * @return the shallow copy
0253: */
0254: //@ also ensures \result != null;
0255: //@ also ensures \result instanceof Instance;
0256: //@ also ensures ((Instance)\result).m_Dataset == m_Dataset;
0257: public/*@pure@*/Object copy() {
0258:
0259: Instance result = new Instance(this );
0260: result.m_Dataset = m_Dataset;
0261: return result;
0262: }
0263:
0264: /**
0265: * Returns the dataset this instance has access to. (ie. obtains
0266: * information about attribute types from) Null if the instance
0267: * doesn't have access to a dataset.
0268: *
0269: * @return the dataset the instance has accesss to
0270: */
0271: //@ ensures \result == m_Dataset;
0272: public/*@pure@*/Instances dataset() {
0273:
0274: return m_Dataset;
0275: }
0276:
0277: /**
0278: * Deletes an attribute at the given position (0 to
0279: * numAttributes() - 1). Only succeeds if the instance does not
0280: * have access to any dataset because otherwise inconsistencies
0281: * could be introduced.
0282: *
0283: * @param position the attribute's position
0284: * @throws RuntimeException if the instance has access to a
0285: * dataset
0286: */
0287: //@ requires m_Dataset != null;
0288: public void deleteAttributeAt(int position) {
0289:
0290: if (m_Dataset != null) {
0291: throw new RuntimeException(
0292: "Instance has access to a dataset!");
0293: }
0294: forceDeleteAttributeAt(position);
0295: }
0296:
0297: /**
0298: * Returns an enumeration of all the attributes.
0299: *
0300: * @return enumeration of all the attributes
0301: * @throws UnassignedDatasetException if the instance doesn't
0302: * have access to a dataset
0303: */
0304: //@ requires m_Dataset != null;
0305: public/*@pure@*/Enumeration enumerateAttributes() {
0306:
0307: if (m_Dataset == null) {
0308: throw new UnassignedDatasetException(
0309: "Instance doesn't have access to a dataset!");
0310: }
0311: return m_Dataset.enumerateAttributes();
0312: }
0313:
0314: /**
0315: * Tests if the headers of two instances are equivalent.
0316: *
0317: * @param inst another instance
0318: * @return true if the header of the given instance is
0319: * equivalent to this instance's header
0320: * @throws UnassignedDatasetException if instance doesn't have access to any
0321: * dataset
0322: */
0323: //@ requires m_Dataset != null;
0324: public/*@pure@*/boolean equalHeaders(Instance inst) {
0325:
0326: if (m_Dataset == null) {
0327: throw new UnassignedDatasetException(
0328: "Instance doesn't have access to a dataset!");
0329: }
0330: return m_Dataset.equalHeaders(inst.m_Dataset);
0331: }
0332:
0333: /**
0334: * Tests whether an instance has a missing value. Skips the class attribute if set.
0335: * @return true if instance has a missing value.
0336: * @throws UnassignedDatasetException if instance doesn't have access to any
0337: * dataset
0338: */
0339: //@ requires m_Dataset != null;
0340: public/*@pure@*/boolean hasMissingValue() {
0341:
0342: if (m_Dataset == null) {
0343: throw new UnassignedDatasetException(
0344: "Instance doesn't have access to a dataset!");
0345: }
0346: for (int i = 0; i < numAttributes(); i++) {
0347: if (i != classIndex()) {
0348: if (isMissing(i)) {
0349: return true;
0350: }
0351: }
0352: }
0353: return false;
0354: }
0355:
0356: /**
0357: * Returns the index of the attribute stored at the given position.
0358: * Just returns the given value.
0359: *
0360: * @param position the position
0361: * @return the index of the attribute stored at the given position
0362: */
0363: public/*@pure@*/int index(int position) {
0364:
0365: return position;
0366: }
0367:
0368: /**
0369: * Inserts an attribute at the given position (0 to
0370: * numAttributes()). Only succeeds if the instance does not
0371: * have access to any dataset because otherwise inconsistencies
0372: * could be introduced.
0373: *
0374: * @param position the attribute's position
0375: * @throws RuntimeException if the instance has accesss to a
0376: * dataset
0377: * @throws IllegalArgumentException if the position is out of range
0378: */
0379: //@ requires m_Dataset == null;
0380: //@ requires 0 <= position && position <= numAttributes();
0381: public void insertAttributeAt(int position) {
0382:
0383: if (m_Dataset != null) {
0384: throw new RuntimeException(
0385: "Instance has accesss to a dataset!");
0386: }
0387: if ((position < 0) || (position > numAttributes())) {
0388: throw new IllegalArgumentException(
0389: "Can't insert attribute: index out " + "of range");
0390: }
0391: forceInsertAttributeAt(position);
0392: }
0393:
0394: /**
0395: * Tests if a specific value is "missing".
0396: *
0397: * @param attIndex the attribute's index
0398: * @return true if the value is "missing"
0399: */
0400: public/*@pure@*/boolean isMissing(int attIndex) {
0401:
0402: if (Double.isNaN(m_AttValues[attIndex])) {
0403: return true;
0404: }
0405: return false;
0406: }
0407:
0408: /**
0409: * Tests if a specific value is "missing". Does
0410: * the same thing as isMissing() if applied to an Instance.
0411: *
0412: * @param indexOfIndex the index of the attribute's index
0413: * @return true if the value is "missing"
0414: */
0415: public/*@pure@*/boolean isMissingSparse(int indexOfIndex) {
0416:
0417: if (Double.isNaN(m_AttValues[indexOfIndex])) {
0418: return true;
0419: }
0420: return false;
0421: }
0422:
0423: /**
0424: * Tests if a specific value is "missing".
0425: * The given attribute has to belong to a dataset.
0426: *
0427: * @param att the attribute
0428: * @return true if the value is "missing"
0429: */
0430: public/*@pure@*/boolean isMissing(Attribute att) {
0431:
0432: return isMissing(att.index());
0433: }
0434:
0435: /**
0436: * Tests if the given value codes "missing".
0437: *
0438: * @param val the value to be tested
0439: * @return true if val codes "missing"
0440: */
0441: public static/*@pure@*/boolean isMissingValue(double val) {
0442:
0443: return Double.isNaN(val);
0444: }
0445:
0446: /**
0447: * Merges this instance with the given instance and returns
0448: * the result. Dataset is set to null.
0449: *
0450: * @param inst the instance to be merged with this one
0451: * @return the merged instances
0452: */
0453: public Instance mergeInstance(Instance inst) {
0454:
0455: int m = 0;
0456: double[] newVals = new double[numAttributes()
0457: + inst.numAttributes()];
0458: for (int j = 0; j < numAttributes(); j++, m++) {
0459: newVals[m] = value(j);
0460: }
0461: for (int j = 0; j < inst.numAttributes(); j++, m++) {
0462: newVals[m] = inst.value(j);
0463: }
0464: return new Instance(1.0, newVals);
0465: }
0466:
0467: /**
0468: * Returns the double that codes "missing".
0469: *
0470: * @return the double that codes "missing"
0471: */
0472: public/*@pure@*/static double missingValue() {
0473:
0474: return MISSING_VALUE;
0475: }
0476:
0477: /**
0478: * Returns the number of attributes.
0479: *
0480: * @return the number of attributes as an integer
0481: */
0482: //@ ensures \result == m_AttValues.length;
0483: public/*@pure@*/int numAttributes() {
0484:
0485: return m_AttValues.length;
0486: }
0487:
0488: /**
0489: * Returns the number of class labels.
0490: *
0491: * @return the number of class labels as an integer if the
0492: * class attribute is nominal, 1 otherwise.
0493: * @throws UnassignedDatasetException if instance doesn't have access to any
0494: * dataset
0495: */
0496: //@ requires m_Dataset != null;
0497: public/*@pure@*/int numClasses() {
0498:
0499: if (m_Dataset == null) {
0500: throw new UnassignedDatasetException(
0501: "Instance doesn't have access to a dataset!");
0502: }
0503: return m_Dataset.numClasses();
0504: }
0505:
0506: /**
0507: * Returns the number of values present. Always the same as numAttributes().
0508: *
0509: * @return the number of values
0510: */
0511: //@ ensures \result == m_AttValues.length;
0512: public/*@pure@*/int numValues() {
0513:
0514: return m_AttValues.length;
0515: }
0516:
0517: /**
0518: * Replaces all missing values in the instance with the
0519: * values contained in the given array. A deep copy of
0520: * the vector of attribute values is performed before the
0521: * values are replaced.
0522: *
0523: * @param array containing the means and modes
0524: * @throws IllegalArgumentException if numbers of attributes are unequal
0525: */
0526: public void replaceMissingValues(double[] array) {
0527:
0528: if ((array == null) || (array.length != m_AttValues.length)) {
0529: throw new IllegalArgumentException(
0530: "Unequal number of attributes!");
0531: }
0532: freshAttributeVector();
0533: for (int i = 0; i < m_AttValues.length; i++) {
0534: if (isMissing(i)) {
0535: m_AttValues[i] = array[i];
0536: }
0537: }
0538: }
0539:
0540: /**
0541: * Sets the class value of an instance to be "missing". A deep copy of
0542: * the vector of attribute values is performed before the
0543: * value is set to be missing.
0544: *
0545: * @throws UnassignedClassException if the class is not set
0546: * @throws UnassignedDatasetException if the instance doesn't
0547: * have access to a dataset
0548: */
0549: //@ requires classIndex() >= 0;
0550: public void setClassMissing() {
0551:
0552: if (classIndex() < 0) {
0553: throw new UnassignedClassException("Class is not set!");
0554: }
0555: setMissing(classIndex());
0556: }
0557:
0558: /**
0559: * Sets the class value of an instance to the given value (internal
0560: * floating-point format). A deep copy of the vector of attribute
0561: * values is performed before the value is set.
0562: *
0563: * @param value the new attribute value (If the corresponding
0564: * attribute is nominal (or a string) then this is the new value's
0565: * index as a double).
0566: * @throws UnassignedClassException if the class is not set
0567: * @throws UnaddignedDatasetException if the instance doesn't
0568: * have access to a dataset
0569: */
0570: //@ requires classIndex() >= 0;
0571: public void setClassValue(double value) {
0572:
0573: if (classIndex() < 0) {
0574: throw new UnassignedClassException("Class is not set!");
0575: }
0576: setValue(classIndex(), value);
0577: }
0578:
0579: /**
0580: * Sets the class value of an instance to the given value. A deep
0581: * copy of the vector of attribute values is performed before the
0582: * value is set.
0583: *
0584: * @param value the new class value (If the class
0585: * is a string attribute and the value can't be found,
0586: * the value is added to the attribute).
0587: * @throws UnassignedClassException if the class is not set
0588: * @throws UnassignedDatasetException if the dataset is not set
0589: * @throws IllegalArgumentException if the attribute is not
0590: * nominal or a string, or the value couldn't be found for a nominal
0591: * attribute
0592: */
0593: //@ requires classIndex() >= 0;
0594: public final void setClassValue(String value) {
0595:
0596: if (classIndex() < 0) {
0597: throw new UnassignedClassException("Class is not set!");
0598: }
0599: setValue(classIndex(), value);
0600: }
0601:
0602: /**
0603: * Sets the reference to the dataset. Does not check if the instance
0604: * is compatible with the dataset. Note: the dataset does not know
0605: * about this instance. If the structure of the dataset's header
0606: * gets changed, this instance will not be adjusted automatically.
0607: *
0608: * @param instances the reference to the dataset
0609: */
0610: public final void setDataset(Instances instances) {
0611:
0612: m_Dataset = instances;
0613: }
0614:
0615: /**
0616: * Sets a specific value to be "missing". Performs a deep copy
0617: * of the vector of attribute values before the value is set to
0618: * be missing.
0619: *
0620: * @param attIndex the attribute's index
0621: */
0622: public final void setMissing(int attIndex) {
0623:
0624: setValue(attIndex, MISSING_VALUE);
0625: }
0626:
0627: /**
0628: * Sets a specific value to be "missing". Performs a deep copy
0629: * of the vector of attribute values before the value is set to
0630: * be missing. The given attribute has to belong to a dataset.
0631: *
0632: * @param att the attribute
0633: */
0634: public final void setMissing(Attribute att) {
0635:
0636: setMissing(att.index());
0637: }
0638:
0639: /**
0640: * Sets a specific value in the instance to the given value
0641: * (internal floating-point format). Performs a deep copy
0642: * of the vector of attribute values before the value is set.
0643: *
0644: * @param attIndex the attribute's index
0645: * @param value the new attribute value (If the corresponding
0646: * attribute is nominal (or a string) then this is the new value's
0647: * index as a double).
0648: */
0649: public void setValue(int attIndex, double value) {
0650:
0651: freshAttributeVector();
0652: m_AttValues[attIndex] = value;
0653: }
0654:
0655: /**
0656: * Sets a specific value in the instance to the given value
0657: * (internal floating-point format). Performs a deep copy
0658: * of the vector of attribute values before the value is set.
0659: * Does exactly the same thing as setValue().
0660: *
0661: * @param indexOfIndex the index of the attribute's index
0662: * @param value the new attribute value (If the corresponding
0663: * attribute is nominal (or a string) then this is the new value's
0664: * index as a double).
0665: */
0666: public void setValueSparse(int indexOfIndex, double value) {
0667:
0668: freshAttributeVector();
0669: m_AttValues[indexOfIndex] = value;
0670: }
0671:
0672: /**
0673: * Sets a value of a nominal or string attribute to the given
0674: * value. Performs a deep copy of the vector of attribute values
0675: * before the value is set.
0676: *
0677: * @param attIndex the attribute's index
0678: * @param value the new attribute value (If the attribute
0679: * is a string attribute and the value can't be found,
0680: * the value is added to the attribute).
0681: * @throws UnassignedDatasetException if the dataset is not set
0682: * @throws IllegalArgumentException if the selected
0683: * attribute is not nominal or a string, or the supplied value couldn't
0684: * be found for a nominal attribute
0685: */
0686: //@ requires m_Dataset != null;
0687: public final void setValue(int attIndex, String value) {
0688:
0689: int valIndex;
0690:
0691: if (m_Dataset == null) {
0692: throw new UnassignedDatasetException(
0693: "Instance doesn't have access to a dataset!");
0694: }
0695: if (!attribute(attIndex).isNominal()
0696: && !attribute(attIndex).isString()) {
0697: throw new IllegalArgumentException(
0698: "Attribute neither nominal nor string!");
0699: }
0700: valIndex = attribute(attIndex).indexOfValue(value);
0701: if (valIndex == -1) {
0702: if (attribute(attIndex).isNominal()) {
0703: throw new IllegalArgumentException(
0704: "Value not defined for given nominal attribute!");
0705: } else {
0706: attribute(attIndex).forceAddValue(value);
0707: valIndex = attribute(attIndex).indexOfValue(value);
0708: }
0709: }
0710: setValue(attIndex, (double) valIndex);
0711: }
0712:
0713: /**
0714: * Sets a specific value in the instance to the given value
0715: * (internal floating-point format). Performs a deep copy of the
0716: * vector of attribute values before the value is set, so if you are
0717: * planning on calling setValue many times it may be faster to
0718: * create a new instance using toDoubleArray. The given attribute
0719: * has to belong to a dataset.
0720: *
0721: * @param att the attribute
0722: * @param value the new attribute value (If the corresponding
0723: * attribute is nominal (or a string) then this is the new value's
0724: * index as a double).
0725: */
0726: public final void setValue(Attribute att, double value) {
0727:
0728: setValue(att.index(), value);
0729: }
0730:
0731: /**
0732: * Sets a value of an nominal or string attribute to the given
0733: * value. Performs a deep copy of the vector of attribute values
0734: * before the value is set, so if you are planning on calling setValue many
0735: * times it may be faster to create a new instance using toDoubleArray.
0736: * The given attribute has to belong to a dataset.
0737: *
0738: * @param att the attribute
0739: * @param value the new attribute value (If the attribute
0740: * is a string attribute and the value can't be found,
0741: * the value is added to the attribute).
0742: * @throws IllegalArgumentException if the the attribute is not
0743: * nominal or a string, or the value couldn't be found for a nominal
0744: * attribute
0745: */
0746: public final void setValue(Attribute att, String value) {
0747:
0748: if (!att.isNominal() && !att.isString()) {
0749: throw new IllegalArgumentException(
0750: "Attribute neither nominal nor string!");
0751: }
0752: int valIndex = att.indexOfValue(value);
0753: if (valIndex == -1) {
0754: if (att.isNominal()) {
0755: throw new IllegalArgumentException(
0756: "Value not defined for given nominal attribute!");
0757: } else {
0758: att.forceAddValue(value);
0759: valIndex = att.indexOfValue(value);
0760: }
0761: }
0762: setValue(att.index(), (double) valIndex);
0763: }
0764:
0765: /**
0766: * Sets the weight of an instance.
0767: *
0768: * @param weight the weight
0769: */
0770: public final void setWeight(double weight) {
0771:
0772: m_Weight = weight;
0773: }
0774:
0775: /**
0776: * Returns the relational value of a relational attribute.
0777: *
0778: * @param attIndex the attribute's index
0779: * @return the corresponding relation as an Instances object
0780: * @throws IllegalArgumentException if the attribute is not a
0781: * relation-valued attribute
0782: * @throws UnassignedDatasetException if the instance doesn't belong
0783: * to a dataset.
0784: */
0785: //@ requires m_Dataset != null;
0786: public final/*@pure@*/Instances relationalValue(int attIndex) {
0787:
0788: if (m_Dataset == null) {
0789: throw new UnassignedDatasetException(
0790: "Instance doesn't have access to a dataset!");
0791: }
0792: return relationalValue(m_Dataset.attribute(attIndex));
0793: }
0794:
0795: /**
0796: * Returns the relational value of a relational attribute.
0797: *
0798: * @param att the attribute
0799: * @return the corresponding relation as an Instances object
0800: * @throws IllegalArgumentException if the attribute is not a
0801: * relation-valued attribute
0802: * @throws UnassignedDatasetException if the instance doesn't belong
0803: * to a dataset.
0804: */
0805: public final/*@pure@*/Instances relationalValue(Attribute att) {
0806:
0807: int attIndex = att.index();
0808: if (att.isRelationValued()) {
0809: return att.relation((int) value(attIndex));
0810: } else {
0811: throw new IllegalArgumentException(
0812: "Attribute isn't relation-valued!");
0813: }
0814: }
0815:
0816: /**
0817: * Returns the value of a nominal, string, date, or relational attribute
0818: * for the instance as a string.
0819: *
0820: * @param attIndex the attribute's index
0821: * @return the value as a string
0822: * @throws IllegalArgumentException if the attribute is not a nominal,
0823: * string, date, or relation-valued attribute.
0824: * @throws UnassignedDatasetException if the instance doesn't belong
0825: * to a dataset.
0826: */
0827: //@ requires m_Dataset != null;
0828: public final/*@pure@*/String stringValue(int attIndex) {
0829:
0830: if (m_Dataset == null) {
0831: throw new UnassignedDatasetException(
0832: "Instance doesn't have access to a dataset!");
0833: }
0834: return stringValue(m_Dataset.attribute(attIndex));
0835: }
0836:
0837: /**
0838: * Returns the value of a nominal, string, date, or relational attribute
0839: * for the instance as a string.
0840: *
0841: * @param att the attribute
0842: * @return the value as a string
0843: * @throws IllegalArgumentException if the attribute is not a nominal,
0844: * string, date, or relation-valued attribute.
0845: * @throws UnassignedDatasetException if the instance doesn't belong
0846: * to a dataset.
0847: */
0848: public final/*@pure@*/String stringValue(Attribute att) {
0849:
0850: int attIndex = att.index();
0851: switch (att.type()) {
0852: case Attribute.NOMINAL:
0853: case Attribute.STRING:
0854: return att.value((int) value(attIndex));
0855: case Attribute.DATE:
0856: return att.formatDate(value(attIndex));
0857: case Attribute.RELATIONAL:
0858: return att.relation((int) value(attIndex))
0859: .stringWithoutHeader();
0860: default:
0861: throw new IllegalArgumentException(
0862: "Attribute isn't nominal, string or date!");
0863: }
0864: }
0865:
0866: /**
0867: * Returns the values of each attribute as an array of doubles.
0868: *
0869: * @return an array containing all the instance attribute values
0870: */
0871: public double[] toDoubleArray() {
0872:
0873: double[] newValues = new double[m_AttValues.length];
0874: System.arraycopy(m_AttValues, 0, newValues, 0,
0875: m_AttValues.length);
0876: return newValues;
0877: }
0878:
0879: /**
0880: * Returns the description of one instance. If the instance
0881: * doesn't have access to a dataset, it returns the internal
0882: * floating-point values. Quotes string
0883: * values that contain whitespace characters.
0884: *
0885: * @return the instance's description as a string
0886: */
0887: public String toString() {
0888:
0889: StringBuffer text = new StringBuffer();
0890:
0891: for (int i = 0; i < m_AttValues.length; i++) {
0892: if (i > 0)
0893: text.append(",");
0894: text.append(toString(i));
0895: }
0896:
0897: return text.toString();
0898: }
0899:
0900: /**
0901: * Returns the description of one value of the instance as a
0902: * string. If the instance doesn't have access to a dataset, it
0903: * returns the internal floating-point value. Quotes string
0904: * values that contain whitespace characters, or if they
0905: * are a question mark.
0906: *
0907: * @param attIndex the attribute's index
0908: * @return the value's description as a string
0909: */
0910: public final/*@pure@*/String toString(int attIndex) {
0911:
0912: StringBuffer text = new StringBuffer();
0913:
0914: if (isMissing(attIndex)) {
0915: text.append("?");
0916: } else {
0917: if (m_Dataset == null) {
0918: text.append(Utils.doubleToString(m_AttValues[attIndex],
0919: 6));
0920: } else {
0921: switch (m_Dataset.attribute(attIndex).type()) {
0922: case Attribute.NOMINAL:
0923: case Attribute.STRING:
0924: case Attribute.DATE:
0925: case Attribute.RELATIONAL:
0926: text.append(Utils.quote(stringValue(attIndex)));
0927: break;
0928: case Attribute.NUMERIC:
0929: text.append(Utils
0930: .doubleToString(value(attIndex), 6));
0931: break;
0932: default:
0933: throw new IllegalStateException(
0934: "Unknown attribute type");
0935: }
0936: }
0937: }
0938: return text.toString();
0939: }
0940:
0941: /**
0942: * Returns the description of one value of the instance as a
0943: * string. If the instance doesn't have access to a dataset it
0944: * returns the internal floating-point value. Quotes string
0945: * values that contain whitespace characters, or if they
0946: * are a question mark.
0947: * The given attribute has to belong to a dataset.
0948: *
0949: * @param att the attribute
0950: * @return the value's description as a string
0951: */
0952: public final String toString(Attribute att) {
0953:
0954: return toString(att.index());
0955: }
0956:
0957: /**
0958: * Returns an instance's attribute value in internal format.
0959: *
0960: * @param attIndex the attribute's index
0961: * @return the specified value as a double (If the corresponding
0962: * attribute is nominal (or a string) then it returns the value's index as a
0963: * double).
0964: */
0965: public/*@pure@*/double value(int attIndex) {
0966:
0967: return m_AttValues[attIndex];
0968: }
0969:
0970: /**
0971: * Returns an instance's attribute value in internal format.
0972: * Does exactly the same thing as value() if applied to an Instance.
0973: *
0974: * @param indexOfIndex the index of the attribute's index
0975: * @return the specified value as a double (If the corresponding
0976: * attribute is nominal (or a string) then it returns the value's index as a
0977: * double).
0978: */
0979: public/*@pure@*/double valueSparse(int indexOfIndex) {
0980:
0981: return m_AttValues[indexOfIndex];
0982: }
0983:
0984: /**
0985: * Returns an instance's attribute value in internal format.
0986: * The given attribute has to belong to a dataset.
0987: *
0988: * @param att the attribute
0989: * @return the specified value as a double (If the corresponding
0990: * attribute is nominal (or a string) then it returns the value's index as a
0991: * double).
0992: */
0993: public/*@pure@*/double value(Attribute att) {
0994:
0995: return value(att.index());
0996: }
0997:
0998: /**
0999: * Returns the instance's weight.
1000: *
1001: * @return the instance's weight as a double
1002: */
1003: public final/*@pure@*/double weight() {
1004:
1005: return m_Weight;
1006: }
1007:
1008: /**
1009: * Deletes an attribute at the given position (0 to
1010: * numAttributes() - 1).
1011: *
1012: * @param position the attribute's position
1013: */
1014: void forceDeleteAttributeAt(int position) {
1015:
1016: double[] newValues = new double[m_AttValues.length - 1];
1017:
1018: System.arraycopy(m_AttValues, 0, newValues, 0, position);
1019: if (position < m_AttValues.length - 1) {
1020: System.arraycopy(m_AttValues, position + 1, newValues,
1021: position, m_AttValues.length - (position + 1));
1022: }
1023: m_AttValues = newValues;
1024: }
1025:
1026: /**
1027: * Inserts an attribute at the given position
1028: * (0 to numAttributes()) and sets its value to be missing.
1029: *
1030: * @param position the attribute's position
1031: */
1032: void forceInsertAttributeAt(int position) {
1033:
1034: double[] newValues = new double[m_AttValues.length + 1];
1035:
1036: System.arraycopy(m_AttValues, 0, newValues, 0, position);
1037: newValues[position] = MISSING_VALUE;
1038: System.arraycopy(m_AttValues, position, newValues,
1039: position + 1, m_AttValues.length - position);
1040: m_AttValues = newValues;
1041: }
1042:
1043: /**
1044: * Private constructor for subclasses. Does nothing.
1045: */
1046: protected Instance() {
1047: }
1048:
1049: /**
1050: * Clones the attribute vector of the instance and
1051: * overwrites it with the clone.
1052: */
1053: private void freshAttributeVector() {
1054:
1055: m_AttValues = toDoubleArray();
1056: }
1057:
1058: /**
1059: * Main method for testing this class.
1060: *
1061: * @param options the commandline options - ignored
1062: */
1063: //@ requires options != null;
1064: public static void main(String[] options) {
1065:
1066: try {
1067:
1068: // Create numeric attributes "length" and "weight"
1069: Attribute length = new Attribute("length");
1070: Attribute weight = new Attribute("weight");
1071:
1072: // Create vector to hold nominal values "first", "second", "third"
1073: FastVector my_nominal_values = new FastVector(3);
1074: my_nominal_values.addElement("first");
1075: my_nominal_values.addElement("second");
1076: my_nominal_values.addElement("third");
1077:
1078: // Create nominal attribute "position"
1079: Attribute position = new Attribute("position",
1080: my_nominal_values);
1081:
1082: // Create vector of the above attributes
1083: FastVector attributes = new FastVector(3);
1084: attributes.addElement(length);
1085: attributes.addElement(weight);
1086: attributes.addElement(position);
1087:
1088: // Create the empty dataset "race" with above attributes
1089: Instances race = new Instances("race", attributes, 0);
1090:
1091: // Make position the class attribute
1092: race.setClassIndex(position.index());
1093:
1094: // Create empty instance with three attribute values
1095: Instance inst = new Instance(3);
1096:
1097: // Set instance's values for the attributes "length", "weight", and "position"
1098: inst.setValue(length, 5.3);
1099: inst.setValue(weight, 300);
1100: inst.setValue(position, "first");
1101:
1102: // Set instance's dataset to be the dataset "race"
1103: inst.setDataset(race);
1104:
1105: // Print the instance
1106: System.out.println("The instance: " + inst);
1107:
1108: // Print the first attribute
1109: System.out.println("First attribute: " + inst.attribute(0));
1110:
1111: // Print the class attribute
1112: System.out.println("Class attribute: "
1113: + inst.classAttribute());
1114:
1115: // Print the class index
1116: System.out.println("Class index: " + inst.classIndex());
1117:
1118: // Say if class is missing
1119: System.out.println("Class is missing: "
1120: + inst.classIsMissing());
1121:
1122: // Print the instance's class value in internal format
1123: System.out.println("Class value (internal format): "
1124: + inst.classValue());
1125:
1126: // Print a shallow copy of this instance
1127: Instance copy = (Instance) inst.copy();
1128: System.out.println("Shallow copy: " + copy);
1129:
1130: // Set dataset for shallow copy
1131: copy.setDataset(inst.dataset());
1132: System.out
1133: .println("Shallow copy with dataset set: " + copy);
1134:
1135: // Unset dataset for copy, delete first attribute, and insert it again
1136: copy.setDataset(null);
1137: copy.deleteAttributeAt(0);
1138: copy.insertAttributeAt(0);
1139: copy.setDataset(inst.dataset());
1140: System.out
1141: .println("Copy with first attribute deleted and inserted: "
1142: + copy);
1143:
1144: // Enumerate attributes (leaving out the class attribute)
1145: System.out
1146: .println("Enumerating attributes (leaving out class):");
1147: Enumeration enu = inst.enumerateAttributes();
1148: while (enu.hasMoreElements()) {
1149: Attribute att = (Attribute) enu.nextElement();
1150: System.out.println(att);
1151: }
1152:
1153: // Headers are equivalent?
1154: System.out
1155: .println("Header of original and copy equivalent: "
1156: + inst.equalHeaders(copy));
1157:
1158: // Test for missing values
1159: System.out.println("Length of copy missing: "
1160: + copy.isMissing(length));
1161: System.out.println("Weight of copy missing: "
1162: + copy.isMissing(weight.index()));
1163: System.out.println("Length of copy missing: "
1164: + Instance.isMissingValue(copy.value(length)));
1165: System.out.println("Missing value coded as: "
1166: + Instance.missingValue());
1167:
1168: // Prints number of attributes and classes
1169: System.out.println("Number of attributes: "
1170: + copy.numAttributes());
1171: System.out.println("Number of classes: "
1172: + copy.numClasses());
1173:
1174: // Replace missing values
1175: double[] meansAndModes = { 2, 3, 0 };
1176: copy.replaceMissingValues(meansAndModes);
1177: System.out.println("Copy with missing value replaced: "
1178: + copy);
1179:
1180: // Setting and getting values and weights
1181: copy.setClassMissing();
1182: System.out.println("Copy with missing class: " + copy);
1183: copy.setClassValue(0);
1184: System.out
1185: .println("Copy with class value set to first value: "
1186: + copy);
1187: copy.setClassValue("third");
1188: System.out
1189: .println("Copy with class value set to \"third\": "
1190: + copy);
1191: copy.setMissing(1);
1192: System.out
1193: .println("Copy with second attribute set to be missing: "
1194: + copy);
1195: copy.setMissing(length);
1196: System.out.println("Copy with length set to be missing: "
1197: + copy);
1198: copy.setValue(0, 0);
1199: System.out.println("Copy with first attribute set to 0: "
1200: + copy);
1201: copy.setValue(weight, 1);
1202: System.out.println("Copy with weight attribute set to 1: "
1203: + copy);
1204: copy.setValue(position, "second");
1205: System.out.println("Copy with position set to \"second\": "
1206: + copy);
1207: copy.setValue(2, "first");
1208: System.out
1209: .println("Copy with last attribute set to \"first\": "
1210: + copy);
1211: System.out.println("Current weight of instance copy: "
1212: + copy.weight());
1213: copy.setWeight(2);
1214: System.out
1215: .println("Current weight of instance copy (set to 2): "
1216: + copy.weight());
1217: System.out.println("Last value of copy: "
1218: + copy.toString(2));
1219: System.out.println("Value of position for copy: "
1220: + copy.toString(position));
1221: System.out.println("Last value of copy (internal format): "
1222: + copy.value(2));
1223: System.out
1224: .println("Value of position for copy (internal format): "
1225: + copy.value(position));
1226: } catch (Exception e) {
1227: e.printStackTrace();
1228: }
1229: }
1230: }
|