001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * StringLocator.java
019: * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
020: */
021:
022: package weka.core;
023:
024: /**
025: * This class locates and records the indices of String attributes,
026: * recursively in case of Relational attributes. The indices are normally
027: * used for copying the Strings from one Instances object to another.
028: *
029: * @author fracpete (fracpete at waikato dot ac dot nz)
030: * @version $Revision: 1.3 $
031: * @see Attribute#STRING
032: * @see Attribute#RELATIONAL
033: */
034: public class StringLocator extends AttributeLocator {
035:
036: /** for serialization */
037: private static final long serialVersionUID = 7805522230268783972L;
038:
039: /**
040: * initializes the StringLocator with the given data
041: *
042: * @param data the data to work on
043: */
044: public StringLocator(Instances data) {
045: super (data, Attribute.STRING);
046: }
047:
048: /**
049: * Initializes the StringLocator with the given data.
050: * Checks only the given range.
051: *
052: * @param data the data to work on
053: * @param fromIndex the first index to inspect (including)
054: * @param toIndex the last index to check (including)
055: */
056: public StringLocator(Instances data, int fromIndex, int toIndex) {
057: super (data, Attribute.STRING, fromIndex, toIndex);
058: }
059:
060: /**
061: * Initializes the AttributeLocator with the given data.
062: * Checks only the specified attribute indices.
063: *
064: * @param data the data to work on
065: * @param indices the attribute indices to check
066: */
067: public StringLocator(Instances data, int[] indices) {
068: super (data, Attribute.STRING, indices);
069: }
070:
071: /**
072: * Copies string values contained in the instance copied to a new
073: * dataset. The Instance must already be assigned to a dataset. This
074: * dataset and the destination dataset must have the same structure.
075: *
076: * @param inst the Instance containing the string values to copy.
077: * @param destDataset the destination set of Instances
078: * @param strAtts an AttributeLocator containing the indices of
079: * any string attributes in the dataset.
080: */
081: public static void copyStringValues(Instance inst,
082: Instances destDataset, AttributeLocator strAtts) {
083:
084: if (inst.dataset() == null) {
085: throw new IllegalArgumentException(
086: "Instance has no dataset assigned!!");
087: } else if (inst.dataset().numAttributes() != destDataset
088: .numAttributes()) {
089: throw new IllegalArgumentException(
090: "Src and Dest differ in # of attributes!!");
091: }
092: copyStringValues(inst, true, inst.dataset(), strAtts,
093: destDataset, strAtts);
094: }
095:
096: /**
097: * Takes string values referenced by an Instance and copies them from a
098: * source dataset to a destination dataset. The instance references are
099: * updated to be valid for the destination dataset. The instance may have the
100: * structure (i.e. number and attribute position) of either dataset (this
101: * affects where references are obtained from). Only works if the number
102: * of string attributes is the same in both indices (implicitly these string
103: * attributes should be semantically same but just with shifted positions).
104: *
105: * @param instance the instance containing references to strings
106: * in the source dataset that will have references
107: * updated to be valid for the destination dataset.
108: * @param instSrcCompat true if the instance structure is the same as
109: * the source, or false if it is the same as the
110: * destination (i.e. which of the string attribute
111: * indices contains the correct locations for this
112: * instance).
113: * @param srcDataset the dataset for which the current instance
114: * string references are valid (after any position
115: * mapping if needed)
116: * @param srcLoc an AttributeLocator containing the indices of
117: * string attributes in the source datset.
118: * @param destDataset the dataset for which the current instance
119: * string references need to be inserted (after
120: * any position mapping if needed)
121: * @param destLoc an AttributeLocator containing the indices of
122: * string attributes in the destination datset.
123: */
124: public static void copyStringValues(Instance instance,
125: boolean instSrcCompat, Instances srcDataset,
126: AttributeLocator srcLoc, Instances destDataset,
127: AttributeLocator destLoc) {
128: if (srcDataset == destDataset)
129: return;
130:
131: if (srcLoc.getAttributeIndices().length != destLoc
132: .getAttributeIndices().length)
133: throw new IllegalArgumentException(
134: "Src and Dest string indices differ in length!!");
135:
136: if (srcLoc.getLocatorIndices().length != destLoc
137: .getLocatorIndices().length)
138: throw new IllegalArgumentException(
139: "Src and Dest locator indices differ in length!!");
140:
141: for (int i = 0; i < srcLoc.getAttributeIndices().length; i++) {
142: int instIndex = instSrcCompat ? srcLoc
143: .getActualIndex(srcLoc.getAttributeIndices()[i])
144: : destLoc.getActualIndex(destLoc
145: .getAttributeIndices()[i]);
146: Attribute src = srcDataset.attribute(srcLoc
147: .getActualIndex(srcLoc.getAttributeIndices()[i]));
148: Attribute dest = destDataset.attribute(destLoc
149: .getActualIndex(destLoc.getAttributeIndices()[i]));
150: if (!instance.isMissing(instIndex)) {
151: int valIndex = dest.addStringValue(src, (int) instance
152: .value(instIndex));
153: instance.setValue(instIndex, (double) valIndex);
154: }
155: }
156:
157: // recurse if necessary
158: int[] srcIndices = srcLoc.getLocatorIndices();
159: int[] destIndices = destLoc.getLocatorIndices();
160: for (int i = 0; i < srcIndices.length; i++) {
161: int index = instSrcCompat ? srcLoc
162: .getActualIndex(srcIndices[i]) : destLoc
163: .getActualIndex(destIndices[i]);
164: if (instance.isMissing(index))
165: continue;
166: Instances rel = instSrcCompat ? instance
167: .relationalValue(index) : instance
168: .relationalValue(index);
169: AttributeLocator srcStrAttsNew = srcLoc
170: .getLocator(srcIndices[i]);
171: Instances srcDatasetNew = srcStrAttsNew.getData();
172: AttributeLocator destStrAttsNew = destLoc
173: .getLocator(destIndices[i]);
174: Instances destDatasetNew = destStrAttsNew.getData();
175: for (int n = 0; n < rel.numInstances(); n++) {
176: copyStringValues(rel.instance(n), instSrcCompat,
177: srcDatasetNew, srcStrAttsNew, destDatasetNew,
178: destStrAttsNew);
179: }
180: }
181: }
182: }
|