001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * EuclideanDistance.java
019: * Copyright (C) 1999-2007 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.core;
024:
025: import java.io.BufferedReader;
026: import java.io.FileReader;
027: import java.io.InputStreamReader;
028: import java.io.Reader;
029: import java.io.Serializable;
030: import java.util.Enumeration;
031: import java.util.Vector;
032:
033: import weka.core.neighboursearch.PerformanceStats;
034:
035: /**
036: <!-- globalinfo-start -->
037: * Implementing Euclidean distance (or similarity) function.<br/>
038: * <br/>
039: * One object defines not one distance but the data model in which the distances between objects of that data model can be computed.<br/>
040: * <br/>
041: * Attention: For efficiency reasons the use of consistency checks (like are the data models of the two instances exactly the same), is low.
042: * <p/>
043: <!-- globalinfo-end -->
044: *
045: <!-- options-start -->
046: * Valid options are: <p/>
047: *
048: * <pre> -D
049: * Turns off the normalization of attribute
050: * values in distance calculation.</pre>
051: *
052: <!-- options-end -->
053: *
054: * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
055: * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz)
056: * @version $Revision: 1.11 $
057: */
058: public class EuclideanDistance implements DistanceFunction,
059: OptionHandler, Cloneable, Serializable {
060:
061: /** for serialization. */
062: private static final long serialVersionUID = 1068606253458807903L;
063:
064: /** the data. */
065: protected Instances m_Data;
066:
067: /** True if normalization is turned off (default false).*/
068: protected boolean m_DontNormalize = false;
069:
070: /** The number of attributes the contribute to a prediction. */
071: protected double m_NumAttributesUsed;
072:
073: /**
074: * Constructs an Euclidean Distance object.
075: */
076: public EuclideanDistance() {
077: }
078:
079: /**
080: * Constructs an Euclidean Distance object.
081: *
082: * @param data the instances the distance function should work on
083: */
084: public EuclideanDistance(Instances data) {
085: m_Data = data;
086: initializeRanges();
087: setNumAttributesUsed();
088: }
089:
090: /**
091: * Returns a string describing this object.
092: *
093: * @return a description of the evaluator suitable for
094: * displaying in the explorer/experimenter gui
095: */
096: public String globalInfo() {
097: return "Implementing Euclidean distance (or similarity) function.\n\n"
098: + "One object defines not one distance but the data model in which "
099: + "the distances between objects of that data model can be computed.\n\n"
100: + "Attention: For efficiency reasons the use of consistency checks "
101: + "(like are the data models of the two instances exactly the same), "
102: + "is low.";
103: }
104:
105: /**
106: * Returns an enumeration describing the available options.
107: *
108: * @return an enumeration of all the available options.
109: */
110: public Enumeration listOptions() {
111: Vector newVector = new Vector();
112:
113: newVector.add(new Option(
114: "\tTurns off the normalization of attribute \n"
115: + "\tvalues in distance calculation.", "D", 0,
116: "-D"));
117:
118: return newVector.elements();
119: }
120:
121: /**
122: * Parses a given list of options. Valid options are:<p/>
123: *
124: * @param options the list of options as an array of strings
125: * @throws Exception if an option is not supported
126: */
127: public void setOptions(String[] options) throws Exception {
128: setDontNormalize(Utils.getFlag('D', options));
129: }
130:
131: /**
132: * Gets the current settings of IBk.
133: *
134: * @return an array of strings suitable for passing to setOptions()
135: */
136: public String[] getOptions() {
137: String[] options = new String[1];
138:
139: if (getDontNormalize())
140: options[0] = "-D";
141: else
142: options[0] = "";
143:
144: return options;
145: }
146:
147: /**
148: * Sets the instances.
149: *
150: * @param insts the instances to use
151: */
152: public void setInstances(Instances insts) {
153: m_Data = insts;
154: initializeRanges();
155: setNumAttributesUsed();
156: }
157:
158: /**
159: * returns the instances currently set.
160: *
161: * @return the current instances
162: */
163: public Instances getInstances() {
164: return m_Data;
165: }
166:
167: /**
168: * Returns the tip text for this property.
169: *
170: * @return tip text for this property suitable for
171: * displaying in the explorer/experimenter gui
172: */
173: public String dontNormalizeTipText() {
174: return "Whether if the normalization of attributes should be turned off "
175: + "for distance calculation (Default: false i.e. attribute values "
176: + "are normalized). ";
177: }
178:
179: /**
180: * Sets whether if the attribute values are to be normalized in distance
181: * calculation.
182: *
183: * @param dontNormalize if true the values are not normalized
184: */
185: public void setDontNormalize(boolean dontNormalize) {
186: m_DontNormalize = dontNormalize;
187: }
188:
189: /**
190: * Gets whether if the attribute values are to be normazlied in distance
191: * calculation. (default false i.e. attribute values are normalized.)
192: *
193: * @return false if values get normalized
194: */
195: public boolean getDontNormalize() {
196: return m_DontNormalize;
197: }
198:
199: /**
200: * Update the distance function (if necessary) for the newly added instance.
201: *
202: * @param ins the instance to add
203: */
204: public void update(Instance ins) {
205: updateRanges(ins);
206: }
207:
208: /**
209: * Calculates the distance between two instances.
210: *
211: * @param first the first instance
212: * @param second the second instance
213: * @return the distance between the two given instances
214: */
215: public double distance(Instance first, Instance second) {
216: return Math.sqrt(distance(first, second,
217: Double.POSITIVE_INFINITY));
218: }
219:
220: /**
221: * Calculates the distance (or similarity) between two instances. Need to
222: * pass this returned distance later on to postprocess method to set it on
223: * correct scale. <br/>
224: * P.S.: Please don't mix the use of this function with
225: * distance(Instance first, Instance second), as that already does post
226: * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
227: * this function and then later on do the post processing on all the
228: * distances.
229: *
230: * @param first the first instance
231: * @param second the second instance
232: * @param stats the structure for storing performance statistics.
233: * @return the distance between the two given instances or
234: * Double.POSITIVE_INFINITY.
235: */
236: public double distance(Instance first, Instance second,
237: PerformanceStats stats) { //debug method pls remove after use
238: return Math.sqrt(distance(first, second,
239: Double.POSITIVE_INFINITY, stats, false));
240: }
241:
242: /**
243: * Calculates the distance (or similarity) between two instances. Need to
244: * pass this returned distance later on to postprocess method to set it on
245: * correct scale. <br/>
246: * P.S.: Please don't mix the use of this function with
247: * distance(Instance first, Instance second), as that already does post
248: * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
249: * this function and then later on do the post processing on all the
250: * distances.
251: *
252: * @param first the first instance
253: * @param second the second instance
254: * @param cutOffValue If the distance being calculated becomes larger than
255: * cutOffValue then the rest of the calculation is skipped
256: * and Double.POSITIVE_INFINITY is returned. Otherwise
257: * the correct disntance is returned.
258: * @return the distance between the two given instances or
259: * Double.POSITIVE_INFINITY.
260: */
261: public double distance(Instance first, Instance second,
262: double cutOffValue) { //debug method pls remove after use
263: return distance(first, second, cutOffValue, null, false);
264: }
265:
266: /**
267: * Calculates the distance (or similarity) between two instances. Need to
268: * pass this returned distance later on to postprocess method to set it on
269: * correct scale. <br/>
270: * P.S.: Please don't mix the use of this function with
271: * distance(Instance first, Instance second), as that already does post
272: * processing. Please consider passing Double.POSITIVE_INFINITY as the
273: * cutOffValue to this function and then later on do the post processing on
274: * all the distances.
275: *
276: * @param first the first instance
277: * @param second the second instance
278: * @param cutOffValue If the distance being calculated becomes larger than
279: * cutOffValue then the rest of the calculation is skipped
280: * and Double.POSITIVE_INFINITY is returned. Otherwise
281: * the correct disntance is returned.
282: * @param print whether to print some debugging output
283: * @return the distance between the two given instances or
284: * Double.POSITIVE_INFINITY.
285: */
286: public double distance(Instance first, Instance second,
287: double cutOffValue, boolean print) {
288: return distance(first, second, cutOffValue, null, print);
289: }
290:
291: /**
292: * Calculates the distance (or similarity) between two instances. Need to
293: * pass this returned distance later on to postprocess method to set it on
294: * correct scale. <br/>
295: * P.S.: Please don't mix the use of this function with
296: * distance(Instance first, Instance second), as that already does post
297: * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
298: * this function and then later on do the post processing on all the
299: * distances.
300: *
301: * @param first the first instance
302: * @param second the second instance
303: * @param cutOffValue If the distance being calculated becomes larger than
304: * cutOffValue then the rest of the calculation is skipped
305: * and Double.POSITIVE_INFINITY is returned. Otherwise
306: * the correct disntance is returned.
307: * @param stats the structure for storing performance statistics.
308: * @return the distance between the two given instances or
309: * Double.POSITIVE_INFINITY.
310: */
311: public double distance(Instance first, Instance second,
312: double cutOffValue, PerformanceStats stats) { //debug method pls remove after use
313: return distance(first, second, cutOffValue, stats, false);
314: }
315:
316: /**
317: * Calculates the distance (or similarity) between two instances. Need to
318: * pass this returned distance later on to postprocess method to set it on
319: * correct scale. <br/>
320: * P.S.: Please don't mix the use of this function with
321: * distance(Instance first, Instance second), as that already does post
322: * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
323: * this function and then later on do the post processing on all the
324: * distances.
325: *
326: * @param first the first instance
327: * @param second the second instance
328: * @param cutOffValue If the distance being calculated becomes larger than
329: * cutOffValue then the rest of the calculation is skipped
330: * and Double.POSITIVE_INFINITY is returned. Otherwise
331: * the correct disntance is returned.
332: * @param stats the structure for storing performance statistics.
333: * @param print whether to print some debugging output
334: * @return the distance between the two given instances or
335: * Double.POSITIVE_INFINITY.
336: */
337: public double distance(Instance first, Instance second,
338: double cutOffValue, PerformanceStats stats, boolean print) {
339:
340: double distance = 0;
341: int firstI, secondI;
342:
343: if (print) {
344: OOPS("Instance1: " + first);
345: OOPS("Instance2: " + second);
346: OOPS("cutOffValue: " + cutOffValue);
347: }
348:
349: for (int p1 = 0, p2 = 0; p1 < first.numValues()
350: || p2 < second.numValues();) {
351: if (p1 >= first.numValues()) {
352: firstI = m_Data.numAttributes();
353: } else {
354: firstI = first.index(p1);
355: }
356: if (p2 >= second.numValues()) {
357: secondI = m_Data.numAttributes();
358: } else {
359: secondI = second.index(p2);
360: }
361: if (firstI == m_Data.classIndex()) {
362: p1++;
363: continue;
364: }
365: if (secondI == m_Data.classIndex()) {
366: p2++;
367: continue;
368: }
369: double diff;
370: if (print)
371: System.out.println("valueSparse(p1): "
372: + first.valueSparse(p1) + " valueSparse(p2): "
373: + second.valueSparse(p2));
374:
375: if (firstI == secondI) {
376: diff = difference(firstI, first.valueSparse(p1), second
377: .valueSparse(p2));
378: p1++;
379: p2++;
380: } else if (firstI > secondI) {
381: diff = difference(secondI, 0, second.valueSparse(p2));
382: p2++;
383: } else {
384: diff = difference(firstI, first.valueSparse(p1), 0);
385: p1++;
386: }
387: if (print)
388: System.out.println("diff: " + diff);
389: if (stats != null)
390: stats.incrCoordCount();
391:
392: distance += diff * diff;
393: if (distance > cutOffValue) //Utils.gr(distance, cutOffValue))
394: return Double.POSITIVE_INFINITY;
395: if (print)
396: System.out.println("distance: " + distance);
397: }
398: if (print) {
399: OOPS("Instance 1: " + first);
400: OOPS("Instance 2: " + second);
401: OOPS("distance: " + distance);
402: OOPS("AttribsUsed: " + m_NumAttributesUsed);
403: OOPS("distance/AttribsUsed: "
404: + Math.sqrt(distance / m_NumAttributesUsed));
405: }
406: return distance;
407: }
408:
409: /**
410: * Does post processing of the distances (if necessary) returned by
411: * distance(distance(Instance first, Instance second, double cutOffValue). It
412: * is necessary to do so to get the correct distances if
413: * distance(distance(Instance first, Instance second, double cutOffValue) is
414: * used. This is because that function actually returns the squared distance
415: * to avoid inaccuracies arising from floating point comparison.
416: *
417: * @param distances the distances to post-process
418: */
419: public void postProcessDistances(double distances[]) {
420: for (int i = 0; i < distances.length; i++) {
421: distances[i] = Math.sqrt(distances[i]);
422: }
423: }
424:
425: /**
426: * Computes the difference between two given attribute
427: * values.
428: *
429: * @param index the attribute index
430: * @param val1 the first value
431: * @param val2 the second value
432: * @return the difference
433: */
434: private double difference(int index, double val1, double val2) {
435:
436: switch (m_Data.attribute(index).type()) {
437: case Attribute.NOMINAL:
438:
439: // If attribute is nominal
440: if (Instance.isMissingValue(val1)
441: || Instance.isMissingValue(val2)
442: || ((int) val1 != (int) val2)) {
443: return 1;
444: } else {
445: return 0;
446: }
447: case Attribute.NUMERIC:
448: // If attribute is numeric
449: if (Instance.isMissingValue(val1)
450: || Instance.isMissingValue(val2)) {
451: if (Instance.isMissingValue(val1)
452: && Instance.isMissingValue(val2)) {
453: if (!m_DontNormalize) //We are doing normalization
454: return 1;
455: else
456: return (m_Ranges[index][R_MAX] - m_Ranges[index][R_MIN]);
457: } else {
458: double diff;
459: if (Instance.isMissingValue(val2)) {
460: diff = (!m_DontNormalize) ? norm(val1, index)
461: : val1;
462: } else {
463: diff = (!m_DontNormalize) ? norm(val2, index)
464: : val2;
465: }
466: if (!m_DontNormalize && diff < 0.5) {
467: diff = 1.0 - diff;
468: } else if (m_DontNormalize) {
469: if ((m_Ranges[index][R_MAX] - diff) > (diff - m_Ranges[index][R_MIN]))
470: return m_Ranges[index][R_MAX] - diff;
471: else
472: return diff - m_Ranges[index][R_MIN];
473: }
474: return diff;
475: }
476: } else {
477: return (!m_DontNormalize) ? (norm(val1, index) - norm(
478: val2, index)) : (val1 - val2);
479: }
480: default:
481: return 0;
482: }
483: }
484:
485: /**
486: * Returns the squared difference of two values of an attribute.
487: *
488: * @param index the attribute index
489: * @param val1 the first value
490: * @param val2 the second value
491: * @return the squared difference
492: */
493: public double sqDifference(int index, double val1, double val2) {
494: double val = difference(index, val1, val2);
495: return val * val;
496: }
497:
498: /**
499: * Normalizes a given value of a numeric attribute.
500: *
501: * @param x the value to be normalized
502: * @param i the attribute's index
503: * @return the normalized value
504: */
505: private double norm(double x, int i) {
506:
507: if (Double.isNaN(m_Ranges[i][R_MIN])
508: || m_Ranges[i][R_MAX] == m_Ranges[i][R_MIN]) { //Utils.eq(m_Ranges[i][R_MAX], m_Ranges[i][R_MIN])) {
509: return 0;
510: } else {
511: return (x - m_Ranges[i][R_MIN]) / (m_Ranges[i][R_WIDTH]);
512: }
513: }
514:
515: /**
516: * Returns value in the middle of the two parameter values.
517: *
518: * @param ranges the ranges to this dimension
519: * @return the middle value
520: */
521: public double getMiddle(double[] ranges) {
522:
523: double middle = ranges[R_MIN] + ranges[R_WIDTH] * 0.5;
524: return middle;
525: }
526:
527: /**
528: * Returns the index of the closest point to the current instance.
529: * Index is index in Instances object that is the second parameter.
530: *
531: * @param instance the instance to assign a cluster to
532: * @param allPoints all points
533: * @param pointList the list of points
534: * @return the index of the closest point
535: * @throws Exception if something goes wrong
536: */
537: public int closestPoint(Instance instance, Instances allPoints,
538: int[] pointList) throws Exception {
539: double minDist = Integer.MAX_VALUE;
540: int bestPoint = 0;
541: for (int i = 0; i < pointList.length; i++) {
542: double dist = distance(instance, allPoints
543: .instance(pointList[i]), Double.POSITIVE_INFINITY);
544: if (dist < minDist) {
545: minDist = dist;
546: bestPoint = i;
547: }
548: }
549: return pointList[bestPoint];
550: }
551:
552: /**
553: * Returns true if the value of the given dimension is smaller or equal the
554: * value to be compared with.
555: *
556: * @param instance the instance where the value should be taken of
557: * @param dim the dimension of the value
558: * @param value the value to compare with
559: * @return true if value of instance is smaller or equal value
560: */
561: public boolean valueIsSmallerEqual(Instance instance, int dim,
562: double value) { //This stays
563: return instance.value(dim) <= value;
564: }
565:
566: /**
567: * Documents the content of an EuclideanDistance object in a string.
568: *
569: * @return the converted string
570: */
571: public String toString() {
572:
573: StringBuffer text = new StringBuffer();
574: //todo
575: text.append("\n");
576: return text.toString();
577: }
578:
579: /**
580: * Used for debug println's.
581: *
582: * @param output string that is printed
583: */
584: private void OOPS(String output) {
585: System.out.println(output);
586: }
587:
588: /**
589: * Computes and sets the number of attributes used.
590: */
591: private void setNumAttributesUsed() {
592:
593: m_NumAttributesUsed = 0.0;
594: if (m_Data != null) {
595: for (int i = 0; i < m_Data.numAttributes(); i++) {
596: if ((i != m_Data.classIndex())
597: && (m_Data.attribute(i).isNominal() || m_Data
598: .attribute(i).isNumeric())) {
599: m_NumAttributesUsed += 1.0;
600: }
601: }
602: }
603: }
604:
605: /*============================Ranges related functions=====================*/
606:
607: /** The range of the attributes. */
608: //being used in KDTree and EuclideanDistance
609: protected double[][] m_Ranges;
610:
611: /** Index in ranges for MIN. */
612: public static final int R_MIN = 0;
613: /** Index in ranges for MAX. */
614: public static final int R_MAX = 1;
615: /** Index in ranges for WIDTH. */
616: public static final int R_WIDTH = 2;
617:
618: /**
619: * Initializes the ranges using all instances of the dataset.
620: * Sets m_Ranges.
621: *
622: * @return the ranges
623: */
624: //Being used in other classes (KDTree).
625: public double[][] initializeRanges() {
626:
627: if (m_Data == null) {
628: m_Ranges = null;
629: return null;
630: }
631:
632: int numAtt = m_Data.numAttributes();
633: double[][] ranges = new double[numAtt][3];
634:
635: if (m_Data.numInstances() <= 0) {
636: initializeRangesEmpty(numAtt, ranges);
637: m_Ranges = ranges;
638: return ranges;
639: } else
640: // initialize ranges using the first instance
641: updateRangesFirst(m_Data.instance(0), numAtt, ranges);
642:
643: // update ranges, starting from the second
644: for (int i = 1; i < m_Data.numInstances(); i++) {
645: updateRanges(m_Data.instance(i), numAtt, ranges);
646: }
647: m_Ranges = ranges;
648: return ranges;
649: }
650:
651: /**
652: * Initializes the ranges of a subset of the instances of this dataset.
653: * Therefore m_Ranges is not set.
654: *
655: * @param instList list of indexes of the subset
656: * @return the ranges
657: * @throws Exception if something goes wrong
658: */
659: //being used in other classes (KDTree and XMeans)
660: public double[][] initializeRanges(int[] instList) throws Exception {
661:
662: if (m_Data == null) {
663: throw new Exception("No instances supplied.");
664: }
665:
666: int numAtt = m_Data.numAttributes();
667: double[][] ranges = new double[numAtt][3];
668:
669: if (m_Data.numInstances() <= 0) {
670: initializeRangesEmpty(numAtt, ranges);
671: return ranges;
672: } else {
673: // initialize ranges using the first instance
674: updateRangesFirst(m_Data.instance(instList[0]), numAtt,
675: ranges);
676: // update ranges, starting from the second
677: for (int i = 1; i < instList.length; i++) {
678: updateRanges(m_Data.instance(instList[i]), numAtt,
679: ranges);
680: }
681: }
682: return ranges;
683: }
684:
685: /**
686: * Initializes the ranges of a subset of the instances of this dataset.
687: * Therefore m_Ranges is not set.
688: * The caller of this method should ensure that the supplied start and end
689: * indices are valid (start <= end, end<instList.length etc) and
690: * correct.
691: *
692: * @param instList list of indexes of the instances
693: * @param startIdx start index of the subset of instances in the indices array
694: * @param endIdx end index of the subset of instances in the indices array
695: * @return the ranges
696: * @throws Exception if something goes wrong
697: */
698: //being used in other classes (KDTree and XMeans)
699: public double[][] initializeRanges(int[] instList, int startIdx,
700: int endIdx) throws Exception {
701:
702: if (m_Data == null) {
703: throw new Exception("No instances supplied.");
704: }
705:
706: int numAtt = m_Data.numAttributes();
707: double[][] ranges = new double[numAtt][3];
708:
709: if (m_Data.numInstances() <= 0) {
710: initializeRangesEmpty(numAtt, ranges);
711: return ranges;
712: } else {
713: // initialize ranges using the first instance
714: updateRangesFirst(m_Data.instance(instList[startIdx]),
715: numAtt, ranges);
716: // update ranges, starting from the second
717: for (int i = startIdx + 1; i <= endIdx; i++) {
718: updateRanges(m_Data.instance(instList[i]), numAtt,
719: ranges);
720: }
721: }
722: return ranges;
723: }
724:
725: /**
726: * Used to initialize the ranges.
727: *
728: * @param numAtt number of attributes in the model
729: * @param ranges low, high and width values for all attributes
730: */
731: //being used in the functions above
732: public void initializeRangesEmpty(int numAtt, double[][] ranges) {
733:
734: for (int j = 0; j < numAtt; j++) {
735: ranges[j][R_MIN] = Double.POSITIVE_INFINITY;
736: ranges[j][R_MAX] = -Double.POSITIVE_INFINITY;
737: ranges[j][R_WIDTH] = Double.POSITIVE_INFINITY;
738: }
739: }
740:
741: /**
742: * Used to initialize the ranges. For this the values of the first
743: * instance is used to save time.
744: * Sets low and high to the values of the first instance and
745: * width to zero.
746: *
747: * @param instance the new instance
748: * @param numAtt number of attributes in the model
749: * @param ranges low, high and width values for all attributes
750: */
751: //being used in the functions above
752: public void updateRangesFirst(Instance instance, int numAtt,
753: double[][] ranges) {
754:
755: for (int j = 0; j < numAtt; j++) {
756: if (!instance.isMissing(j)) {
757: ranges[j][R_MIN] = instance.value(j);
758: ranges[j][R_MAX] = instance.value(j);
759: ranges[j][R_WIDTH] = 0.0;
760: } else { // if value was missing
761: ranges[j][R_MIN] = Double.POSITIVE_INFINITY;
762: ranges[j][R_MAX] = -Double.POSITIVE_INFINITY;
763: ranges[j][R_WIDTH] = Double.POSITIVE_INFINITY;
764: }
765: }
766: }
767:
768: /**
769: * Updates the minimum and maximum and width values for all the attributes
770: * based on a new instance.
771: *
772: * @param instance the new instance
773: * @param numAtt number of attributes in the model
774: * @param ranges low, high and width values for all attributes
775: */
776: //Being used in the functions above
777: private void updateRanges(Instance instance, int numAtt,
778: double[][] ranges) {
779:
780: // updateRangesFirst must have been called on ranges
781: for (int j = 0; j < numAtt; j++) {
782: double value = instance.value(j);
783: if (!instance.isMissing(j)) {
784: if (value < ranges[j][R_MIN]) {
785: ranges[j][R_MIN] = value;
786: ranges[j][R_WIDTH] = ranges[j][R_MAX]
787: - ranges[j][R_MIN];
788: if (value > ranges[j][R_MAX]) { //if this is the first value that is
789: ranges[j][R_MAX] = value; //not missing. The,0
790: ranges[j][R_WIDTH] = ranges[j][R_MAX]
791: - ranges[j][R_MIN];
792: }
793: } else {
794: if (value > ranges[j][R_MAX]) {
795: ranges[j][R_MAX] = value;
796: ranges[j][R_WIDTH] = ranges[j][R_MAX]
797: - ranges[j][R_MIN];
798: }
799: }
800: }
801: }
802: }
803:
804: /**
805: * Updates the ranges given a new instance.
806: *
807: * @param instance the new instance
808: * @param ranges low, high and width values for all attributes
809: * @return the updated ranges
810: */
811: //being used in other classes (KDTree)
812: public double[][] updateRanges(Instance instance, double[][] ranges) {
813:
814: // updateRangesFirst must have been called on ranges
815: for (int j = 0; j < ranges.length; j++) {
816: double value = instance.value(j);
817: if (!instance.isMissing(j)) {
818: if (value < ranges[j][R_MIN]) {
819: ranges[j][R_MIN] = value;
820: ranges[j][R_WIDTH] = ranges[j][R_MAX]
821: - ranges[j][R_MIN];
822: } else {
823: if (instance.value(j) > ranges[j][R_MAX]) {
824: ranges[j][R_MAX] = value;
825: ranges[j][R_WIDTH] = ranges[j][R_MAX]
826: - ranges[j][R_MIN];
827: }
828: }
829: }
830: }
831: return ranges;
832: }
833:
834: /**
835: * Update the ranges if a new instance comes.
836: *
837: * @param instance the new instance
838: */
839: //Being used in KDTree
840: public void updateRanges(Instance instance) {
841: m_Ranges = updateRanges(instance, m_Ranges);
842: }
843:
844: /**
845: * prints the ranges.
846: *
847: * @param ranges low, high and width values for all attributes
848: */
849: //Not being used in any other class. Not even being used in this class.
850: public void printRanges(double[][] ranges) {
851:
852: OOPS("printRanges");
853: // updateRangesFirst must have been called on ranges
854: for (int j = 0; j < ranges.length; j++) {
855: OOPS(" " + j + "-MIN " + ranges[j][R_MIN]);
856: OOPS(" " + j + "-MAX " + ranges[j][R_MAX]);
857: OOPS(" " + j + "-WIDTH " + ranges[j][R_WIDTH]);
858: }
859: }
860:
861: /**
862: * Test if an instance is within the given ranges.
863: *
864: * @param instance the instance
865: * @param ranges the ranges the instance is tested to be in
866: * @return true if instance is within the ranges
867: */
868: //being used in IBk but better to remove from there.
869: public boolean inRanges(Instance instance, double[][] ranges) {
870: boolean isIn = true;
871:
872: // updateRangesFirst must have been called on ranges
873: for (int j = 0; isIn && (j < ranges.length); j++) {
874: if (!instance.isMissing(j)) {
875: double value = instance.value(j);
876: isIn = value <= ranges[j][R_MAX];
877: if (isIn)
878: isIn = value >= ranges[j][R_MIN];
879: }
880: }
881: return isIn;
882: }
883:
884: /**
885: * Prints a range to standard output.
886: *
887: * @param model the instances this ranges are for
888: * @param ranges the ranges to print
889: */
890: //Not being used in any other class. Not even being used in this class.
891: public void printRanges(Instances model, double[][] ranges) {
892: System.out.println("printRanges");
893: for (int j = 0; j < model.numAttributes(); j++) {
894: System.out.print("Attribute " + j + " MIN: "
895: + ranges[j][R_MIN]);
896: System.out.print(" MAX: " + ranges[j][R_MAX]);
897: System.out.print(" WIDTH: " + ranges[j][R_WIDTH]);
898: System.out.println(" ");
899: }
900: }
901:
902: /**
903: * Check if ranges are set.
904: *
905: * @return true if ranges are set
906: */
907: //Not being used in any other class
908: public boolean rangesSet() {
909: return (m_Ranges != null);
910: }
911:
912: /**
913: * Method to get the ranges.
914: *
915: * @return the ranges
916: * @throws Exception if no randes are set yet
917: */
918: //Not being used in any other class
919: public double[][] getRanges() throws Exception {
920: if (m_Ranges == null)
921: throw new Exception("Ranges not yet set.");
922: return m_Ranges;
923: }
924:
925: /**
926: * Main method for testing this class.
927: *
928: * @param args the commandline parameters
929: */
930: public static void main(String[] args) {
931: try {
932: Reader r = null;
933: if (args.length > 1) {
934: throw (new Exception(
935: "Usage: EuclideanDistance <filename>"));
936: } else if (args.length == 0) {
937: r = new BufferedReader(new InputStreamReader(System.in));
938: } else {
939: r = new BufferedReader(new FileReader(args[0]));
940: }
941: Instances i = new Instances(r);
942: EuclideanDistance test = new EuclideanDistance(i);
943: System.out.println("test:\n " + test);
944:
945: } catch (Exception e) {
946: e.printStackTrace();
947: }
948: }
949: }
|