Source Code Cross Referenced for EuclideanDistance.java in  » Science » weka » weka » core » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.core 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *    This program is free software; you can redistribute it and/or modify
003:         *    it under the terms of the GNU General Public License as published by
004:         *    the Free Software Foundation; either version 2 of the License, or
005:         *    (at your option) any later version.
006:         *
007:         *    This program is distributed in the hope that it will be useful,
008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
010:         *    GNU General Public License for more details.
011:         *
012:         *    You should have received a copy of the GNU General Public License
013:         *    along with this program; if not, write to the Free Software
014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015:         */
016:
017:        /*
018:         *    EuclideanDistance.java
019:         *    Copyright (C) 1999-2007 University of Waikato, Hamilton, New Zealand
020:         *
021:         */
022:
023:        package weka.core;
024:
025:        import java.io.BufferedReader;
026:        import java.io.FileReader;
027:        import java.io.InputStreamReader;
028:        import java.io.Reader;
029:        import java.io.Serializable;
030:        import java.util.Enumeration;
031:        import java.util.Vector;
032:
033:        import weka.core.neighboursearch.PerformanceStats;
034:
035:        /**
036:         <!-- globalinfo-start -->
037:         * Implementing Euclidean distance (or similarity) function.<br/>
038:         * <br/>
039:         * One object defines not one distance but the data model in which the distances between objects of that data model can be computed.<br/>
040:         * <br/>
041:         * Attention: For efficiency reasons the use of consistency checks (like are the data models of the two instances exactly the same), is low.
042:         * <p/>
043:         <!-- globalinfo-end -->
044:         *
045:         <!-- options-start -->
046:         * Valid options are: <p/>
047:         * 
048:         * <pre> -D
049:         *  Turns off the normalization of attribute 
050:         *  values in distance calculation.</pre>
051:         * 
052:         <!-- options-end --> 
053:         *
054:         * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
055:         * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz)
056:         * @version $Revision: 1.11 $
057:         */
058:        public class EuclideanDistance implements  DistanceFunction,
059:                OptionHandler, Cloneable, Serializable {
060:
061:            /** for serialization. */
062:            private static final long serialVersionUID = 1068606253458807903L;
063:
064:            /** the data. */
065:            protected Instances m_Data;
066:
067:            /** True if normalization is turned off (default false).*/
068:            protected boolean m_DontNormalize = false;
069:
070:            /** The number of attributes the contribute to a prediction. */
071:            protected double m_NumAttributesUsed;
072:
073:            /**
074:             * Constructs an Euclidean Distance object.
075:             */
076:            public EuclideanDistance() {
077:            }
078:
079:            /**
080:             * Constructs an Euclidean Distance object.
081:             * 
082:             * @param data 	the instances the distance function should work on
083:             */
084:            public EuclideanDistance(Instances data) {
085:                m_Data = data;
086:                initializeRanges();
087:                setNumAttributesUsed();
088:            }
089:
090:            /**
091:             * Returns a string describing this object.
092:             * 
093:             * @return 		a description of the evaluator suitable for
094:             * 			displaying in the explorer/experimenter gui
095:             */
096:            public String globalInfo() {
097:                return "Implementing Euclidean distance (or similarity) function.\n\n"
098:                        + "One object defines not one distance but the data model in which "
099:                        + "the distances between objects of that data model can be computed.\n\n"
100:                        + "Attention: For efficiency reasons the use of consistency checks "
101:                        + "(like are the data models of the two instances exactly the same), "
102:                        + "is low.";
103:            }
104:
105:            /**
106:             * Returns an enumeration describing the available options.
107:             *
108:             * @return 		an enumeration of all the available options.
109:             */
110:            public Enumeration listOptions() {
111:                Vector newVector = new Vector();
112:
113:                newVector.add(new Option(
114:                        "\tTurns off the normalization of attribute \n"
115:                                + "\tvalues in distance calculation.", "D", 0,
116:                        "-D"));
117:
118:                return newVector.elements();
119:            }
120:
121:            /**
122:             * Parses a given list of options. Valid options are:<p/>
123:             *
124:             * @param options 	the list of options as an array of strings
125:             * @throws Exception 	if an option is not supported
126:             */
127:            public void setOptions(String[] options) throws Exception {
128:                setDontNormalize(Utils.getFlag('D', options));
129:            }
130:
131:            /**
132:             * Gets the current settings of IBk.
133:             *
134:             * @return 		an array of strings suitable for passing to setOptions()
135:             */
136:            public String[] getOptions() {
137:                String[] options = new String[1];
138:
139:                if (getDontNormalize())
140:                    options[0] = "-D";
141:                else
142:                    options[0] = "";
143:
144:                return options;
145:            }
146:
147:            /**
148:             * Sets the instances.
149:             * 
150:             * @param insts	the instances to use
151:             */
152:            public void setInstances(Instances insts) {
153:                m_Data = insts;
154:                initializeRanges();
155:                setNumAttributesUsed();
156:            }
157:
158:            /**
159:             * returns the instances currently set.
160:             * 
161:             * @return		the current instances
162:             */
163:            public Instances getInstances() {
164:                return m_Data;
165:            }
166:
167:            /** 
168:             * Returns the tip text for this property.
169:             * 
170:             * @return 		tip text for this property suitable for
171:             *         		displaying in the explorer/experimenter gui
172:             */
173:            public String dontNormalizeTipText() {
174:                return "Whether if the normalization of attributes should be turned off "
175:                        + "for distance calculation (Default: false i.e. attribute values "
176:                        + "are normalized). ";
177:            }
178:
179:            /** 
180:             * Sets whether if the attribute values are to be normalized in distance
181:             * calculation.
182:             * 
183:             * @param dontNormalize	if true the values are not normalized
184:             */
185:            public void setDontNormalize(boolean dontNormalize) {
186:                m_DontNormalize = dontNormalize;
187:            }
188:
189:            /**
190:             * Gets whether if the attribute values are to be normazlied in distance
191:             * calculation. (default false i.e. attribute values are normalized.)
192:             * 
193:             * @return		false if values get normalized
194:             */
195:            public boolean getDontNormalize() {
196:                return m_DontNormalize;
197:            }
198:
199:            /**
200:             * Update the distance function (if necessary) for the newly added instance.
201:             * 
202:             * @param ins		the instance to add
203:             */
204:            public void update(Instance ins) {
205:                updateRanges(ins);
206:            }
207:
208:            /**
209:             * Calculates the distance between two instances.
210:             * 
211:             * @param first 	the first instance
212:             * @param second 	the second instance
213:             * @return 		the distance between the two given instances
214:             */
215:            public double distance(Instance first, Instance second) {
216:                return Math.sqrt(distance(first, second,
217:                        Double.POSITIVE_INFINITY));
218:            }
219:
220:            /**
221:             * Calculates the distance (or similarity) between two instances. Need to
222:             * pass this returned distance later on to postprocess method to set it on
223:             * correct scale. <br/>
224:             * P.S.: Please don't mix the use of this function with
225:             * distance(Instance first, Instance second), as that already does post
226:             * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
227:             * this function and then later on do the post processing on all the
228:             * distances.
229:             *
230:             * @param first 	the first instance
231:             * @param second 	the second instance
232:             * @param stats 	the structure for storing performance statistics.
233:             * @return 		the distance between the two given instances or 
234:             * 			Double.POSITIVE_INFINITY.
235:             */
236:            public double distance(Instance first, Instance second,
237:                    PerformanceStats stats) { //debug method pls remove after use
238:                return Math.sqrt(distance(first, second,
239:                        Double.POSITIVE_INFINITY, stats, false));
240:            }
241:
242:            /**
243:             * Calculates the distance (or similarity) between two instances. Need to
244:             * pass this returned distance later on to postprocess method to set it on
245:             * correct scale. <br/>
246:             * P.S.: Please don't mix the use of this function with
247:             * distance(Instance first, Instance second), as that already does post
248:             * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
249:             * this function and then later on do the post processing on all the
250:             * distances.
251:             *
252:             * @param first 	the first instance
253:             * @param second 	the second instance
254:             * @param cutOffValue	If the distance being calculated becomes larger than 
255:             * 			cutOffValue then the rest of the calculation is skipped 
256:             * 			and Double.POSITIVE_INFINITY is returned. Otherwise 
257:             * 			the correct disntance is returned.
258:             * @return 		the distance between the two given instances or 
259:             * 			Double.POSITIVE_INFINITY.
260:             */
261:            public double distance(Instance first, Instance second,
262:                    double cutOffValue) { //debug method pls remove after use
263:                return distance(first, second, cutOffValue, null, false);
264:            }
265:
266:            /**
267:             * Calculates the distance (or similarity) between two instances. Need to
268:             * pass this returned distance later on to postprocess method to set it on
269:             * correct scale. <br/>
270:             * P.S.: Please don't mix the use of this function with
271:             * distance(Instance first, Instance second), as that already does post
272:             * processing. Please consider passing Double.POSITIVE_INFINITY as the 
273:             * cutOffValue to this function and then later on do the post processing on 
274:             * all the distances.
275:             *
276:             * @param first 	the first instance
277:             * @param second 	the second instance
278:             * @param cutOffValue	If the distance being calculated becomes larger than 
279:             * 			cutOffValue then the rest of the calculation is skipped 
280:             * 			and Double.POSITIVE_INFINITY is returned. Otherwise 
281:             * 			the correct disntance is returned.
282:             * @param print 	whether to print some debugging output
283:             * @return 		the distance between the two given instances or 
284:             * 			Double.POSITIVE_INFINITY.
285:             */
286:            public double distance(Instance first, Instance second,
287:                    double cutOffValue, boolean print) {
288:                return distance(first, second, cutOffValue, null, print);
289:            }
290:
291:            /**
292:             * Calculates the distance (or similarity) between two instances. Need to
293:             * pass this returned distance later on to postprocess method to set it on
294:             * correct scale. <br/>
295:             * P.S.: Please don't mix the use of this function with
296:             * distance(Instance first, Instance second), as that already does post
297:             * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
298:             * this function and then later on do the post processing on all the
299:             * distances.
300:             *
301:             * @param first 	the first instance
302:             * @param second 	the second instance
303:             * @param cutOffValue	If the distance being calculated becomes larger than 
304:             * 			cutOffValue then the rest of the calculation is skipped 
305:             * 			and Double.POSITIVE_INFINITY is returned. Otherwise 
306:             * 			the correct disntance is returned.
307:             * @param stats 	the structure for storing performance statistics.
308:             * @return 		the distance between the two given instances or 
309:             * 			Double.POSITIVE_INFINITY.
310:             */
311:            public double distance(Instance first, Instance second,
312:                    double cutOffValue, PerformanceStats stats) { //debug method pls remove after use
313:                return distance(first, second, cutOffValue, stats, false);
314:            }
315:
316:            /**
317:             * Calculates the distance (or similarity) between two instances. Need to
318:             * pass this returned distance later on to postprocess method to set it on
319:             * correct scale. <br/>
320:             * P.S.: Please don't mix the use of this function with
321:             * distance(Instance first, Instance second), as that already does post
322:             * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
323:             * this function and then later on do the post processing on all the
324:             * distances.
325:             *
326:             * @param first 	the first instance
327:             * @param second 	the second instance
328:             * @param cutOffValue	If the distance being calculated becomes larger than 
329:             * 			cutOffValue then the rest of the calculation is skipped 
330:             * 			and Double.POSITIVE_INFINITY is returned. Otherwise 
331:             * 			the correct disntance is returned.
332:             * @param stats 	the structure for storing performance statistics.
333:             * @param print 	whether to print some debugging output
334:             * @return 		the distance between the two given instances or 
335:             * 			Double.POSITIVE_INFINITY.
336:             */
337:            public double distance(Instance first, Instance second,
338:                    double cutOffValue, PerformanceStats stats, boolean print) {
339:
340:                double distance = 0;
341:                int firstI, secondI;
342:
343:                if (print) {
344:                    OOPS("Instance1: " + first);
345:                    OOPS("Instance2: " + second);
346:                    OOPS("cutOffValue: " + cutOffValue);
347:                }
348:
349:                for (int p1 = 0, p2 = 0; p1 < first.numValues()
350:                        || p2 < second.numValues();) {
351:                    if (p1 >= first.numValues()) {
352:                        firstI = m_Data.numAttributes();
353:                    } else {
354:                        firstI = first.index(p1);
355:                    }
356:                    if (p2 >= second.numValues()) {
357:                        secondI = m_Data.numAttributes();
358:                    } else {
359:                        secondI = second.index(p2);
360:                    }
361:                    if (firstI == m_Data.classIndex()) {
362:                        p1++;
363:                        continue;
364:                    }
365:                    if (secondI == m_Data.classIndex()) {
366:                        p2++;
367:                        continue;
368:                    }
369:                    double diff;
370:                    if (print)
371:                        System.out.println("valueSparse(p1): "
372:                                + first.valueSparse(p1) + " valueSparse(p2): "
373:                                + second.valueSparse(p2));
374:
375:                    if (firstI == secondI) {
376:                        diff = difference(firstI, first.valueSparse(p1), second
377:                                .valueSparse(p2));
378:                        p1++;
379:                        p2++;
380:                    } else if (firstI > secondI) {
381:                        diff = difference(secondI, 0, second.valueSparse(p2));
382:                        p2++;
383:                    } else {
384:                        diff = difference(firstI, first.valueSparse(p1), 0);
385:                        p1++;
386:                    }
387:                    if (print)
388:                        System.out.println("diff: " + diff);
389:                    if (stats != null)
390:                        stats.incrCoordCount();
391:
392:                    distance += diff * diff;
393:                    if (distance > cutOffValue) //Utils.gr(distance, cutOffValue))
394:                        return Double.POSITIVE_INFINITY;
395:                    if (print)
396:                        System.out.println("distance: " + distance);
397:                }
398:                if (print) {
399:                    OOPS("Instance 1: " + first);
400:                    OOPS("Instance 2: " + second);
401:                    OOPS("distance: " + distance);
402:                    OOPS("AttribsUsed: " + m_NumAttributesUsed);
403:                    OOPS("distance/AttribsUsed: "
404:                            + Math.sqrt(distance / m_NumAttributesUsed));
405:                }
406:                return distance;
407:            }
408:
409:            /**
410:             * Does post processing of the distances (if necessary) returned by
411:             * distance(distance(Instance first, Instance second, double cutOffValue). It
412:             * is necessary to do so to get the correct distances if
413:             * distance(distance(Instance first, Instance second, double cutOffValue) is
414:             * used. This is because that function actually returns the squared distance
415:             * to avoid inaccuracies arising from floating point comparison.
416:             * 
417:             * @param distances	the distances to post-process
418:             */
419:            public void postProcessDistances(double distances[]) {
420:                for (int i = 0; i < distances.length; i++) {
421:                    distances[i] = Math.sqrt(distances[i]);
422:                }
423:            }
424:
425:            /**
426:             * Computes the difference between two given attribute
427:             * values.
428:             * 
429:             * @param index	the attribute index
430:             * @param val1	the first value
431:             * @param val2	the second value
432:             * @return		the difference
433:             */
434:            private double difference(int index, double val1, double val2) {
435:
436:                switch (m_Data.attribute(index).type()) {
437:                case Attribute.NOMINAL:
438:
439:                    // If attribute is nominal
440:                    if (Instance.isMissingValue(val1)
441:                            || Instance.isMissingValue(val2)
442:                            || ((int) val1 != (int) val2)) {
443:                        return 1;
444:                    } else {
445:                        return 0;
446:                    }
447:                case Attribute.NUMERIC:
448:                    // If attribute is numeric
449:                    if (Instance.isMissingValue(val1)
450:                            || Instance.isMissingValue(val2)) {
451:                        if (Instance.isMissingValue(val1)
452:                                && Instance.isMissingValue(val2)) {
453:                            if (!m_DontNormalize) //We are doing normalization
454:                                return 1;
455:                            else
456:                                return (m_Ranges[index][R_MAX] - m_Ranges[index][R_MIN]);
457:                        } else {
458:                            double diff;
459:                            if (Instance.isMissingValue(val2)) {
460:                                diff = (!m_DontNormalize) ? norm(val1, index)
461:                                        : val1;
462:                            } else {
463:                                diff = (!m_DontNormalize) ? norm(val2, index)
464:                                        : val2;
465:                            }
466:                            if (!m_DontNormalize && diff < 0.5) {
467:                                diff = 1.0 - diff;
468:                            } else if (m_DontNormalize) {
469:                                if ((m_Ranges[index][R_MAX] - diff) > (diff - m_Ranges[index][R_MIN]))
470:                                    return m_Ranges[index][R_MAX] - diff;
471:                                else
472:                                    return diff - m_Ranges[index][R_MIN];
473:                            }
474:                            return diff;
475:                        }
476:                    } else {
477:                        return (!m_DontNormalize) ? (norm(val1, index) - norm(
478:                                val2, index)) : (val1 - val2);
479:                    }
480:                default:
481:                    return 0;
482:                }
483:            }
484:
485:            /**
486:             * Returns the squared difference of two values of an attribute.
487:             * 
488:             * @param index	the attribute index
489:             * @param val1	the first value
490:             * @param val2	the second value
491:             * @return		the squared difference
492:             */
493:            public double sqDifference(int index, double val1, double val2) {
494:                double val = difference(index, val1, val2);
495:                return val * val;
496:            }
497:
498:            /**
499:             * Normalizes a given value of a numeric attribute.
500:             *
501:             * @param x 		the value to be normalized
502:             * @param i 		the attribute's index
503:             * @return		the normalized value
504:             */
505:            private double norm(double x, int i) {
506:
507:                if (Double.isNaN(m_Ranges[i][R_MIN])
508:                        || m_Ranges[i][R_MAX] == m_Ranges[i][R_MIN]) { //Utils.eq(m_Ranges[i][R_MAX], m_Ranges[i][R_MIN])) {
509:                    return 0;
510:                } else {
511:                    return (x - m_Ranges[i][R_MIN]) / (m_Ranges[i][R_WIDTH]);
512:                }
513:            }
514:
515:            /**
516:             * Returns value in the middle of the two parameter values.
517:             * 
518:             * @param ranges 	the ranges to this dimension
519:             * @return 		the middle value
520:             */
521:            public double getMiddle(double[] ranges) {
522:
523:                double middle = ranges[R_MIN] + ranges[R_WIDTH] * 0.5;
524:                return middle;
525:            }
526:
527:            /**
528:             * Returns the index of the closest point to the current instance.
529:             * Index is index in Instances object that is the second parameter.
530:             *
531:             * @param instance 	the instance to assign a cluster to
532:             * @param allPoints 	all points
533:             * @param pointList 	the list of points
534:             * @return 		the index of the closest point
535:             * @throws Exception	if something goes wrong
536:             */
537:            public int closestPoint(Instance instance, Instances allPoints,
538:                    int[] pointList) throws Exception {
539:                double minDist = Integer.MAX_VALUE;
540:                int bestPoint = 0;
541:                for (int i = 0; i < pointList.length; i++) {
542:                    double dist = distance(instance, allPoints
543:                            .instance(pointList[i]), Double.POSITIVE_INFINITY);
544:                    if (dist < minDist) {
545:                        minDist = dist;
546:                        bestPoint = i;
547:                    }
548:                }
549:                return pointList[bestPoint];
550:            }
551:
552:            /**
553:             * Returns true if the value of the given dimension is smaller or equal the
554:             * value to be compared with.
555:             * 
556:             * @param instance 	the instance where the value should be taken of
557:             * @param dim 	the dimension of the value
558:             * @param value 	the value to compare with
559:             * @return 		true if value of instance is smaller or equal value
560:             */
561:            public boolean valueIsSmallerEqual(Instance instance, int dim,
562:                    double value) { //This stays
563:                return instance.value(dim) <= value;
564:            }
565:
566:            /**
567:             * Documents the content of an EuclideanDistance object in a string.
568:             * 
569:             * @return 		the converted string
570:             */
571:            public String toString() {
572:
573:                StringBuffer text = new StringBuffer();
574:                //todo
575:                text.append("\n");
576:                return text.toString();
577:            }
578:
579:            /**
580:             * Used for debug println's.
581:             * 
582:             * @param output 	string that is printed
583:             */
584:            private void OOPS(String output) {
585:                System.out.println(output);
586:            }
587:
588:            /**
589:             * Computes and sets the number of attributes used.
590:             */
591:            private void setNumAttributesUsed() {
592:
593:                m_NumAttributesUsed = 0.0;
594:                if (m_Data != null) {
595:                    for (int i = 0; i < m_Data.numAttributes(); i++) {
596:                        if ((i != m_Data.classIndex())
597:                                && (m_Data.attribute(i).isNominal() || m_Data
598:                                        .attribute(i).isNumeric())) {
599:                            m_NumAttributesUsed += 1.0;
600:                        }
601:                    }
602:                }
603:            }
604:
605:            /*============================Ranges related functions=====================*/
606:
607:            /** The range of the attributes. */
608:            //being used in KDTree and EuclideanDistance
609:            protected double[][] m_Ranges;
610:
611:            /** Index in ranges for MIN. */
612:            public static final int R_MIN = 0;
613:            /** Index in ranges for MAX. */
614:            public static final int R_MAX = 1;
615:            /** Index in ranges for WIDTH. */
616:            public static final int R_WIDTH = 2;
617:
618:            /**
619:             * Initializes the ranges using all instances of the dataset.
620:             * Sets m_Ranges.
621:             * 
622:             * @return the ranges
623:             */
624:            //Being used in other classes (KDTree).
625:            public double[][] initializeRanges() {
626:
627:                if (m_Data == null) {
628:                    m_Ranges = null;
629:                    return null;
630:                }
631:
632:                int numAtt = m_Data.numAttributes();
633:                double[][] ranges = new double[numAtt][3];
634:
635:                if (m_Data.numInstances() <= 0) {
636:                    initializeRangesEmpty(numAtt, ranges);
637:                    m_Ranges = ranges;
638:                    return ranges;
639:                } else
640:                    // initialize ranges using the first instance
641:                    updateRangesFirst(m_Data.instance(0), numAtt, ranges);
642:
643:                // update ranges, starting from the second
644:                for (int i = 1; i < m_Data.numInstances(); i++) {
645:                    updateRanges(m_Data.instance(i), numAtt, ranges);
646:                }
647:                m_Ranges = ranges;
648:                return ranges;
649:            }
650:
651:            /**
652:             * Initializes the ranges of a subset of the instances of this dataset.
653:             * Therefore m_Ranges is not set.
654:             * 
655:             * @param instList 	list of indexes of the subset
656:             * @return 		the ranges
657:             * @throws Exception	if something goes wrong
658:             */
659:            //being used in other classes (KDTree and XMeans)
660:            public double[][] initializeRanges(int[] instList) throws Exception {
661:
662:                if (m_Data == null) {
663:                    throw new Exception("No instances supplied.");
664:                }
665:
666:                int numAtt = m_Data.numAttributes();
667:                double[][] ranges = new double[numAtt][3];
668:
669:                if (m_Data.numInstances() <= 0) {
670:                    initializeRangesEmpty(numAtt, ranges);
671:                    return ranges;
672:                } else {
673:                    // initialize ranges using the first instance
674:                    updateRangesFirst(m_Data.instance(instList[0]), numAtt,
675:                            ranges);
676:                    // update ranges, starting from the second
677:                    for (int i = 1; i < instList.length; i++) {
678:                        updateRanges(m_Data.instance(instList[i]), numAtt,
679:                                ranges);
680:                    }
681:                }
682:                return ranges;
683:            }
684:
685:            /**
686:             * Initializes the ranges of a subset of the instances of this dataset.
687:             * Therefore m_Ranges is not set.
688:             * The caller of this method should ensure that the supplied start and end 
689:             * indices are valid (start &lt;= end, end&lt;instList.length etc) and
690:             * correct.
691:             *
692:             * @param instList 	list of indexes of the instances
693:             * @param startIdx 	start index of the subset of instances in the indices array
694:             * @param endIdx 	end index of the subset of instances in the indices array
695:             * @return 		the ranges
696:             * @throws Exception	if something goes wrong
697:             */
698:            //being used in other classes (KDTree and XMeans)
699:            public double[][] initializeRanges(int[] instList, int startIdx,
700:                    int endIdx) throws Exception {
701:
702:                if (m_Data == null) {
703:                    throw new Exception("No instances supplied.");
704:                }
705:
706:                int numAtt = m_Data.numAttributes();
707:                double[][] ranges = new double[numAtt][3];
708:
709:                if (m_Data.numInstances() <= 0) {
710:                    initializeRangesEmpty(numAtt, ranges);
711:                    return ranges;
712:                } else {
713:                    // initialize ranges using the first instance
714:                    updateRangesFirst(m_Data.instance(instList[startIdx]),
715:                            numAtt, ranges);
716:                    // update ranges, starting from the second
717:                    for (int i = startIdx + 1; i <= endIdx; i++) {
718:                        updateRanges(m_Data.instance(instList[i]), numAtt,
719:                                ranges);
720:                    }
721:                }
722:                return ranges;
723:            }
724:
725:            /**
726:             * Used to initialize the ranges.
727:             * 
728:             * @param numAtt 	number of attributes in the model
729:             * @param ranges 	low, high and width values for all attributes
730:             */
731:            //being used in the functions above
732:            public void initializeRangesEmpty(int numAtt, double[][] ranges) {
733:
734:                for (int j = 0; j < numAtt; j++) {
735:                    ranges[j][R_MIN] = Double.POSITIVE_INFINITY;
736:                    ranges[j][R_MAX] = -Double.POSITIVE_INFINITY;
737:                    ranges[j][R_WIDTH] = Double.POSITIVE_INFINITY;
738:                }
739:            }
740:
741:            /**
742:             * Used to initialize the ranges. For this the values of the first
743:             * instance is used to save time.
744:             * Sets low and high to the values of the first instance and
745:             * width to zero.
746:             * 
747:             * @param instance 	the new instance
748:             * @param numAtt 	number of attributes in the model
749:             * @param ranges 	low, high and width values for all attributes
750:             */
751:            //being used in the functions above
752:            public void updateRangesFirst(Instance instance, int numAtt,
753:                    double[][] ranges) {
754:
755:                for (int j = 0; j < numAtt; j++) {
756:                    if (!instance.isMissing(j)) {
757:                        ranges[j][R_MIN] = instance.value(j);
758:                        ranges[j][R_MAX] = instance.value(j);
759:                        ranges[j][R_WIDTH] = 0.0;
760:                    } else { // if value was missing
761:                        ranges[j][R_MIN] = Double.POSITIVE_INFINITY;
762:                        ranges[j][R_MAX] = -Double.POSITIVE_INFINITY;
763:                        ranges[j][R_WIDTH] = Double.POSITIVE_INFINITY;
764:                    }
765:                }
766:            }
767:
768:            /**
769:             * Updates the minimum and maximum and width values for all the attributes
770:             * based on a new instance.
771:             * 
772:             * @param instance 	the new instance
773:             * @param numAtt 	number of attributes in the model
774:             * @param ranges 	low, high and width values for all attributes
775:             */
776:            //Being used in the functions above
777:            private void updateRanges(Instance instance, int numAtt,
778:                    double[][] ranges) {
779:
780:                // updateRangesFirst must have been called on ranges
781:                for (int j = 0; j < numAtt; j++) {
782:                    double value = instance.value(j);
783:                    if (!instance.isMissing(j)) {
784:                        if (value < ranges[j][R_MIN]) {
785:                            ranges[j][R_MIN] = value;
786:                            ranges[j][R_WIDTH] = ranges[j][R_MAX]
787:                                    - ranges[j][R_MIN];
788:                            if (value > ranges[j][R_MAX]) { //if this is the first value that is
789:                                ranges[j][R_MAX] = value; //not missing. The,0
790:                                ranges[j][R_WIDTH] = ranges[j][R_MAX]
791:                                        - ranges[j][R_MIN];
792:                            }
793:                        } else {
794:                            if (value > ranges[j][R_MAX]) {
795:                                ranges[j][R_MAX] = value;
796:                                ranges[j][R_WIDTH] = ranges[j][R_MAX]
797:                                        - ranges[j][R_MIN];
798:                            }
799:                        }
800:                    }
801:                }
802:            }
803:
804:            /**
805:             * Updates the ranges given a new instance.
806:             * 
807:             * @param instance 	the new instance
808:             * @param ranges 	low, high and width values for all attributes
809:             * @return		the updated ranges
810:             */
811:            //being used in other classes (KDTree)
812:            public double[][] updateRanges(Instance instance, double[][] ranges) {
813:
814:                // updateRangesFirst must have been called on ranges
815:                for (int j = 0; j < ranges.length; j++) {
816:                    double value = instance.value(j);
817:                    if (!instance.isMissing(j)) {
818:                        if (value < ranges[j][R_MIN]) {
819:                            ranges[j][R_MIN] = value;
820:                            ranges[j][R_WIDTH] = ranges[j][R_MAX]
821:                                    - ranges[j][R_MIN];
822:                        } else {
823:                            if (instance.value(j) > ranges[j][R_MAX]) {
824:                                ranges[j][R_MAX] = value;
825:                                ranges[j][R_WIDTH] = ranges[j][R_MAX]
826:                                        - ranges[j][R_MIN];
827:                            }
828:                        }
829:                    }
830:                }
831:                return ranges;
832:            }
833:
834:            /**
835:             * Update the ranges if a new instance comes.
836:             * 
837:             * @param instance 	the new instance
838:             */
839:            //Being used in KDTree
840:            public void updateRanges(Instance instance) {
841:                m_Ranges = updateRanges(instance, m_Ranges);
842:            }
843:
844:            /**
845:             * prints the ranges.
846:             * 
847:             * @param ranges 	low, high and width values for all attributes
848:             */
849:            //Not being used in any other class. Not even being used in this class.
850:            public void printRanges(double[][] ranges) {
851:
852:                OOPS("printRanges");
853:                // updateRangesFirst must have been called on ranges
854:                for (int j = 0; j < ranges.length; j++) {
855:                    OOPS(" " + j + "-MIN " + ranges[j][R_MIN]);
856:                    OOPS(" " + j + "-MAX " + ranges[j][R_MAX]);
857:                    OOPS(" " + j + "-WIDTH " + ranges[j][R_WIDTH]);
858:                }
859:            }
860:
861:            /**
862:             * Test if an instance is within the given ranges.
863:             * 
864:             * @param instance 	the instance
865:             * @param ranges 	the ranges the instance is tested to be in
866:             * @return true 	if instance is within the ranges
867:             */
868:            //being used in IBk but better to remove from there.
869:            public boolean inRanges(Instance instance, double[][] ranges) {
870:                boolean isIn = true;
871:
872:                // updateRangesFirst must have been called on ranges
873:                for (int j = 0; isIn && (j < ranges.length); j++) {
874:                    if (!instance.isMissing(j)) {
875:                        double value = instance.value(j);
876:                        isIn = value <= ranges[j][R_MAX];
877:                        if (isIn)
878:                            isIn = value >= ranges[j][R_MIN];
879:                    }
880:                }
881:                return isIn;
882:            }
883:
884:            /**
885:             * Prints a range to standard output.
886:             * 
887:             * @param model	the instances this ranges are for
888:             * @param ranges 	the ranges to print
889:             */
890:            //Not being used in any other class. Not even being used in this class.
891:            public void printRanges(Instances model, double[][] ranges) {
892:                System.out.println("printRanges");
893:                for (int j = 0; j < model.numAttributes(); j++) {
894:                    System.out.print("Attribute " + j + " MIN: "
895:                            + ranges[j][R_MIN]);
896:                    System.out.print(" MAX: " + ranges[j][R_MAX]);
897:                    System.out.print(" WIDTH: " + ranges[j][R_WIDTH]);
898:                    System.out.println(" ");
899:                }
900:            }
901:
902:            /**
903:             * Check if ranges are set.
904:             * 
905:             * @return 		true if ranges are set
906:             */
907:            //Not being used in any other class
908:            public boolean rangesSet() {
909:                return (m_Ranges != null);
910:            }
911:
912:            /**
913:             * Method to get the ranges.
914:             * 
915:             * @return 		the ranges
916:             * @throws Exception	if no randes are set yet
917:             */
918:            //Not being used in any other class
919:            public double[][] getRanges() throws Exception {
920:                if (m_Ranges == null)
921:                    throw new Exception("Ranges not yet set.");
922:                return m_Ranges;
923:            }
924:
925:            /**
926:             * Main method for testing this class.
927:             * 
928:             * @param args	the commandline parameters
929:             */
930:            public static void main(String[] args) {
931:                try {
932:                    Reader r = null;
933:                    if (args.length > 1) {
934:                        throw (new Exception(
935:                                "Usage: EuclideanDistance <filename>"));
936:                    } else if (args.length == 0) {
937:                        r = new BufferedReader(new InputStreamReader(System.in));
938:                    } else {
939:                        r = new BufferedReader(new FileReader(args[0]));
940:                    }
941:                    Instances i = new Instances(r);
942:                    EuclideanDistance test = new EuclideanDistance(i);
943:                    System.out.println("test:\n " + test);
944:
945:                } catch (Exception e) {
946:                    e.printStackTrace();
947:                }
948:            }
949:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.