001: /* ===========================================================
002: * JFreeChart : a free chart library for the Java(tm) platform
003: * ===========================================================
004: *
005: * (C) Copyright 2000-2006, by Object Refinery Limited and Contributors.
006: *
007: * Project Info: http://www.jfree.org/jfreechart/index.html
008: *
009: * This library is free software; you can redistribute it and/or modify it
010: * under the terms of the GNU Lesser General Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * (at your option) any later version.
013: *
014: * This library is distributed in the hope that it will be useful, but
015: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017: * License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public
020: * License along with this library; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
022: * USA.
023: *
024: * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
025: * in the United States and other countries.]
026: *
027: * ---------------
028: * Statistics.java
029: * ---------------
030: * (C) Copyright 2000-2006, by Matthew Wright and Contributors.
031: *
032: * Original Author: Matthew Wright;
033: * Contributor(s): David Gilbert (for Object Refinery Limited);
034: *
035: * $Id: Statistics.java,v 1.5.2.2 2006/11/16 11:19:47 mungady Exp $
036: *
037: * Changes (from 08-Nov-2001)
038: * --------------------------
039: * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
040: * Moved from JFreeChart to package com.jrefinery.data.* in
041: * JCommon class library (DG);
042: * 24-Jun-2002 : Removed unnecessary local variable (DG);
043: * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
044: * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
045: * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
046: * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0
047: * release (DG);
048: *
049: */
050:
051: package org.jfree.data.statistics;
052:
053: import java.util.ArrayList;
054: import java.util.Collection;
055: import java.util.Collections;
056: import java.util.Iterator;
057: import java.util.List;
058:
059: /**
060: * A utility class that provides some common statistical functions.
061: */
062: public abstract class Statistics {
063:
064: /**
065: * Returns the mean of an array of numbers. This is equivalent to calling
066: * <code>calculateMean(values, true)</code>.
067: *
068: * @param values the values (<code>null</code> not permitted).
069: *
070: * @return The mean.
071: */
072: public static double calculateMean(Number[] values) {
073: return calculateMean(values, true);
074: }
075:
076: /**
077: * Returns the mean of an array of numbers.
078: *
079: * @param values the values (<code>null</code> not permitted).
080: * @param includeNullAndNaN a flag that controls whether or not
081: * <code>null</code> and <code>Double.NaN</code> values are included
082: * in the calculation (if either is present in the array, the result is
083: * {@link Double#NaN}).
084: *
085: * @return The mean.
086: *
087: * @since 1.0.3
088: */
089: public static double calculateMean(Number[] values,
090: boolean includeNullAndNaN) {
091:
092: if (values == null) {
093: throw new IllegalArgumentException(
094: "Null 'values' argument.");
095: }
096: double sum = 0.0;
097: double current;
098: int counter = 0;
099: for (int i = 0; i < values.length; i++) {
100: // treat nulls the same as NaNs
101: if (values[i] != null) {
102: current = values[i].doubleValue();
103: } else {
104: current = Double.NaN;
105: }
106: // calculate the sum and count
107: if (includeNullAndNaN || !Double.isNaN(current)) {
108: sum = sum + current;
109: counter++;
110: }
111: }
112: double result = (sum / counter);
113: return result;
114: }
115:
116: /**
117: * Returns the mean of a collection of <code>Number</code> objects.
118: *
119: * @param values the values (<code>null</code> not permitted).
120: *
121: * @return The mean.
122: */
123: public static double calculateMean(Collection values) {
124: return calculateMean(values, true);
125: }
126:
127: /**
128: * Returns the mean of a collection of <code>Number</code> objects.
129: *
130: * @param values the values (<code>null</code> not permitted).
131: * @param includeNullAndNaN a flag that controls whether or not
132: * <code>null</code> and <code>Double.NaN</code> values are included
133: * in the calculation (if either is present in the array, the result is
134: * {@link Double#NaN}).
135: *
136: * @return The mean.
137: *
138: * @since 1.0.3
139: */
140: public static double calculateMean(Collection values,
141: boolean includeNullAndNaN) {
142:
143: if (values == null) {
144: throw new IllegalArgumentException(
145: "Null 'values' argument.");
146: }
147: int count = 0;
148: double total = 0.0;
149: Iterator iterator = values.iterator();
150: while (iterator.hasNext()) {
151: Object object = iterator.next();
152: if (object == null) {
153: if (includeNullAndNaN) {
154: return Double.NaN;
155: }
156: } else {
157: if (object instanceof Number) {
158: Number number = (Number) object;
159: double value = number.doubleValue();
160: if (Double.isNaN(value)) {
161: if (includeNullAndNaN) {
162: return Double.NaN;
163: }
164: } else {
165: total = total + number.doubleValue();
166: count = count + 1;
167: }
168: }
169: }
170: }
171: return total / count;
172: }
173:
174: /**
175: * Calculates the median for a list of values (<code>Number</code> objects).
176: * The list of values will be copied, and the copy sorted, before
177: * calculating the median. To avoid this step (if your list of values
178: * is already sorted), use the {@link #calculateMedian(List, boolean)}
179: * method.
180: *
181: * @param values the values (<code>null</code> permitted).
182: *
183: * @return The median.
184: */
185: public static double calculateMedian(List values) {
186: return calculateMedian(values, true);
187: }
188:
189: /**
190: * Calculates the median for a list of values (<code>Number</code> objects).
191: * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
192: * to be presorted in ascending order by value.
193: *
194: * @param values the values (<code>null</code> permitted).
195: * @param copyAndSort a flag that controls whether the list of values is
196: * copied and sorted.
197: *
198: * @return The median.
199: */
200: public static double calculateMedian(List values,
201: boolean copyAndSort) {
202:
203: double result = Double.NaN;
204: if (values != null) {
205: if (copyAndSort) {
206: int itemCount = values.size();
207: List copy = new ArrayList(itemCount);
208: for (int i = 0; i < itemCount; i++) {
209: copy.add(i, values.get(i));
210: }
211: Collections.sort(copy);
212: values = copy;
213: }
214: int count = values.size();
215: if (count > 0) {
216: if (count % 2 == 1) {
217: if (count > 1) {
218: Number value = (Number) values
219: .get((count - 1) / 2);
220: result = value.doubleValue();
221: } else {
222: Number value = (Number) values.get(0);
223: result = value.doubleValue();
224: }
225: } else {
226: Number value1 = (Number) values.get(count / 2 - 1);
227: Number value2 = (Number) values.get(count / 2);
228: result = (value1.doubleValue() + value2
229: .doubleValue()) / 2.0;
230: }
231: }
232: }
233: return result;
234: }
235:
236: /**
237: * Calculates the median for a sublist within a list of values
238: * (<code>Number</code> objects).
239: *
240: * @param values the values, in any order (<code>null</code> not
241: * permitted).
242: * @param start the start index.
243: * @param end the end index.
244: *
245: * @return The median.
246: */
247: public static double calculateMedian(List values, int start, int end) {
248: return calculateMedian(values, start, end, true);
249: }
250:
251: /**
252: * Calculates the median for a sublist within a list of values
253: * (<code>Number</code> objects). The entire list will be sorted if the
254: * <code>ascending</code< argument is <code>false</code>.
255: *
256: * @param values the values (<code>null</code> not permitted).
257: * @param start the start index.
258: * @param end the end index.
259: * @param copyAndSort a flag that that controls whether the list of values
260: * is copied and sorted.
261: *
262: * @return The median.
263: */
264: public static double calculateMedian(List values, int start,
265: int end, boolean copyAndSort) {
266:
267: double result = Double.NaN;
268: if (copyAndSort) {
269: List working = new ArrayList(end - start + 1);
270: for (int i = start; i <= end; i++) {
271: working.add(values.get(i));
272: }
273: Collections.sort(working);
274: result = calculateMedian(working, false);
275: } else {
276: int count = end - start + 1;
277: if (count > 0) {
278: if (count % 2 == 1) {
279: if (count > 1) {
280: Number value = (Number) values.get(start
281: + (count - 1) / 2);
282: result = value.doubleValue();
283: } else {
284: Number value = (Number) values.get(start);
285: result = value.doubleValue();
286: }
287: } else {
288: Number value1 = (Number) values.get(start + count
289: / 2 - 1);
290: Number value2 = (Number) values.get(start + count
291: / 2);
292: result = (value1.doubleValue() + value2
293: .doubleValue()) / 2.0;
294: }
295: }
296: }
297: return result;
298:
299: }
300:
301: /**
302: * Returns the standard deviation of a set of numbers.
303: *
304: * @param data the data (<code>null</code> or zero length array not
305: * permitted).
306: *
307: * @return The standard deviation of a set of numbers.
308: */
309: public static double getStdDev(Number[] data) {
310: if (data == null) {
311: throw new IllegalArgumentException("Null 'data' array.");
312: }
313: if (data.length == 0) {
314: throw new IllegalArgumentException(
315: "Zero length 'data' array.");
316: }
317: double avg = calculateMean(data);
318: double sum = 0.0;
319:
320: for (int counter = 0; counter < data.length; counter++) {
321: double diff = data[counter].doubleValue() - avg;
322: sum = sum + diff * diff;
323: }
324: return Math.sqrt(sum / (data.length - 1));
325: }
326:
327: /**
328: * Fits a straight line to a set of (x, y) data, returning the slope and
329: * intercept.
330: *
331: * @param xData the x-data (<code>null</code> not permitted).
332: * @param yData the y-data (<code>null</code> not permitted).
333: *
334: * @return A double array with the intercept in [0] and the slope in [1].
335: */
336: public static double[] getLinearFit(Number[] xData, Number[] yData) {
337:
338: if (xData == null) {
339: throw new IllegalArgumentException("Null 'xData' argument.");
340: }
341: if (yData == null) {
342: throw new IllegalArgumentException("Null 'yData' argument.");
343: }
344: if (xData.length != yData.length) {
345: throw new IllegalArgumentException(
346: "Statistics.getLinearFit(): array lengths must be equal.");
347: }
348:
349: double[] result = new double[2];
350: // slope
351: result[1] = getSlope(xData, yData);
352: // intercept
353: result[0] = calculateMean(yData) - result[1]
354: * calculateMean(xData);
355:
356: return result;
357:
358: }
359:
360: /**
361: * Finds the slope of a regression line using least squares.
362: *
363: * @param xData the x-values (<code>null</code> not permitted).
364: * @param yData the y-values (<code>null</code> not permitted).
365: *
366: * @return The slope.
367: */
368: public static double getSlope(Number[] xData, Number[] yData) {
369:
370: if (xData == null) {
371: throw new IllegalArgumentException("Null 'xData' argument.");
372: }
373: if (yData == null) {
374: throw new IllegalArgumentException("Null 'yData' argument.");
375: }
376: if (xData.length != yData.length) {
377: throw new IllegalArgumentException(
378: "Array lengths must be equal.");
379: }
380:
381: // ********* stat function for linear slope ********
382: // y = a + bx
383: // a = ybar - b * xbar
384: // sum(x * y) - (sum (x) * sum(y)) / n
385: // b = ------------------------------------
386: // sum (x^2) - (sum(x)^2 / n
387: // *************************************************
388:
389: // sum of x, x^2, x * y, y
390: double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
391: int counter;
392: for (counter = 0; counter < xData.length; counter++) {
393: sx = sx + xData[counter].doubleValue();
394: sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
395: sxy = sxy + yData[counter].doubleValue()
396: * xData[counter].doubleValue();
397: sy = sy + yData[counter].doubleValue();
398: }
399: return (sxy - (sx * sy) / counter)
400: / (sxx - (sx * sx) / counter);
401:
402: }
403:
404: /**
405: * Calculates the correlation between two datasets. Both arrays should
406: * contain the same number of items. Null values are treated as zero.
407: * <P>
408: * Information about the correlation calculation was obtained from:
409: *
410: * http://trochim.human.cornell.edu/kb/statcorr.htm
411: *
412: * @param data1 the first dataset.
413: * @param data2 the second dataset.
414: *
415: * @return The correlation.
416: */
417: public static double getCorrelation(Number[] data1, Number[] data2) {
418: if (data1 == null) {
419: throw new IllegalArgumentException("Null 'data1' argument.");
420: }
421: if (data2 == null) {
422: throw new IllegalArgumentException("Null 'data2' argument.");
423: }
424: if (data1.length != data2.length) {
425: throw new IllegalArgumentException(
426: "'data1' and 'data2' arrays must have same length.");
427: }
428: int n = data1.length;
429: double sumX = 0.0;
430: double sumY = 0.0;
431: double sumX2 = 0.0;
432: double sumY2 = 0.0;
433: double sumXY = 0.0;
434: for (int i = 0; i < n; i++) {
435: double x = 0.0;
436: if (data1[i] != null) {
437: x = data1[i].doubleValue();
438: }
439: double y = 0.0;
440: if (data2[i] != null) {
441: y = data2[i].doubleValue();
442: }
443: sumX = sumX + x;
444: sumY = sumY + y;
445: sumXY = sumXY + (x * y);
446: sumX2 = sumX2 + (x * x);
447: sumY2 = sumY2 + (y * y);
448: }
449: return (n * sumXY - sumX * sumY)
450: / Math.pow((n * sumX2 - sumX * sumX)
451: * (n * sumY2 - sumY * sumY), 0.5);
452: }
453:
454: /**
455: * Returns a data set for a moving average on the data set passed in.
456: *
457: * @param xData an array of the x data.
458: * @param yData an array of the y data.
459: * @param period the number of data points to average
460: *
461: * @return A double[][] the length of the data set in the first dimension,
462: * with two doubles for x and y in the second dimension
463: */
464: public static double[][] getMovingAverage(Number[] xData,
465: Number[] yData, int period) {
466:
467: // check arguments...
468: if (xData.length != yData.length) {
469: throw new IllegalArgumentException(
470: "Array lengths must be equal.");
471: }
472:
473: if (period > xData.length) {
474: throw new IllegalArgumentException(
475: "Period can't be longer than dataset.");
476: }
477:
478: double[][] result = new double[xData.length - period][2];
479: for (int i = 0; i < result.length; i++) {
480: result[i][0] = xData[i + period].doubleValue();
481: // holds the moving average sum
482: double sum = 0.0;
483: for (int j = 0; j < period; j++) {
484: sum += yData[i + j].doubleValue();
485: }
486: sum = sum / period;
487: result[i][1] = sum;
488: }
489: return result;
490:
491: }
492:
493: }
|