001: /*
002: * Copyright 2003-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat;
017:
018: import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
019: import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
020: import org.apache.commons.math.stat.descriptive.moment.Mean;
021: import org.apache.commons.math.stat.descriptive.moment.Variance;
022: import org.apache.commons.math.stat.descriptive.rank.Max;
023: import org.apache.commons.math.stat.descriptive.rank.Min;
024: import org.apache.commons.math.stat.descriptive.rank.Percentile;
025: import org.apache.commons.math.stat.descriptive.summary.Product;
026: import org.apache.commons.math.stat.descriptive.summary.Sum;
027: import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
028: import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
029:
030: /**
031: * StatUtils provides static methods for computing statistics based on data
032: * stored in double[] arrays.
033: *
034: * @version $Revision: 233996 $ $Date: 2005-08-19 21:26:27 -0700 (Fri, 19 Aug 2005) $
035: */
036: public final class StatUtils {
037:
038: /** sum */
039: private static UnivariateStatistic sum = new Sum();
040:
041: /** sumSq */
042: private static UnivariateStatistic sumSq = new SumOfSquares();
043:
044: /** prod */
045: private static UnivariateStatistic prod = new Product();
046:
047: /** sumLog */
048: private static UnivariateStatistic sumLog = new SumOfLogs();
049:
050: /** min */
051: private static UnivariateStatistic min = new Min();
052:
053: /** max */
054: private static UnivariateStatistic max = new Max();
055:
056: /** mean */
057: private static UnivariateStatistic mean = new Mean();
058:
059: /** variance */
060: private static Variance variance = new Variance();
061:
062: /** percentile */
063: private static Percentile percentile = new Percentile();
064:
065: /** geometric mean */
066: private static GeometricMean geometricMean = new GeometricMean();
067:
068: /**
069: * Private Constructor
070: */
071: private StatUtils() {
072: }
073:
074: /**
075: * Returns the sum of the values in the input array, or
076: * <code>Double.NaN</code> if the array is empty.
077: * <p>
078: * Throws <code>IllegalArgumentException</code> if the input array
079: * is null.
080: *
081: * @param values array of values to sum
082: * @return the sum of the values or <code>Double.NaN</code> if the array
083: * is empty
084: * @throws IllegalArgumentException if the array is null
085: */
086: public static double sum(final double[] values) {
087: return sum.evaluate(values);
088: }
089:
090: /**
091: * Returns the sum of the entries in the specified portion of
092: * the input array, or <code>Double.NaN</code> if the designated subarray
093: * is empty.
094: * <p>
095: * Throws <code>IllegalArgumentException</code> if the array is null.
096: *
097: * @param values the input array
098: * @param begin index of the first array element to include
099: * @param length the number of elements to include
100: * @return the sum of the values or Double.NaN if length = 0
101: * @throws IllegalArgumentException if the array is null or the array index
102: * parameters are not valid
103: */
104: public static double sum(final double[] values, final int begin,
105: final int length) {
106: return sum.evaluate(values, begin, length);
107: }
108:
109: /**
110: * Returns the sum of the squares of the entries in the input array, or
111: * <code>Double.NaN</code> if the array is empty.
112: * <p>
113: * Throws <code>IllegalArgumentException</code> if the array is null.
114: *
115: * @param values input array
116: * @return the sum of the squared values or <code>Double.NaN</code> if the
117: * array is empty
118: * @throws IllegalArgumentException if the array is null
119: */
120: public static double sumSq(final double[] values) {
121: return sumSq.evaluate(values);
122: }
123:
124: /**
125: * Returns the sum of the squares of the entries in the specified portion of
126: * the input array, or <code>Double.NaN</code> if the designated subarray
127: * is empty.
128: * <p>
129: * Throws <code>IllegalArgumentException</code> if the array is null.
130: *
131: * @param values the input array
132: * @param begin index of the first array element to include
133: * @param length the number of elements to include
134: * @return the sum of the squares of the values or Double.NaN if length = 0
135: * @throws IllegalArgumentException if the array is null or the array index
136: * parameters are not valid
137: */
138: public static double sumSq(final double[] values, final int begin,
139: final int length) {
140: return sumSq.evaluate(values, begin, length);
141: }
142:
143: /**
144: * Returns the product of the entries in the input array, or
145: * <code>Double.NaN</code> if the array is empty.
146: * <p>
147: * Throws <code>IllegalArgumentException</code> if the array is null.
148: *
149: * @param values the input array
150: * @return the product of the values or Double.NaN if the array is empty
151: * @throws IllegalArgumentException if the array is null
152: */
153: public static double product(final double[] values) {
154: return prod.evaluate(values);
155: }
156:
157: /**
158: * Returns the product of the entries in the specified portion of
159: * the input array, or <code>Double.NaN</code> if the designated subarray
160: * is empty.
161: * <p>
162: * Throws <code>IllegalArgumentException</code> if the array is null.
163: *
164: * @param values the input array
165: * @param begin index of the first array element to include
166: * @param length the number of elements to include
167: * @return the product of the values or Double.NaN if length = 0
168: * @throws IllegalArgumentException if the array is null or the array index
169: * parameters are not valid
170: */
171: public static double product(final double[] values,
172: final int begin, final int length) {
173: return prod.evaluate(values, begin, length);
174: }
175:
176: /**
177: * Returns the sum of the natural logs of the entries in the input array, or
178: * <code>Double.NaN</code> if the array is empty.
179: * <p>
180: * Throws <code>IllegalArgumentException</code> if the array is null.
181: * <p>
182: * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
183: *
184: * @param values the input array
185: * @return the sum of the natural logs of the values or Double.NaN if
186: * the array is empty
187: * @throws IllegalArgumentException if the array is null
188: */
189: public static double sumLog(final double[] values) {
190: return sumLog.evaluate(values);
191: }
192:
193: /**
194: * Returns the sum of the natural logs of the entries in the specified portion of
195: * the input array, or <code>Double.NaN</code> if the designated subarray
196: * is empty.
197: * <p>
198: * Throws <code>IllegalArgumentException</code> if the array is null.
199: * <p>
200: * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
201: *
202: * @param values the input array
203: * @param begin index of the first array element to include
204: * @param length the number of elements to include
205: * @return the sum of the natural logs of the values or Double.NaN if
206: * length = 0
207: * @throws IllegalArgumentException if the array is null or the array index
208: * parameters are not valid
209: */
210: public static double sumLog(final double[] values, final int begin,
211: final int length) {
212: return sumLog.evaluate(values, begin, length);
213: }
214:
215: /**
216: * Returns the arithmetic mean of the entries in the input array, or
217: * <code>Double.NaN</code> if the array is empty.
218: * <p>
219: * Throws <code>IllegalArgumentException</code> if the array is null.
220: * <p>
221: * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
222: * details on the computing algorithm.
223: *
224: * @param values the input array
225: * @return the mean of the values or Double.NaN if the array is empty
226: * @throws IllegalArgumentException if the array is null
227: */
228: public static double mean(final double[] values) {
229: return mean.evaluate(values);
230: }
231:
232: /**
233: * Returns the arithmetic mean of the entries in the specified portion of
234: * the input array, or <code>Double.NaN</code> if the designated subarray
235: * is empty.
236: * <p>
237: * Throws <code>IllegalArgumentException</code> if the array is null.
238: * <p>
239: * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
240: * details on the computing algorithm.
241: *
242: * @param values the input array
243: * @param begin index of the first array element to include
244: * @param length the number of elements to include
245: * @return the mean of the values or Double.NaN if length = 0
246: * @throws IllegalArgumentException if the array is null or the array index
247: * parameters are not valid
248: */
249: public static double mean(final double[] values, final int begin,
250: final int length) {
251: return mean.evaluate(values, begin, length);
252: }
253:
254: /**
255: * Returns the geometric mean of the entries in the input array, or
256: * <code>Double.NaN</code> if the array is empty.
257: * <p>
258: * Throws <code>IllegalArgumentException</code> if the array is null.
259: * <p>
260: * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
261: * for details on the computing algorithm.
262: *
263: * @param values the input array
264: * @return the geometric mean of the values or Double.NaN if the array is empty
265: * @throws IllegalArgumentException if the array is null
266: */
267: public static double geometricMean(final double[] values) {
268: return geometricMean.evaluate(values);
269: }
270:
271: /**
272: * Returns the geometric mean of the entries in the specified portion of
273: * the input array, or <code>Double.NaN</code> if the designated subarray
274: * is empty.
275: * <p>
276: * Throws <code>IllegalArgumentException</code> if the array is null.
277: * <p>
278: * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
279: * for details on the computing algorithm.
280: *
281: * @param values the input array
282: * @param begin index of the first array element to include
283: * @param length the number of elements to include
284: * @return the geometric mean of the values or Double.NaN if length = 0
285: * @throws IllegalArgumentException if the array is null or the array index
286: * parameters are not valid
287: */
288: public static double geometricMean(final double[] values,
289: final int begin, final int length) {
290: return geometricMean.evaluate(values, begin, length);
291: }
292:
293: /**
294: * Returns the variance of the entries in the input array, or
295: * <code>Double.NaN</code> if the array is empty.
296: * <p>
297: * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
298: * details on the computing algorithm.
299: * <p>
300: * Returns 0 for a single-value (i.e. length = 1) sample.
301: * <p>
302: * Throws <code>IllegalArgumentException</code> if the array is null.
303: *
304: * @param values the input array
305: * @return the variance of the values or Double.NaN if the array is empty
306: * @throws IllegalArgumentException if the array is null
307: */
308: public static double variance(final double[] values) {
309: return variance.evaluate(values);
310: }
311:
312: /**
313: * Returns the variance of the entries in the specified portion of
314: * the input array, or <code>Double.NaN</code> if the designated subarray
315: * is empty.
316: * <p>
317: * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
318: * details on the computing algorithm.
319: * <p>
320: * Returns 0 for a single-value (i.e. length = 1) sample.
321: * <p>
322: * Throws <code>IllegalArgumentException</code> if the array is null or the
323: * array index parameters are not valid.
324: *
325: * @param values the input array
326: * @param begin index of the first array element to include
327: * @param length the number of elements to include
328: * @return the variance of the values or Double.NaN if length = 0
329: * @throws IllegalArgumentException if the array is null or the array index
330: * parameters are not valid
331: */
332: public static double variance(final double[] values,
333: final int begin, final int length) {
334: return variance.evaluate(values, begin, length);
335: }
336:
337: /**
338: * Returns the variance of the entries in the specified portion of
339: * the input array, using the precomputed mean value. Returns
340: * <code>Double.NaN</code> if the designated subarray is empty.
341: * <p>
342: * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
343: * details on the computing algorithm.
344: * <p>
345: * The formula used assumes that the supplied mean value is the arithmetic
346: * mean of the sample data, not a known population parameter. This method
347: * is supplied only to save computation when the mean has already been
348: * computed.
349: * <p>
350: * Returns 0 for a single-value (i.e. length = 1) sample.
351: * <p>
352: * Throws <code>IllegalArgumentException</code> if the array is null or the
353: * array index parameters are not valid.
354: *
355: * @param values the input array
356: * @param mean the precomputed mean value
357: * @param begin index of the first array element to include
358: * @param length the number of elements to include
359: * @return the variance of the values or Double.NaN if length = 0
360: * @throws IllegalArgumentException if the array is null or the array index
361: * parameters are not valid
362: */
363: public static double variance(final double[] values,
364: final double mean, final int begin, final int length) {
365: return variance.evaluate(values, mean, begin, length);
366: }
367:
368: /**
369: * Returns the variance of the entries in the input array, using the
370: * precomputed mean value. Returns <code>Double.NaN</code> if the array
371: * is empty.
372: * <p>
373: * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
374: * details on the computing algorithm.
375: * <p>
376: * The formula used assumes that the supplied mean value is the arithmetic
377: * mean of the sample data, not a known population parameter. This method
378: * is supplied only to save computation when the mean has already been
379: * computed.
380: * <p>
381: * Returns 0 for a single-value (i.e. length = 1) sample.
382: * <p>
383: * Throws <code>IllegalArgumentException</code> if the array is null.
384: *
385: * @param values the input array
386: * @param mean the precomputed mean value
387: * @return the variance of the values or Double.NaN if the array is empty
388: * @throws IllegalArgumentException if the array is null
389: */
390: public static double variance(final double[] values,
391: final double mean) {
392: return variance.evaluate(values, mean);
393: }
394:
395: /**
396: * Returns the maximum of the entries in the input array, or
397: * <code>Double.NaN</code> if the array is empty.
398: * <p>
399: * Throws <code>IllegalArgumentException</code> if the array is null.
400: * <p>
401: * <ul>
402: * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
403: * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
404: * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
405: * the result is <code>Double.POSITIVE_INFINITY.</code></li>
406: * </ul>
407: *
408: * @param values the input array
409: * @return the maximum of the values or Double.NaN if the array is empty
410: * @throws IllegalArgumentException if the array is null
411: */
412: public static double max(final double[] values) {
413: return max.evaluate(values);
414: }
415:
416: /**
417: * Returns the maximum of the entries in the specified portion of
418: * the input array, or <code>Double.NaN</code> if the designated subarray
419: * is empty.
420: * <p>
421: * Throws <code>IllegalArgumentException</code> if the array is null or
422: * the array index parameters are not valid.
423: * <p>
424: * <ul>
425: * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
426: * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
427: * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
428: * the result is <code>Double.POSITIVE_INFINITY.</code></li>
429: * </ul>
430: *
431: * @param values the input array
432: * @param begin index of the first array element to include
433: * @param length the number of elements to include
434: * @return the maximum of the values or Double.NaN if length = 0
435: * @throws IllegalArgumentException if the array is null or the array index
436: * parameters are not valid
437: */
438: public static double max(final double[] values, final int begin,
439: final int length) {
440: return max.evaluate(values, begin, length);
441: }
442:
443: /**
444: * Returns the minimum of the entries in the input array, or
445: * <code>Double.NaN</code> if the array is empty.
446: * <p>
447: * Throws <code>IllegalArgumentException</code> if the array is null.
448: * <p>
449: * <ul>
450: * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
451: * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
452: * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
453: * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
454: * </ul>
455: *
456: * @param values the input array
457: * @return the minimum of the values or Double.NaN if the array is empty
458: * @throws IllegalArgumentException if the array is null
459: */
460: public static double min(final double[] values) {
461: return min.evaluate(values);
462: }
463:
464: /**
465: * Returns the minimum of the entries in the specified portion of
466: * the input array, or <code>Double.NaN</code> if the designated subarray
467: * is empty.
468: * <p>
469: * Throws <code>IllegalArgumentException</code> if the array is null or
470: * the array index parameters are not valid.
471: * <p>
472: * <ul>
473: * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
474: * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
475: * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
476: * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
477: * </ul>
478: *
479: * @param values the input array
480: * @param begin index of the first array element to include
481: * @param length the number of elements to include
482: * @return the minimum of the values or Double.NaN if length = 0
483: * @throws IllegalArgumentException if the array is null or the array index
484: * parameters are not valid
485: */
486: public static double min(final double[] values, final int begin,
487: final int length) {
488: return min.evaluate(values, begin, length);
489: }
490:
491: /**
492: * Returns an estimate of the <code>p</code>th percentile of the values
493: * in the <code>values</code> array.
494: * <p>
495: * <ul>
496: * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
497: * <code>0</code></li>
498: * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
499: * if <code>values</code> has length <code>1</code></li>
500: * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
501: * is null or p is not a valid quantile value (p must be greater than 0
502: * and less than or equal to 100)</li>
503: * </ul>
504: * <p>
505: * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
506: * a description of the percentile estimation algorithm used.
507: *
508: * @param values input array of values
509: * @param p the percentile value to compute
510: * @return the percentile value or Double.NaN if the array is empty
511: * @throws IllegalArgumentException if <code>values</code> is null
512: * or p is invalid
513: */
514: public static double percentile(final double[] values,
515: final double p) {
516: return percentile.evaluate(values, p);
517: }
518:
519: /**
520: * Returns an estimate of the <code>p</code>th percentile of the values
521: * in the <code>values</code> array, starting with the element in (0-based)
522: * position <code>begin</code> in the array and including <code>length</code>
523: * values.
524: * <p>
525: * <ul>
526: * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
527: * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
528: * if <code>length = 1 </code></li>
529: * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
530: * is null , <code>begin</code> or <code>length</code> is invalid, or
531: * <code>p</code> is not a valid quantile value (p must be greater than 0
532: * and less than or equal to 100)</li>
533: * </ul>
534: * <p>
535: * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
536: * a description of the percentile estimation algorithm used.
537: *
538: * @param values array of input values
539: * @param p the percentile to compute
540: * @param begin the first (0-based) element to include in the computation
541: * @param length the number of array elements to include
542: * @return the percentile value
543: * @throws IllegalArgumentException if the parameters are not valid or the
544: * input array is null
545: */
546: public static double percentile(final double[] values,
547: final int begin, final int length, final double p) {
548: return percentile.evaluate(values, begin, length, p);
549: }
550:
551: /**
552: * Returns the sum of the (signed) differences between corresponding elements of the
553: * input arrays -- i.e., sum(sample1[i] - sample2[i]).
554: *
555: * @param sample1 the first array
556: * @param sample2 the second array
557: * @return sum of paired differences
558: * @throws IllegalArgumentException if the arrays do not have the same
559: * (positive) length
560: */
561: public static double sumDifference(final double[] sample1,
562: final double[] sample2) throws IllegalArgumentException {
563: int n = sample1.length;
564: if (n != sample2.length || n < 1) {
565: throw new IllegalArgumentException(
566: "Input arrays must have the same (positive) length.");
567: }
568: double result = 0;
569: for (int i = 0; i < n; i++) {
570: result += sample1[i] - sample2[i];
571: }
572: return result;
573: }
574:
575: /**
576: * Returns the mean of the (signed) differences between corresponding elements of the
577: * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
578: *
579: * @param sample1 the first array
580: * @param sample2 the second array
581: * @return mean of paired differences
582: * @throws IllegalArgumentException if the arrays do not have the same
583: * (positive) length
584: */
585: public static double meanDifference(final double[] sample1,
586: final double[] sample2) throws IllegalArgumentException {
587: return sumDifference(sample1, sample2)
588: / (double) sample1.length;
589: }
590:
591: /**
592: * Returns the variance of the (signed) differences between corresponding elements of the
593: * input arrays -- i.e., var(sample1[i] - sample2[i]).
594: *
595: * @param sample1 the first array
596: * @param sample2 the second array
597: * @param meanDifference the mean difference between corresponding entries
598: * @see #meanDifference(double[],double[])
599: * @return variance of paired differences
600: * @throws IllegalArgumentException if the arrays do not have the same
601: * length or their common length is less than 2.
602: */
603: public static double varianceDifference(final double[] sample1,
604: final double[] sample2, double meanDifference)
605: throws IllegalArgumentException {
606: double sum1 = 0d;
607: double sum2 = 0d;
608: double diff = 0d;
609: int n = sample1.length;
610: if (n < 2 || n != sample2.length) {
611: throw new IllegalArgumentException(
612: "Input array lengths must be equal and at least 2.");
613: }
614: for (int i = 0; i < n; i++) {
615: diff = sample1[i] - sample2[i];
616: sum1 += (diff - meanDifference) * (diff - meanDifference);
617: sum2 += diff - meanDifference;
618: }
619: return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1);
620: }
621:
622: }
|