001: /*
002: * Copyright 2003-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat.descriptive;
017:
018: import java.io.Serializable;
019: import java.util.Arrays;
020:
021: import org.apache.commons.discovery.tools.DiscoverClass;
022: import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
023: import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
024: import org.apache.commons.math.stat.descriptive.moment.Mean;
025: import org.apache.commons.math.stat.descriptive.moment.Skewness;
026: import org.apache.commons.math.stat.descriptive.moment.Variance;
027: import org.apache.commons.math.stat.descriptive.rank.Max;
028: import org.apache.commons.math.stat.descriptive.rank.Min;
029: import org.apache.commons.math.stat.descriptive.rank.Percentile;
030: import org.apache.commons.math.stat.descriptive.summary.Sum;
031: import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
032:
033: /**
034: * Abstract factory class for univariate statistical summaries.
035: *
036: * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
037: */
038: public abstract class DescriptiveStatistics implements
039: StatisticalSummary, Serializable {
040:
041: /** Serialization UID */
042: private static final long serialVersionUID = 5188298269533339922L;
043:
044: /**
045: * Create an instance of a <code>DescriptiveStatistics</code>
046: * @param cls the type of <code>DescriptiveStatistics</code> object to
047: * create.
048: * @return a new factory.
049: * @throws InstantiationException is thrown if the object can not be
050: * created.
051: * @throws IllegalAccessException is thrown if the type's default
052: * constructor is not accessible.
053: */
054: public static DescriptiveStatistics newInstance(Class cls)
055: throws InstantiationException, IllegalAccessException {
056: return (DescriptiveStatistics) cls.newInstance();
057: }
058:
059: /**
060: * Create an instance of a <code>DescriptiveStatistics</code>
061: * @return a new factory.
062: */
063: public static DescriptiveStatistics newInstance() {
064: DescriptiveStatistics factory = null;
065: try {
066: DiscoverClass dc = new DiscoverClass();
067: factory = (DescriptiveStatistics) dc
068: .newInstance(DescriptiveStatistics.class,
069: "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl");
070: } catch (Throwable t) {
071: return new DescriptiveStatisticsImpl();
072: }
073: return factory;
074: }
075:
076: /**
077: * This constant signals that a Univariate implementation
078: * takes into account the contributions of an infinite number of
079: * elements. In other words, if getWindow returns this
080: * constant, there is, in effect, no "window".
081: */
082: public static final int INFINITE_WINDOW = -1;
083:
084: /**
085: * Adds the value to the set of numbers
086: * @param v the value to be added
087: */
088: public abstract void addValue(double v);
089:
090: /**
091: * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
092: * arithmetic mean </a> of the available values
093: * @return The mean or Double.NaN if no values have been added.
094: */
095: public double getMean() {
096: return apply(new Mean());
097: }
098:
099: /**
100: * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
101: * geometric mean </a> of the available values
102: * @return The geometricMean, Double.NaN if no values have been added,
103: * or if the productof the available values is less than or equal to 0.
104: */
105: public double getGeometricMean() {
106: return apply(new GeometricMean());
107: }
108:
109: /**
110: * Returns the variance of the available values.
111: * @return The variance, Double.NaN if no values have been added
112: * or 0.0 for a single value set.
113: */
114: public double getVariance() {
115: return apply(new Variance());
116: }
117:
118: /**
119: * Returns the standard deviation of the available values.
120: * @return The standard deviation, Double.NaN if no values have been added
121: * or 0.0 for a single value set.
122: */
123: public double getStandardDeviation() {
124: double stdDev = Double.NaN;
125: if (getN() > 0) {
126: if (getN() > 1) {
127: stdDev = Math.sqrt(getVariance());
128: } else {
129: stdDev = 0.0;
130: }
131: }
132: return (stdDev);
133: }
134:
135: /**
136: * Returns the skewness of the available values. Skewness is a
137: * measure of the assymetry of a given distribution.
138: * @return The skewness, Double.NaN if no values have been added
139: * or 0.0 for a value set <=2.
140: */
141: public double getSkewness() {
142: return apply(new Skewness());
143: }
144:
145: /**
146: * Returns the Kurtosis of the available values. Kurtosis is a
147: * measure of the "peakedness" of a distribution
148: * @return The kurtosis, Double.NaN if no values have been added, or 0.0
149: * for a value set <=3.
150: */
151: public double getKurtosis() {
152: return apply(new Kurtosis());
153: }
154:
155: /**
156: * Returns the maximum of the available values
157: * @return The max or Double.NaN if no values have been added.
158: */
159: public double getMax() {
160: return apply(new Max());
161: }
162:
163: /**
164: * Returns the minimum of the available values
165: * @return The min or Double.NaN if no values have been added.
166: */
167: public double getMin() {
168: return apply(new Min());
169: }
170:
171: /**
172: * Returns the number of available values
173: * @return The number of available values
174: */
175: public abstract long getN();
176:
177: /**
178: * Returns the sum of the values that have been added to Univariate.
179: * @return The sum or Double.NaN if no values have been added
180: */
181: public double getSum() {
182: return apply(new Sum());
183: }
184:
185: /**
186: * Returns the sum of the squares of the available values.
187: * @return The sum of the squares or Double.NaN if no
188: * values have been added.
189: */
190: public double getSumsq() {
191: return apply(new SumOfSquares());
192: }
193:
194: /**
195: * Resets all statistics and storage
196: */
197: public abstract void clear();
198:
199: /**
200: * Univariate has the ability to return only measures for the
201: * last N elements added to the set of values.
202: * @return The current window size or -1 if its Infinite.
203: */
204:
205: public abstract int getWindowSize();
206:
207: /**
208: * WindowSize controls the number of values which contribute
209: * to the values returned by Univariate. For example, if
210: * windowSize is set to 3 and the values {1,2,3,4,5}
211: * have been added <strong> in that order</strong>
212: * then the <i>available values</i> are {3,4,5} and all
213: * reported statistics will be based on these values
214: * @param windowSize sets the size of the window.
215: */
216: public abstract void setWindowSize(int windowSize);
217:
218: /**
219: * Returns the current set of values in an array of double primitives.
220: * The order of addition is preserved. The returned array is a fresh
221: * copy of the underlying data -- i.e., it is not a reference to the
222: * stored data.
223: *
224: * @return returns the current set of numbers in the order in which they
225: * were added to this set
226: */
227: public abstract double[] getValues();
228:
229: /**
230: * Returns the current set of values in an array of double primitives,
231: * sorted in ascending order. The returned array is a fresh
232: * copy of the underlying data -- i.e., it is not a reference to the
233: * stored data.
234: * @return returns the current set of
235: * numbers sorted in ascending order
236: */
237: public double[] getSortedValues() {
238: double[] sort = getValues();
239: Arrays.sort(sort);
240: return sort;
241: }
242:
243: /**
244: * Returns the element at the specified index
245: * @param index The Index of the element
246: * @return return the element at the specified index
247: */
248: public abstract double getElement(int index);
249:
250: /**
251: * Returns an estimate for the pth percentile of the stored values.
252: * <p>
253: * The implementation provided here follows the first estimation procedure presented
254: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
255: * <p>
256: * <strong>Preconditions</strong>:<ul>
257: * <li><code>0 < p < 100</code> (otherwise an
258: * <code>IllegalArgumentException</code> is thrown)</li>
259: * <li>at least one value must be stored (returns <code>Double.NaN
260: * </code> otherwise)</li>
261: * </ul>
262: *
263: * @param p the requested percentile (scaled from 0 - 100)
264: * @return An estimate for the pth percentile of the stored data
265: * values
266: */
267: public double getPercentile(double p) {
268: return apply(new Percentile(p));
269: }
270:
271: /**
272: * Generates a text report displaying univariate statistics from values
273: * that have been added. Each statistic is displayed on a separate
274: * line.
275: *
276: * @return String with line feeds displaying statistics
277: */
278: public String toString() {
279: StringBuffer outBuffer = new StringBuffer();
280: outBuffer.append("DescriptiveStatistics:\n");
281: outBuffer.append("n: " + getN() + "\n");
282: outBuffer.append("min: " + getMin() + "\n");
283: outBuffer.append("max: " + getMax() + "\n");
284: outBuffer.append("mean: " + getMean() + "\n");
285: outBuffer.append("std dev: " + getStandardDeviation() + "\n");
286: outBuffer.append("median: " + getPercentile(50) + "\n");
287: outBuffer.append("skewness: " + getSkewness() + "\n");
288: outBuffer.append("kurtosis: " + getKurtosis() + "\n");
289: return outBuffer.toString();
290: }
291:
292: /**
293: * Apply the given statistic to the data associated with this set of statistics.
294: * @param stat the statistic to apply
295: * @return the computed value of the statistic.
296: */
297: public abstract double apply(UnivariateStatistic stat);
298:
299: }
|