001: /*
002: * Copyright 2003-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat.descriptive.rank;
017:
018: import java.io.Serializable;
019: import java.util.Arrays;
020: import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
021:
022: /**
023: * Provides percentile computation.
024: * <p>
025: * There are several commonly used methods for estimating percentiles (a.k.a.
026: * quantiles) based on sample data. For large samples, the different methods
027: * agree closely, but when sample sizes are small, different methods will give
028: * significantly different results. The algorithm implemented here works as follows:
029: * <ol>
030: * <li>Let <code>n</code> be the length of the (sorted) array and
031: * <code>0 < p <= 100</code> be the desired percentile.</li>
032: * <li>If <code> n = 1 </code> return the unique array element (regardless of
033: * the value of <code>p</code>); otherwise </li>
034: * <li>Compute the estimated percentile position
035: * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
036: * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
037: * part of <code>pos</code>). If <code>pos >= n</code> return the largest
038: * element in the array; otherwise</li>
039: * <li>Let <code>lower</code> be the element in position
040: * <code>floor(pos)</code> in the array and let <code>upper</code> be the
041: * next element in the array. Return <code>lower + d * (upper - lower)</code>
042: * </li>
043: * </ol>
044: * <p>
045: * To compute percentiles, the data must be (totally) ordered. Input arrays
046: * are copied and then sorted using {@link java.util.Arrays#sort(double[])}.
047: * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
048: * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
049: * <code>Double.NaN</code> larger than any other value (including
050: * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
051: * (50th percentile) of
052: * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>
053: * <p>
054: * Since percentile estimation usually involves interpolation between array
055: * elements, arrays containing <code>NaN</code> or infinite values will often
056: * result in <code>NaN<code> or infinite values returned.
057: * <p>
058: * <strong>Note that this implementation is not synchronized.</strong> If
059: * multiple threads access an instance of this class concurrently, and at least
060: * one of the threads invokes the <code>increment()</code> or
061: * <code>clear()</code> method, it must be synchronized externally.
062: *
063: * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
064: */
065: public class Percentile extends AbstractUnivariateStatistic implements
066: Serializable {
067:
068: /** Serializable version identifier */
069: private static final long serialVersionUID = -8091216485095130416L;
070:
071: /** Determines what percentile is computed when evaluate() is activated
072: * with no quantile argument */
073: private double quantile = 0.0;
074:
075: /**
076: * Constructs a Percentile with a default quantile
077: * value of 50.0.
078: */
079: public Percentile() {
080: this (50.0);
081: }
082:
083: /**
084: * Constructs a Percentile with the specific quantile value.
085: * @param p the quantile
086: * @throws IllegalArgumentException if p is not greater than 0 and less
087: * than or equal to 100
088: */
089: public Percentile(final double p) {
090: setQuantile(p);
091: }
092:
093: /**
094: * Returns an estimate of the <code>p</code>th percentile of the values
095: * in the <code>values</code> array.
096: * <p>
097: * Calls to this method do not modify the internal <code>quantile</code>
098: * state of this statistic.
099: * <p>
100: * <ul>
101: * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
102: * <code>0</code></li>
103: * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
104: * if <code>values</code> has length <code>1</code></li>
105: * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
106: * is null or p is not a valid quantile value (p must be greater than 0
107: * and less than or equal to 100) </li>
108: * </ul>
109: * <p>
110: * See {@link Percentile} for a description of the percentile estimation
111: * algorithm used.
112: *
113: * @param values input array of values
114: * @param p the percentile value to compute
115: * @return the percentile value or Double.NaN if the array is empty
116: * @throws IllegalArgumentException if <code>values</code> is null
117: * or p is invalid
118: */
119: public double evaluate(final double[] values, final double p) {
120: test(values, 0, 0);
121: return evaluate(values, 0, values.length, p);
122: }
123:
124: /**
125: * Returns an estimate of the <code>quantile</code>th percentile of the
126: * designated values in the <code>values</code> array. The quantile
127: * estimated is determined by the <code>quantile</code> property.
128: * <p>
129: * <ul>
130: * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
131: * <li>Returns (for any value of <code>quantile</code>)
132: * <code>values[begin]</code> if <code>length = 1 </code></li>
133: * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
134: * is null, or <code>start</code> or <code>length</code>
135: * is invalid</li>
136: * </ul>
137: * <p>
138: * See {@link Percentile} for a description of the percentile estimation
139: * algorithm used.
140: *
141: * @param values the input array
142: * @param start index of the first array element to include
143: * @param length the number of elements to include
144: * @return the percentile value
145: * @throws IllegalArgumentException if the parameters are not valid
146: *
147: */
148: public double evaluate(final double[] values, final int start,
149: final int length) {
150: return evaluate(values, start, length, quantile);
151: }
152:
153: /**
154: * Returns an estimate of the <code>p</code>th percentile of the values
155: * in the <code>values</code> array, starting with the element in (0-based)
156: * position <code>begin</code> in the array and including <code>length</code>
157: * values.
158: * <p>
159: * Calls to this method do not modify the internal <code>quantile</code>
160: * state of this statistic.
161: * <p>
162: * <ul>
163: * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
164: * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
165: * if <code>length = 1 </code></li>
166: * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
167: * is null , <code>begin</code> or <code>length</code> is invalid, or
168: * <code>p</code> is not a valid quantile value (p must be greater than 0
169: * and less than or equal to 100)</li>
170: * </ul>
171: * <p>
172: * See {@link Percentile} for a description of the percentile estimation
173: * algorithm used.
174: *
175: * @param values array of input values
176: * @param p the percentile to compute
177: * @param begin the first (0-based) element to include in the computation
178: * @param length the number of array elements to include
179: * @return the percentile value
180: * @throws IllegalArgumentException if the parameters are not valid or the
181: * input array is null
182: */
183: public double evaluate(final double[] values, final int begin,
184: final int length, final double p) {
185:
186: test(values, begin, length);
187:
188: if ((p > 100) || (p <= 0)) {
189: throw new IllegalArgumentException(
190: "invalid quantile value: " + p);
191: }
192: if (length == 0) {
193: return Double.NaN;
194: }
195: if (length == 1) {
196: return values[begin]; // always return single value for n = 1
197: }
198: double n = (double) length;
199: double pos = p * (n + 1) / 100;
200: double fpos = Math.floor(pos);
201: int intPos = (int) fpos;
202: double dif = pos - fpos;
203: double[] sorted = new double[length];
204: System.arraycopy(values, begin, sorted, 0, length);
205: Arrays.sort(sorted);
206:
207: if (pos < 1) {
208: return sorted[0];
209: }
210: if (pos >= n) {
211: return sorted[length - 1];
212: }
213: double lower = sorted[intPos - 1];
214: double upper = sorted[intPos];
215: return lower + dif * (upper - lower);
216: }
217:
218: /**
219: * Returns the value of the quantile field (determines what percentile is
220: * computed when evaluate() is called with no quantile argument).
221: *
222: * @return quantile
223: */
224: public double getQuantile() {
225: return quantile;
226: }
227:
228: /**
229: * Sets the value of the quantile field (determines what percentile is
230: * computed when evaluate() is called with no quantile argument).
231: *
232: * @param p a value between 0 < p <= 100
233: * @throws IllegalArgumentException if p is not greater than 0 and less
234: * than or equal to 100
235: */
236: public void setQuantile(final double p) {
237: if (p <= 0 || p > 100) {
238: throw new IllegalArgumentException(
239: "Illegal quantile value: " + p);
240: }
241: quantile = p;
242: }
243:
244: }
|