001: /*
002: * Copyright 2003-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.commons.math.distribution;
018:
019: import java.io.Serializable;
020:
021: import org.apache.commons.math.util.MathUtils;
022:
023: /**
024: * The default implementation of {@link HypergeometricDistribution}.
025: *
026: * @version $Revision: 348888 $ $Date: 2005-11-24 23:21:25 -0700 (Thu, 24 Nov 2005) $
027: */
028: public class HypergeometricDistributionImpl extends
029: AbstractIntegerDistribution implements
030: HypergeometricDistribution, Serializable {
031:
032: /** Serializable version identifier */
033: private static final long serialVersionUID = -436928820673516179L;
034:
035: /** The number of successes in the population. */
036: private int numberOfSuccesses;
037:
038: /** The population size. */
039: private int populationSize;
040:
041: /** The sample size. */
042: private int sampleSize;
043:
044: /**
045: * Construct a new hypergeometric distribution with the given the population
046: * size, the number of successes in the population, and the sample size.
047: * @param populationSize the population size.
048: * @param numberOfSuccesses number of successes in the population.
049: * @param sampleSize the sample size.
050: */
051: public HypergeometricDistributionImpl(int populationSize,
052: int numberOfSuccesses, int sampleSize) {
053: super ();
054: if (numberOfSuccesses > populationSize) {
055: throw new IllegalArgumentException(
056: "number of successes must be less than or equal to "
057: + "population size");
058: }
059: if (sampleSize > populationSize) {
060: throw new IllegalArgumentException(
061: "sample size must be less than or equal to population size");
062: }
063: setPopulationSize(populationSize);
064: setSampleSize(sampleSize);
065: setNumberOfSuccesses(numberOfSuccesses);
066: }
067:
068: /**
069: * For this disbution, X, this method returns P(X ≤ x).
070: * @param x the value at which the PDF is evaluated.
071: * @return PDF for this distribution.
072: */
073: public double cumulativeProbability(int x) {
074: double ret;
075:
076: int n = getPopulationSize();
077: int m = getNumberOfSuccesses();
078: int k = getSampleSize();
079:
080: int[] domain = getDomain(n, m, k);
081: if (x < domain[0]) {
082: ret = 0.0;
083: } else if (x >= domain[1]) {
084: ret = 1.0;
085: } else {
086: ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
087: }
088:
089: return ret;
090: }
091:
092: /**
093: * Return the domain for the given hypergeometric distribution parameters.
094: * @param n the population size.
095: * @param m number of successes in the population.
096: * @param k the sample size.
097: * @return a two element array containing the lower and upper bounds of the
098: * hypergeometric distribution.
099: */
100: private int[] getDomain(int n, int m, int k) {
101: return new int[] { getLowerDomain(n, m, k),
102: getUpperDomain(m, k) };
103: }
104:
105: /**
106: * Access the domain value lower bound, based on <code>p</code>, used to
107: * bracket a PDF root.
108: *
109: * @param p the desired probability for the critical value
110: * @return domain value lower bound, i.e.
111: * P(X < <i>lower bound</i>) < <code>p</code>
112: */
113: protected int getDomainLowerBound(double p) {
114: return getLowerDomain(getPopulationSize(),
115: getNumberOfSuccesses(), getSampleSize());
116: }
117:
118: /**
119: * Access the domain value upper bound, based on <code>p</code>, used to
120: * bracket a PDF root.
121: *
122: * @param p the desired probability for the critical value
123: * @return domain value upper bound, i.e.
124: * P(X < <i>upper bound</i>) > <code>p</code>
125: */
126: protected int getDomainUpperBound(double p) {
127: return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
128: }
129:
130: /**
131: * Return the lowest domain value for the given hypergeometric distribution
132: * parameters.
133: * @param n the population size.
134: * @param m number of successes in the population.
135: * @param k the sample size.
136: * @return the lowest domain value of the hypergeometric distribution.
137: */
138: private int getLowerDomain(int n, int m, int k) {
139: return Math.max(0, m - (n - k));
140: }
141:
142: /**
143: * Access the number of successes.
144: * @return the number of successes.
145: */
146: public int getNumberOfSuccesses() {
147: return numberOfSuccesses;
148: }
149:
150: /**
151: * Access the population size.
152: * @return the population size.
153: */
154: public int getPopulationSize() {
155: return populationSize;
156: }
157:
158: /**
159: * Access the sample size.
160: * @return the sample size.
161: */
162: public int getSampleSize() {
163: return sampleSize;
164: }
165:
166: /**
167: * Return the highest domain value for the given hypergeometric distribution
168: * parameters.
169: * @param m number of successes in the population.
170: * @param k the sample size.
171: * @return the highest domain value of the hypergeometric distribution.
172: */
173: private int getUpperDomain(int m, int k) {
174: return Math.min(k, m);
175: }
176:
177: /**
178: * For this disbution, X, this method returns P(X = x).
179: *
180: * @param x the value at which the PMF is evaluated.
181: * @return PMF for this distribution.
182: */
183: public double probability(int x) {
184: double ret;
185:
186: int n = getPopulationSize();
187: int m = getNumberOfSuccesses();
188: int k = getSampleSize();
189:
190: int[] domain = getDomain(n, m, k);
191: if (x < domain[0] || x > domain[1]) {
192: ret = 0.0;
193: } else {
194: ret = probability(n, m, k, x);
195: }
196:
197: return ret;
198: }
199:
200: /**
201: * For the disbution, X, defined by the given hypergeometric distribution
202: * parameters, this method returns P(X = x).
203: *
204: * @param n the population size.
205: * @param m number of successes in the population.
206: * @param k the sample size.
207: * @param x the value at which the PMF is evaluated.
208: * @return PMF for the distribution.
209: */
210: private double probability(int n, int m, int k, int x) {
211: return Math.exp(MathUtils.binomialCoefficientLog(m, x)
212: + MathUtils.binomialCoefficientLog(n - m, k - x)
213: - MathUtils.binomialCoefficientLog(n, k));
214: }
215:
216: /**
217: * Modify the number of successes.
218: * @param num the new number of successes.
219: * @throws IllegalArgumentException if <code>num</code> is negative.
220: */
221: public void setNumberOfSuccesses(int num) {
222: if (num < 0) {
223: throw new IllegalArgumentException(
224: "number of successes must be non-negative.");
225: }
226: numberOfSuccesses = num;
227: }
228:
229: /**
230: * Modify the population size.
231: * @param size the new population size.
232: * @throws IllegalArgumentException if <code>size</code> is not positive.
233: */
234: public void setPopulationSize(int size) {
235: if (size <= 0) {
236: throw new IllegalArgumentException(
237: "population size must be positive.");
238: }
239: populationSize = size;
240: }
241:
242: /**
243: * Modify the sample size.
244: * @param size the new sample size.
245: * @throws IllegalArgumentException if <code>size</code> is negative.
246: */
247: public void setSampleSize(int size) {
248: if (size < 0) {
249: throw new IllegalArgumentException(
250: "sample size must be non-negative.");
251: }
252: sampleSize = size;
253: }
254:
255: /**
256: * For this disbution, X, this method returns P(X ≥ x).
257: * @param x the value at which the CDF is evaluated.
258: * @return upper tail CDF for this distribution.
259: * @since 1.1
260: */
261: public double upperCumulativeProbability(int x) {
262: double ret;
263:
264: int n = getPopulationSize();
265: int m = getNumberOfSuccesses();
266: int k = getSampleSize();
267:
268: int[] domain = getDomain(n, m, k);
269: if (x < domain[0]) {
270: ret = 1.0;
271: } else if (x > domain[1]) {
272: ret = 0.0;
273: } else {
274: ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
275: }
276:
277: return ret;
278: }
279:
280: /**
281: * For this disbution, X, this method returns P(x0 ≤ X ≤ x1). This
282: * probability is computed by summing the point probabilities for the values
283: * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
284: * @param x0 the inclusive, lower bound
285: * @param x1 the inclusive, upper bound
286: * @param dx the direction of summation. 1 indicates summing from x0 to x1.
287: * 0 indicates summing from x1 to x0.
288: * @param n the population size.
289: * @param m number of successes in the population.
290: * @param k the sample size.
291: * @return P(x0 ≤ X ≤ x1).
292: */
293: private double innerCumulativeProbability(int x0, int x1, int dx,
294: int n, int m, int k) {
295: double ret = probability(n, m, k, x0);
296: while (x0 != x1) {
297: x0 += dx;
298: ret += probability(n, m, k, x0);
299: }
300: return ret;
301: }
302: }
|