001: /*
002: * Copyright 2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat.inference;
017:
018: import org.apache.commons.math.MathException;
019: import org.apache.commons.math.distribution.DistributionFactory;
020: import org.apache.commons.math.distribution.ChiSquaredDistribution;
021:
022: /**
023: * Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface.
024: *
025: * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $
026: */
027: public class ChiSquareTestImpl implements ChiSquareTest {
028:
029: /** Cached DistributionFactory used to create ChiSquaredDistribution instances */
030: private DistributionFactory distributionFactory = null;
031:
032: /**
033: * Construct a ChiSquareTestImpl
034: */
035: public ChiSquareTestImpl() {
036: super ();
037: }
038:
039: /**
040: * @param observed array of observed frequency counts
041: * @param expected array of expected frequency counts
042: * @return chi-square test statistic
043: * @throws IllegalArgumentException if preconditions are not met
044: * or length is less than 2
045: */
046: public double chiSquare(double[] expected, long[] observed)
047: throws IllegalArgumentException {
048: double sumSq = 0.0d;
049: double dev = 0.0d;
050: if ((expected.length < 2)
051: || (expected.length != observed.length)) {
052: throw new IllegalArgumentException(
053: "observed, expected array lengths incorrect");
054: }
055: if (!isPositive(expected) || !isNonNegative(observed)) {
056: throw new IllegalArgumentException(
057: "observed counts must be non-negative and expected counts must be postive");
058: }
059: for (int i = 0; i < observed.length; i++) {
060: dev = ((double) observed[i] - expected[i]);
061: sumSq += dev * dev / expected[i];
062: }
063: return sumSq;
064: }
065:
066: /**
067: * @param observed array of observed frequency counts
068: * @param expected array of exptected frequency counts
069: * @return p-value
070: * @throws IllegalArgumentException if preconditions are not met
071: * @throws MathException if an error occurs computing the p-value
072: */
073: public double chiSquareTest(double[] expected, long[] observed)
074: throws IllegalArgumentException, MathException {
075: ChiSquaredDistribution chiSquaredDistribution = getDistributionFactory()
076: .createChiSquareDistribution(
077: (double) expected.length - 1);
078: return 1 - chiSquaredDistribution
079: .cumulativeProbability(chiSquare(expected, observed));
080: }
081:
082: /**
083: * @param observed array of observed frequency counts
084: * @param expected array of exptected frequency counts
085: * @param alpha significance level of the test
086: * @return true iff null hypothesis can be rejected with confidence
087: * 1 - alpha
088: * @throws IllegalArgumentException if preconditions are not met
089: * @throws MathException if an error occurs performing the test
090: */
091: public boolean chiSquareTest(double[] expected, long[] observed,
092: double alpha) throws IllegalArgumentException,
093: MathException {
094: if ((alpha <= 0) || (alpha > 0.5)) {
095: throw new IllegalArgumentException(
096: "bad significance level: " + alpha);
097: }
098: return (chiSquareTest(expected, observed) < alpha);
099: }
100:
101: /**
102: * @param counts array representation of 2-way table
103: * @return chi-square test statistic
104: * @throws IllegalArgumentException if preconditions are not met
105: */
106: public double chiSquare(long[][] counts)
107: throws IllegalArgumentException {
108:
109: checkArray(counts);
110: int nRows = counts.length;
111: int nCols = counts[0].length;
112:
113: // compute row, column and total sums
114: double[] rowSum = new double[nRows];
115: double[] colSum = new double[nCols];
116: double total = 0.0d;
117: for (int row = 0; row < nRows; row++) {
118: for (int col = 0; col < nCols; col++) {
119: rowSum[row] += (double) counts[row][col];
120: colSum[col] += (double) counts[row][col];
121: total += (double) counts[row][col];
122: }
123: }
124:
125: // compute expected counts and chi-square
126: double sumSq = 0.0d;
127: double expected = 0.0d;
128: for (int row = 0; row < nRows; row++) {
129: for (int col = 0; col < nCols; col++) {
130: expected = (rowSum[row] * colSum[col]) / total;
131: sumSq += (((double) counts[row][col] - expected) * ((double) counts[row][col] - expected))
132: / expected;
133: }
134: }
135: return sumSq;
136: }
137:
138: /**
139: * @param counts array representation of 2-way table
140: * @return p-value
141: * @throws IllegalArgumentException if preconditions are not met
142: * @throws MathException if an error occurs computing the p-value
143: */
144: public double chiSquareTest(long[][] counts)
145: throws IllegalArgumentException, MathException {
146: checkArray(counts);
147: double df = ((double) counts.length - 1)
148: * ((double) counts[0].length - 1);
149: ChiSquaredDistribution chiSquaredDistribution = getDistributionFactory()
150: .createChiSquareDistribution(df);
151: return 1 - chiSquaredDistribution
152: .cumulativeProbability(chiSquare(counts));
153: }
154:
155: /**
156: * @param counts array representation of 2-way table
157: * @param alpha significance level of the test
158: * @return true iff null hypothesis can be rejected with confidence
159: * 1 - alpha
160: * @throws IllegalArgumentException if preconditions are not met
161: * @throws MathException if an error occurs performing the test
162: */
163: public boolean chiSquareTest(long[][] counts, double alpha)
164: throws IllegalArgumentException, MathException {
165: if ((alpha <= 0) || (alpha > 0.5)) {
166: throw new IllegalArgumentException(
167: "bad significance level: " + alpha);
168: }
169: return (chiSquareTest(counts) < alpha);
170: }
171:
172: /**
173: * Checks to make sure that the input long[][] array is rectangular,
174: * has at least 2 rows and 2 columns, and has all non-negative entries,
175: * throwing IllegalArgumentException if any of these checks fail.
176: *
177: * @param in input 2-way table to check
178: * @throws IllegalArgumentException if the array is not valid
179: */
180: private void checkArray(long[][] in)
181: throws IllegalArgumentException {
182:
183: if (in.length < 2) {
184: throw new IllegalArgumentException(
185: "Input table must have at least two rows");
186: }
187:
188: if (in[0].length < 2) {
189: throw new IllegalArgumentException(
190: "Input table must have at least two columns");
191: }
192:
193: if (!isRectangular(in)) {
194: throw new IllegalArgumentException(
195: "Input table must be rectangular");
196: }
197:
198: if (!isNonNegative(in)) {
199: throw new IllegalArgumentException(
200: "All entries in input 2-way table must be non-negative");
201: }
202:
203: }
204:
205: //--------------------- Protected methods ---------------------------------
206: /**
207: * Gets a DistributionFactory to use in creating ChiSquaredDistribution instances.
208: *
209: * @return a DistributionFactory
210: */
211: protected DistributionFactory getDistributionFactory() {
212: if (distributionFactory == null) {
213: distributionFactory = DistributionFactory.newInstance();
214: }
215: return distributionFactory;
216: }
217:
218: //--------------------- Private array methods -- should find a utility home for these
219:
220: /**
221: * Returns true iff input array is rectangular.
222: *
223: * @param in array to be tested
224: * @return true if the array is rectangular
225: * @throws NullPointerException if input array is null
226: * @throws ArrayIndexOutOfBoundsException if input array is empty
227: */
228: private boolean isRectangular(long[][] in) {
229: for (int i = 1; i < in.length; i++) {
230: if (in[i].length != in[0].length) {
231: return false;
232: }
233: }
234: return true;
235: }
236:
237: /**
238: * Returns true iff all entries of the input array are > 0.
239: * Returns true if the array is non-null, but empty
240: *
241: * @param in array to be tested
242: * @return true if all entries of the array are positive
243: * @throws NullPointerException if input array is null
244: */
245: private boolean isPositive(double[] in) {
246: for (int i = 0; i < in.length; i++) {
247: if (in[i] <= 0) {
248: return false;
249: }
250: }
251: return true;
252: }
253:
254: /**
255: * Returns true iff all entries of the input array are >= 0.
256: * Returns true if the array is non-null, but empty
257: *
258: * @param in array to be tested
259: * @return true if all entries of the array are non-negative
260: * @throws NullPointerException if input array is null
261: */
262: private boolean isNonNegative(long[] in) {
263: for (int i = 0; i < in.length; i++) {
264: if (in[i] < 0) {
265: return false;
266: }
267: }
268: return true;
269: }
270:
271: /**
272: * Returns true iff all entries of (all subarrays of) the input array are >= 0.
273: * Returns true if the array is non-null, but empty
274: *
275: * @param in array to be tested
276: * @return true if all entries of the array are non-negative
277: * @throws NullPointerException if input array is null
278: */
279: private boolean isNonNegative(long[][] in) {
280: for (int i = 0; i < in.length; i++) {
281: for (int j = 0; j < in[i].length; j++) {
282: if (in[i][j] < 0) {
283: return false;
284: }
285: }
286: }
287: return true;
288: }
289:
290: }
|