001: /*
002: * Copyright 2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat.inference;
017:
018: import org.apache.commons.math.MathException;
019:
020: /**
021: * An interface for Chi-Square tests.
022: *
023: * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $
024: */
025: public interface ChiSquareTest {
026:
027: /**
028: * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
029: * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
030: * freqeuncy counts.
031: * <p>
032: * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
033: * the observed counts follow the expected distribution.
034: * <p>
035: * <strong>Preconditions</strong>: <ul>
036: * <li>Expected counts must all be positive.
037: * </li>
038: * <li>Observed counts must all be >= 0.
039: * </li>
040: * <li>The observed and expected arrays must have the same length and
041: * their common length must be at least 2.
042: * </li></ul><p>
043: * If any of the preconditions are not met, an
044: * <code>IllegalArgumentException</code> is thrown.
045: *
046: * @param observed array of observed frequency counts
047: * @param expected array of expected frequency counts
048: * @return chiSquare statistic
049: * @throws IllegalArgumentException if preconditions are not met
050: */
051: double chiSquare(double[] expected, long[] observed)
052: throws IllegalArgumentException;
053:
054: /**
055: * Returns the <i>observed significance level</i>, or <a href=
056: * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
057: * p-value</a>, associated with a
058: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
059: * Chi-square goodness of fit test</a> comparing the <code>observed</code>
060: * frequency counts to those in the <code>expected</code> array.
061: * <p>
062: * The number returned is the smallest significance level at which one can reject
063: * the null hypothesis that the observed counts conform to the frequency distribution
064: * described by the expected counts.
065: * <p>
066: * <strong>Preconditions</strong>: <ul>
067: * <li>Expected counts must all be positive.
068: * </li>
069: * <li>Observed counts must all be >= 0.
070: * </li>
071: * <li>The observed and expected arrays must have the same length and
072: * their common length must be at least 2.
073: * </li></ul><p>
074: * If any of the preconditions are not met, an
075: * <code>IllegalArgumentException</code> is thrown.
076: *
077: * @param observed array of observed frequency counts
078: * @param expected array of expected frequency counts
079: * @return p-value
080: * @throws IllegalArgumentException if preconditions are not met
081: * @throws MathException if an error occurs computing the p-value
082: */
083: double chiSquareTest(double[] expected, long[] observed)
084: throws IllegalArgumentException, MathException;
085:
086: /**
087: * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
088: * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts
089: * conform to the frequency distribution described by the expected counts, with
090: * significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
091: * with 100 * (1 - alpha) percent confidence.
092: * <p>
093: * <strong>Example:</strong><br>
094: * To test the hypothesis that <code>observed</code> follows
095: * <code>expected</code> at the 99% level, use <p>
096: * <code>chiSquareTest(expected, observed, 0.01) </code>
097: * <p>
098: * <strong>Preconditions</strong>: <ul>
099: * <li>Expected counts must all be positive.
100: * </li>
101: * <li>Observed counts must all be >= 0.
102: * </li>
103: * <li>The observed and expected arrays must have the same length and
104: * their common length must be at least 2.
105: * <li> <code> 0 < alpha < 0.5 </code>
106: * </li></ul><p>
107: * If any of the preconditions are not met, an
108: * <code>IllegalArgumentException</code> is thrown.
109: *
110: * @param observed array of observed frequency counts
111: * @param expected array of expected frequency counts
112: * @param alpha significance level of the test
113: * @return true iff null hypothesis can be rejected with confidence
114: * 1 - alpha
115: * @throws IllegalArgumentException if preconditions are not met
116: * @throws MathException if an error occurs performing the test
117: */
118: boolean chiSquareTest(double[] expected, long[] observed,
119: double alpha) throws IllegalArgumentException,
120: MathException;
121:
122: /**
123: * Computes the Chi-Square statistic associated with a
124: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
125: * chi-square test of independence</a> based on the input <code>counts</code>
126: * array, viewed as a two-way table.
127: * <p>
128: * The rows of the 2-way table are <code>count[0], ... , count[count.length - 1] </code>
129: * <p>
130: * <strong>Preconditions</strong>: <ul>
131: * <li>All counts must be >= 0.
132: * </li>
133: * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
134: * </li>
135: * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
136: * at least 2 rows.
137: * </li>
138: * </li></ul><p>
139: * If any of the preconditions are not met, an
140: * <code>IllegalArgumentException</code> is thrown.
141: *
142: * @param counts array representation of 2-way table
143: * @return chiSquare statistic
144: * @throws IllegalArgumentException if preconditions are not met
145: */
146: double chiSquare(long[][] counts) throws IllegalArgumentException;
147:
148: /**
149: * Returns the <i>observed significance level</i>, or <a href=
150: * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
151: * p-value</a>, associated with a
152: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
153: * chi-square test of independence</a> based on the input <code>counts</code>
154: * array, viewed as a two-way table.
155: * <p>
156: * The rows of the 2-way table are <code>count[0], ... , count[count.length - 1] </code>
157: * <p>
158: * <strong>Preconditions</strong>: <ul>
159: * <li>All counts must be >= 0.
160: * </li>
161: * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
162: * </li>
163: * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
164: * at least 2 rows.
165: * </li>
166: * </li></ul><p>
167: * If any of the preconditions are not met, an
168: * <code>IllegalArgumentException</code> is thrown.
169: *
170: * @param counts array representation of 2-way table
171: * @return p-value
172: * @throws IllegalArgumentException if preconditions are not met
173: * @throws MathException if an error occurs computing the p-value
174: */
175: double chiSquareTest(long[][] counts)
176: throws IllegalArgumentException, MathException;
177:
178: /**
179: * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
180: * chi-square test of independence</a> evaluating the null hypothesis that the classifications
181: * represented by the counts in the columns of the input 2-way table are independent of the rows,
182: * with significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
183: * with 100 * (1 - alpha) percent confidence.
184: * <p>
185: * The rows of the 2-way table are <code>count[0], ... , count[count.length - 1] </code>
186: * <p>
187: * <strong>Example:</strong><br>
188: * To test the null hypothesis that the counts in <code>count[0], ... , count[count.length - 1] </code>
189: * all correspond to the same underlying probability distribution at the 99% level, use <p>
190: * <code>chiSquareTest(counts, 0.01) </code>
191: * <p>
192: * <strong>Preconditions</strong>: <ul>
193: * <li>All counts must be >= 0.
194: * </li>
195: * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
196: * </li>
197: * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
198: * at least 2 rows.
199: * </li>
200: * </li></ul><p>
201: * If any of the preconditions are not met, an
202: * <code>IllegalArgumentException</code> is thrown.
203: *
204: * @param counts array representation of 2-way table
205: * @param alpha significance level of the test
206: * @return true iff null hypothesis can be rejected with confidence
207: * 1 - alpha
208: * @throws IllegalArgumentException if preconditions are not met
209: * @throws MathException if an error occurs performing the test
210: */
211: boolean chiSquareTest(long[][] counts, double alpha)
212: throws IllegalArgumentException, MathException;
213: }
|