001: /*
002: * Copyright 2004-2005 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat.inference;
017:
018: import org.apache.commons.math.MathException;
019: import org.apache.commons.math.stat.descriptive.StatisticalSummary;
020:
021: /**
022: * An interface for Student's t-tests.
023: * <p>
024: * Tests can be:<ul>
025: * <li>One-sample or two-sample</li>
026: * <li>One-sided or two-sided</li>
027: * <li>Paired or unpaired (for two-sample tests)</li>
028: * <li>Homoscedastic (equal variance assumption) or heteroscedastic
029: * (for two sample tests)</li>
030: * <li>Fixed significance level (boolean-valued) or returning p-values.
031: * </li></ul>
032: * <p>
033: * Test statistics are available for all tests. Methods including "Test" in
034: * in their names perform tests, all other methods return t-statistics. Among
035: * the "Test" methods, <code>double-</code>valued methods return p-values;
036: * <code>boolean-</code>valued methods perform fixed significance level tests.
037: * Significance levels are always specified as numbers between 0 and 0.5
038: * (e.g. tests at the 95% level use <code>alpha=0.05</code>).
039: * <p>
040: * Input to tests can be either <code>double[]</code> arrays or
041: * {@link StatisticalSummary} instances.
042: *
043: *
044: * @version $Revision: 161625 $ $Date: 2005-04-16 22:12:15 -0700 (Sat, 16 Apr 2005) $
045: */
046: public interface TTest {
047: /**
048: * Computes a paired, 2-sample t-statistic based on the data in the input
049: * arrays. The t-statistic returned is equivalent to what would be returned by
050: * computing the one-sample t-statistic {@link #t(double, double[])}, with
051: * <code>mu = 0</code> and the sample array consisting of the (signed)
052: * differences between corresponding entries in <code>sample1</code> and
053: * <code>sample2.</code>
054: * <p>
055: * <strong>Preconditions</strong>: <ul>
056: * <li>The input arrays must have the same length and their common length
057: * must be at least 2.
058: * </li></ul>
059: *
060: * @param sample1 array of sample data values
061: * @param sample2 array of sample data values
062: * @return t statistic
063: * @throws IllegalArgumentException if the precondition is not met
064: * @throws MathException if the statistic can not be computed do to a
065: * convergence or other numerical error.
066: */
067: public abstract double pairedT(double[] sample1, double[] sample2)
068: throws IllegalArgumentException, MathException;
069:
070: /**
071: * Returns the <i>observed significance level</i>, or
072: * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
073: * based on the data in the input arrays.
074: * <p>
075: * The number returned is the smallest significance level
076: * at which one can reject the null hypothesis that the mean of the paired
077: * differences is 0 in favor of the two-sided alternative that the mean paired
078: * difference is not equal to 0. For a one-sided test, divide the returned
079: * value by 2.
080: * <p>
081: * This test is equivalent to a one-sample t-test computed using
082: * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
083: * array consisting of the signed differences between corresponding elements of
084: * <code>sample1</code> and <code>sample2.</code>
085: * <p>
086: * <strong>Usage Note:</strong><br>
087: * The validity of the p-value depends on the assumptions of the parametric
088: * t-test procedure, as discussed
089: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
090: * here</a>
091: * <p>
092: * <strong>Preconditions</strong>: <ul>
093: * <li>The input array lengths must be the same and their common length must
094: * be at least 2.
095: * </li></ul>
096: *
097: * @param sample1 array of sample data values
098: * @param sample2 array of sample data values
099: * @return p-value for t-test
100: * @throws IllegalArgumentException if the precondition is not met
101: * @throws MathException if an error occurs computing the p-value
102: */
103: public abstract double pairedTTest(double[] sample1,
104: double[] sample2) throws IllegalArgumentException,
105: MathException;
106:
107: /**
108: * Performs a paired t-test evaluating the null hypothesis that the
109: * mean of the paired differences between <code>sample1</code> and
110: * <code>sample2</code> is 0 in favor of the two-sided alternative that the
111: * mean paired difference is not equal to 0, with significance level
112: * <code>alpha</code>.
113: * <p>
114: * Returns <code>true</code> iff the null hypothesis can be rejected with
115: * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
116: * <code>alpha * 2</code>
117: * <p>
118: * <strong>Usage Note:</strong><br>
119: * The validity of the test depends on the assumptions of the parametric
120: * t-test procedure, as discussed
121: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
122: * here</a>
123: * <p>
124: * <strong>Preconditions</strong>: <ul>
125: * <li>The input array lengths must be the same and their common length
126: * must be at least 2.
127: * </li>
128: * <li> <code> 0 < alpha < 0.5 </code>
129: * </li></ul>
130: *
131: * @param sample1 array of sample data values
132: * @param sample2 array of sample data values
133: * @param alpha significance level of the test
134: * @return true if the null hypothesis can be rejected with
135: * confidence 1 - alpha
136: * @throws IllegalArgumentException if the preconditions are not met
137: * @throws MathException if an error occurs performing the test
138: */
139: public abstract boolean pairedTTest(double[] sample1,
140: double[] sample2, double alpha)
141: throws IllegalArgumentException, MathException;
142:
143: /**
144: * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
145: * t statistic </a> given observed values and a comparison constant.
146: * <p>
147: * This statistic can be used to perform a one sample t-test for the mean.
148: * <p>
149: * <strong>Preconditions</strong>: <ul>
150: * <li>The observed array length must be at least 2.
151: * </li></ul>
152: *
153: * @param mu comparison constant
154: * @param observed array of values
155: * @return t statistic
156: * @throws IllegalArgumentException if input array length is less than 2
157: */
158: public abstract double t(double mu, double[] observed)
159: throws IllegalArgumentException;
160:
161: /**
162: * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
163: * t statistic </a> to use in comparing the mean of the dataset described by
164: * <code>sampleStats</code> to <code>mu</code>.
165: * <p>
166: * This statistic can be used to perform a one sample t-test for the mean.
167: * <p>
168: * <strong>Preconditions</strong>: <ul>
169: * <li><code>observed.getN() > = 2</code>.
170: * </li></ul>
171: *
172: * @param mu comparison constant
173: * @param sampleStats DescriptiveStatistics holding sample summary statitstics
174: * @return t statistic
175: * @throws IllegalArgumentException if the precondition is not met
176: */
177: public abstract double t(double mu, StatisticalSummary sampleStats)
178: throws IllegalArgumentException;
179:
180: /**
181: * Computes a 2-sample t statistic, under the hypothesis of equal
182: * subpopulation variances. To compute a t-statistic without the
183: * equal variances hypothesis, use {@link #t(double[], double[])}.
184: * <p>
185: * This statistic can be used to perform a (homoscedastic) two-sample
186: * t-test to compare sample means.
187: * <p>
188: * The t-statisitc is
189: * <p>
190: * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
191: * <p>
192: * where <strong><code>n1</code></strong> is the size of first sample;
193: * <strong><code> n2</code></strong> is the size of second sample;
194: * <strong><code> m1</code></strong> is the mean of first sample;
195: * <strong><code> m2</code></strong> is the mean of second sample</li>
196: * </ul>
197: * and <strong><code>var</code></strong> is the pooled variance estimate:
198: * <p>
199: * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
200: * <p>
201: * with <strong><code>var1<code></strong> the variance of the first sample and
202: * <strong><code>var2</code></strong> the variance of the second sample.
203: * <p>
204: * <strong>Preconditions</strong>: <ul>
205: * <li>The observed array lengths must both be at least 2.
206: * </li></ul>
207: *
208: * @param sample1 array of sample data values
209: * @param sample2 array of sample data values
210: * @return t statistic
211: * @throws IllegalArgumentException if the precondition is not met
212: */
213: public abstract double homoscedasticT(double[] sample1,
214: double[] sample2) throws IllegalArgumentException;
215:
216: /**
217: * Computes a 2-sample t statistic, without the hypothesis of equal
218: * subpopulation variances. To compute a t-statistic assuming equal
219: * variances, use {@link #homoscedasticT(double[], double[])}.
220: * <p>
221: * This statistic can be used to perform a two-sample t-test to compare
222: * sample means.
223: * <p>
224: * The t-statisitc is
225: * <p>
226: * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
227: * <p>
228: * where <strong><code>n1</code></strong> is the size of the first sample
229: * <strong><code> n2</code></strong> is the size of the second sample;
230: * <strong><code> m1</code></strong> is the mean of the first sample;
231: * <strong><code> m2</code></strong> is the mean of the second sample;
232: * <strong><code> var1</code></strong> is the variance of the first sample;
233: * <strong><code> var2</code></strong> is the variance of the second sample;
234: * <p>
235: * <strong>Preconditions</strong>: <ul>
236: * <li>The observed array lengths must both be at least 2.
237: * </li></ul>
238: *
239: * @param sample1 array of sample data values
240: * @param sample2 array of sample data values
241: * @return t statistic
242: * @throws IllegalArgumentException if the precondition is not met
243: */
244: public abstract double t(double[] sample1, double[] sample2)
245: throws IllegalArgumentException;
246:
247: /**
248: * Computes a 2-sample t statistic </a>, comparing the means of the datasets
249: * described by two {@link StatisticalSummary} instances, without the
250: * assumption of equal subpopulation variances. Use
251: * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
252: * compute a t-statistic under the equal variances assumption.
253: * <p>
254: * This statistic can be used to perform a two-sample t-test to compare
255: * sample means.
256: * <p>
257: * The returned t-statisitc is
258: * <p>
259: * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
260: * <p>
261: * where <strong><code>n1</code></strong> is the size of the first sample;
262: * <strong><code> n2</code></strong> is the size of the second sample;
263: * <strong><code> m1</code></strong> is the mean of the first sample;
264: * <strong><code> m2</code></strong> is the mean of the second sample
265: * <strong><code> var1</code></strong> is the variance of the first sample;
266: * <strong><code> var2</code></strong> is the variance of the second sample
267: * <p>
268: * <strong>Preconditions</strong>: <ul>
269: * <li>The datasets described by the two Univariates must each contain
270: * at least 2 observations.
271: * </li></ul>
272: *
273: * @param sampleStats1 StatisticalSummary describing data from the first sample
274: * @param sampleStats2 StatisticalSummary describing data from the second sample
275: * @return t statistic
276: * @throws IllegalArgumentException if the precondition is not met
277: */
278: public abstract double t(StatisticalSummary sampleStats1,
279: StatisticalSummary sampleStats2)
280: throws IllegalArgumentException;
281:
282: /**
283: * Computes a 2-sample t statistic, comparing the means of the datasets
284: * described by two {@link StatisticalSummary} instances, under the
285: * assumption of equal subpopulation variances. To compute a t-statistic
286: * without the equal variances assumption, use
287: * {@link #t(StatisticalSummary, StatisticalSummary)}.
288: * <p>
289: * This statistic can be used to perform a (homoscedastic) two-sample
290: * t-test to compare sample means.
291: * <p>
292: * The t-statisitc returned is
293: * <p>
294: * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
295: * <p>
296: * where <strong><code>n1</code></strong> is the size of first sample;
297: * <strong><code> n2</code></strong> is the size of second sample;
298: * <strong><code> m1</code></strong> is the mean of first sample;
299: * <strong><code> m2</code></strong> is the mean of second sample
300: * and <strong><code>var</code></strong> is the pooled variance estimate:
301: * <p>
302: * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
303: * <p>
304: * with <strong><code>var1<code></strong> the variance of the first sample and
305: * <strong><code>var2</code></strong> the variance of the second sample.
306: * <p>
307: * <strong>Preconditions</strong>: <ul>
308: * <li>The datasets described by the two Univariates must each contain
309: * at least 2 observations.
310: * </li></ul>
311: *
312: * @param sampleStats1 StatisticalSummary describing data from the first sample
313: * @param sampleStats2 StatisticalSummary describing data from the second sample
314: * @return t statistic
315: * @throws IllegalArgumentException if the precondition is not met
316: */
317: public abstract double homoscedasticT(
318: StatisticalSummary sampleStats1,
319: StatisticalSummary sampleStats2)
320: throws IllegalArgumentException;
321:
322: /**
323: * Returns the <i>observed significance level</i>, or
324: * <i>p-value</i>, associated with a one-sample, two-tailed t-test
325: * comparing the mean of the input array with the constant <code>mu</code>.
326: * <p>
327: * The number returned is the smallest significance level
328: * at which one can reject the null hypothesis that the mean equals
329: * <code>mu</code> in favor of the two-sided alternative that the mean
330: * is different from <code>mu</code>. For a one-sided test, divide the
331: * returned value by 2.
332: * <p>
333: * <strong>Usage Note:</strong><br>
334: * The validity of the test depends on the assumptions of the parametric
335: * t-test procedure, as discussed
336: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
337: * <p>
338: * <strong>Preconditions</strong>: <ul>
339: * <li>The observed array length must be at least 2.
340: * </li></ul>
341: *
342: * @param mu constant value to compare sample mean against
343: * @param sample array of sample data values
344: * @return p-value
345: * @throws IllegalArgumentException if the precondition is not met
346: * @throws MathException if an error occurs computing the p-value
347: */
348: public abstract double tTest(double mu, double[] sample)
349: throws IllegalArgumentException, MathException;
350:
351: /**
352: * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
353: * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
354: * which <code>sample</code> is drawn equals <code>mu</code>.
355: * <p>
356: * Returns <code>true</code> iff the null hypothesis can be
357: * rejected with confidence <code>1 - alpha</code>. To
358: * perform a 1-sided test, use <code>alpha * 2</code>
359: * <p>
360: * <strong>Examples:</strong><br><ol>
361: * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
362: * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
363: * </li>
364: * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
365: * at the 99% level, first verify that the measured sample mean is less
366: * than <code>mu</code> and then use
367: * <br><code>tTest(mu, sample, 0.02) </code>
368: * </li></ol>
369: * <p>
370: * <strong>Usage Note:</strong><br>
371: * The validity of the test depends on the assumptions of the one-sample
372: * parametric t-test procedure, as discussed
373: * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
374: * <p>
375: * <strong>Preconditions</strong>: <ul>
376: * <li>The observed array length must be at least 2.
377: * </li></ul>
378: *
379: * @param mu constant value to compare sample mean against
380: * @param sample array of sample data values
381: * @param alpha significance level of the test
382: * @return p-value
383: * @throws IllegalArgumentException if the precondition is not met
384: * @throws MathException if an error computing the p-value
385: */
386: public abstract boolean tTest(double mu, double[] sample,
387: double alpha) throws IllegalArgumentException,
388: MathException;
389:
390: /**
391: * Returns the <i>observed significance level</i>, or
392: * <i>p-value</i>, associated with a one-sample, two-tailed t-test
393: * comparing the mean of the dataset described by <code>sampleStats</code>
394: * with the constant <code>mu</code>.
395: * <p>
396: * The number returned is the smallest significance level
397: * at which one can reject the null hypothesis that the mean equals
398: * <code>mu</code> in favor of the two-sided alternative that the mean
399: * is different from <code>mu</code>. For a one-sided test, divide the
400: * returned value by 2.
401: * <p>
402: * <strong>Usage Note:</strong><br>
403: * The validity of the test depends on the assumptions of the parametric
404: * t-test procedure, as discussed
405: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
406: * here</a>
407: * <p>
408: * <strong>Preconditions</strong>: <ul>
409: * <li>The sample must contain at least 2 observations.
410: * </li></ul>
411: *
412: * @param mu constant value to compare sample mean against
413: * @param sampleStats StatisticalSummary describing sample data
414: * @return p-value
415: * @throws IllegalArgumentException if the precondition is not met
416: * @throws MathException if an error occurs computing the p-value
417: */
418: public abstract double tTest(double mu,
419: StatisticalSummary sampleStats)
420: throws IllegalArgumentException, MathException;
421:
422: /**
423: * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
424: * two-sided t-test</a> evaluating the null hypothesis that the mean of the
425: * population from which the dataset described by <code>stats</code> is
426: * drawn equals <code>mu</code>.
427: * <p>
428: * Returns <code>true</code> iff the null hypothesis can be rejected with
429: * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
430: * <code>alpha * 2.</code>
431: * <p>
432: * <strong>Examples:</strong><br><ol>
433: * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
434: * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
435: * </li>
436: * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
437: * at the 99% level, first verify that the measured sample mean is less
438: * than <code>mu</code> and then use
439: * <br><code>tTest(mu, sampleStats, 0.02) </code>
440: * </li></ol>
441: * <p>
442: * <strong>Usage Note:</strong><br>
443: * The validity of the test depends on the assumptions of the one-sample
444: * parametric t-test procedure, as discussed
445: * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
446: * <p>
447: * <strong>Preconditions</strong>: <ul>
448: * <li>The sample must include at least 2 observations.
449: * </li></ul>
450: *
451: * @param mu constant value to compare sample mean against
452: * @param sampleStats StatisticalSummary describing sample data values
453: * @param alpha significance level of the test
454: * @return p-value
455: * @throws IllegalArgumentException if the precondition is not met
456: * @throws MathException if an error occurs computing the p-value
457: */
458: public abstract boolean tTest(double mu,
459: StatisticalSummary sampleStats, double alpha)
460: throws IllegalArgumentException, MathException;
461:
462: /**
463: * Returns the <i>observed significance level</i>, or
464: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
465: * comparing the means of the input arrays.
466: * <p>
467: * The number returned is the smallest significance level
468: * at which one can reject the null hypothesis that the two means are
469: * equal in favor of the two-sided alternative that they are different.
470: * For a one-sided test, divide the returned value by 2.
471: * <p>
472: * The test does not assume that the underlying popuation variances are
473: * equal and it uses approximated degrees of freedom computed from the
474: * sample data to compute the p-value. The t-statistic used is as defined in
475: * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
476: * to the degrees of freedom is used,
477: * as described
478: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
479: * here.</a> To perform the test under the assumption of equal subpopulation
480: * variances, use {@link #homoscedasticTTest(double[], double[])}.
481: * <p>
482: * <strong>Usage Note:</strong><br>
483: * The validity of the p-value depends on the assumptions of the parametric
484: * t-test procedure, as discussed
485: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
486: * here</a>
487: * <p>
488: * <strong>Preconditions</strong>: <ul>
489: * <li>The observed array lengths must both be at least 2.
490: * </li></ul>
491: *
492: * @param sample1 array of sample data values
493: * @param sample2 array of sample data values
494: * @return p-value for t-test
495: * @throws IllegalArgumentException if the precondition is not met
496: * @throws MathException if an error occurs computing the p-value
497: */
498: public abstract double tTest(double[] sample1, double[] sample2)
499: throws IllegalArgumentException, MathException;
500:
501: /**
502: * Returns the <i>observed significance level</i>, or
503: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
504: * comparing the means of the input arrays, under the assumption that
505: * the two samples are drawn from subpopulations with equal variances.
506: * To perform the test without the equal variances assumption, use
507: * {@link #tTest(double[], double[])}.
508: * <p>
509: * The number returned is the smallest significance level
510: * at which one can reject the null hypothesis that the two means are
511: * equal in favor of the two-sided alternative that they are different.
512: * For a one-sided test, divide the returned value by 2.
513: * <p>
514: * A pooled variance estimate is used to compute the t-statistic. See
515: * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
516: * minus 2 is used as the degrees of freedom.
517: * <p>
518: * <strong>Usage Note:</strong><br>
519: * The validity of the p-value depends on the assumptions of the parametric
520: * t-test procedure, as discussed
521: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
522: * here</a>
523: * <p>
524: * <strong>Preconditions</strong>: <ul>
525: * <li>The observed array lengths must both be at least 2.
526: * </li></ul>
527: *
528: * @param sample1 array of sample data values
529: * @param sample2 array of sample data values
530: * @return p-value for t-test
531: * @throws IllegalArgumentException if the precondition is not met
532: * @throws MathException if an error occurs computing the p-value
533: */
534: public abstract double homoscedasticTTest(double[] sample1,
535: double[] sample2) throws IllegalArgumentException,
536: MathException;
537:
538: /**
539: * Performs a
540: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
541: * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
542: * and <code>sample2</code> are drawn from populations with the same mean,
543: * with significance level <code>alpha</code>. This test does not assume
544: * that the subpopulation variances are equal. To perform the test assuming
545: * equal variances, use
546: * {@link #homoscedasticTTest(double[], double[], double)}.
547: * <p>
548: * Returns <code>true</code> iff the null hypothesis that the means are
549: * equal can be rejected with confidence <code>1 - alpha</code>. To
550: * perform a 1-sided test, use <code>alpha * 2</code>
551: * <p>
552: * See {@link #t(double[], double[])} for the formula used to compute the
553: * t-statistic. Degrees of freedom are approximated using the
554: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
555: * Welch-Satterthwaite approximation.</a>
556:
557: * <p>
558: * <strong>Examples:</strong><br><ol>
559: * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
560: * the 95% level, use
561: * <br><code>tTest(sample1, sample2, 0.05). </code>
562: * </li>
563: * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
564: * at the 99% level, first verify that the measured mean of <code>sample 1</code>
565: * is less than the mean of <code>sample 2</code> and then use
566: * <br><code>tTest(sample1, sample2, 0.02) </code>
567: * </li></ol>
568: * <p>
569: * <strong>Usage Note:</strong><br>
570: * The validity of the test depends on the assumptions of the parametric
571: * t-test procedure, as discussed
572: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
573: * here</a>
574: * <p>
575: * <strong>Preconditions</strong>: <ul>
576: * <li>The observed array lengths must both be at least 2.
577: * </li>
578: * <li> <code> 0 < alpha < 0.5 </code>
579: * </li></ul>
580: *
581: * @param sample1 array of sample data values
582: * @param sample2 array of sample data values
583: * @param alpha significance level of the test
584: * @return true if the null hypothesis can be rejected with
585: * confidence 1 - alpha
586: * @throws IllegalArgumentException if the preconditions are not met
587: * @throws MathException if an error occurs performing the test
588: */
589: public abstract boolean tTest(double[] sample1, double[] sample2,
590: double alpha) throws IllegalArgumentException,
591: MathException;
592:
593: /**
594: * Performs a
595: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
596: * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
597: * and <code>sample2</code> are drawn from populations with the same mean,
598: * with significance level <code>alpha</code>, assuming that the
599: * subpopulation variances are equal. Use
600: * {@link #tTest(double[], double[], double)} to perform the test without
601: * the assumption of equal variances.
602: * <p>
603: * Returns <code>true</code> iff the null hypothesis that the means are
604: * equal can be rejected with confidence <code>1 - alpha</code>. To
605: * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
606: * without the assumption of equal subpopulation variances, use
607: * {@link #tTest(double[], double[], double)}.
608: * <p>
609: * A pooled variance estimate is used to compute the t-statistic. See
610: * {@link #t(double[], double[])} for the formula. The sum of the sample
611: * sizes minus 2 is used as the degrees of freedom.
612: * <p>
613: * <strong>Examples:</strong><br><ol>
614: * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
615: * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
616: * </li>
617: * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
618: * at the 99% level, first verify that the measured mean of
619: * <code>sample 1</code> is less than the mean of <code>sample 2</code>
620: * and then use
621: * <br><code>tTest(sample1, sample2, 0.02) </code>
622: * </li></ol>
623: * <p>
624: * <strong>Usage Note:</strong><br>
625: * The validity of the test depends on the assumptions of the parametric
626: * t-test procedure, as discussed
627: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
628: * here</a>
629: * <p>
630: * <strong>Preconditions</strong>: <ul>
631: * <li>The observed array lengths must both be at least 2.
632: * </li>
633: * <li> <code> 0 < alpha < 0.5 </code>
634: * </li></ul>
635: *
636: * @param sample1 array of sample data values
637: * @param sample2 array of sample data values
638: * @param alpha significance level of the test
639: * @return true if the null hypothesis can be rejected with
640: * confidence 1 - alpha
641: * @throws IllegalArgumentException if the preconditions are not met
642: * @throws MathException if an error occurs performing the test
643: */
644: public abstract boolean homoscedasticTTest(double[] sample1,
645: double[] sample2, double alpha)
646: throws IllegalArgumentException, MathException;
647:
648: /**
649: * Returns the <i>observed significance level</i>, or
650: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
651: * comparing the means of the datasets described by two StatisticalSummary
652: * instances.
653: * <p>
654: * The number returned is the smallest significance level
655: * at which one can reject the null hypothesis that the two means are
656: * equal in favor of the two-sided alternative that they are different.
657: * For a one-sided test, divide the returned value by 2.
658: * <p>
659: * The test does not assume that the underlying popuation variances are
660: * equal and it uses approximated degrees of freedom computed from the
661: * sample data to compute the p-value. To perform the test assuming
662: * equal variances, use
663: * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
664: * <p>
665: * <strong>Usage Note:</strong><br>
666: * The validity of the p-value depends on the assumptions of the parametric
667: * t-test procedure, as discussed
668: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
669: * here</a>
670: * <p>
671: * <strong>Preconditions</strong>: <ul>
672: * <li>The datasets described by the two Univariates must each contain
673: * at least 2 observations.
674: * </li></ul>
675: *
676: * @param sampleStats1 StatisticalSummary describing data from the first sample
677: * @param sampleStats2 StatisticalSummary describing data from the second sample
678: * @return p-value for t-test
679: * @throws IllegalArgumentException if the precondition is not met
680: * @throws MathException if an error occurs computing the p-value
681: */
682: public abstract double tTest(StatisticalSummary sampleStats1,
683: StatisticalSummary sampleStats2)
684: throws IllegalArgumentException, MathException;
685:
686: /**
687: * Returns the <i>observed significance level</i>, or
688: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
689: * comparing the means of the datasets described by two StatisticalSummary
690: * instances, under the hypothesis of equal subpopulation variances. To
691: * perform a test without the equal variances assumption, use
692: * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
693: * <p>
694: * The number returned is the smallest significance level
695: * at which one can reject the null hypothesis that the two means are
696: * equal in favor of the two-sided alternative that they are different.
697: * For a one-sided test, divide the returned value by 2.
698: * <p>
699: * See {@link #homoscedasticT(double[], double[])} for the formula used to
700: * compute the t-statistic. The sum of the sample sizes minus 2 is used as
701: * the degrees of freedom.
702: * <p>
703: * <strong>Usage Note:</strong><br>
704: * The validity of the p-value depends on the assumptions of the parametric
705: * t-test procedure, as discussed
706: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
707: * <p>
708: * <strong>Preconditions</strong>: <ul>
709: * <li>The datasets described by the two Univariates must each contain
710: * at least 2 observations.
711: * </li></ul>
712: *
713: * @param sampleStats1 StatisticalSummary describing data from the first sample
714: * @param sampleStats2 StatisticalSummary describing data from the second sample
715: * @return p-value for t-test
716: * @throws IllegalArgumentException if the precondition is not met
717: * @throws MathException if an error occurs computing the p-value
718: */
719: public abstract double homoscedasticTTest(
720: StatisticalSummary sampleStats1,
721: StatisticalSummary sampleStats2)
722: throws IllegalArgumentException, MathException;
723:
724: /**
725: * Performs a
726: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
727: * two-sided t-test</a> evaluating the null hypothesis that
728: * <code>sampleStats1</code> and <code>sampleStats2</code> describe
729: * datasets drawn from populations with the same mean, with significance
730: * level <code>alpha</code>. This test does not assume that the
731: * subpopulation variances are equal. To perform the test under the equal
732: * variances assumption, use
733: * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
734: * <p>
735: * Returns <code>true</code> iff the null hypothesis that the means are
736: * equal can be rejected with confidence <code>1 - alpha</code>. To
737: * perform a 1-sided test, use <code>alpha * 2</code>
738: * <p>
739: * See {@link #t(double[], double[])} for the formula used to compute the
740: * t-statistic. Degrees of freedom are approximated using the
741: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
742: * Welch-Satterthwaite approximation.</a>
743: * <p>
744: * <strong>Examples:</strong><br><ol>
745: * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
746: * the 95%, use
747: * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
748: * </li>
749: * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
750: * at the 99% level, first verify that the measured mean of
751: * <code>sample 1</code> is less than the mean of <code>sample 2</code>
752: * and then use
753: * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
754: * </li></ol>
755: * <p>
756: * <strong>Usage Note:</strong><br>
757: * The validity of the test depends on the assumptions of the parametric
758: * t-test procedure, as discussed
759: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
760: * here</a>
761: * <p>
762: * <strong>Preconditions</strong>: <ul>
763: * <li>The datasets described by the two Univariates must each contain
764: * at least 2 observations.
765: * </li>
766: * <li> <code> 0 < alpha < 0.5 </code>
767: * </li></ul>
768: *
769: * @param sampleStats1 StatisticalSummary describing sample data values
770: * @param sampleStats2 StatisticalSummary describing sample data values
771: * @param alpha significance level of the test
772: * @return true if the null hypothesis can be rejected with
773: * confidence 1 - alpha
774: * @throws IllegalArgumentException if the preconditions are not met
775: * @throws MathException if an error occurs performing the test
776: */
777: public abstract boolean tTest(StatisticalSummary sampleStats1,
778: StatisticalSummary sampleStats2, double alpha)
779: throws IllegalArgumentException, MathException;
780: }
|