0001: /*
0002: * Copyright 2004-2005 The Apache Software Foundation.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016: package org.apache.commons.math.stat.inference;
0017:
0018: import org.apache.commons.math.MathException;
0019: import org.apache.commons.math.distribution.DistributionFactory;
0020: import org.apache.commons.math.distribution.TDistribution;
0021: import org.apache.commons.math.stat.StatUtils;
0022: import org.apache.commons.math.stat.descriptive.StatisticalSummary;
0023:
0024: /**
0025: * Implements t-test statistics defined in the {@link TTest} interface.
0026: * <p>
0027: * Uses commons-math {@link org.apache.commons.math.distribution.TDistribution}
0028: * implementation to estimate exact p-values.
0029: *
0030: * @version $Revision: 165583 $ $Date: 2005-05-01 22:14:49 -0700 (Sun, 01 May 2005) $
0031: */
0032: public class TTestImpl implements TTest {
0033:
0034: /** Cached DistributionFactory used to create TDistribution instances */
0035: private DistributionFactory distributionFactory = null;
0036:
0037: /**
0038: * Default constructor.
0039: */
0040: public TTestImpl() {
0041: super ();
0042: }
0043:
0044: /**
0045: * Computes a paired, 2-sample t-statistic based on the data in the input
0046: * arrays. The t-statistic returned is equivalent to what would be returned by
0047: * computing the one-sample t-statistic {@link #t(double, double[])}, with
0048: * <code>mu = 0</code> and the sample array consisting of the (signed)
0049: * differences between corresponding entries in <code>sample1</code> and
0050: * <code>sample2.</code>
0051: * <p>
0052: * <strong>Preconditions</strong>: <ul>
0053: * <li>The input arrays must have the same length and their common length
0054: * must be at least 2.
0055: * </li></ul>
0056: *
0057: * @param sample1 array of sample data values
0058: * @param sample2 array of sample data values
0059: * @return t statistic
0060: * @throws IllegalArgumentException if the precondition is not met
0061: * @throws MathException if the statistic can not be computed do to a
0062: * convergence or other numerical error.
0063: */
0064: public double pairedT(double[] sample1, double[] sample2)
0065: throws IllegalArgumentException, MathException {
0066: if ((sample1 == null)
0067: || (sample2 == null || Math.min(sample1.length,
0068: sample2.length) < 2)) {
0069: throw new IllegalArgumentException(
0070: "insufficient data for t statistic");
0071: }
0072: double meanDifference = StatUtils.meanDifference(sample1,
0073: sample2);
0074: return t(meanDifference, 0, StatUtils.varianceDifference(
0075: sample1, sample2, meanDifference),
0076: (double) sample1.length);
0077: }
0078:
0079: /**
0080: * Returns the <i>observed significance level</i>, or
0081: * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
0082: * based on the data in the input arrays.
0083: * <p>
0084: * The number returned is the smallest significance level
0085: * at which one can reject the null hypothesis that the mean of the paired
0086: * differences is 0 in favor of the two-sided alternative that the mean paired
0087: * difference is not equal to 0. For a one-sided test, divide the returned
0088: * value by 2.
0089: * <p>
0090: * This test is equivalent to a one-sample t-test computed using
0091: * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
0092: * array consisting of the signed differences between corresponding elements of
0093: * <code>sample1</code> and <code>sample2.</code>
0094: * <p>
0095: * <strong>Usage Note:</strong><br>
0096: * The validity of the p-value depends on the assumptions of the parametric
0097: * t-test procedure, as discussed
0098: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0099: * here</a>
0100: * <p>
0101: * <strong>Preconditions</strong>: <ul>
0102: * <li>The input array lengths must be the same and their common length must
0103: * be at least 2.
0104: * </li></ul>
0105: *
0106: * @param sample1 array of sample data values
0107: * @param sample2 array of sample data values
0108: * @return p-value for t-test
0109: * @throws IllegalArgumentException if the precondition is not met
0110: * @throws MathException if an error occurs computing the p-value
0111: */
0112: public double pairedTTest(double[] sample1, double[] sample2)
0113: throws IllegalArgumentException, MathException {
0114: double meanDifference = StatUtils.meanDifference(sample1,
0115: sample2);
0116: return tTest(meanDifference, 0, StatUtils.varianceDifference(
0117: sample1, sample2, meanDifference),
0118: (double) sample1.length);
0119: }
0120:
0121: /**
0122: * Performs a paired t-test evaluating the null hypothesis that the
0123: * mean of the paired differences between <code>sample1</code> and
0124: * <code>sample2</code> is 0 in favor of the two-sided alternative that the
0125: * mean paired difference is not equal to 0, with significance level
0126: * <code>alpha</code>.
0127: * <p>
0128: * Returns <code>true</code> iff the null hypothesis can be rejected with
0129: * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
0130: * <code>alpha * 2</code>
0131: * <p>
0132: * <strong>Usage Note:</strong><br>
0133: * The validity of the test depends on the assumptions of the parametric
0134: * t-test procedure, as discussed
0135: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0136: * here</a>
0137: * <p>
0138: * <strong>Preconditions</strong>: <ul>
0139: * <li>The input array lengths must be the same and their common length
0140: * must be at least 2.
0141: * </li>
0142: * <li> <code> 0 < alpha < 0.5 </code>
0143: * </li></ul>
0144: *
0145: * @param sample1 array of sample data values
0146: * @param sample2 array of sample data values
0147: * @param alpha significance level of the test
0148: * @return true if the null hypothesis can be rejected with
0149: * confidence 1 - alpha
0150: * @throws IllegalArgumentException if the preconditions are not met
0151: * @throws MathException if an error occurs performing the test
0152: */
0153: public boolean pairedTTest(double[] sample1, double[] sample2,
0154: double alpha) throws IllegalArgumentException,
0155: MathException {
0156: if ((alpha <= 0) || (alpha > 0.5)) {
0157: throw new IllegalArgumentException(
0158: "bad significance level: " + alpha);
0159: }
0160: return (pairedTTest(sample1, sample2) < alpha);
0161: }
0162:
0163: /**
0164: * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
0165: * t statistic </a> given observed values and a comparison constant.
0166: * <p>
0167: * This statistic can be used to perform a one sample t-test for the mean.
0168: * <p>
0169: * <strong>Preconditions</strong>: <ul>
0170: * <li>The observed array length must be at least 2.
0171: * </li></ul>
0172: *
0173: * @param mu comparison constant
0174: * @param observed array of values
0175: * @return t statistic
0176: * @throws IllegalArgumentException if input array length is less than 2
0177: */
0178: public double t(double mu, double[] observed)
0179: throws IllegalArgumentException {
0180: if ((observed == null) || (observed.length < 2)) {
0181: throw new IllegalArgumentException(
0182: "insufficient data for t statistic");
0183: }
0184: return t(StatUtils.mean(observed), mu, StatUtils
0185: .variance(observed), observed.length);
0186: }
0187:
0188: /**
0189: * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
0190: * t statistic </a> to use in comparing the mean of the dataset described by
0191: * <code>sampleStats</code> to <code>mu</code>.
0192: * <p>
0193: * This statistic can be used to perform a one sample t-test for the mean.
0194: * <p>
0195: * <strong>Preconditions</strong>: <ul>
0196: * <li><code>observed.getN() > = 2</code>.
0197: * </li></ul>
0198: *
0199: * @param mu comparison constant
0200: * @param sampleStats DescriptiveStatistics holding sample summary statitstics
0201: * @return t statistic
0202: * @throws IllegalArgumentException if the precondition is not met
0203: */
0204: public double t(double mu, StatisticalSummary sampleStats)
0205: throws IllegalArgumentException {
0206: if ((sampleStats == null) || (sampleStats.getN() < 2)) {
0207: throw new IllegalArgumentException(
0208: "insufficient data for t statistic");
0209: }
0210: return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
0211: sampleStats.getN());
0212: }
0213:
0214: /**
0215: * Computes a 2-sample t statistic, under the hypothesis of equal
0216: * subpopulation variances. To compute a t-statistic without the
0217: * equal variances hypothesis, use {@link #t(double[], double[])}.
0218: * <p>
0219: * This statistic can be used to perform a (homoscedastic) two-sample
0220: * t-test to compare sample means.
0221: * <p>
0222: * The t-statisitc is
0223: * <p>
0224: * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
0225: * <p>
0226: * where <strong><code>n1</code></strong> is the size of first sample;
0227: * <strong><code> n2</code></strong> is the size of second sample;
0228: * <strong><code> m1</code></strong> is the mean of first sample;
0229: * <strong><code> m2</code></strong> is the mean of second sample</li>
0230: * </ul>
0231: * and <strong><code>var</code></strong> is the pooled variance estimate:
0232: * <p>
0233: * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
0234: * <p>
0235: * with <strong><code>var1<code></strong> the variance of the first sample and
0236: * <strong><code>var2</code></strong> the variance of the second sample.
0237: * <p>
0238: * <strong>Preconditions</strong>: <ul>
0239: * <li>The observed array lengths must both be at least 2.
0240: * </li></ul>
0241: *
0242: * @param sample1 array of sample data values
0243: * @param sample2 array of sample data values
0244: * @return t statistic
0245: * @throws IllegalArgumentException if the precondition is not met
0246: */
0247: public double homoscedasticT(double[] sample1, double[] sample2)
0248: throws IllegalArgumentException {
0249: if ((sample1 == null)
0250: || (sample2 == null || Math.min(sample1.length,
0251: sample2.length) < 2)) {
0252: throw new IllegalArgumentException(
0253: "insufficient data for t statistic");
0254: }
0255: return homoscedasticT(StatUtils.mean(sample1), StatUtils
0256: .mean(sample2), StatUtils.variance(sample1), StatUtils
0257: .variance(sample2), (double) sample1.length,
0258: (double) sample2.length);
0259: }
0260:
0261: /**
0262: * Computes a 2-sample t statistic, without the hypothesis of equal
0263: * subpopulation variances. To compute a t-statistic assuming equal
0264: * variances, use {@link #homoscedasticT(double[], double[])}.
0265: * <p>
0266: * This statistic can be used to perform a two-sample t-test to compare
0267: * sample means.
0268: * <p>
0269: * The t-statisitc is
0270: * <p>
0271: * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
0272: * <p>
0273: * where <strong><code>n1</code></strong> is the size of the first sample
0274: * <strong><code> n2</code></strong> is the size of the second sample;
0275: * <strong><code> m1</code></strong> is the mean of the first sample;
0276: * <strong><code> m2</code></strong> is the mean of the second sample;
0277: * <strong><code> var1</code></strong> is the variance of the first sample;
0278: * <strong><code> var2</code></strong> is the variance of the second sample;
0279: * <p>
0280: * <strong>Preconditions</strong>: <ul>
0281: * <li>The observed array lengths must both be at least 2.
0282: * </li></ul>
0283: *
0284: * @param sample1 array of sample data values
0285: * @param sample2 array of sample data values
0286: * @return t statistic
0287: * @throws IllegalArgumentException if the precondition is not met
0288: */
0289: public double t(double[] sample1, double[] sample2)
0290: throws IllegalArgumentException {
0291: if ((sample1 == null)
0292: || (sample2 == null || Math.min(sample1.length,
0293: sample2.length) < 2)) {
0294: throw new IllegalArgumentException(
0295: "insufficient data for t statistic");
0296: }
0297: return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
0298: StatUtils.variance(sample1), StatUtils
0299: .variance(sample2), (double) sample1.length,
0300: (double) sample2.length);
0301: }
0302:
0303: /**
0304: * Computes a 2-sample t statistic </a>, comparing the means of the datasets
0305: * described by two {@link StatisticalSummary} instances, without the
0306: * assumption of equal subpopulation variances. Use
0307: * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
0308: * compute a t-statistic under the equal variances assumption.
0309: * <p>
0310: * This statistic can be used to perform a two-sample t-test to compare
0311: * sample means.
0312: * <p>
0313: * The returned t-statisitc is
0314: * <p>
0315: * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
0316: * <p>
0317: * where <strong><code>n1</code></strong> is the size of the first sample;
0318: * <strong><code> n2</code></strong> is the size of the second sample;
0319: * <strong><code> m1</code></strong> is the mean of the first sample;
0320: * <strong><code> m2</code></strong> is the mean of the second sample
0321: * <strong><code> var1</code></strong> is the variance of the first sample;
0322: * <strong><code> var2</code></strong> is the variance of the second sample
0323: * <p>
0324: * <strong>Preconditions</strong>: <ul>
0325: * <li>The datasets described by the two Univariates must each contain
0326: * at least 2 observations.
0327: * </li></ul>
0328: *
0329: * @param sampleStats1 StatisticalSummary describing data from the first sample
0330: * @param sampleStats2 StatisticalSummary describing data from the second sample
0331: * @return t statistic
0332: * @throws IllegalArgumentException if the precondition is not met
0333: */
0334: public double t(StatisticalSummary sampleStats1,
0335: StatisticalSummary sampleStats2)
0336: throws IllegalArgumentException {
0337: if ((sampleStats1 == null)
0338: || (sampleStats2 == null || Math.min(sampleStats1
0339: .getN(), sampleStats2.getN()) < 2)) {
0340: throw new IllegalArgumentException(
0341: "insufficient data for t statistic");
0342: }
0343: return t(sampleStats1.getMean(), sampleStats2.getMean(),
0344: sampleStats1.getVariance(), sampleStats2.getVariance(),
0345: (double) sampleStats1.getN(), (double) sampleStats2
0346: .getN());
0347: }
0348:
0349: /**
0350: * Computes a 2-sample t statistic, comparing the means of the datasets
0351: * described by two {@link StatisticalSummary} instances, under the
0352: * assumption of equal subpopulation variances. To compute a t-statistic
0353: * without the equal variances assumption, use
0354: * {@link #t(StatisticalSummary, StatisticalSummary)}.
0355: * <p>
0356: * This statistic can be used to perform a (homoscedastic) two-sample
0357: * t-test to compare sample means.
0358: * <p>
0359: * The t-statisitc returned is
0360: * <p>
0361: * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
0362: * <p>
0363: * where <strong><code>n1</code></strong> is the size of first sample;
0364: * <strong><code> n2</code></strong> is the size of second sample;
0365: * <strong><code> m1</code></strong> is the mean of first sample;
0366: * <strong><code> m2</code></strong> is the mean of second sample
0367: * and <strong><code>var</code></strong> is the pooled variance estimate:
0368: * <p>
0369: * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
0370: * <p>
0371: * with <strong><code>var1<code></strong> the variance of the first sample and
0372: * <strong><code>var2</code></strong> the variance of the second sample.
0373: * <p>
0374: * <strong>Preconditions</strong>: <ul>
0375: * <li>The datasets described by the two Univariates must each contain
0376: * at least 2 observations.
0377: * </li></ul>
0378: *
0379: * @param sampleStats1 StatisticalSummary describing data from the first sample
0380: * @param sampleStats2 StatisticalSummary describing data from the second sample
0381: * @return t statistic
0382: * @throws IllegalArgumentException if the precondition is not met
0383: */
0384: public double homoscedasticT(StatisticalSummary sampleStats1,
0385: StatisticalSummary sampleStats2)
0386: throws IllegalArgumentException {
0387: if ((sampleStats1 == null)
0388: || (sampleStats2 == null || Math.min(sampleStats1
0389: .getN(), sampleStats2.getN()) < 2)) {
0390: throw new IllegalArgumentException(
0391: "insufficient data for t statistic");
0392: }
0393: return homoscedasticT(sampleStats1.getMean(), sampleStats2
0394: .getMean(), sampleStats1.getVariance(), sampleStats2
0395: .getVariance(), (double) sampleStats1.getN(),
0396: (double) sampleStats2.getN());
0397: }
0398:
0399: /**
0400: * Returns the <i>observed significance level</i>, or
0401: * <i>p-value</i>, associated with a one-sample, two-tailed t-test
0402: * comparing the mean of the input array with the constant <code>mu</code>.
0403: * <p>
0404: * The number returned is the smallest significance level
0405: * at which one can reject the null hypothesis that the mean equals
0406: * <code>mu</code> in favor of the two-sided alternative that the mean
0407: * is different from <code>mu</code>. For a one-sided test, divide the
0408: * returned value by 2.
0409: * <p>
0410: * <strong>Usage Note:</strong><br>
0411: * The validity of the test depends on the assumptions of the parametric
0412: * t-test procedure, as discussed
0413: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
0414: * <p>
0415: * <strong>Preconditions</strong>: <ul>
0416: * <li>The observed array length must be at least 2.
0417: * </li></ul>
0418: *
0419: * @param mu constant value to compare sample mean against
0420: * @param sample array of sample data values
0421: * @return p-value
0422: * @throws IllegalArgumentException if the precondition is not met
0423: * @throws MathException if an error occurs computing the p-value
0424: */
0425: public double tTest(double mu, double[] sample)
0426: throws IllegalArgumentException, MathException {
0427: if ((sample == null) || (sample.length < 2)) {
0428: throw new IllegalArgumentException(
0429: "insufficient data for t statistic");
0430: }
0431: return tTest(StatUtils.mean(sample), mu, StatUtils
0432: .variance(sample), sample.length);
0433: }
0434:
0435: /**
0436: * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
0437: * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
0438: * which <code>sample</code> is drawn equals <code>mu</code>.
0439: * <p>
0440: * Returns <code>true</code> iff the null hypothesis can be
0441: * rejected with confidence <code>1 - alpha</code>. To
0442: * perform a 1-sided test, use <code>alpha * 2</code>
0443: * <p>
0444: * <strong>Examples:</strong><br><ol>
0445: * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
0446: * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
0447: * </li>
0448: * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
0449: * at the 99% level, first verify that the measured sample mean is less
0450: * than <code>mu</code> and then use
0451: * <br><code>tTest(mu, sample, 0.02) </code>
0452: * </li></ol>
0453: * <p>
0454: * <strong>Usage Note:</strong><br>
0455: * The validity of the test depends on the assumptions of the one-sample
0456: * parametric t-test procedure, as discussed
0457: * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
0458: * <p>
0459: * <strong>Preconditions</strong>: <ul>
0460: * <li>The observed array length must be at least 2.
0461: * </li></ul>
0462: *
0463: * @param mu constant value to compare sample mean against
0464: * @param sample array of sample data values
0465: * @param alpha significance level of the test
0466: * @return p-value
0467: * @throws IllegalArgumentException if the precondition is not met
0468: * @throws MathException if an error computing the p-value
0469: */
0470: public boolean tTest(double mu, double[] sample, double alpha)
0471: throws IllegalArgumentException, MathException {
0472: if ((alpha <= 0) || (alpha > 0.5)) {
0473: throw new IllegalArgumentException(
0474: "bad significance level: " + alpha);
0475: }
0476: return (tTest(mu, sample) < alpha);
0477: }
0478:
0479: /**
0480: * Returns the <i>observed significance level</i>, or
0481: * <i>p-value</i>, associated with a one-sample, two-tailed t-test
0482: * comparing the mean of the dataset described by <code>sampleStats</code>
0483: * with the constant <code>mu</code>.
0484: * <p>
0485: * The number returned is the smallest significance level
0486: * at which one can reject the null hypothesis that the mean equals
0487: * <code>mu</code> in favor of the two-sided alternative that the mean
0488: * is different from <code>mu</code>. For a one-sided test, divide the
0489: * returned value by 2.
0490: * <p>
0491: * <strong>Usage Note:</strong><br>
0492: * The validity of the test depends on the assumptions of the parametric
0493: * t-test procedure, as discussed
0494: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0495: * here</a>
0496: * <p>
0497: * <strong>Preconditions</strong>: <ul>
0498: * <li>The sample must contain at least 2 observations.
0499: * </li></ul>
0500: *
0501: * @param mu constant value to compare sample mean against
0502: * @param sampleStats StatisticalSummary describing sample data
0503: * @return p-value
0504: * @throws IllegalArgumentException if the precondition is not met
0505: * @throws MathException if an error occurs computing the p-value
0506: */
0507: public double tTest(double mu, StatisticalSummary sampleStats)
0508: throws IllegalArgumentException, MathException {
0509: if ((sampleStats == null) || (sampleStats.getN() < 2)) {
0510: throw new IllegalArgumentException(
0511: "insufficient data for t statistic");
0512: }
0513: return tTest(sampleStats.getMean(), mu, sampleStats
0514: .getVariance(), sampleStats.getN());
0515: }
0516:
0517: /**
0518: * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
0519: * two-sided t-test</a> evaluating the null hypothesis that the mean of the
0520: * population from which the dataset described by <code>stats</code> is
0521: * drawn equals <code>mu</code>.
0522: * <p>
0523: * Returns <code>true</code> iff the null hypothesis can be rejected with
0524: * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
0525: * <code>alpha * 2.</code>
0526: * <p>
0527: * <strong>Examples:</strong><br><ol>
0528: * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
0529: * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
0530: * </li>
0531: * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
0532: * at the 99% level, first verify that the measured sample mean is less
0533: * than <code>mu</code> and then use
0534: * <br><code>tTest(mu, sampleStats, 0.02) </code>
0535: * </li></ol>
0536: * <p>
0537: * <strong>Usage Note:</strong><br>
0538: * The validity of the test depends on the assumptions of the one-sample
0539: * parametric t-test procedure, as discussed
0540: * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
0541: * <p>
0542: * <strong>Preconditions</strong>: <ul>
0543: * <li>The sample must include at least 2 observations.
0544: * </li></ul>
0545: *
0546: * @param mu constant value to compare sample mean against
0547: * @param sampleStats StatisticalSummary describing sample data values
0548: * @param alpha significance level of the test
0549: * @return p-value
0550: * @throws IllegalArgumentException if the precondition is not met
0551: * @throws MathException if an error occurs computing the p-value
0552: */
0553: public boolean tTest(double mu, StatisticalSummary sampleStats,
0554: double alpha) throws IllegalArgumentException,
0555: MathException {
0556: if ((alpha <= 0) || (alpha > 0.5)) {
0557: throw new IllegalArgumentException(
0558: "bad significance level: " + alpha);
0559: }
0560: return (tTest(mu, sampleStats) < alpha);
0561: }
0562:
0563: /**
0564: * Returns the <i>observed significance level</i>, or
0565: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
0566: * comparing the means of the input arrays.
0567: * <p>
0568: * The number returned is the smallest significance level
0569: * at which one can reject the null hypothesis that the two means are
0570: * equal in favor of the two-sided alternative that they are different.
0571: * For a one-sided test, divide the returned value by 2.
0572: * <p>
0573: * The test does not assume that the underlying popuation variances are
0574: * equal and it uses approximated degrees of freedom computed from the
0575: * sample data to compute the p-value. The t-statistic used is as defined in
0576: * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
0577: * to the degrees of freedom is used,
0578: * as described
0579: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
0580: * here.</a> To perform the test under the assumption of equal subpopulation
0581: * variances, use {@link #homoscedasticTTest(double[], double[])}.
0582: * <p>
0583: * <strong>Usage Note:</strong><br>
0584: * The validity of the p-value depends on the assumptions of the parametric
0585: * t-test procedure, as discussed
0586: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0587: * here</a>
0588: * <p>
0589: * <strong>Preconditions</strong>: <ul>
0590: * <li>The observed array lengths must both be at least 2.
0591: * </li></ul>
0592: *
0593: * @param sample1 array of sample data values
0594: * @param sample2 array of sample data values
0595: * @return p-value for t-test
0596: * @throws IllegalArgumentException if the precondition is not met
0597: * @throws MathException if an error occurs computing the p-value
0598: */
0599: public double tTest(double[] sample1, double[] sample2)
0600: throws IllegalArgumentException, MathException {
0601: if ((sample1 == null)
0602: || (sample2 == null || Math.min(sample1.length,
0603: sample2.length) < 2)) {
0604: throw new IllegalArgumentException("insufficient data");
0605: }
0606: return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
0607: StatUtils.variance(sample1), StatUtils
0608: .variance(sample2), (double) sample1.length,
0609: (double) sample2.length);
0610: }
0611:
0612: /**
0613: * Returns the <i>observed significance level</i>, or
0614: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
0615: * comparing the means of the input arrays, under the assumption that
0616: * the two samples are drawn from subpopulations with equal variances.
0617: * To perform the test without the equal variances assumption, use
0618: * {@link #tTest(double[], double[])}.
0619: * <p>
0620: * The number returned is the smallest significance level
0621: * at which one can reject the null hypothesis that the two means are
0622: * equal in favor of the two-sided alternative that they are different.
0623: * For a one-sided test, divide the returned value by 2.
0624: * <p>
0625: * A pooled variance estimate is used to compute the t-statistic. See
0626: * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
0627: * minus 2 is used as the degrees of freedom.
0628: * <p>
0629: * <strong>Usage Note:</strong><br>
0630: * The validity of the p-value depends on the assumptions of the parametric
0631: * t-test procedure, as discussed
0632: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0633: * here</a>
0634: * <p>
0635: * <strong>Preconditions</strong>: <ul>
0636: * <li>The observed array lengths must both be at least 2.
0637: * </li></ul>
0638: *
0639: * @param sample1 array of sample data values
0640: * @param sample2 array of sample data values
0641: * @return p-value for t-test
0642: * @throws IllegalArgumentException if the precondition is not met
0643: * @throws MathException if an error occurs computing the p-value
0644: */
0645: public double homoscedasticTTest(double[] sample1, double[] sample2)
0646: throws IllegalArgumentException, MathException {
0647: if ((sample1 == null)
0648: || (sample2 == null || Math.min(sample1.length,
0649: sample2.length) < 2)) {
0650: throw new IllegalArgumentException("insufficient data");
0651: }
0652: return homoscedasticTTest(StatUtils.mean(sample1), StatUtils
0653: .mean(sample2), StatUtils.variance(sample1), StatUtils
0654: .variance(sample2), (double) sample1.length,
0655: (double) sample2.length);
0656: }
0657:
0658: /**
0659: * Performs a
0660: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
0661: * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
0662: * and <code>sample2</code> are drawn from populations with the same mean,
0663: * with significance level <code>alpha</code>. This test does not assume
0664: * that the subpopulation variances are equal. To perform the test assuming
0665: * equal variances, use
0666: * {@link #homoscedasticTTest(double[], double[], double)}.
0667: * <p>
0668: * Returns <code>true</code> iff the null hypothesis that the means are
0669: * equal can be rejected with confidence <code>1 - alpha</code>. To
0670: * perform a 1-sided test, use <code>alpha / 2</code>
0671: * <p>
0672: * See {@link #t(double[], double[])} for the formula used to compute the
0673: * t-statistic. Degrees of freedom are approximated using the
0674: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
0675: * Welch-Satterthwaite approximation.</a>
0676:
0677: * <p>
0678: * <strong>Examples:</strong><br><ol>
0679: * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
0680: * the 95% level, use
0681: * <br><code>tTest(sample1, sample2, 0.05). </code>
0682: * </li>
0683: * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
0684: * the 99% level, first verify that the measured mean of <code>sample 1</code>
0685: * is less than the mean of <code>sample 2</code> and then use
0686: * <br><code>tTest(sample1, sample2, 0.02) </code>
0687: * </li></ol>
0688: * <p>
0689: * <strong>Usage Note:</strong><br>
0690: * The validity of the test depends on the assumptions of the parametric
0691: * t-test procedure, as discussed
0692: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0693: * here</a>
0694: * <p>
0695: * <strong>Preconditions</strong>: <ul>
0696: * <li>The observed array lengths must both be at least 2.
0697: * </li>
0698: * <li> <code> 0 < alpha < 0.5 </code>
0699: * </li></ul>
0700: *
0701: * @param sample1 array of sample data values
0702: * @param sample2 array of sample data values
0703: * @param alpha significance level of the test
0704: * @return true if the null hypothesis can be rejected with
0705: * confidence 1 - alpha
0706: * @throws IllegalArgumentException if the preconditions are not met
0707: * @throws MathException if an error occurs performing the test
0708: */
0709: public boolean tTest(double[] sample1, double[] sample2,
0710: double alpha) throws IllegalArgumentException,
0711: MathException {
0712: if ((alpha <= 0) || (alpha > 0.5)) {
0713: throw new IllegalArgumentException(
0714: "bad significance level: " + alpha);
0715: }
0716: return (tTest(sample1, sample2) < alpha);
0717: }
0718:
0719: /**
0720: * Performs a
0721: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
0722: * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
0723: * and <code>sample2</code> are drawn from populations with the same mean,
0724: * with significance level <code>alpha</code>, assuming that the
0725: * subpopulation variances are equal. Use
0726: * {@link #tTest(double[], double[], double)} to perform the test without
0727: * the assumption of equal variances.
0728: * <p>
0729: * Returns <code>true</code> iff the null hypothesis that the means are
0730: * equal can be rejected with confidence <code>1 - alpha</code>. To
0731: * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
0732: * without the assumption of equal subpopulation variances, use
0733: * {@link #tTest(double[], double[], double)}.
0734: * <p>
0735: * A pooled variance estimate is used to compute the t-statistic. See
0736: * {@link #t(double[], double[])} for the formula. The sum of the sample
0737: * sizes minus 2 is used as the degrees of freedom.
0738: * <p>
0739: * <strong>Examples:</strong><br><ol>
0740: * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
0741: * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
0742: * </li>
0743: * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
0744: * at the 99% level, first verify that the measured mean of
0745: * <code>sample 1</code> is less than the mean of <code>sample 2</code>
0746: * and then use
0747: * <br><code>tTest(sample1, sample2, 0.02) </code>
0748: * </li></ol>
0749: * <p>
0750: * <strong>Usage Note:</strong><br>
0751: * The validity of the test depends on the assumptions of the parametric
0752: * t-test procedure, as discussed
0753: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0754: * here</a>
0755: * <p>
0756: * <strong>Preconditions</strong>: <ul>
0757: * <li>The observed array lengths must both be at least 2.
0758: * </li>
0759: * <li> <code> 0 < alpha < 0.5 </code>
0760: * </li></ul>
0761: *
0762: * @param sample1 array of sample data values
0763: * @param sample2 array of sample data values
0764: * @param alpha significance level of the test
0765: * @return true if the null hypothesis can be rejected with
0766: * confidence 1 - alpha
0767: * @throws IllegalArgumentException if the preconditions are not met
0768: * @throws MathException if an error occurs performing the test
0769: */
0770: public boolean homoscedasticTTest(double[] sample1,
0771: double[] sample2, double alpha)
0772: throws IllegalArgumentException, MathException {
0773: if ((alpha <= 0) || (alpha > 0.5)) {
0774: throw new IllegalArgumentException(
0775: "bad significance level: " + alpha);
0776: }
0777: return (homoscedasticTTest(sample1, sample2) < alpha);
0778: }
0779:
0780: /**
0781: * Returns the <i>observed significance level</i>, or
0782: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
0783: * comparing the means of the datasets described by two StatisticalSummary
0784: * instances.
0785: * <p>
0786: * The number returned is the smallest significance level
0787: * at which one can reject the null hypothesis that the two means are
0788: * equal in favor of the two-sided alternative that they are different.
0789: * For a one-sided test, divide the returned value by 2.
0790: * <p>
0791: * The test does not assume that the underlying popuation variances are
0792: * equal and it uses approximated degrees of freedom computed from the
0793: * sample data to compute the p-value. To perform the test assuming
0794: * equal variances, use
0795: * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
0796: * <p>
0797: * <strong>Usage Note:</strong><br>
0798: * The validity of the p-value depends on the assumptions of the parametric
0799: * t-test procedure, as discussed
0800: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0801: * here</a>
0802: * <p>
0803: * <strong>Preconditions</strong>: <ul>
0804: * <li>The datasets described by the two Univariates must each contain
0805: * at least 2 observations.
0806: * </li></ul>
0807: *
0808: * @param sampleStats1 StatisticalSummary describing data from the first sample
0809: * @param sampleStats2 StatisticalSummary describing data from the second sample
0810: * @return p-value for t-test
0811: * @throws IllegalArgumentException if the precondition is not met
0812: * @throws MathException if an error occurs computing the p-value
0813: */
0814: public double tTest(StatisticalSummary sampleStats1,
0815: StatisticalSummary sampleStats2)
0816: throws IllegalArgumentException, MathException {
0817: if ((sampleStats1 == null)
0818: || (sampleStats2 == null || Math.min(sampleStats1
0819: .getN(), sampleStats2.getN()) < 2)) {
0820: throw new IllegalArgumentException(
0821: "insufficient data for t statistic");
0822: }
0823: return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
0824: sampleStats1.getVariance(), sampleStats2.getVariance(),
0825: (double) sampleStats1.getN(), (double) sampleStats2
0826: .getN());
0827: }
0828:
0829: /**
0830: * Returns the <i>observed significance level</i>, or
0831: * <i>p-value</i>, associated with a two-sample, two-tailed t-test
0832: * comparing the means of the datasets described by two StatisticalSummary
0833: * instances, under the hypothesis of equal subpopulation variances. To
0834: * perform a test without the equal variances assumption, use
0835: * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
0836: * <p>
0837: * The number returned is the smallest significance level
0838: * at which one can reject the null hypothesis that the two means are
0839: * equal in favor of the two-sided alternative that they are different.
0840: * For a one-sided test, divide the returned value by 2.
0841: * <p>
0842: * See {@link #homoscedasticT(double[], double[])} for the formula used to
0843: * compute the t-statistic. The sum of the sample sizes minus 2 is used as
0844: * the degrees of freedom.
0845: * <p>
0846: * <strong>Usage Note:</strong><br>
0847: * The validity of the p-value depends on the assumptions of the parametric
0848: * t-test procedure, as discussed
0849: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
0850: * <p>
0851: * <strong>Preconditions</strong>: <ul>
0852: * <li>The datasets described by the two Univariates must each contain
0853: * at least 2 observations.
0854: * </li></ul>
0855: *
0856: * @param sampleStats1 StatisticalSummary describing data from the first sample
0857: * @param sampleStats2 StatisticalSummary describing data from the second sample
0858: * @return p-value for t-test
0859: * @throws IllegalArgumentException if the precondition is not met
0860: * @throws MathException if an error occurs computing the p-value
0861: */
0862: public double homoscedasticTTest(StatisticalSummary sampleStats1,
0863: StatisticalSummary sampleStats2)
0864: throws IllegalArgumentException, MathException {
0865: if ((sampleStats1 == null)
0866: || (sampleStats2 == null || Math.min(sampleStats1
0867: .getN(), sampleStats2.getN()) < 2)) {
0868: throw new IllegalArgumentException(
0869: "insufficient data for t statistic");
0870: }
0871: return homoscedasticTTest(sampleStats1.getMean(), sampleStats2
0872: .getMean(), sampleStats1.getVariance(), sampleStats2
0873: .getVariance(), (double) sampleStats1.getN(),
0874: (double) sampleStats2.getN());
0875: }
0876:
0877: /**
0878: * Performs a
0879: * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
0880: * two-sided t-test</a> evaluating the null hypothesis that
0881: * <code>sampleStats1</code> and <code>sampleStats2</code> describe
0882: * datasets drawn from populations with the same mean, with significance
0883: * level <code>alpha</code>. This test does not assume that the
0884: * subpopulation variances are equal. To perform the test under the equal
0885: * variances assumption, use
0886: * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
0887: * <p>
0888: * Returns <code>true</code> iff the null hypothesis that the means are
0889: * equal can be rejected with confidence <code>1 - alpha</code>. To
0890: * perform a 1-sided test, use <code>alpha * 2</code>
0891: * <p>
0892: * See {@link #t(double[], double[])} for the formula used to compute the
0893: * t-statistic. Degrees of freedom are approximated using the
0894: * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
0895: * Welch-Satterthwaite approximation.</a>
0896: * <p>
0897: * <strong>Examples:</strong><br><ol>
0898: * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
0899: * the 95%, use
0900: * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
0901: * </li>
0902: * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
0903: * at the 99% level, first verify that the measured mean of
0904: * <code>sample 1</code> is less than the mean of <code>sample 2</code>
0905: * and then use
0906: * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
0907: * </li></ol>
0908: * <p>
0909: * <strong>Usage Note:</strong><br>
0910: * The validity of the test depends on the assumptions of the parametric
0911: * t-test procedure, as discussed
0912: * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
0913: * here</a>
0914: * <p>
0915: * <strong>Preconditions</strong>: <ul>
0916: * <li>The datasets described by the two Univariates must each contain
0917: * at least 2 observations.
0918: * </li>
0919: * <li> <code> 0 < alpha < 0.5 </code>
0920: * </li></ul>
0921: *
0922: * @param sampleStats1 StatisticalSummary describing sample data values
0923: * @param sampleStats2 StatisticalSummary describing sample data values
0924: * @param alpha significance level of the test
0925: * @return true if the null hypothesis can be rejected with
0926: * confidence 1 - alpha
0927: * @throws IllegalArgumentException if the preconditions are not met
0928: * @throws MathException if an error occurs performing the test
0929: */
0930: public boolean tTest(StatisticalSummary sampleStats1,
0931: StatisticalSummary sampleStats2, double alpha)
0932: throws IllegalArgumentException, MathException {
0933: if ((alpha <= 0) || (alpha > 0.5)) {
0934: throw new IllegalArgumentException(
0935: "bad significance level: " + alpha);
0936: }
0937: return (tTest(sampleStats1, sampleStats2) < alpha);
0938: }
0939:
0940: //----------------------------------------------- Protected methods
0941:
0942: /**
0943: * Gets a DistributionFactory to use in creating TDistribution instances.
0944: * @return a distribution factory.
0945: */
0946: protected DistributionFactory getDistributionFactory() {
0947: if (distributionFactory == null) {
0948: distributionFactory = DistributionFactory.newInstance();
0949: }
0950: return distributionFactory;
0951: }
0952:
0953: /**
0954: * Computes approximate degrees of freedom for 2-sample t-test.
0955: *
0956: * @param v1 first sample variance
0957: * @param v2 second sample variance
0958: * @param n1 first sample n
0959: * @param n2 second sample n
0960: * @return approximate degrees of freedom
0961: */
0962: protected double df(double v1, double v2, double n1, double n2) {
0963: return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2)))
0964: / ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2)
0965: / (n2 * n2 * (n2 - 1d)));
0966: }
0967:
0968: /**
0969: * Computes t test statistic for 1-sample t-test.
0970: *
0971: * @param m sample mean
0972: * @param mu constant to test against
0973: * @param v sample variance
0974: * @param n sample n
0975: * @return t test statistic
0976: */
0977: protected double t(double m, double mu, double v, double n) {
0978: return (m - mu) / Math.sqrt(v / n);
0979: }
0980:
0981: /**
0982: * Computes t test statistic for 2-sample t-test.
0983: * <p>
0984: * Does not assume that subpopulation variances are equal.
0985: *
0986: * @param m1 first sample mean
0987: * @param m2 second sample mean
0988: * @param v1 first sample variance
0989: * @param v2 second sample variance
0990: * @param n1 first sample n
0991: * @param n2 second sample n
0992: * @return t test statistic
0993: */
0994: protected double t(double m1, double m2, double v1, double v2,
0995: double n1, double n2) {
0996: return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
0997: }
0998:
0999: /**
1000: * Computes t test statistic for 2-sample t-test under the hypothesis
1001: * of equal subpopulation variances.
1002: *
1003: * @param m1 first sample mean
1004: * @param m2 second sample mean
1005: * @param v1 first sample variance
1006: * @param v2 second sample variance
1007: * @param n1 first sample n
1008: * @param n2 second sample n
1009: * @return t test statistic
1010: */
1011: protected double homoscedasticT(double m1, double m2, double v1,
1012: double v2, double n1, double n2) {
1013: double pooledVariance = ((n1 - 1) * v1 + (n2 - 1) * v2)
1014: / (n1 + n2 - 2);
1015: return (m1 - m2)
1016: / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1017: }
1018:
1019: /**
1020: * Computes p-value for 2-sided, 1-sample t-test.
1021: *
1022: * @param m sample mean
1023: * @param mu constant to test against
1024: * @param v sample variance
1025: * @param n sample n
1026: * @return p-value
1027: * @throws MathException if an error occurs computing the p-value
1028: */
1029: protected double tTest(double m, double mu, double v, double n)
1030: throws MathException {
1031: double t = Math.abs(t(m, mu, v, n));
1032: TDistribution tDistribution = getDistributionFactory()
1033: .createTDistribution(n - 1);
1034: return 1.0 - tDistribution.cumulativeProbability(-t, t);
1035: }
1036:
1037: /**
1038: * Computes p-value for 2-sided, 2-sample t-test.
1039: * <p>
1040: * Does not assume subpopulation variances are equal. Degrees of freedom
1041: * are estimated from the data.
1042: *
1043: * @param m1 first sample mean
1044: * @param m2 second sample mean
1045: * @param v1 first sample variance
1046: * @param v2 second sample variance
1047: * @param n1 first sample n
1048: * @param n2 second sample n
1049: * @return p-value
1050: * @throws MathException if an error occurs computing the p-value
1051: */
1052: protected double tTest(double m1, double m2, double v1, double v2,
1053: double n1, double n2) throws MathException {
1054: double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
1055: double degreesOfFreedom = 0;
1056: degreesOfFreedom = df(v1, v2, n1, n2);
1057: TDistribution tDistribution = getDistributionFactory()
1058: .createTDistribution(degreesOfFreedom);
1059: return 1.0 - tDistribution.cumulativeProbability(-t, t);
1060: }
1061:
1062: /**
1063: * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1064: * of equal subpopulation variances.
1065: * <p>
1066: * The sum of the sample sizes minus 2 is used as degrees of freedom.
1067: *
1068: * @param m1 first sample mean
1069: * @param m2 second sample mean
1070: * @param v1 first sample variance
1071: * @param v2 second sample variance
1072: * @param n1 first sample n
1073: * @param n2 second sample n
1074: * @return p-value
1075: * @throws MathException if an error occurs computing the p-value
1076: */
1077: protected double homoscedasticTTest(double m1, double m2,
1078: double v1, double v2, double n1, double n2)
1079: throws MathException {
1080: double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1081: double degreesOfFreedom = 0;
1082: degreesOfFreedom = (double) (n1 + n2 - 2);
1083: TDistribution tDistribution = getDistributionFactory()
1084: .createTDistribution(degreesOfFreedom);
1085: return 1.0 - tDistribution.cumulativeProbability(-t, t);
1086: }
1087: }
|