001: /*
002: * Copyright 2003-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat.regression;
017:
018: import java.util.Random;
019:
020: import junit.framework.Test;
021: import junit.framework.TestCase;
022: import junit.framework.TestSuite;
023:
024: /**
025: * Test cases for the TestStatistic class.
026: *
027: * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $
028: */
029:
030: public final class SimpleRegressionTest extends TestCase {
031:
032: /*
033: * NIST "Norris" refernce data set from
034: * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
035: * Strangely, order is {y,x}
036: */
037: private double[][] data = { { 0.1, 0.2 }, { 338.8, 337.4 },
038: { 118.1, 118.2 }, { 888.0, 884.6 }, { 9.2, 10.1 },
039: { 228.1, 226.5 }, { 668.5, 666.3 }, { 998.5, 996.3 },
040: { 449.1, 448.6 }, { 778.9, 777.0 }, { 559.2, 558.2 },
041: { 0.3, 0.4 }, { 0.1, 0.6 }, { 778.1, 775.5 },
042: { 668.8, 666.9 }, { 339.3, 338.0 }, { 448.9, 447.5 },
043: { 10.8, 11.6 }, { 557.7, 556.0 }, { 228.3, 228.1 },
044: { 998.0, 995.8 }, { 888.8, 887.6 }, { 119.6, 120.2 },
045: { 0.3, 0.3 }, { 0.6, 0.3 }, { 557.6, 556.8 },
046: { 339.3, 339.1 }, { 888.0, 887.2 }, { 998.5, 999.0 },
047: { 778.9, 779.0 }, { 10.2, 11.1 }, { 117.6, 118.3 },
048: { 228.9, 229.2 }, { 668.4, 669.1 }, { 449.2, 448.9 },
049: { 0.2, 0.5 } };
050:
051: /*
052: * Correlation example from
053: * http://www.xycoon.com/correlation.htm
054: */
055: private double[][] corrData = { { 101.0, 99.2 }, { 100.1, 99.0 },
056: { 100.0, 100.0 }, { 90.6, 111.6 }, { 86.5, 122.2 },
057: { 89.7, 117.6 }, { 90.6, 121.1 }, { 82.8, 136.0 },
058: { 70.1, 154.2 }, { 65.4, 153.6 }, { 61.3, 158.5 },
059: { 62.5, 140.6 }, { 63.6, 136.2 }, { 52.6, 168.0 },
060: { 59.7, 154.3 }, { 59.5, 149.0 }, { 61.3, 165.5 } };
061:
062: /*
063: * From Moore and Mcabe, "Introduction to the Practice of Statistics"
064: * Example 10.3
065: */
066: private double[][] infData = { { 15.6, 5.2 }, { 26.8, 6.1 },
067: { 37.8, 8.7 }, { 36.4, 8.5 }, { 35.5, 8.8 }, { 18.6, 4.9 },
068: { 15.3, 4.5 }, { 7.9, 2.5 }, { 0.0, 1.1 } };
069:
070: /*
071: * Data with bad linear fit
072: */
073: private double[][] infData2 = { { 1, 1 }, { 2, 0 }, { 3, 5 },
074: { 4, 2 }, { 5, -1 }, { 6, 12 } };
075:
076: public SimpleRegressionTest(String name) {
077: super (name);
078: }
079:
080: public void setUp() {
081: }
082:
083: public static Test suite() {
084: TestSuite suite = new TestSuite(SimpleRegressionTest.class);
085: suite.setName("BivariateRegression Tests");
086: return suite;
087: }
088:
089: public void testNorris() {
090: SimpleRegression regression = new SimpleRegression();
091: for (int i = 0; i < data.length; i++) {
092: regression.addData(data[i][1], data[i][0]);
093: }
094: // Tests against certified values from
095: // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
096: assertEquals("slope", 1.00211681802045, regression.getSlope(),
097: 10E-12);
098: assertEquals("slope std err", 0.429796848199937E-03, regression
099: .getSlopeStdErr(), 10E-12);
100: assertEquals("number of observations", 36, regression.getN());
101: assertEquals("intercept", -0.262323073774029, regression
102: .getIntercept(), 10E-12);
103: assertEquals("std err intercept", 0.232818234301152, regression
104: .getInterceptStdErr(), 10E-12);
105: assertEquals("r-square", 0.999993745883712, regression
106: .getRSquare(), 10E-12);
107: assertEquals("SSR", 4255954.13232369, regression
108: .getRegressionSumSquares(), 10E-9);
109: assertEquals("MSE", 0.782864662630069, regression
110: .getMeanSquareError(), 10E-10);
111: assertEquals("SSE", 26.6173985294224, regression
112: .getSumSquaredErrors(), 10E-9);
113: // ------------ End certified data tests
114:
115: assertEquals("predict(0)", -0.262323073774029, regression
116: .predict(0), 10E-12);
117: assertEquals("predict(1)",
118: 1.00211681802045 - 0.262323073774029, regression
119: .predict(1), 10E-12);
120: }
121:
122: public void testCorr() {
123: SimpleRegression regression = new SimpleRegression();
124: regression.addData(corrData);
125: assertEquals("number of observations", 17, regression.getN());
126: assertEquals("r-square", .896123, regression.getRSquare(),
127: 10E-6);
128: assertEquals("r", -0.94663767742, regression.getR(), 1E-10);
129: }
130:
131: public void testNaNs() {
132: SimpleRegression regression = new SimpleRegression();
133: assertTrue("intercept not NaN", Double.isNaN(regression
134: .getIntercept()));
135: assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
136: assertTrue("slope std err not NaN", Double.isNaN(regression
137: .getSlopeStdErr()));
138: assertTrue("intercept std err not NaN", Double.isNaN(regression
139: .getInterceptStdErr()));
140: assertTrue("MSE not NaN", Double.isNaN(regression
141: .getMeanSquareError()));
142: assertTrue("e not NaN", Double.isNaN(regression.getR()));
143: assertTrue("r-square not NaN", Double.isNaN(regression
144: .getRSquare()));
145: assertTrue("RSS not NaN", Double.isNaN(regression
146: .getRegressionSumSquares()));
147: assertTrue("SSE not NaN", Double.isNaN(regression
148: .getSumSquaredErrors()));
149: assertTrue("SSTO not NaN", Double.isNaN(regression
150: .getTotalSumSquares()));
151: assertTrue("predict not NaN", Double.isNaN(regression
152: .predict(0)));
153:
154: regression.addData(1, 2);
155: regression.addData(1, 3);
156:
157: // No x variation, so these should still blow...
158: assertTrue("intercept not NaN", Double.isNaN(regression
159: .getIntercept()));
160: assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
161: assertTrue("slope std err not NaN", Double.isNaN(regression
162: .getSlopeStdErr()));
163: assertTrue("intercept std err not NaN", Double.isNaN(regression
164: .getInterceptStdErr()));
165: assertTrue("MSE not NaN", Double.isNaN(regression
166: .getMeanSquareError()));
167: assertTrue("e not NaN", Double.isNaN(regression.getR()));
168: assertTrue("r-square not NaN", Double.isNaN(regression
169: .getRSquare()));
170: assertTrue("RSS not NaN", Double.isNaN(regression
171: .getRegressionSumSquares()));
172: assertTrue("SSE not NaN", Double.isNaN(regression
173: .getSumSquaredErrors()));
174: assertTrue("predict not NaN", Double.isNaN(regression
175: .predict(0)));
176:
177: // but SSTO should be OK
178: assertTrue("SSTO NaN", !Double.isNaN(regression
179: .getTotalSumSquares()));
180:
181: regression = new SimpleRegression();
182:
183: regression.addData(1, 2);
184: regression.addData(3, 3);
185:
186: // All should be OK except MSE, s(b0), s(b1) which need one more df
187: assertTrue("interceptNaN", !Double.isNaN(regression
188: .getIntercept()));
189: assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
190: assertTrue("slope std err not NaN", Double.isNaN(regression
191: .getSlopeStdErr()));
192: assertTrue("intercept std err not NaN", Double.isNaN(regression
193: .getInterceptStdErr()));
194: assertTrue("MSE not NaN", Double.isNaN(regression
195: .getMeanSquareError()));
196: assertTrue("r NaN", !Double.isNaN(regression.getR()));
197: assertTrue("r-square NaN", !Double.isNaN(regression
198: .getRSquare()));
199: assertTrue("RSS NaN", !Double.isNaN(regression
200: .getRegressionSumSquares()));
201: assertTrue("SSE NaN", !Double.isNaN(regression
202: .getSumSquaredErrors()));
203: assertTrue("SSTO NaN", !Double.isNaN(regression
204: .getTotalSumSquares()));
205: assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
206:
207: regression.addData(1, 4);
208:
209: // MSE, MSE, s(b0), s(b1) should all be OK now
210: assertTrue("MSE NaN", !Double.isNaN(regression
211: .getMeanSquareError()));
212: assertTrue("slope std err NaN", !Double.isNaN(regression
213: .getSlopeStdErr()));
214: assertTrue("intercept std err NaN", !Double.isNaN(regression
215: .getInterceptStdErr()));
216: }
217:
218: public void testClear() {
219: SimpleRegression regression = new SimpleRegression();
220: regression.addData(corrData);
221: assertEquals("number of observations", 17, regression.getN());
222: regression.clear();
223: assertEquals("number of observations", 0, regression.getN());
224: regression.addData(corrData);
225: assertEquals("r-square", .896123, regression.getRSquare(),
226: 10E-6);
227: regression.addData(data);
228: assertEquals("number of observations", 53, regression.getN());
229: }
230:
231: public void testInference() throws Exception {
232: //---------- verified against R, version 1.8.1 -----
233: // infData
234: SimpleRegression regression = new SimpleRegression();
235: regression.addData(infData);
236: assertEquals("slope std err", 0.011448491, regression
237: .getSlopeStdErr(), 1E-10);
238: assertEquals("std err intercept", 0.286036932, regression
239: .getInterceptStdErr(), 1E-8);
240: assertEquals("significance", 4.596e-07, regression
241: .getSignificance(), 1E-8);
242: assertEquals("slope conf interval half-width", 0.0270713794287,
243: regression.getSlopeConfidenceInterval(), 1E-8);
244: // infData2
245: regression = new SimpleRegression();
246: regression.addData(infData2);
247: assertEquals("slope std err", 1.07260253, regression
248: .getSlopeStdErr(), 1E-8);
249: assertEquals("std err intercept", 4.17718672, regression
250: .getInterceptStdErr(), 1E-8);
251: assertEquals("significance", 0.261829133982, regression
252: .getSignificance(), 1E-11);
253: assertEquals("slope conf interval half-width", 2.97802204827,
254: regression.getSlopeConfidenceInterval(), 1E-8);
255: //------------- End R-verified tests -------------------------------
256:
257: //FIXME: get a real example to test against with alpha = .01
258: assertTrue("tighter means wider", regression
259: .getSlopeConfidenceInterval() < regression
260: .getSlopeConfidenceInterval(0.01));
261:
262: try {
263: double x = regression.getSlopeConfidenceInterval(1);
264: fail("expecting IllegalArgumentException for alpha = 1");
265: } catch (IllegalArgumentException ex) {
266: ;
267: }
268:
269: }
270:
271: public void testPerfect() throws Exception {
272: SimpleRegression regression = new SimpleRegression();
273: int n = 100;
274: for (int i = 0; i < n; i++) {
275: regression.addData(((double) i) / (n - 1), i);
276: }
277: assertEquals(0.0, regression.getSignificance(), 1.0e-5);
278: assertTrue(regression.getSlope() > 0.0);
279: }
280:
281: public void testPerfectNegative() throws Exception {
282: SimpleRegression regression = new SimpleRegression();
283: int n = 100;
284: for (int i = 0; i < n; i++) {
285: regression.addData(-((double) i) / (n - 1), i);
286: }
287:
288: assertEquals(0.0, regression.getSignificance(), 1.0e-5);
289: assertTrue(regression.getSlope() < 0.0);
290: }
291:
292: public void testRandom() throws Exception {
293: SimpleRegression regression = new SimpleRegression();
294: Random random = new Random(1);
295: int n = 100;
296: for (int i = 0; i < n; i++) {
297: regression.addData(((double) i) / (n - 1), random
298: .nextDouble());
299: }
300:
301: assertTrue(0.0 < regression.getSignificance()
302: && regression.getSignificance() < 1.0);
303: }
304: }
|