001: /*************************************************************************
002: * *
003: * 1) This source code file, in unmodified form, and compiled classes *
004: * derived from it can be used and distributed without restriction, *
005: * including for commercial use. (Attribution is not required *
006: * but is appreciated.) *
007: * *
008: * 2) Modified versions of this file can be made and distributed *
009: * provided: the modified versions are put into a Java package *
010: * different from the original package, edu.hws; modified *
011: * versions are distributed under the same terms as the original; *
012: * and the modifications are documented in comments. (Modification *
013: * here does not include simply making subclasses that belong to *
014: * a package other than edu.hws, which can be done without any *
015: * restriction.) *
016: * *
017: * David J. Eck *
018: * Department of Mathematics and Computer Science *
019: * Hobart and William Smith Colleges *
020: * Geneva, New York 14456, USA *
021: * Email: eck@hws.edu WWW: http://math.hws.edu/eck/ *
022: * *
023: *************************************************************************/package edu.hws.jcm.draw;
024:
025: import java.awt.*;
026: import edu.hws.jcm.data.*;
027: import edu.hws.jcm.awt.*;
028:
029: /**
030: * A ScatterPlot graphs data taken from a DataTableInput. The data
031: * in the form of a small plus sign at each (x,y) in the data.
032: * The x and y values can be taken directly from two specified
033: * columns in the table. They can also be computed by expressions
034: * that can use column names from the table as well as the special
035: * variable rowNumber. For example, if column names are X and Y,
036: * then it could plot sqrt(X) versus rowNumber*(X+Y).
037: * <p>Optionally, a ScatterPlot will also draw a regression line
038: * for the data. Certain statistical values about the data points
039: * are available as Value objects by calling the getValueObject()
040: * method.
041: */
042:
043: public class ScatterPlot extends Drawable implements Computable {
044:
045: /**
046: * A constant that can be used in the getValueObject() method to
047: * indicate which statistic the object should represent.
048: */
049: public static final int INTERCEPT = 0, SLOPE = 1, DATACT = 2,
050: MISSINGCT = 3, STANDARDERROR = 4, CORRELATION = 5;
051:
052: private DataTableInput table; // The table from which the data for the plot is taken
053: private long lastTableSN; // serial number from table when getData() was last done
054:
055: private boolean autoChangeLimits = true; // If true, then the limits on the coords will
056: // be changed so that the data pretty much
057: // fills the coordinate rect.
058:
059: private int column1, column2; // Column numbers that specify which columns from
060: // the table will be plotted. These are ignored
061: // if exp1 and exp2 are non-null.
062:
063: private Expression exp1, exp2; // Expressions that give data to be plotted, or
064: // null if column numbers are to be used.
065:
066: private boolean showRegressionLine = true; // If true, a regression is drawn
067:
068: private boolean missingValueIsError = true; // If true and if any of the data values is Double.NaN,
069: // then an error is thrown.
070:
071: private double slope = Double.NaN; // Values of statistics.
072: private double intercept = Double.NaN;
073: private int dataCt;
074: private int missingCt;
075: private double correlation = Double.NaN;
076: private double standardError = Double.NaN;
077:
078: private double[][] data; // The actual data values to be drawn (computed in getData())
079:
080: private Color lineColor = Color.black; // Color of regression line.
081: private Color dataColor = Color.red; // Color of data points.
082:
083: private static final int crossHalfSize = 2; // Size of one arm of the plus sign that is drawn
084:
085: // to represent a data point.
086:
087: /**
088: * Default constructor. A data table, at least, must be specified before anything can be drawn.
089: * The first two columns of the table will be plotted (once a table is specified).
090: */
091: public ScatterPlot() {
092: this (null, 0, 1);
093: }
094:
095: /**
096: * Create a scatter plot to plot data from the specified table. Initially, it is configured
097: * to plot data from the first two columns in the table.
098: */
099: public ScatterPlot(DataTableInput table) {
100: this (table, 0, 1);
101: }
102:
103: /**
104: * Create a scatter plot to plot data from two specified columns in a table.
105: * Columns are numbered starting from zero.
106: */
107: public ScatterPlot(DataTableInput table, int column1, int column2) {
108: this .table = table;
109: this .column1 = column1;
110: this .column2 = column2;
111: }
112:
113: /**
114: * Create scatter plot to plot specified expressions using data from a table.
115: * The expressions should include references to the column names from the table
116: * and can also refer to the special variable "rowNumber".
117: */
118: public ScatterPlot(DataTableInput table, Expression exp1,
119: Expression exp2) {
120: this .table = table;
121: this .exp1 = exp1;
122: this .exp2 = exp2;
123: column1 = 0;
124: column2 = 1;
125: }
126:
127: /**
128: * Specify the table from which the plotted data is to be taken. The data from the
129: * first two columns of the table will be plotted, unless this is changed by
130: * calling setColumns() or setExpressions().
131: */
132: public void setTable(DataTableInput table) {
133: if (table == this .table)
134: return;
135: this .table = table;
136: lastTableSN = 0;
137: column1 = 0;
138: column2 = 1;
139: checkData();
140: }
141:
142: /**
143: * Get the DataTableInput from which the plotted data is obtained.
144: */
145: public DataTableInput getTable() {
146: return table;
147: }
148:
149: /**
150: * Specify that the data to be plotted should be taken from the specified
151: * columns in the table. Note that columns are numbered starting from zero.
152: * The parameters must be within the range of column numbers in the table.
153: */
154: public void setColumns(int c1, int c2) {
155: column1 = c1;
156: column2 = c2;
157: exp1 = exp2 = null;
158: lastTableSN = 0; // force checkData to recompute
159: checkData();
160: }
161:
162: /**
163: * Specify the data for the the plot is to be obtained by evaluating
164: * the two expressions that are given as parameters. Both expressions
165: * should be non-null. The expressions can only be created by a Parser
166: * to which the variables from the table have been added by calling
167: * the method DataTableInput.addVariablesToParser(). The expressions
168: * are evaluated once for each row in the table to obtain the data to be ploted. They can include
169: * references to the column names from the table and to the special
170: * variable "rowNumber", which represents the number of the current row.
171: */
172: public void setExpressions(Expression exp1, Expression exp2) {
173: this .exp1 = exp1;
174: this .exp2 = exp2;
175: lastTableSN = 0; // force checkData to recompute
176: checkData();
177: }
178:
179: /**
180: * If the parameter is true, then a regression line for the data is drawn.
181: * The default value is true.
182: */
183: public void setShowRegressionLine(boolean line) {
184: if (line != showRegressionLine) {
185: showRegressionLine = line;
186: needsRedraw();
187: }
188: }
189:
190: /**
191: * The return value tells whether a regression line is drawn.
192: */
193: public boolean getShowRegressionLine() {
194: return showRegressionLine;
195: }
196:
197: /**
198: * If the parameter is true, then a missing data value (an empty cell or
199: * an undefined value for one of the expressions) is considered to be an
200: * error, and a JCMError is thrown when it is encountered. If the value is
201: * false, missing data are ignored, and the value of MISSINGCT gives the
202: * number of points for which the data was missing. Note that invalid
203: * data (a cell that contains text that does not represent a number) is
204: * always considered to be an error. Also note that completely empty
205: * rows at the bottom of a DataTableInput are always ignored and are
206: * never considered to be an error.
207: * The default value of missingValueIsError is true, so that missing data
208: * is considered to be an error unless you turn off this option.
209: */
210: public void setMissingValueIsError(boolean isError) {
211: missingValueIsError = isError;
212: }
213:
214: /**
215: * The return value indicates whether missing data is considered to be
216: * an error.
217: */
218: public boolean getMissingValueIsError() {
219: return missingValueIsError;
220: }
221:
222: /**
223: * If the parameter is true, then the limits on the CoordinateRect that contains
224: * this ScatterPlot are automatically adjusted whenever the data is recomputed.
225: * The default value is true.
226: */
227: public void setAutoChangeLimits(boolean set) {
228: autoChangeLimits = set;
229: }
230:
231: /**
232: * The return value indicates whether the limits on the CoordinateRect are
233: * automatically adjusted when the data is recomputed.
234: */
235: public boolean getAutoChangeLimits() {
236: return autoChangeLimits;
237: }
238:
239: /**
240: * Get the color that is used to draw the points in the data.
241: */
242: public Color getDataColor() {
243: return dataColor;
244: }
245:
246: /**
247: * Set the color that is used to draw the points in the data.
248: * The default value is red.
249: */
250: public void setDataColor(Color color) {
251: if (color != null)
252: dataColor = color;
253: }
254:
255: /**
256: * Get the color that is used to draw the regression line.
257: */
258: public Color getLineColor() {
259: return lineColor;
260: }
261:
262: /**
263: * Set the color that is used to draw the regression line.
264: * The default value is black.
265: */
266: public void setLineColor(Color color) {
267: if (color != null)
268: lineColor = color;
269: }
270:
271: /**
272: * Get a Value that represents a statistic about the data that is shown
273: * in the scatter plot. The parameter specifies which statistic is represented.
274: * It can be one of the constants defined in this class: SLOPE (of regression line),
275: * INTERCEPT (y-intercept of regression line), DATACT (number of data points),
276: * MISSINGCT (number of missing data; alwasy zero if the missingValueIsError property
277: * is true), STANDARDERROR (standard error of regression line), and CORRELATION
278: * (correlation coefficient between first and second coordintes of data points).
279: */
280: public Value getValueObject(int valueCode) {
281: if (valueCode < 0 || valueCode > 5)
282: throw new IllegalArgumentException("Unknown code ("
283: + valueCode + ") for type of value object.");
284: return new SPV(valueCode);
285: }
286:
287: /**
288: * Check data from table and recompute everything if it has changed.
289: */
290: private void checkData() {
291: if (table != null && lastTableSN == table.getSerialNumber())
292: return;
293: try {
294: compute();
295: } catch (JCMError e) {
296: canvas.setErrorMessage(null, e.getMessage());
297: }
298: }
299:
300: /**
301: * Recompute the data for the scatter plot. This is generally
302: * not called directly.
303: */
304: public void compute() {
305: double[] desiredLimits = getData();
306: if (table != null)
307: lastTableSN = table.getSerialNumber();
308: if (desiredLimits == null
309: || !needsNewLimits(desiredLimits, coords))
310: needsRedraw();
311: else
312: coords.setLimits(desiredLimits);
313: }
314:
315: /**
316: * Draw the data points and regression line. Not meant to be called directly.
317: */
318: public void draw(Graphics g, boolean coordsChanged) {
319: g.setColor(dataColor);
320: if (table == null) {
321: g.drawString("No table has been specified.", 20, 27);
322: return;
323: }
324: if (column1 < 0 || column1 >= table.getColumnCount()
325: || column2 < 0 || column2 >= table.getColumnCount()) {
326: g.drawString("Illegal column numbers.", 20, 27);
327: return;
328: }
329: if (data == null || data.length == 0) {
330: g.drawString("No data available.", 20, 27);
331: return;
332: }
333: checkData();
334: for (int i = 0; i < data.length; i++) {
335: int x = coords.xToPixel(data[i][0]);
336: int y = coords.yToPixel(data[i][1]);
337: g.drawLine(x - crossHalfSize, y, x + crossHalfSize, y);
338: g.drawLine(x, y - crossHalfSize, x, y + crossHalfSize);
339: }
340: if (showRegressionLine && !Double.isNaN(slope)) {
341: g.setColor(lineColor);
342: if (Double.isInfinite(slope)) {
343: int x = coords.xToPixel(data[0][0]);
344: g.drawLine(x, coords.getTop(), x, coords.getTop()
345: + coords.getHeight());
346: } else {
347: double x1 = coords.pixelToX(coords.getLeft());
348: double x2 = coords.pixelToX(coords.getLeft()
349: + coords.getWidth());
350: double y1 = slope * x1 + intercept;
351: double y2 = slope * x2 + intercept;
352: g.drawLine(coords.xToPixel(x1),
353: coords.yToPixel(y1) - 1, coords.xToPixel(x2),
354: coords.yToPixel(y2) - 1);
355: }
356: }
357: }
358:
359: /**
360: * Get the data for the plot, and recompute the statistics.
361: * Also, compute the appropriate limits for the CoordinateRect.
362: * The return value represents these limits.
363: */
364: private double[] getData() {
365: int rows = (table == null) ? 0 : table.getNonEmptyRowCount();
366: double[] desiredLimits = null;
367: if (table == null
368: || rows == 0
369: || ((exp1 == null || exp2 == null) && (column1 < 0
370: || column1 >= table.getColumnCount()
371: || column2 < 0 || column2 >= table
372: .getColumnCount()))) {
373: data = new double[0][2];
374: dataCt = 0;
375: missingCt = 0;
376: slope = Double.NaN;
377: intercept = Double.NaN;
378: correlation = Double.NaN;
379: standardError = Double.NaN;
380: return null;
381: }
382: data = new double[rows][2];
383: dataCt = 0;
384: missingCt = 0;
385: if (exp1 == null || exp2 == null) {
386: for (int i = 0; i < rows; i++) {
387: double x = table.getCellContents(i + 1, column1);
388: double y = table.getCellContents(i + 1, column2);
389: if (Double.isNaN(x) || Double.isNaN(y)
390: || Double.isInfinite(x) || Double.isInfinite(y)) {
391: if (missingValueIsError)
392: throw new JCMError("Missing data in row "
393: + table.getCurrentRowNumber()
394: + " of table.", this );
395: missingCt++;
396: } else {
397: data[dataCt][0] = x;
398: data[dataCt][1] = y;
399: dataCt++;
400: }
401: }
402: } else {
403: for (int i = 0; i < rows; i++) {
404: table.setCurrentRowNumber(i + 1);
405: double x = exp1.getVal();
406: double y = exp2.getVal();
407: if (Double.isNaN(x) || Double.isNaN(y)
408: || Double.isInfinite(x) || Double.isInfinite(y)) {
409: if (missingValueIsError)
410: throw new JCMError(
411: "Missing data or undefined expression value for row "
412: + table.getCurrentRowNumber()
413: + " of table.", this );
414: missingCt++;
415: } else {
416: data[dataCt][0] = x;
417: data[dataCt][1] = y;
418: dataCt++;
419: }
420: }
421: }
422: if (dataCt < data.length) {
423: double[][] d = new double[dataCt][2];
424: for (int i = 0; i < dataCt; i++)
425: d[i] = data[i];
426: data = d;
427: }
428: getRegressionStats();
429: if (autoChangeLimits)
430: desiredLimits = computeDesiredLimits();
431: return desiredLimits;
432: }
433:
434: private void getRegressionStats() {
435: // Compute statistics, based on data in data array.
436: if (dataCt == 0) {
437: slope = intercept = correlation = standardError = Double.NaN;
438: return;
439: }
440: boolean allSameX = true, allSameY = true;
441: double sumx = data[0][0], sumy = data[0][1], sumxy = data[0][0]
442: * data[0][1], sumx2 = data[0][0] * data[0][0], sumy2 = data[0][1]
443: * data[0][1];
444: for (int i = 1; i < dataCt; i++) {
445: if (data[0][0] != data[i][0])
446: allSameX = false;
447: if (data[0][1] != data[i][1])
448: allSameY = false;
449: sumx += data[i][0];
450: sumy += data[i][1];
451: sumxy += data[i][0] * data[i][1];
452: sumx2 += data[i][0] * data[i][0];
453: sumy2 += data[i][1] * data[i][1];
454: }
455: double denomx = dataCt * sumx2 - sumx * sumx;
456: double denomy = dataCt * sumy2 - sumy * sumy;
457: double numer = dataCt * sumxy - sumx * sumy;
458: if (allSameX && allSameY) {
459: slope = 0;
460: intercept = data[0][1];
461: correlation = standardError = Double.NaN;
462: } else if (allSameX) {
463: slope = Double.POSITIVE_INFINITY;
464: intercept = correlation = standardError = Double.NaN;
465: } else if (denomx == 0) {
466: slope = intercept = correlation = standardError = Double.NaN;
467: } else {
468: slope = numer / denomx;
469: intercept = (sumy - slope * sumx) / dataCt;
470: if (denomy == 0)
471: correlation = Double.NaN;
472: else
473: correlation = numer / Math.sqrt(denomx * denomy);
474: if (dataCt <= 2)
475: standardError = Double.NaN;
476: else {
477: double sum = 0;
478: for (int i = 0; i < dataCt; i++) {
479: double x = data[i][1]
480: - (slope * data[i][0] + intercept);
481: sum += x * x;
482: }
483: standardError = Math.sqrt(sum / (dataCt - 2));
484: }
485: }
486: }
487:
488: private double[] computeDesiredLimits() {
489: // Compute desired limits, based on data in data array
490: if (data.length == 0)
491: return null;
492: double xmin = Double.MAX_VALUE, xmax = -Double.MAX_VALUE, ymin = Double.MAX_VALUE, ymax = -Double.MAX_VALUE;
493: for (int i = 0; i < dataCt; i++) {
494: double x = data[i][0];
495: double y = data[i][1];
496: if (x > xmax)
497: xmax = x;
498: if (x < xmin)
499: xmin = x;
500: if (y > ymax)
501: ymax = y;
502: if (y < ymin)
503: ymin = y;
504: }
505: if (xmin > 0 && (xmax - xmin) > xmax / 2)
506: xmin = 0;
507: if (ymin > 0 && (ymax - ymin) > ymax / 2)
508: ymin = 0;
509: if (ymax < 0)
510: ymax = 0;
511: if (xmax < 0)
512: xmax = 0;
513: if (xmax == xmin) {
514: xmax += 1;
515: xmin -= 1;
516: } else {
517: double spread = (xmax - xmin) / 15;
518: xmax += spread;
519: xmin -= spread;
520: }
521: if (ymax == ymin) {
522: ymax += 1;
523: ymin -= 1;
524: } else {
525: double spread = (ymax - ymin) / 15;
526: ymax += spread;
527: ymin -= spread;
528: }
529: return new double[] { xmin, xmax, ymin, ymax };
530: }
531:
532: private boolean needsNewLimits(double[] desiredLimits,
533: CoordinateRect coords) {
534: // Check if limits should actually be changed; avoid changing them if
535: // they are close to the desired limits.
536: double[] limits = new double[] { coords.getXmin(),
537: coords.getXmax(), coords.getYmin(), coords.getYmax() };
538: return (desiredLimits[0] < limits[0]
539: || desiredLimits[1] > limits[1]
540: || desiredLimits[2] < limits[2]
541: || desiredLimits[3] > limits[3]
542: || (limits[1] - limits[0]) > 1.3 * (desiredLimits[1] - desiredLimits[0])
543: || (limits[3] - limits[2]) > 1.3 * (desiredLimits[3] - desiredLimits[2])
544: || (limits[1] - limits[0]) < (desiredLimits[1] - desiredLimits[0]) / 1.3 || (limits[3] - limits[2]) < (desiredLimits[3] - desiredLimits[2]) / 1.3);
545: }
546:
547: private class SPV implements Value {
548: // Represents one of the value objects that can
549: // be returned by the getValueObject() method.
550: private int code; // Which statisitic does this Value represent?
551:
552: SPV(int code) {
553: this .code = code;
554: }
555:
556: public double getVal() {
557: checkData();
558: switch (code) {
559: case INTERCEPT:
560: return intercept;
561: case SLOPE:
562: return slope;
563: case DATACT:
564: return dataCt;
565: case MISSINGCT:
566: return missingCt;
567: case STANDARDERROR:
568: return standardError;
569: default:
570: return correlation;
571: }
572: }
573: }
574:
575: } // end class ScatterPlot
|