001: /*
002: * GeoTools - OpenSource mapping toolkit
003: * http://geotools.org
004: * (C) 2003-2006, GeoTools Project Managment Committee (PMC)
005: * (C) 2001, Institut de Recherche pour le Développement
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation;
010: * version 2.1 of the License.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: */
017: package org.geotools.math;
018:
019: // J2SE dependencies
020: import java.io.Serializable;
021: import java.util.Locale;
022:
023: // Geotools dependencies
024: import org.geotools.io.TableWriter;
025: import org.geotools.resources.i18n.Descriptions;
026: import org.geotools.resources.i18n.DescriptionKeys;
027: import org.opengis.util.Cloneable;
028:
029: /**
030: * Hold some statistics about a series of sample values. Given a series of sample values
031: * <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>, <var>s<sub>2</sub></var>,
032: * <var>s<sub>3</sub></var>..., this class computes {@linkplain #minimum minimum},
033: * {@linkplain #maximum maximum}, {@linkplain #mean mean}, {@linkplain #rms root mean square}
034: * and {@linkplain #standardDeviation standard deviation}. Statistics are computed on the fly;
035: * the sample values are never stored in memory.
036: * <p>
037: * An instance of {@code Statistics} is initially empty (i.e. all statistical values are set
038: * to {@link Double#NaN NaN}). The statistics are updated every time an {@link #add(double)}
039: * method is invoked with a non-{@linkplain Double#NaN NaN} value. A typical usage of this
040: * class is:
041: *
042: * <blockquote><pre>
043: * double[] data = new double[1000];
044: * // (Compute some data values here...)
045: *
046: * Statistics stats = new Statistics();
047: * for (int i=0; i<data.length; i++) {
048: * stats.add(data[i]);
049: * }
050: * System.out.println(stats);
051: * </pre></blockquote>
052: *
053: * @source $URL: http://svn.geotools.org/geotools/tags/2.4.1/modules/library/metadata/src/main/java/org/geotools/math/Statistics.java $
054: * @version $Id: Statistics.java 22443 2006-10-27 20:47:22Z desruisseaux $
055: * @author Martin Desruisseaux
056: *
057: * @since 2.0
058: */
059: public class Statistics implements Cloneable, Serializable {
060: /**
061: * Serial number for compatibility with different versions.
062: */
063: private static final long serialVersionUID = -22884277805533726L;
064:
065: /**
066: * Valeur minimale qui aie été transmise à la méthode
067: * {@link #add(double)}. Lors de la construction, ce
068: * champs est initialisé à NaN.
069: */
070: private double min = Double.NaN;
071:
072: /**
073: * Valeur maximale qui aie été transmise à la méthode
074: * {@link #add(double)}. Lors de la construction, ce
075: * champs est initialisé à NaN.
076: */
077: private double max = Double.NaN;
078:
079: /**
080: * Somme de toutes les valeurs qui ont été transmises à
081: * la méthode {@link #add(double)}. Lors de la construction,
082: * ce champs est initialisé à 0.
083: */
084: private double sum = 0;
085:
086: /**
087: * Somme des carrés de toutes les valeurs qui ont été
088: * transmises à la méthode {@link #add(double)}. Lors
089: * de la construction, ce champs est initialisé à 0.
090: */
091: private double sum2 = 0;
092:
093: /**
094: * Nombre de données autres que NaN qui ont été transmises
095: * à la méthode {@link #add(double)}. Lors de la construction,
096: * ce champs est initialisé à 0.
097: */
098: private int n = 0;
099:
100: /**
101: * Nombre de données égales à NaN qui ont été transmises à
102: * la méthode {@link #add(double)}. Les NaN sont ingorés lors
103: * du calcul des statistiques, mais on les compte quand même
104: * au passage. Lors de la construction ce champs est initialisé à 0.
105: */
106: private int nNaN = 0;
107:
108: /**
109: * Construct an initially empty set of statistics.
110: * All statistical values are initialized to {@link Double#NaN}.
111: */
112: public Statistics() {
113: }
114:
115: /**
116: * Reset the statistics to their initial {@link Double#NaN NaN} values.
117: * This method reset this object state as if it was just created.
118: */
119: public void reset() {
120: min = Double.NaN;
121: max = Double.NaN;
122: sum = 0;
123: sum2 = 0;
124: n = 0;
125: nNaN = 0;
126: }
127:
128: /**
129: * Update statistics for the specified sample. This {@code add}
130: * method is usually invoked inside a {@code for} loop.
131: *
132: * @param sample The sample value. {@link Double#NaN NaN} values are ignored.
133: *
134: * @see #add(long)
135: * @see #add(Statistics)
136: */
137: public void add(final double sample) {
138: if (!Double.isNaN(sample)) {
139: /*
140: * Les deux prochaines lignes utilisent !(a>=b) au
141: * lieu de (a<b) afin de prendre en compte les NaN.
142: */
143: if (!(min <= sample))
144: min = sample;
145: if (!(max >= sample))
146: max = sample;
147: sum2 += (sample * sample);
148: sum += sample;
149: n++;
150: } else {
151: nNaN++;
152: }
153: }
154:
155: /**
156: * Update statistics for the specified sample. This {@code add}
157: * method is usually invoked inside a {@code for} loop.
158: *
159: * @param sample The sample value.
160: *
161: * @see #add(double)
162: * @see #add(Statistics)
163: */
164: public void add(final long sample) {
165: final double fdatum = sample;
166: if (!(min <= fdatum))
167: min = fdatum;
168: if (!(max >= fdatum))
169: max = fdatum;
170: sum2 += (fdatum * fdatum);
171: sum += fdatum;
172: n++;
173: }
174:
175: /**
176: * Update statistics with all samples from the specified {@code stats}. Invoking this
177: * method is equivalent (except for rounding errors) to invoking {@link #add(double) add}
178: * for all samples that were added to {@code stats}.
179: *
180: * @param stats The statistics to be added to {@code this}, or {@code null} if none.
181: */
182: public void add(final Statistics stats) {
183: if (stats != null) {
184: // "if (a<b)" équivaut à "if (!isNaN(a) && a<b)".
185: if (Double.isNaN(min) || stats.min < min)
186: min = stats.min;
187: if (Double.isNaN(max) || stats.max > max)
188: max = stats.max;
189: sum2 += stats.sum2;
190: sum += stats.sum;
191: n += stats.n;
192: nNaN += stats.nNaN;
193: }
194: }
195:
196: /**
197: * Returns the number of {@link Double#NaN NaN} samples. {@code NaN} samples are
198: * ignored in all other statitical computation. This method count them for information
199: * purpose only.
200: */
201: public int countNaN() {
202: return Math.max(nNaN, 0);
203: }
204:
205: /**
206: * Returns the number of samples, excluding {@link Double#NaN NaN} values.
207: */
208: public int count() {
209: return n;
210: }
211:
212: /**
213: * Returns the minimum sample value, or {@link Double#NaN NaN} if none.
214: *
215: * @see #maximum
216: */
217: public double minimum() {
218: return min;
219: }
220:
221: /**
222: * Returns the maximum sample value, or {@link Double#NaN NaN} if none.
223: *
224: * @see #minimum
225: */
226: public double maximum() {
227: return max;
228: }
229:
230: /**
231: * Returns the range of sample values. This is equivalent to <code>{@link #maximum maximum} -
232: * {@link #minimum minimum}</code>, except for rounding error. If no samples were added,
233: * then returns {@link Double#NaN NaN}.
234: *
235: * @see #minimum
236: * @see #maximum
237: */
238: public double range() {
239: return max - min;
240: }
241:
242: /**
243: * Returns the mean value, or {@link Double#NaN NaN} if none.
244: */
245: public double mean() {
246: return sum / n;
247: }
248:
249: /**
250: * Returns the root mean square, or {@link Double#NaN NaN} if none.
251: */
252: public double rms() {
253: return Math.sqrt(sum2 / n);
254: }
255:
256: /**
257: * Retourne l'écart type des échantillons par rapport à la moyenne. Si les données
258: * fournies aux différentes méthodes {@code add(...)} se distribuent selon une
259: * loi normale, alors l'écart type est la distance de part et d'autre de la moyenne
260: * dans lequel se trouveraient environ 84% des données. Le tableau ci-dessous donne
261: * le pourcentage approximatif des données que l'on trouve de part et d'autre de la
262: * moyenne à des distances telles que 2 ou 3 fois l'écart-type.
263: *
264: * <table align=center>
265: * <tr><td> 0.5 </td><td> 69.1% </td></tr>
266: * <tr><td> 1.0 </td><td> 84.2% </td></tr>
267: * <tr><td> 1.5 </td><td> 93.3% </td></tr>
268: * <tr><td> 2.0 </td><td> 97.7% </td></tr>
269: * <tr><td> 3.0 </td><td> 99.9% </td></tr>
270: * </table>
271: *
272: * @param allPopulation La valeur {@code true} indique que les données fournies
273: * aux différentes méthodes {@code add(...)} représentent l'ensemble de
274: * la polulation. La valeur {@code false} indique que ces données ne
275: * représentent qu'un échantillon de la population, ce qui est généralement le
276: * cas. Si le nombre de données est élevé, alors les valeurs {@code true}
277: * et {@code false} donneront sensiblement les mêmes résultats.
278: */
279: public double standardDeviation(final boolean allPopulation) {
280: return Math.sqrt((sum2 - sum * sum / n)
281: / (allPopulation ? n : n - 1));
282: }
283:
284: /**
285: * Returns a clone of this statistics.
286: */
287: public Object clone() {
288: try {
289: return super .clone();
290: } catch (CloneNotSupportedException exception) {
291: // Should not happen since we are cloneable
292: throw new AssertionError(exception);
293: }
294: }
295:
296: /**
297: * Test this statistics with the specified object for equality.
298: */
299: public boolean equals(final Object obj) {
300: if (obj != null && getClass().equals(obj.getClass())) {
301: final Statistics cast = (Statistics) obj;
302: return n == cast.n
303: && Double.doubleToLongBits(min) == Double
304: .doubleToLongBits(cast.min)
305: && Double.doubleToLongBits(max) == Double
306: .doubleToLongBits(cast.max)
307: && Double.doubleToLongBits(sum) == Double
308: .doubleToLongBits(cast.sum)
309: && Double.doubleToLongBits(sum2) == Double
310: .doubleToLongBits(cast.sum2);
311: }
312: return false;
313: }
314:
315: /**
316: * Returns a hash code value for this statistics.
317: */
318: public int hashCode() {
319: final long code = (Double.doubleToLongBits(min) + 37 * (Double
320: .doubleToLongBits(max) + 37 * (Double
321: .doubleToLongBits(sum) + 37 * (Double
322: .doubleToLongBits(sum2)))));
323: return (int) code ^ (int) (code >>> 32) ^ n;
324: }
325:
326: /**
327: * Returns a string representation of this statistics. This method invokes
328: * {@link #toString(Locale, boolean)} using the default locale and spaces
329: * separator.
330: */
331: public final String toString() {
332: return toString(null, false);
333: }
334:
335: /**
336: * Returns a localized string representation of this statistics. This string
337: * will span multiple lines, one for each statistical value. For example:
338: *
339: * <blockquote><pre>
340: * Compte: 8726
341: * Minimum: 6.853
342: * Maximum: 8.259
343: * Moyenne: 7.421
344: * RMS: 7.846
345: * Écart-type: 6.489
346: * </pre></blockquote>
347: *
348: * If {@code tabulations} is true, then labels (e.g. "Minimum") and values
349: * (e.g. "6.853") are separated by tabulations. Otherwise, they are separated
350: * by spaces.
351: */
352: public String toString(final Locale locale,
353: final boolean tabulations) {
354: String text = Descriptions.getResources(locale).getString(
355: DescriptionKeys.STATISTICS_TO_STRING_$6,
356: new Number[] { new Integer(count()),
357: new Double(minimum()), new Double(maximum()),
358: new Double(mean()), new Double(rms()),
359: new Double(standardDeviation(false)) });
360: if (!tabulations) {
361: final TableWriter tmp = new TableWriter(null, 1);
362: tmp.write(text);
363: tmp.setColumnAlignment(1, TableWriter.ALIGN_RIGHT);
364: text = tmp.toString();
365: }
366: return text;
367: }
368:
369: /**
370: * Hold some statistics about a series of sample values and the difference between them.
371: * Given a series of sample values <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>,
372: * <var>s<sub>2</sub></var>, <var>s<sub>3</sub></var>..., this class computes statistics
373: * in the same way than {@link Statistics} and additionnaly computes statistics for
374: * <var>s<sub>1</sub></var>-<var>s<sub>0</sub></var>,
375: * <var>s<sub>2</sub></var>-<var>s<sub>1</sub></var>,
376: * <var>s<sub>3</sub></var>-<var>s<sub>2</sub></var>...,
377: * which are stored in a {@link #getDeltaStatistics delta} statistics object.
378: *
379: * @version $Id: Statistics.java 22443 2006-10-27 20:47:22Z desruisseaux $
380: * @author Martin Desruisseaux
381: */
382: public static class Delta extends Statistics {
383: /**
384: * Serial number for compatibility with different versions.
385: */
386: private static final long serialVersionUID = 3464306833883333219L;
387:
388: /**
389: * Statistics about the differences between consecutive sample values.
390: */
391: private Statistics delta;
392:
393: /**
394: * Last value given to an {@link #add(double) add} method as
395: * a {@code double}, or {@link Double#NaN NaN} if none.
396: */
397: private double last = Double.NaN;
398:
399: /**
400: * Last value given to an {@link #add(long) add}
401: * method as a {@code long}, or 0 if none.
402: */
403: private long lastAsLong;
404:
405: /**
406: * Construct an initially empty set of statistics.
407: * All statistical values are initialized to {@link Double#NaN}.
408: */
409: public Delta() {
410: delta = new Statistics();
411: delta.nNaN = -1; // Do not count the first NaN, which will always be the first value.
412: }
413:
414: /**
415: * Construct an initially empty set of statistics using the specified
416: * object for {@link #getDeltaStatistics delta} statistics. This method
417: * allows chaining different kind of statistics objects. For example, one
418: * could write:
419: * <blockquote><pre>
420: * new Statistics.Delta(new Statistics.Delta());
421: * </pre></blockquote>
422: * Which would compute statistics of sample values, statistics of difference between
423: * consecutive sample values, and statistics of difference of difference between
424: * consecutive sample values. Other kinds of {@link Statistics} object could be
425: * chained as well.
426: */
427: public Delta(final Statistics delta) {
428: this .delta = delta;
429: delta.reset();
430: delta.nNaN = -1; // Do not count the first NaN, which will always be the first value.
431: }
432:
433: /**
434: * Returns the statistics about difference between consecutives values.
435: * Given a series of sample values <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>,
436: * <var>s<sub>2</sub></var>, <var>s<sub>3</sub></var>..., this is statistics for
437: * <var>s<sub>1</sub></var>-<var>s<sub>0</sub></var>,
438: * <var>s<sub>2</sub></var>-<var>s<sub>1</sub></var>,
439: * <var>s<sub>3</sub></var>-<var>s<sub>2</sub></var>...,
440: */
441: public Statistics getDeltaStatistics() {
442: return delta;
443: }
444:
445: /**
446: * Reset the statistics to their initial {@link Double#NaN NaN} values.
447: * This method reset this object state as if it was just created.
448: */
449: public void reset() {
450: super .reset();
451: delta.reset();
452: delta.nNaN = -1; // Do not count the first NaN, which will always be the first value.
453: last = Double.NaN;
454: lastAsLong = 0;
455: }
456:
457: /**
458: * Update statistics for the specified sample. The {@link #getDeltaStatistics delta}
459: * statistics are updated with <code>sample - sample<sub>last</sub></code> value,
460: * where <code>sample<sub>last</sub></code> is the last value given to the previous
461: * call of an {@code add(...)} method.
462: */
463: public void add(final double sample) {
464: super .add(sample);
465: delta.add(sample - last);
466: last = sample;
467: lastAsLong = (long) sample;
468: }
469:
470: /**
471: * Update statistics for the specified sample. The {@link #getDeltaStatistics delta}
472: * statistics are updated with <code>sample - sample<sub>last</sub></code> value,
473: * where <code>sample<sub>last</sub></code> is the last value given to the previous
474: * call of an {@code add(...)} method.
475: */
476: public void add(final long sample) {
477: super .add(sample);
478: if (last == (double) lastAsLong) {
479: // 'lastAsLong' may have more precision than 'last' since the cast to the
480: // 'double' type may loose some digits. Invoke the 'delta.add(long)' version.
481: delta.add(sample - lastAsLong);
482: } else {
483: // The sample value is either fractional, outside 'long' range,
484: // infinity or NaN. Invoke the 'delta.add(double)' version.
485: delta.add(sample - last);
486: }
487: last = sample;
488: lastAsLong = sample;
489: }
490:
491: /**
492: * Update statistics with all samples from the specified {@code stats}. Invoking this
493: * method is equivalent (except for rounding errors) to invoking {@link #add(double) add}
494: * for all samples that were added to {@code stats}. The {@code stats} argument
495: * must be an instance of {@code Statistics.Delta}.
496: *
497: * @param stats The statistics to be added to {@code this},
498: * or {@code null} if none.
499: * @throws ClassCastException If {@code stats} is not an instance of
500: * {@code Statistics.Delta}.
501: */
502: public void add(final Statistics stats)
503: throws ClassCastException {
504: if (stats != null) {
505: final Delta toAdd = (Delta) stats;
506: if (toAdd.delta.nNaN >= 0) {
507: delta.add(toAdd.delta);
508: last = toAdd.last;
509: lastAsLong = toAdd.lastAsLong;
510: super .add(stats);
511: }
512: }
513: }
514:
515: /**
516: * Returns a clone of this statistics.
517: */
518: public Object clone() {
519: Delta copy = (Delta) super .clone();
520: copy.delta = (Statistics) copy.delta.clone();
521: return copy;
522: }
523:
524: /**
525: * Test this statistics with the specified object for equality.
526: */
527: public boolean equals(final Object obj) {
528: return super .equals(obj)
529: && delta.equals(((Delta) obj).delta);
530: }
531:
532: /**
533: * Returns a hash code value for this statistics.
534: */
535: public int hashCode() {
536: return super .hashCode() + 37 * delta.hashCode();
537: }
538: }
539: }
|