01: /*
02: Copyright © 1999 CERN - European Organization for Nuclear Research.
03: Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
04: is hereby granted without fee, provided that the above copyright notice appear in all copies and
05: that both that copyright notice and this permission notice appear in supporting documentation.
06: CERN makes no representations about the suitability of this software for any purpose.
07: It is provided "as is" without expressed or implied warranty.
08: */
09:
10: package org.hammurapi.inspectors.metrics.statistics;
11:
12: //!! job: average, median
13:
14: /**
15: * @author mucbj0
16: *
17: * To change the template for this generated type comment go to
18: * Window - Preferences - Java - Code Generation - Code and Comments
19: */
20: public class DescriptiveStatistic {
21:
22: /**
23: * Computes the frequency (number of occurances, count) of each distinct value in the given sorted data.
24: * After this call returns both <tt>distinctValues</tt> and <tt>frequencies</tt> have a new size (which is equal for both), which is the number of distinct values in the sorted data.
25: * <p>
26: * Distinct values are filled into <tt>distinctValues</tt>, starting at index 0.
27: * The frequency of each distinct value is filled into <tt>frequencies</tt>, starting at index 0.
28: * As a result, the smallest distinct value (and its frequency) can be found at index 0, the second smallest distinct value (and its frequency) at index 1, ..., the largest distinct value (and its frequency) at index <tt>distinctValues.size()-1</tt>.
29: *
30: * <b>Example:</b>
31: * <br>
32: * <tt>elements = (5,6,6,7,8,8) --> distinctValues = (5,6,7,8), frequencies = (1,2,1,2)</tt>
33: *
34: * @param sortedData the data; must be sorted ascending.
35: * @param distinctValues a list to be filled with the distinct values; can have any size.
36: * @param frequencies a list to be filled with the frequencies; can have any size; set this parameter to <tt>null</tt> to ignore it.
37: */
38: public void frequencies(IntVector sortedData,
39: IntVector distinctValues, IntVector frequencies) {
40:
41: int size = sortedData.size();
42: int i = 0;
43: sortedData.sort();
44:
45: while (i < size) {
46: int element = sortedData.elementAt(i);
47: int cursor = i;
48:
49: // determine run length (number of equal elements)
50: while (++i < size && sortedData.elementAt(i) == element)
51: ;
52:
53: int runLength = i - cursor;
54: distinctValues.addElement(element);
55: if (frequencies != null)
56: frequencies.addElement(runLength);
57: }
58: }
59:
60: /**
61: * Returns the arithmetic mean of a data sequence;
62: * That is <tt>Sum( data[i] ) / data.size()</tt>.
63: */
64: public static double mean(IntVector data) {
65: if (data != null && data.size() > 0) {
66: return sum(data) / data.size();
67: } else {
68: return 0;
69: }
70: }
71:
72: public static int sum(IntVector data) {
73: int sum = 0;
74: for (int i = 0; i < data.size(); i++) {
75: sum += data.elementAt(i);
76: }
77: return sum;
78:
79: }
80: }
|