001: /*
002: * Copyright 2003-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.commons.math.stat;
017:
018: import java.io.Serializable;
019: import java.text.NumberFormat;
020: import java.util.Iterator;
021: import java.util.Comparator;
022: import java.util.TreeMap;
023:
024: /**
025: * Maintains a frequency distribution.
026: * <p>
027: * Accepts int, long, char or Object values. New values added must be
028: * comparable to those that have been added, otherwise the add method will
029: * throw an IllegalArgumentException.
030: * <p>
031: * Integer values (int, long, Integer, Long) are not distinguished by type --
032: * i.e. <code>addValue(new Long(2)), addValue(2), addValue(2l)</code> all have
033: * the same effect (similarly for arguments to <code>getCount,</code> etc.).
034: * <p>
035: * The values are ordered using the default (natural order), unless a
036: * <code>Comparator</code> is supplied in the constructor.
037: *
038: * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
039: */
040: public class Frequency implements Serializable {
041:
042: /** Serializable version identifier */
043: private static final long serialVersionUID = -3845586908418844111L;
044:
045: /** underlying collection */
046: private TreeMap freqTable = null;
047:
048: /**
049: * Default constructor.
050: */
051: public Frequency() {
052: freqTable = new TreeMap();
053: }
054:
055: /**
056: * Constructor allowing values Comparator to be specified.
057: *
058: * @param comparator Comparator used to order values
059: */
060: public Frequency(Comparator comparator) {
061: freqTable = new TreeMap(comparator);
062: }
063:
064: /**
065: * Return a string representation of this frequency
066: * distribution.
067: *
068: * @return a string representation.
069: */
070: public String toString() {
071: NumberFormat nf = NumberFormat.getPercentInstance();
072: StringBuffer outBuffer = new StringBuffer();
073: outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
074: Iterator iter = freqTable.keySet().iterator();
075: while (iter.hasNext()) {
076: Object value = iter.next();
077: outBuffer.append(value);
078: outBuffer.append('\t');
079: outBuffer.append(getCount(value));
080: outBuffer.append('\t');
081: outBuffer.append(nf.format(getPct(value)));
082: outBuffer.append('\t');
083: outBuffer.append(nf.format(getCumPct(value)));
084: outBuffer.append('\n');
085: }
086: return outBuffer.toString();
087: }
088:
089: /**
090: * Adds 1 to the frequency count for v.
091: *
092: * @param v the value to add.
093: * @throws IllegalArgumentException if <code>v</code> is not comparable.
094: */
095: public void addValue(Object v) {
096: Object obj = v;
097: if (v instanceof Integer) {
098: obj = new Long(((Integer) v).longValue());
099: }
100: try {
101: Long count = (Long) freqTable.get(obj);
102: if (count == null) {
103: freqTable.put(obj, new Long(1));
104: } else {
105: freqTable.put(obj, new Long(count.longValue() + 1));
106: }
107: } catch (ClassCastException ex) {
108: //TreeMap will throw ClassCastException if v is not comparable
109: throw new IllegalArgumentException(
110: "Value not comparable to existing values.");
111: }
112: }
113:
114: /**
115: * Adds 1 to the frequency count for v.
116: *
117: * @param v the value to add.
118: */
119: public void addValue(int v) {
120: addValue(new Long(v));
121: }
122:
123: /**
124: * Adds 1 to the frequency count for v.
125: *
126: * @param v the value to add.
127: */
128: public void addValue(Integer v) {
129: addValue(new Long(v.longValue()));
130: }
131:
132: /**
133: * Adds 1 to the frequency count for v.
134: *
135: * @param v the value to add.
136: */
137: public void addValue(long v) {
138: addValue(new Long(v));
139: }
140:
141: /**
142: * Adds 1 to the frequency count for v.
143: *
144: * @param v the value to add.
145: */
146: public void addValue(char v) {
147: addValue(new Character(v));
148: }
149:
150: /** Clears the frequency table */
151: public void clear() {
152: freqTable.clear();
153: }
154:
155: /**
156: * Returns an Iterator over the set of values that have been added.
157: * <p>
158: * If added values are itegral (i.e., integers, longs, Integers, or Longs),
159: * they are converted to Longs when they are added, so the objects returned
160: * by the Iterator will in this case be Longs.
161: *
162: * @return values Iterator
163: */
164: public Iterator valuesIterator() {
165: return freqTable.keySet().iterator();
166: }
167:
168: //-------------------------------------------------------------------------
169:
170: /**
171: * Returns the sum of all frequencies.
172: *
173: * @return the total frequency count.
174: */
175: public long getSumFreq() {
176: long result = 0;
177: Iterator iterator = freqTable.values().iterator();
178: while (iterator.hasNext()) {
179: result += ((Long) iterator.next()).longValue();
180: }
181: return result;
182: }
183:
184: /**
185: * Returns the number of values = v.
186: *
187: * @param v the value to lookup.
188: * @return the frequency of v.
189: */
190: public long getCount(Object v) {
191: if (v instanceof Integer) {
192: return getCount(((Integer) v).longValue());
193: }
194: long result = 0;
195: try {
196: Long count = (Long) freqTable.get(v);
197: if (count != null) {
198: result = count.longValue();
199: }
200: } catch (ClassCastException ex) {
201: // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
202: }
203: return result;
204: }
205:
206: /**
207: * Returns the number of values = v.
208: *
209: * @param v the value to lookup.
210: * @return the frequency of v.
211: */
212: public long getCount(int v) {
213: return getCount(new Long(v));
214: }
215:
216: /**
217: * Returns the number of values = v.
218: *
219: * @param v the value to lookup.
220: * @return the frequency of v.
221: */
222: public long getCount(long v) {
223: return getCount(new Long(v));
224: }
225:
226: /**
227: * Returns the number of values = v.
228: *
229: * @param v the value to lookup.
230: * @return the frequency of v.
231: */
232: public long getCount(char v) {
233: return getCount(new Character(v));
234: }
235:
236: //-------------------------------------------------------------
237:
238: /**
239: * Returns the percentage of values that are equal to v
240: * (as a proportion between 0 and 1).
241: * <p>
242: * Returns <code>Double.NaN</code> if no values have been added.
243: *
244: * @param v the value to lookup
245: * @return the proportion of values equal to v
246: */
247: public double getPct(Object v) {
248: if (getSumFreq() == 0) {
249: return Double.NaN;
250: }
251: return (double) getCount(v) / (double) getSumFreq();
252: }
253:
254: /**
255: * Returns the percentage of values that are equal to v
256: * (as a proportion between 0 and 1).
257: *
258: * @param v the value to lookup
259: * @return the proportion of values equal to v
260: */
261: public double getPct(int v) {
262: return getPct(new Long(v));
263: }
264:
265: /**
266: * Returns the percentage of values that are equal to v
267: * (as a proportion between 0 and 1).
268: *
269: * @param v the value to lookup
270: * @return the proportion of values equal to v
271: */
272: public double getPct(long v) {
273: return getPct(new Long(v));
274: }
275:
276: /**
277: * Returns the percentage of values that are equal to v
278: * (as a proportion between 0 and 1).
279: *
280: * @param v the value to lookup
281: * @return the proportion of values equal to v
282: */
283: public double getPct(char v) {
284: return getPct(new Character(v));
285: }
286:
287: //-----------------------------------------------------------------------------------------
288:
289: /**
290: * Returns the cumulative frequency of values less than or equal to v.
291: * <p>
292: * Returns 0 if v is not comparable to the values set.
293: *
294: * @param v the value to lookup.
295: * @return the proportion of values equal to v
296: */
297: public long getCumFreq(Object v) {
298: if (getSumFreq() == 0) {
299: return 0;
300: }
301: if (v instanceof Integer) {
302: return getCumFreq(((Integer) v).longValue());
303: }
304: Comparator c = freqTable.comparator();
305: if (c == null) {
306: c = new NaturalComparator();
307: }
308: long result = 0;
309:
310: try {
311: Long value = (Long) freqTable.get(v);
312: if (value != null) {
313: result = value.longValue();
314: }
315: } catch (ClassCastException ex) {
316: return result; // v is not comparable
317: }
318:
319: if (c.compare(v, freqTable.firstKey()) < 0) {
320: return 0; // v is comparable, but less than first value
321: }
322:
323: if (c.compare(v, freqTable.lastKey()) >= 0) {
324: return getSumFreq(); // v is comparable, but greater than the last value
325: }
326:
327: Iterator values = valuesIterator();
328: while (values.hasNext()) {
329: Object nextValue = values.next();
330: if (c.compare(v, nextValue) > 0) {
331: result += getCount(nextValue);
332: } else {
333: return result;
334: }
335: }
336: return result;
337: }
338:
339: /**
340: * Returns the cumulative frequency of values less than or equal to v.
341: * <p>
342: * Returns 0 if v is not comparable to the values set.
343: *
344: * @param v the value to lookup
345: * @return the proportion of values equal to v
346: */
347: public long getCumFreq(int v) {
348: return getCumFreq(new Long(v));
349: }
350:
351: /**
352: * Returns the cumulative frequency of values less than or equal to v.
353: * <p>
354: * Returns 0 if v is not comparable to the values set.
355: *
356: * @param v the value to lookup
357: * @return the proportion of values equal to v
358: */
359: public long getCumFreq(long v) {
360: return getCumFreq(new Long(v));
361: }
362:
363: /**
364: * Returns the cumulative frequency of values less than or equal to v.
365: * <p>
366: * Returns 0 if v is not comparable to the values set.
367: *
368: * @param v the value to lookup
369: * @return the proportion of values equal to v
370: */
371: public long getCumFreq(char v) {
372: return getCumFreq(new Character(v));
373: }
374:
375: //----------------------------------------------------------------------------------------------
376:
377: /**
378: * Returns the cumulative percentage of values less than or equal to v
379: * (as a proportion between 0 and 1).
380: * <p>
381: * Returns <code>Double.NaN</code> if no values have been added.
382: * Returns 0 if at least one value has been added, but v is not comparable
383: * to the values set.
384: *
385: * @param v the value to lookup
386: * @return the proportion of values less than or equal to v
387: */
388: public double getCumPct(Object v) {
389: if (getSumFreq() == 0) {
390: return Double.NaN;
391: }
392: return (double) getCumFreq(v) / (double) getSumFreq();
393: }
394:
395: /**
396: * Returns the cumulative percentage of values less than or equal to v
397: * (as a proportion between 0 and 1).
398: * <p>
399: * Returns 0 if v is not comparable to the values set.
400: *
401: * @param v the value to lookup
402: * @return the proportion of values less than or equal to v
403: */
404: public double getCumPct(int v) {
405: return getCumPct(new Long(v));
406: }
407:
408: /**
409: * Returns the cumulative percentage of values less than or equal to v
410: * (as a proportion between 0 and 1).
411: * <p>
412: * Returns 0 if v is not comparable to the values set.
413: *
414: * @param v the value to lookup
415: * @return the proportion of values less than or equal to v
416: */
417: public double getCumPct(long v) {
418: return getCumPct(new Long(v));
419: }
420:
421: /**
422: * Returns the cumulative percentage of values less than or equal to v
423: * (as a proportion between 0 and 1).
424: * <p>
425: * Returns 0 if v is not comparable to the values set.
426: *
427: * @param v the value to lookup
428: * @return the proportion of values less than or equal to v
429: */
430: public double getCumPct(char v) {
431: return getCumPct(new Character(v));
432: }
433:
434: /**
435: * A Comparator that compares comparable objects using the
436: * natural order. Copied from Commons Collections ComparableComparator.
437: */
438: private static class NaturalComparator implements Comparator {
439: /**
440: * Compare the two {@link Comparable Comparable} arguments.
441: * This method is equivalent to:
442: * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
443: *
444: * @param o1 the first object
445: * @param o2 the second object
446: * @return result of comparison
447: * @throws NullPointerException when <i>o1</i> is <code>null</code>,
448: * or when <code>((Comparable)o1).compareTo(o2)</code> does
449: * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
450: * or when <code>((Comparable)o1).compareTo(o2)</code> does
451: */
452: public int compare(Object o1, Object o2) {
453: return ((Comparable) o1).compareTo(o2);
454: }
455: }
456: }
|