#!/usr/bin/env python
# (C) 2000 Huaiyu Zhu <hzhu@users.sourceforge.net>. Licence: GPL
# $Id: utils.py,v 1.3 2000/10/05 01:32:27 hzhu Exp $
"""
Statistics utilities
"""
from MatPy.Matrix import Matrix,c_range,zeros,sum2,sort
def hist(y, x=10, scaled=None):
"""hist(y, x=None): return freq, x as col vecs
(if x was vec it is unchanged)
Inspired by hist.m in octave
"""
max_val = max(y)
min_val = min(y)
if not isinstance(x, Matrix):
n = x
if not n>0: raise ValueError, "hist: number of bins must be positive"
width = max_val - min_val
delta0 = width * 1.0 /n
# Leave cusion at both sides of data
# Not used - don't know how to tell boundaries to gnuplot
cusion_ratio = 0. / max(n, len(y))
delta = delta0 * (1 + cusion_ratio)
cusion = width * cusion_ratio/2.0
x0 = (c_range(n)) * delta - cusion + min_val
x = x0 + delta/2.
x1 = x0 + delta
else:
x = sort(x)
# dividing points
d = list((x[1:] + x[:-1]) / 2.)
n = len(d) + 1
# extend at both sides
x0 = Matrix([2*d[0]-d[1]] + d)
x1 = Matrix(d + [2*d[-1]-d[-2]])
freq = zeros((n, 1))
cum0 = 0
for i in range(n):
#print x0[i], x1[i]
cum = sum2(y.lt(x1[i]))
a = cum - cum0
freq[i], cum0 = a, cum
if scaled:
if delta is None:
raise "Can't scale unequal bins yet"
else:
freq = freq*1.0/len(y) /delta
return freq, x
def histplot(y, x=10, waittime=None, g=None, scaled=None):
"""histplot(y, x, waittime, g, scaled) : plot histogram
y is a vector, x is number of bins or centers of bins
g is existing plot object
scaled - whether to scale to pdf
"""
(nn, xx) = hist(y, x, scaled=scaled)
#print nn, y
from MatPy.gplot import Gplot,wait
print g
if not g: g = Gplot()
g.plot([nn], [xx], with="boxes")
wait(waittime)
return g
|