demo.py : » Math » Modular-toolkit-for-Data-Processing » MDP-2.6 » mdp » demo » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Math » Modular toolkit for Data Processing
Modular toolkit for Data Processing » MDP 2.6 » mdp » demo » demo.py
import mdp
pcanode1 = mdp.nodes.PCANode()
pcanode1
# PCANode(input_dim=None, output_dim=None, dtype=None)
pcanode2 = mdp.nodes.PCANode(output_dim = 10)
pcanode2
# PCANode(input_dim=None, output_dim=10, dtype=None)
pcanode3 = mdp.nodes.PCANode(output_dim = 0.8)
pcanode3.desired_variance
# 0.80000000000000004
pcanode4 = mdp.nodes.PCANode(dtype = 'float32')
pcanode4
# PCANode(input_dim=None, output_dim=None, dtype='float32')
pcanode4.supported_dtypes
# [dtype('float32'), dtype('float64')]
expnode = mdp.nodes.PolynomialExpansionNode(3)
x = mdp.numx_rand.random((100, 25))  # 25 variables, 100 observations
pcanode1.train(x)
pcanode1
# PCANode(input_dim=25, output_dim=None, dtype='float64')
for i in range(100):
    x = mdp.numx_rand.random((100, 25))
    pcanode1.train(x)
# >>>
expnode.is_trainable()
# False
pcanode1.stop_training()
pcanode3.train(x)
pcanode3.stop_training()
pcanode3.output_dim
# 16
pcanode3.explained_variance
# 0.85261144755506446
avg = pcanode1.avg            # mean of the input data
v = pcanode1.get_projmatrix() # projection matrix
fdanode = mdp.nodes.FDANode()
for label in ['a', 'b', 'c']:
    x = mdp.numx_rand.random((100, 25))
    fdanode.train(x, label)
# >>>
fdanode.stop_training()
for label in ['a', 'b', 'c']:
    x = mdp.numx_rand.random((100, 25))
    fdanode.train(x, label)
# >>>
x = mdp.numx_rand.random((100, 25))
y_pca = pcanode1.execute(x)
y_pca = pcanode1(x)
x = mdp.numx_rand.random((100, 5))
y_exp = expnode(x)
x = mdp.numx_rand.random((100, 25))
y_fda = fdanode(x)
pcanode1.is_invertible()
# True
x = pcanode1.inverse(y_pca)
expnode.is_invertible()
# False
class TimesTwoNode(mdp.Node):
    def is_trainable(self):
        return False
    def _execute(self, x):
        return 2*x
    def _inverse(self, y):
        return y/2
# ...
# >>>
node = TimesTwoNode(dtype = 'int32')
x = mdp.numx.array([[1.0, 2.0, 3.0]])
y = node(x)
print x, '* 2 =  ', y
# [ [ 1.  2.  3.]] * 2 =   [ [2 4 6]]
print y, '/ 2 =', node.inverse(y)
# [ [2 4 6]] / 2 = [ [1 2 3]]
class PowerNode(mdp.Node):
    def __init__(self, power, input_dim=None, dtype=None):
        super(PowerNode, self).__init__(input_dim=input_dim, dtype=dtype)
        self.power = power
    def is_trainable(self):
        return False
    def is_invertible(self):
        return False
    def _get_supported_dtypes(self):
        return ['float32', 'float64']
    def _execute(self, x):
        return self._refcast(x**self.power)
# ...
# >>>
node = PowerNode(3)
x = mdp.numx.array([[1.0, 2.0, 3.0]])
y = node(x)
print x, '**', node.power, '=', node(x)
# [ [ 1.  2.  3.]] ** 3 = [ [  1.   8.  27.]]
class MeanFreeNode(mdp.Node):
    def __init__(self, input_dim=None, dtype=None):
        super(MeanFreeNode, self).__init__(input_dim=input_dim,
                                           dtype=dtype)
        self.avg = None
        self.tlen = 0
    def _train(self, x):
        # Initialize the mean vector with the right
        # size and dtype if necessary:
        if self.avg is None:
            self.avg = mdp.numx.zeros(self.input_dim,
                                      dtype=self.dtype)
        self.avg += mdp.numx.sum(x, axis=0)
        self.tlen += x.shape[0]
    def _stop_training(self):
        self.avg /= self.tlen
        if self.output_dim is None:
            self.output_dim = self.input_dim
    def _execute(self, x):
        return x - self.avg
    def _inverse(self, y):
        return y + self.avg
# ...
# >>>
node = MeanFreeNode()
x = mdp.numx_rand.random((10,4))
node.train(x)
y = node(x)
print 'Mean of y (should be zero): ', mdp.numx.mean(y, 0)
# Mean of y (should be zero):  [  0.00000000e+00   2.22044605e-17
# -2.22044605e-17   1.11022302e-17]
class UnitVarianceNode(mdp.Node):
    def __init__(self, input_dim=None, dtype=None):
        super(UnitVarianceNode, self).__init__(input_dim=input_dim,
                                               dtype=dtype)
        self.avg = None # average
        self.std = None # standard deviation
        self.tlen = 0
    def _get_train_seq(self):
        return [(self._train_mean, self._stop_mean),
                (self._train_std, self._stop_std)]
    def _train_mean(self, x):
        if self.avg is None:
            self.avg = mdp.numx.zeros(self.input_dim,
                                      dtype=self.dtype)
        self.avg += mdp.numx.sum(x, 0)
        self.tlen += x.shape[0]
    def _stop_mean(self):
        self.avg /= self.tlen
    def _train_std(self, x):
        if self.std is None:
            self.tlen = 0
            self.std = mdp.numx.zeros(self.input_dim,
                                      dtype=self.dtype)
        self.std += mdp.numx.sum((x - self.avg)**2., 0)
        self.tlen += x.shape[0]
    def _stop_std(self):
        # compute the standard deviation
        self.std = mdp.numx.sqrt(self.std/(self.tlen-1))
    def _execute(self, x):
        return (x - self.avg)/self.std
    def _inverse(self, y):
        return y*self.std + self.avg
# >>>
node = UnitVarianceNode()
x = mdp.numx_rand.random((10,4))
# loop over phases
for phase in range(2):
    node.train(x)
    node.stop_training()
# ...
# ...
# execute
y = node(x)
print 'Standard deviation of y (should be one): ', mdp.numx.std(y, axis=0)
# Standard deviation of y (should be one):  [ 1.  1.  1.  1.]
class TwiceNode(mdp.Node):
    def is_trainable(self): return False
    def is_invertible(self): return False
    def _set_input_dim(self, n):
        self._input_dim = n
        self._output_dim = 2*n
    def _set_output_dim(self, n):
        raise mdp.NodeException, "Output dim can not be set explicitly!"
    def _execute(self, x):
        return mdp.numx.concatenate((x, x), 1)
# ...
# >>>
node = TwiceNode()
x = mdp.numx.zeros((5,2))
x
# array([[0, 0],
# [0, 0],
# [0, 0],
# [0, 0],
# [0, 0]])
node.execute(x)
# array([[0, 0, 0, 0],
# [0, 0, 0, 0],
# [0, 0, 0, 0],
# [0, 0, 0, 0],
# [0, 0, 0, 0]])
inp = mdp.numx_rand.random((1000, 20))
inp = (inp - mdp.numx.mean(inp, 0))/mdp.numx.std(inp, 0)
inp[:,5:] /= 10.0
x = mdp.utils.mult(inp,mdp.numx_rand.random((20, 20)))
inp_test = mdp.numx_rand.random((1000, 20))
inp_test = (inp_test - mdp.numx.mean(inp_test, 0))/mdp.numx.std(inp_test, 0)
inp_test[:,5:] /= 10.0
x_test = mdp.utils.mult(inp_test, mdp.numx_rand.random((20, 20)))
pca = mdp.nodes.PCANode(output_dim=5)
pca.train(x)
out1 = pca(x)
ica = mdp.nodes.CuBICANode()
ica.train(out1)
out2 = ica(out1)
out1_test = pca(x_test)
out2_test = ica(out1_test)
hitnode = mdp.nodes.HitParadeNode(3)
hitnode.train(out2_test)
maxima, indices = hitnode.get_maxima()
flow = mdp.Flow([mdp.nodes.PCANode(output_dim=5), mdp.nodes.CuBICANode()])
flow = mdp.nodes.PCANode(output_dim=5) + mdp.nodes.CuBICANode()
flow.train(x)
flow.append(mdp.nodes.HitParadeNode(3))
flow += mdp.nodes.HitParadeNode(3)
flow.train(x_test)
maxima, indices = flow[2].get_maxima()
out = flow.execute(x)
cov = mdp.numx.amax(abs(mdp.utils.cov2(inp[:,:5], out)), axis=1)
print cov
# [ 0.98992083  0.99244511  0.99227319  0.99663185  0.9871812 ]
out = flow(x)
rec = flow.inverse(out)
cov = mdp.numx.amax(abs(mdp.utils.cov2(x/mdp.numx.std(x,axis=0),
                                       rec/mdp.numx.std(rec,axis=0))))
print cov
# [ 0.99839606  0.99744461  0.99616208  0.99772863  0.99690947
# 0.99864056  0.99734378  0.98722502  0.98118101  0.99407939
# 0.99683096  0.99756988  0.99664384  0.99723419  0.9985529
# 0.99829763  0.9982712   0.99721741  0.99682906  0.98858858]
for node in flow:
    print repr(node)
# ...
# PCANode(input_dim=20, output_dim=5, dtype='float64')
# CuBICANode(input_dim=5, output_dim=5, dtype='float64')
# HitParadeNode(input_dim=5, output_dim=5, dtype='float64')
# HitParadeNode(input_dim=5, output_dim=5, dtype='float64')
# >>>
len(flow)
# 4
print flow[::2]
# [PCANode, HitParadeNode]
nodetoberemoved = flow.pop(-1)
nodetoberemoved
# HitParadeNode(input_dim=5, output_dim=5, dtype='float64')
len(flow)
# 3
dummyflow = flow[1:].copy()
longflow = flow + dummyflow
len(longflow)
# 4
class BogusExceptNode(mdp.Node):
   def train(self,x):
       self.bogus_attr = 1
       raise Exception, "Bogus Exception"
   def execute(self,x):
       raise Exception, "Bogus Exception"
# ...
flow = mdp.Flow([BogusExceptNode()])
flow.set_crash_recovery(1)
flow.set_crash_recovery('/home/myself/mydumps/MDPdump.pic')
class BogusNode(mdp.Node):
    """This node does nothing."""
    def _train(self, x):
        pass
# ...
class BogusNode2(mdp.Node):
    """This node does nothing. But it's not trainable nor invertible.
    """
    def is_trainable(self): return False
    def is_invertible(self): return False
# ...
# >>>
def gen_data(blocks):
    for i in mdp.utils.progressinfo(xrange(blocks)):
        block_x = mdp.numx.atleast_2d(mdp.numx.arange(2,1001,2))
        block_y = mdp.numx.atleast_2d(mdp.numx.arange(1,1001,2))
        # put variables on columns and observations on rows
        block = mdp.numx.transpose(mdp.numx.concatenate([block_x,block_y]))
        yield block
# ...
# >>>
flow = mdp.Flow([BogusNode(),BogusNode()], verbose=1)
flow.train([gen_data(5000),gen_data(3000)])
# Training node #0 (BogusNode)
# [===================================100%==================================>]
flow = BogusNode() + BogusNode()
block_x = mdp.numx.atleast_2d(mdp.numx.arange(2,1001,2))
block_y = mdp.numx.atleast_2d(mdp.numx.arange(1,1001,2))
single_block = mdp.numx.transpose(mdp.numx.concatenate([block_x,block_y]))
flow.train(single_block)
flow = mdp.Flow([BogusNode2(),BogusNode()], verbose=1)
flow.train([None, gen_data(5000)])
# Training node #0 (BogusNode2)
# Training finished
# Training node #1 (BogusNode)
# [===================================100%==================================>]
flow = mdp.Flow([BogusNode2(),BogusNode()], verbose=1)
flow.train(single_block)
# Training node #0 (BogusNode2)
# Training finished
# Training node #1 (BogusNode)
# Training finished
# Close the training phase of the last node
flow = mdp.Flow([BogusNode(),BogusNode()], verbose=1)
flow.train([gen_data(1), gen_data(1)])
# Training node #0 (BogusNode)
# Training finished
# Training node #1 (BosgusNode)
# [===================================100%==================================>]
output = flow(single_block)
output = flow.inverse(single_block)
class SimpleIterable(object):
    def __init__(self, blocks):
        self.blocks = blocks
    def __iter__(self):
        # this is a generator
        for i in range(self.blocks):
            yield generate_some_data()
# >>>
class RandomIterable(object):
    def __init__(self):
        self.state = None
    def __iter__(self):
        if self.state is None:
            self.state = mdp.numx_rand.get_state()
        else:
            mdp.numx_rand.set_state(self.state)
        for i in range(2):
            yield mdp.numx_rand.random((1,4))
iterable = RandomIterable()
for x in iterable:
    print x
# ...
# [[ 0.99586495  0.53463386  0.6306412   0.09679571]]
# [[ 0.51117469  0.46647448  0.95089738  0.94837122]]
for x in iterable:
    print x
# ...
# [[ 0.99586495  0.53463386  0.6306412   0.09679571]]
# [[ 0.51117469  0.46647448  0.95089738  0.94837122]]
def gen_data(blocks,dims):
    mat = mdp.numx_rand.random((dims,dims))-0.5
    for i in xrange(blocks):
        # put variables on columns and observations on rows
        block = mdp.utils.mult(mdp.numx_rand.random((1000,dims)), mat)
        yield block
# ...
# >>>
pca = mdp.nodes.PCANode(output_dim=0.9)
exp = mdp.nodes.PolynomialExpansionNode(2)
sfa = mdp.nodes.SFANode()
class PCADimensionExceededException(Exception):
    """Exception base class for PCA exceeded dimensions case."""
    pass
# ...
# >>>
class CheckPCA(mdp.CheckpointFunction):
    def __init__(self,max_dim):
        self.max_dim = max_dim
    def __call__(self,node):
        node.stop_training()
        act_dim = node.get_output_dim()
        if act_dim > self.max_dim:
            errstr = 'PCA output dimensions exceeded maximum '+\
                     '(%d > %d)'%(act_dim,self.max_dim)
            raise PCADimensionExceededException, errstr
        else:
            print 'PCA output dimensions = %d'%(act_dim)
# ...
# >>>
flow = mdp.CheckpointFlow([pca, exp, sfa])
flow[0] = mdp.nodes.PCANode(output_dim=0.9)
flow.train([gen_data(10, 12), None, gen_data(10, 12)],
           [CheckPCA(10), None, None])
# PCA output dimensions = 6
pca = mdp.nodes.PCANode(output_dim=0.9)
exp = mdp.nodes.PolynomialExpansionNode(2)
sfa = mdp.nodes.SFANode()
flow = mdp.CheckpointFlow([pca, exp, sfa])
flow.train([gen_data(10, 12), None, gen_data(10, 12)],
           [CheckPCA(10),
            None,
            mdp.CheckpointSaveFunction('dummy.pic',
                                       stop_training = 1,
                                       protocol = 0)])
# ...
# PCA output dimensions = 7
fl = file('dummy.pic')
import cPickle
sfa_reloaded = cPickle.load(fl)
sfa_reloaded
# SFANode(input_dim=35, output_dim=35, dtype='d')
fl.close()
import os
os.remove('dummy.pic')
class TestExtensionNode(mdp.ExtensionNode):
    extension_name = "test"
    def _execute(self):
        return 0
# ...
class TestNode(mdp.Node):
    def _execute(self):
        return 1
# ...
class ExtendedTestNode(TestExtensionNode, TestNode):
    pass
# ...
# >>>
node1 = mdp.nodes.PCANode(input_dim=100, output_dim=10)
node2 = mdp.nodes.SFANode(input_dim=100, output_dim=20)
layer = mdp.hinet.Layer([node1, node2])
layer
# Layer(input_dim=200, output_dim=30, dtype=None)
node1_1 = mdp.nodes.PCANode(input_dim=100, output_dim=50)
node1_2 = mdp.nodes.SFANode(input_dim=50, output_dim=10)
node1_flow = mdp.Flow([node1_1, node1_2])
node1 = mdp.hinet.FlowNode(node1_flow)
layer = mdp.hinet.Layer([node1, node2])
layer
# Layer(input_dim=200, output_dim=30, dtype=None)
switchboard = mdp.hinet.Switchboard(input_dim=6, connections=[0,1,2,3,4,3,4,5])
switchboard
# Switchboard(input_dim=3, output_dim=2, dtype=None)
x = mdp.numx.array([[2,4,6,8,10,12]])
switchboard.execute(x)
# array([[ 2,  4,  6,  8, 10,  8, 10, 12]])
mdp.hinet.show_flow(flow)
node1 = mdp.nodes.PCANode(input_dim=100, output_dim=10)
node2 = mdp.nodes.SFA2Node(input_dim=10, output_dim=10)
parallel_flow = mdp.parallel.ParallelFlow([node1, node2])
n_data_chunks = 2
data_iterables = [[mdp.numx_rand.random((200, 100))
                   for _ in range(n_data_chunks)]
                   for _ in range(2)]
scheduler = mdp.parallel.ProcessScheduler(n_processes=2)
parallel_flow.train(data_iterables, scheduler=scheduler)
scheduler.shutdown()
try:
    parallel_flow.train(data_iterables, scheduler=scheduler)
finally:
    scheduler.shutdown()
# ...
gc = mdp.nodes.GaussianClassifierNode()
gc.train(mdp.numx_rand.random((50, 3)), +1)
gc.train(mdp.numx_rand.random((50, 3)) - 0.8, -1)
gc.train(mdp.numx_rand.random((50, 3)), [+1] * 50)
test_data = mdp.numx.array([[0.1, 0.2, 0.1], [-0.1, -0.2, -0.1]])
gc.label(test_data)
# [1, -1]
gc.prob(test_data)
# [{-1: 0.21013407927789607, 1: 0.78986592072210393},
# {-1: 0.99911458988539714, 1: 0.00088541011460285866}]
gc.rank(test_data)
# [[1, -1], [-1, 1]]
p2 = mdp.numx.pi*2
t = mdp.numx.linspace(0,1,10000,endpoint=0) # time axis 1s, samplerate 10KHz
dforce = mdp.numx.sin(p2*5*t) + mdp.numx.sin(p2*11*t) + mdp.numx.sin(p2*13*t)
def logistic_map(x,r):
    return r*x*(1-x)
# ...
# >>>
series = mdp.numx.zeros((10000,1),'d')
series[0] = 0.6
for i in range(1,10000):
    series[i] = logistic_map(series[i-1],3.6+0.13*dforce[i])
# ...
# >>>
flow = (mdp.nodes.EtaComputerNode() +
        mdp.nodes.TimeFramesNode(10) +
        mdp.nodes.PolynomialExpansionNode(3) +
        mdp.nodes.SFANode(output_dim=1) +
        mdp.nodes.EtaComputerNode() )
# ...
# >>>
flow.train(series)
slow = flow(series)
resc_dforce = (dforce - mdp.numx.mean(dforce,0))/mdp.numx.std(dforce,0)
mdp.utils.cov2(resc_dforce[:-9],slow)
# 0.99992501533859179
print 'Eta value (time series): ', flow[0].get_eta(t=10000)
# Eta value (time series):  [ 3002.53380245]
print 'Eta value (slow feature): ', flow[-1].get_eta(t=9996)
# Eta value (slow feature):  [ 10.2185087]
mdp.numx_rand.seed(1266090063)
def uniform(min_, max_, dims):
    """Return a random number between min_ and max_ ."""
    return mdp.numx_rand.random(dims)*(max_-min_)+min_
# ...
def circumference_distr(center, radius, n):
    """Return n random points uniformly distributed on a circumference."""
    phi = uniform(0, 2*mdp.numx.pi, (n,1))
    x = radius*mdp.numx.cos(phi)+center[0]
    y = radius*mdp.numx.sin(phi)+center[1]
    return mdp.numx.concatenate((x,y), axis=1)
# ...
def circle_distr(center, radius, n):
    """Return n random points uniformly distributed on a circle."""
    phi = uniform(0, 2*mdp.numx.pi, (n,1))
    sqrt_r = mdp.numx.sqrt(uniform(0, radius*radius, (n,1)))
    x = sqrt_r*mdp.numx.cos(phi)+center[0]
    y = sqrt_r*mdp.numx.sin(phi)+center[1]
    return mdp.numx.concatenate((x,y), axis=1)
# ...
def rectangle_distr(center, w, h, n):
    """Return n random points uniformly distributed on a rectangle."""
    x = uniform(-w/2., w/2., (n,1))+center[0]
    y = uniform(-h/2., h/2., (n,1))+center[1]
    return mdp.numx.concatenate((x,y), axis=1)
# ...
N = 2000
cf1 = circumference_distr([6,-0.5], 2, N)
cf2 = circumference_distr([3,-2], 0.3, N)
cl1 = circle_distr([-5,3], 0.5, N/2)
cl2 = circle_distr([3.5,2.5], 0.7, N)
r1 = rectangle_distr([-1.5,0], 1, 4, N)
r2 = rectangle_distr([+1.5,0], 1, 4, N)
r3 = rectangle_distr([0,+1.5], 2, 1, N/2)
r4 = rectangle_distr([0,-1.5], 2, 1, N/2)
x = mdp.numx.concatenate([cf1, cf2, cl1, cl2, r1,r2,r3,r4], axis=0)
x = mdp.numx.take(x,mdp.numx_rand.permutation(x.shape[0]), axis=0)
gng = mdp.nodes.GrowingNeuralGasNode(max_nodes=75)
STEP = 500
for i in range(0,x.shape[0],STEP):
    gng.train(x[i:i+STEP])
    # [...] plotting instructions
# ...
gng.stop_training()
n_obj = len(gng.graph.connected_components())
print n_obj
# 5
def s_distr(npoints, hole=False):
    """Return a 3D S-shaped surface. If hole is True, the surface has
    a hole in the middle."""
    t = mdp.numx_rand.random(npoints)
    y = mdp.numx_rand.random(npoints)*5.
    theta = 3.*mdp.numx.pi*(t-0.5)
    x = mdp.numx.sin(theta)
    z = mdp.numx.sign(theta)*(mdp.numx.cos(theta) - 1.)
    if hole:
        indices = mdp.numx.where(((0.3>t) | (0.7<t)) | ((1.>y) | (4.<y)))
        return x[indices], y[indices], z[indices], t[indices]
    else:
        return x, y, z, t
n, k = 1000, 15
x, y, z, t = s_distr(n, hole=False)
data = mdp.numx.array([x,y,z]).T
lle_projected_data = mdp.nodes.LLENode(k, output_dim=2)(data)
x, y, z, t = s_distr(n, hole=True)
data = mdp.numx.array([x,y,z]).T
lle_projected_data = mdp.nodes.LLENode(k, output_dim=2)(data)
hlle_projected_data = mdp.nodes.HLLENode(k, output_dim=2)(data)
import bimdp
pca_node = bimdp.nodes.PCABiNode(node_id="pca")
biflow = bimdp.BiFlow([pca_node])
biflow["pca"]
# PCABiNode(input_dim=None, output_dim=None, dtype=None, node_id="pca")
samples = mdp.numx_rand.random((100,10))
labels = mdp.numx.arange(100)
flow = bimdp.BiFlow([mdp.nodes.PCANode(), bimdp.nodes.FDABiNode()])
flow.train([[samples],[samples]], [None,[{"cl": labels}]])
# git clone git://mdp-toolkit.git.sourceforge.net/gitroot/mdp-toolkit/mdp-toolkit
# git clone git://mdp-toolkit.git.sourceforge.net/gitroot/mdp-toolkit/docs
# git clone git://mdp-toolkit.git.sourceforge.net/gitroot/mdp-toolkit/examples
# git clone git://mdp-toolkit.git.sourceforge.net/gitroot/mdp-toolkit/contrib
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.