import sys
import subprocess
import numpy
import cPickle
import tables
from time import time
# Size of the NxM array
N = 10; M = 125000
# Name of the dump file
filename = "/tmp/dumpfile.data"
# Function to get the size of a file
def get_filesize():
sout = subprocess.Popen("sync;du -h %s" % filename, shell=True,
stdout=subprocess.PIPE).stdout
line = [l for l in sout][0]
return line.split()[0]
# Print out some statistics
def print_stats(explain, tref, tpick):
ttime = time()-tref
print "%s %ss. " %(explain, round(ttime, 3)),
print "Speed-up over cPickle: %sx" % round(tpick/ttime, 2)
print "File size:", get_filesize()
# Print some preliminary information
print 'Python version: %s' % sys.version
print "NumPy version: %s" % numpy.__version__
print "PyTables version: %s" % tables.__version__
print "Checking with a %sx%s matrix of float64 elements (%s MB)" % \
(N, M, round(N*M*8/(1024.*1024),3))
# Start the actual benchmarks
print "***** cPickle (protocol 2) *****"
na = numpy.random.rand(N, M)
tref = time()
f = file(filename, 'w')
cPickle.dump(na, f, 2)
tpickw = time()-tref
f.close()
print "Time for writing: %ss" % round(tpickw, 3)
print "File size:", get_filesize()
tref = time()
f = file(filename, 'r')
nar = cPickle.load(f)
tpickr = time()-tref
print "Time for reading: %ss" % round(tpickr, 3)
f.close()
print "***** PyTables EArray (dump row to row) *****"
na = numpy.random.rand(1, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M))
for i in xrange(N):
a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)
tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)
print "***** PyTables EArray (dump row to row, compressed with zlib) ******"
na = numpy.random.rand(1, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M),
filters=tables.Filters(complevel=3, complib='zlib'))
for i in xrange(N):
a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)
tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)
print "***** PyTables EArray (dump row to row, compressed with lzo) *****"
na = numpy.random.rand(1, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M),
filters=tables.Filters(complevel=3, complib='lzo'))
for i in xrange(N):
a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)
tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)
print "***** PyTables EArray (complete dump) *****"
na = numpy.random.rand(N, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M))
a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)
tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)
print "***** PyTables Array *****"
na = numpy.random.rand(N, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createArray(f.root, 'array', na)
f.close()
print_stats("Time for writing:", tref, tpickw)
tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)
|