searchsorted bench.py : » Database » PyTables » tables-2.1.2 » bench » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Database » PyTables
PyTables » tables 2.1.2 » bench » searchsorted-bench.py
#!/usr/bin/env python

import copy

import time
import numarray as NA
from tables import *
import random

class Small(IsDescription):
    var1 = StringCol(itemsize=4)
    var2 = Int32Col()
    var3 = Float64Col()
    var4 = BoolCol()

# Define a user record to characterize some kind of particles
class Medium(IsDescription):
    var1        = StringCol(itemsize=16)  # 16-character String
    #float1      = Float64Col(dflt=2.3)
    #float2      = Float64Col(dflt=2.3)
    #zADCcount    = Int16Col()    # signed short integer
    var2        = Int32Col()    # signed short integer
    var3        = Float64Col()
    grid_i      = Int32Col()    # integer
    grid_j      = Int32Col()    # integer
    pressure    = Float32Col()    # float  (single-precision)
    energy      = Float64Col(shape=2)    # double (double-precision)

def createFile(filename, nrows, filters, atom, recsize, index, verbose):

    # Open a file in "w"rite mode
    fileh = openFile(filename, mode = "w", title="Searchsorted Benchmark",
                     filters=filters)
    title = "This is the IndexArray title"
    # Create an IndexArray instance
    rowswritten = 0
    # Create an entry
    klass = {"small":Small, "medium":Medium}
    table = fileh.createTable(fileh.root, 'table', klass[recsize], title,
                              None, nrows)
    for i in xrange(nrows):
        #table.row['var1'] = str(i)
        #table.row['var2'] = random.randrange(nrows)
        table.row['var2'] = i
        table.row['var3'] = i
        #table.row['var4'] = i % 2
        #table.row['var4'] = i > 2
        table.row.append()
    rowswritten += nrows
    table.flush()
    rowsize = table.rowsize
    indexrows = 0

    # Index one entry:
    if index:
        if atom == "string":
            indexrows = table.cols.var1.createIndex()
        elif atom == "bool":
            indexrows = table.cols.var4.createIndex()
        elif atom == "int":
            indexrows = table.cols.var2.createIndex()
        elif atom == "float":
            indexrows = table.cols.var3.createIndex()
        else:
            raise ValueError, "Index type not supported yet"
        if verbose:
            print "Number of indexed rows:", indexrows
    # Close the file (eventually destroy the extended type)
    fileh.close()

    return (rowswritten, rowsize)

def readFile(filename, atom, niter, verbose):
    # Open the HDF5 file in read-only mode

    fileh = openFile(filename, mode = "r")
    table = fileh.root.table
    print "reading", table
    if atom == "string":
        idxcol = table.cols.var1.index
    elif atom == "bool":
        idxcol = table.cols.var4.index
    elif atom == "int":
        idxcol = table.cols.var2.index
    else:
        idxcol = table.cols.var3.index
    if verbose:
        print "Max rows in buf:", table.nrowsinbuf
        print "Rows in", table._v_pathname, ":", table.nrows
        print "Buffersize:", table.rowsize * table.nrowsinbuf
        print "MaxTuples:", table.nrowsinbuf
        print "Chunk size:", idxcol.sorted.chunksize
        print "Number of elements per slice:", idxcol.nelemslice
        print "Slice number in", table._v_pathname, ":", idxcol.nrows

    rowselected = 0
    if atom == "string":
        for i in xrange(niter):
            #results = [table.row["var3"] for i in table.where(2+i<=table.cols.var2 < 10+i)]
#             results = [table.row.nrow() for i in table.where(2<=table.cols.var2 < 10)]
            results = [p["var1"] #p.nrow()
                       for p in table.where(table.cols.var1 == "1111")]
#                      for p in table.where("1000"<=table.cols.var1<="1010")]
            rowselected += len(results)
    elif atom == "bool":
        for i in xrange(niter):
            results = [p["var2"] #p.nrow()
                       for p in table.where(table.cols.var4==0)]
            rowselected += len(results)
    elif atom == "int":
        for i in xrange(niter):
            #results = [table.row["var3"] for i in table.where(2+i<=table.cols.var2 < 10+i)]
#             results = [table.row.nrow() for i in table.where(2<=table.cols.var2 < 10)]
            results = [p["var2"] #p.nrow()
#                        for p in table.where(110*i<=table.cols.var2<110*(i+1))]
#                       for p in table.where(1000-30<table.cols.var2<1000+60)]
                       for p in table.where(table.cols.var2<=400)]
            rowselected += len(results)
    elif atom == "float":
        for i in xrange(niter):
#         results = [(table.row.nrow(), table.row["var3"])
#                    for i in table.where(3<=table.cols.var3 < 5.)]
#             results = [(p.nrow(), p["var3"])
#                        for p in table.where(1000.-i<=table.cols.var3<1000.+i)]
            results = [p["var3"] # (p.nrow(), p["var3"])
                       for p in table.where(100*i<=table.cols.var3<100*(i+1))]
#                        for p in table
#                        if 100*i<=p["var3"]<100*(i+1)]
#             results = [ (p.nrow(), p["var3"]) for p in table
#                         if (1000.-i <= p["var3"] < 1000.+i) ]
            rowselected += len(results)
        else:
            raise ValueError, "Unsuported atom value"
    if verbose and 1:
        print "Values that fullfill the conditions:"
        print results

    rowsread = table.nrows * niter
    rowsize = table.rowsize

    # Close the file (eventually destroy the extended type)
    fileh.close()

    return (rowsread, rowselected, rowsize)


def searchFile(filename, atom, verbose, item):
    # Open the HDF5 file in read-only mode

    fileh = openFile(filename, mode = "r")
    rowsread = 0
    uncomprBytes = 0
    table = fileh.root.table
    rowsize = table.rowsize
    if atom == "int":
        idxcol = table.cols.var2.index
    elif atom == "float":
        idxcol = table.cols.var3.index
    else:
        raise ValueError, "Unsuported atom value"
    print "Searching", table, "..."
    if verbose:
        print "Chunk size:", idxcol.sorted.chunksize
        print "Number of elements per slice:", idxcol.sorted.nelemslice
        print "Slice number in", table._v_pathname, ":", idxcol.sorted.nrows

    (positions, niter) = idxcol.search(item)
    if verbose:
        print "Positions for item",item,"==>",positions
        print "Total iterations in search:", niter

    rowsread += table.nrows
    uncomprBytes += idxcol.sorted.chunksize * niter * idxcol.sorted.itemsize

    results = table.read(coords=positions)
    print "results length:", len(results)
    if verbose:
        print "Values that fullfill the conditions:"
        print results

    # Close the file (eventually destroy the extended type)
    fileh.close()

    return (rowsread, uncomprBytes, niter)


if __name__=="__main__":
    import sys
    import getopt
    try:
        import psyco
        psyco_imported = 1
    except:
        psyco_imported = 0

    import time

    usage = """usage: %s [-v] [-p] [-R range] [-r] [-w] [-s recsize ] [-a
    atom] [-c level] [-l complib] [-S] [-F] [-i item] [-n nrows] [-x]
    [-k niter] file
            -v verbose
            -p use "psyco" if available
            -R select a range in a field in the form "start,stop,step"
            -r only read test
            -w only write test
            -s record size
            -a use [float], [int], [bool] or [string] atom
            -c sets a compression level (do not set it or 0 for no compression)
            -S activate shuffling filter
            -F activate fletcher32 filter
            -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2")
            -i item to search
            -n set the number of rows in tables
            -x don't make indexes
            -k number of iterations for reading\n""" % sys.argv[0]

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'vpSFR:rwxk:s:a:c:l:i:n:')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) <> 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    verbose = 0
    rng = None
    item = None
    atom = "int"
    fieldName = None
    testread = 1
    testwrite = 1
    usepsyco = 0
    complevel = 0
    shuffle = 0
    fletcher32 = 0
    complib = "zlib"
    nrows = 100
    recsize = "small"
    index = 1
    niter = 1

    # Get the options
    for option in opts:
        if option[0] == '-v':
            verbose = 1
        if option[0] == '-p':
            usepsyco = 1
        if option[0] == '-S':
            shuffle = 1
        if option[0] == '-F':
            fletcher32 = 1
        elif option[0] == '-R':
            rng = [int(i) for i in option[1].split(",")]
        elif option[0] == '-r':
            testwrite = 0
        elif option[0] == '-w':
            testread = 0
        elif option[0] == '-x':
            index = 0
        elif option[0] == '-s':
            recsize = option[1]
        elif option[0] == '-a':
            atom = option[1]
            if atom not in ["float", "int", "bool", "string"]:
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '-c':
            complevel = int(option[1])
        elif option[0] == '-l':
            complib = option[1]
        elif option[0] == '-i':
            item = eval(option[1])
        elif option[0] == '-n':
            nrows = int(option[1])
        elif option[0] == '-k':
            niter = int(option[1])

    # Build the Filters instance
    filters = Filters(complevel=complevel, complib=complib,
                      shuffle=shuffle, fletcher32=fletcher32)

    # Catch the hdf5 file passed as the last argument
    file = pargs[0]

    if testwrite:
        print "Compression level:", complevel
        if complevel > 0:
            print "Compression library:", complib
            if shuffle:
                print "Suffling..."
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(createFile)
        (rowsw, rowsz) = createFile(file, nrows, filters,
                                    atom, recsize, index, verbose)
        t2 = time.time()
        cpu2 = time.clock()
        tapprows = round(t2-t1, 3)
        cpuapprows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpuapprows/tapprows, 2)*100)
        print "Rows written:", rowsw, " Row size:", rowsz
        print "Time writing rows: %s s (real) %s s (cpu)  %s%%" % \
              (tapprows, cpuapprows, tpercent)
        print "Write rows/sec: ", int(rowsw / float(tapprows))
        print "Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))

    if testread:
        if psyco_imported and usepsyco:
            psyco.bind(readFile)
            psyco.bind(searchFile)
        t1 = time.time()
        cpu1 = time.clock()
        if rng or item:
            (rowsr, uncomprB, niter) = searchFile(file, atom, verbose, item)
        else:
            for i in range(1):
                (rowsr, rowsel, rowsz) = readFile(file, atom, niter, verbose)
        t2 = time.time()
        cpu2 = time.clock()
        treadrows = round(t2-t1, 3)
        cpureadrows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpureadrows/treadrows, 2)*100)
        tMrows = rowsr/(1000*1000.)
        sKrows = rowsel/1000.
        print "Rows read:", rowsr, "Mread:", round(tMrows, 3), "Mrows"
        print "Rows selected:", rowsel, "Ksel:", round(sKrows,3), "Krows"
        print "Time reading rows: %s s (real) %s s (cpu)  %s%%" % \
              (treadrows, cpureadrows, tpercent)
        print "Read Mrows/sec: ", round(tMrows / float(treadrows), 3)
        #print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))
#       print "Uncompr MB :", int(uncomprB / (1024 * 1024))
#       print "Uncompr MB/s :", int(uncomprB / (treadrows * 1024 * 1024))
#       print "Total chunks uncompr :", int(niter)
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.