make_hdf.py :  » Database » PyTables » tables-2.1.2 » contrib » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Database » PyTables 
PyTables » tables 2.1.2 » contrib » make_hdf.py
#!/usr/bin/env python

from __future__ import generators

import tables,cPickle,copy,time
#################################################################################

def is_scalar(item):
    try:
        iter(item)
        #could be a string
        try:
            item[:0]+'' #check for string
            return 'str'
        except:
            return 0
    except:
        return 'notstr'
def is_dict(item):
    try:
        item.iteritems()
        return 1
    except:
        return 0

def make_col(row_type,row_name,row_item,str_len):
    '''for strings it will always make at least 80 char or twice mac char size'''
    set_len=80
    if str_len:
        if 2*str_len>set_len:
            set_len=2*str_len
        row_type[row_name]=tables.Col("CharType", set_len)
    else:
        type_matrix={
            int:tables.Col("Int32", 1),
            float:tables.Col("Float32", 4), #Col("Int16", 1)
            }
        row_type[row_name]=type_matrix[type(row_item)]    
def make_row(data):
    row_type={}
    scalar_type=is_scalar(data)
    if scalar_type:
        if scalar_type=='str':
            make_col(row_type,'scalar',data,len(data))
        else:
            make_col(row_type,'scalar',data,0)
    else: #it is a list-like
        the_type=is_scalar(data[0])
        if the_type=='str':
            #get max length
            the_max=0
            for i in data:
              if len(i)>the_max:
                 the_max=len(i)
            make_col(row_type,'col',data[0],the_max)
        elif the_type:
            make_col(row_type,'col',data[0],0)
        else: #list within the list, make many columns
            make_col(row_type,'col_depth',0,0)
            count=0
            for col in data:
                the_type=is_scalar(col[0])
                if the_type=='str':
                    #get max length
                    the_max=0
                    for i in data:
                      if len(i)>the_max:
                         the_max=len(i)
                    make_col(row_type,'col_'+str(count),col[0],the_max)
                elif the_type:
                    make_col(row_type,'col_'+str(count),col[0],0)
                else:
                    raise 'too many nested levels of lists'
                count+=1
    return row_type
def add_table(fileh,group_obj,data,table_name):
    #figure out if it is a list of lists or a single list
    #get types of columns
    row_type=make_row(data)
    table1=fileh.createTable(group_obj, table_name, row_type,'H',compress=1)
    row=table1.row

    if is_scalar(data):
        row['scalar']=data
        row.append()
    else:
        if is_scalar(data[0]):
            for i in data:
                row['col']=i
                row.append()
        else:
            count=0
            for col in data:
                row['col_depth']=len(col)
                for the_row in col:
                    if is_scalar(the_row):
                        row['col_'+str(count)]=the_row
                        row.append()
                    else:
                        raise 'too many levels of lists'
                count+=1
    table1.flush()

def add_cache(fileh,cache):
    group_name='pytables_cache_v0';table_name='cache0'
    root=fileh.root
    group_obj=fileh.createGroup(root,group_name)
    cache_str=cPickle.dumps(cache,0)
    cache_str=cache_str.replace('\n',chr(1))
    cache_pieces=[]
    while cache_str:
        cache_part=cache_str[:8000];cache_str=cache_str[8000:]
        if cache_part:
            cache_pieces.append(cache_part)
    row_type={}
    row_type['col_0']=tables.Col("CharType", 8000)
    #
    table_cache=fileh.createTable(group_obj, table_name, row_type,'H',compress =1)
    for piece in cache_pieces:
        print len(piece)
        table_cache.row['col_0']=piece
        table_cache.row.append()
    table_cache.flush()
def save2(hdf_file,data):
    fileh=tables.openFile(hdf_file,mode='w',title='logon history')
    root=fileh.root;cache_root=cache={}
    root_path=root._v_pathname;root=0
    stack = [ (root_path, data,cache) ]
    table_num=0
    count=0
    
    while stack:
        (group_obj_path,data,cache)=stack.pop()
        #data='wilma':{'mother':[22,23,24]}}
        #grp_name wilma
        for grp_name in data:
            #print 'fileh=',fileh
            count+=1
            cache[grp_name]={}
            new_group_obj=fileh.createGroup(group_obj_path,grp_name)
            #print 'path=',new_group_obj._v_pathname
            new_path=new_group_obj._v_pathname
            #if dict, you have a bunch of groups
            if is_dict(data[grp_name]):#{'mother':[22,23,24]}
                stack.append((new_path,data[grp_name],cache[grp_name]))
            #you have a table
            else:
                #data[grp_name]=[110,130,140],[1,2,3]
                add_table(fileh,new_path,data[grp_name],'tbl_'+str(table_num))
                table_num+=1

    #fileh=tables.openFile(hdf_file,mode='a',title='logon history')
    add_cache(fileh,cache_root)
    fileh.close()

    
########################
class Hdf_dict(dict):
    def __init__(self,hdf_file,hdf_dict={},stack=[]):
        self.hdf_file=hdf_file
        self.stack=stack
        if stack:
            self.hdf_dict=hdf_dict
        else:
            self.hdf_dict=self.get_cache()
        self.cur_dict=self.hdf_dict
    def get_cache(self):
        b1=time.time()
        fileh=tables.openFile(self.hdf_file,rootUEP='pytables_cache_v0')
        table=fileh.root.cache0
        total=[]
        print 'reading'
        begin=time.time()
        for i in table.iterrows():
            total.append(i['col_0'])
        total=''.join(total)
        total=total.replace(chr(1),'\n')
        print 'loaded cache len=',len(total),time.time()-begin
        begin=time.time()
        a=cPickle.loads(total)
        print 'cache',time.time()-begin
        return a
    def has_key(self, k):
        return k in self.cur_dict
    def keys(self):
        return self.cur_dict.keys()
    def get(self,key,default=None):
        try:
            return self.__getitem__(key)
        except:
            return default
    def items(self):
            return list(self.iteritems())
    def values(self):
            return list(self.itervalues())

    ###########################################
    def __len__(self):
        return len(self.cur_dict.keys())
    def __getitem__(self, k):
        if k in self.cur_dict:
            #now check if k has any data
            if self.cur_dict[k]:
                new_stack=self.stack[:]
                new_stack.append(k)
                return Hdf_dict(self.hdf_file,hdf_dict=self.cur_dict[k],stack=new_stack)
            else:
                new_stack=self.stack[:]
                new_stack.append(k)
                fileh=tables.openFile(self.hdf_file,rootUEP='/'.join(new_stack))
                #cur_data=getattr(self.cur_group,k) #/wilma (Group) '' =getattr(/ (Group) 'logon history',wilma)
                for table in fileh.root:
                    #return [ i['col_1'] for i in table.iterrows() ] #[9110,91]
                    #perhaps they stored a single item
                    try:
                        for item in table['scalar']:
                            return item
                    except:
                        #otherwise they stored a list of data
                        try:
                            return [ item for item in table['col']]
                        except:
                            cur_column=[]
                            total_columns=[]
                            colnames=table.colnames
                            col_num=0
                            cur_row=0
                            num_rows=0
                            for row in table:
                                if not num_rows:
                                    num_rows=row['col_depth']
                                if cur_row==num_rows:
                                    cur_row=num_rows=0
                                    col_num+=1
                                    total_columns.append(cur_column)
                                    cur_column=[]
                                cur_column.append( row['col_'+str(col_num)])
                                cur_row+=1
                            total_columns.append(cur_column)
                            return total_columns
        else:
            raise KeyError, k
    def iterkeys(self):
        for key in self.keys():
            yield key
    def __iter__(self):
        return self.iterkeys()
    def itervalues(self):
        for k in self.iterkeys():
            v=self.__getitem__(k)
            yield v
    def iteritems(self):
           # yield children
            for k in self.iterkeys():
                v=self.__getitem__(k)
                yield (k,v)
    def __repr__(self):
        return '{Hdf dict}'
    def __str__(self):
        return self.__repr__()
    #####
    def setdefault(self,key,default=None):
        try:
            return self.__getitem__(key)
        except:
            self.__setitem__(key)
            return default
    def update(self,d):
        for k,v in d.items():
            self.__setitem__(k, v)
    def popitem(self):
        try:
            k, v = self.iteritems().next()
            del self[k]
            return k, v
        except StopIteration:
            raise KeyError, "Hdf Dict is empty"       
    def __setitem__(self, key, value):
        raise NotImplementedError
    def __delitem__(self, key):
        raise NotImplementedError                
    def __hash__(self):
        raise TypeError, "Hdf dict bjects are unhashable"

if __name__=='__main__':

    def write_small(file=''):
        data1={
        'fred':['a','b','c'],
        'barney':[[9110,9130,9140],[91,92,93]],
        'wilma':{'mother':{'pebbles':[22,23,24],'bambam':[67,68,69]}}
        }

        print 'saving'
        save2(file,data1)
        print 'saved'
    def read_small(file=''):
        #a=make_hdf.Hdf_dict(file)
        a=Hdf_dict(file)
        print a['wilma']
        b=a['wilma']
        for i in b:
            print i

        print a.keys()
        print 'has fred',a.has_key('fred')
        print 'length a',len(a)
        print 'get',a.get('fred'),a.get('not here')
        print 'wilma keys',a['wilma'].keys()
        print 'barney',a['barney']
        print 'get items'
        print a.items()
        for i in a.iteritems():
            print 'item',i
        for i in a.itervalues():
            print i
    a=raw_input('enter y to write out test file to test.hdf')
    if a.strip()=='y':
        print 'writing'
        write_small('test.hdf')
        print 'reading'
        read_small('test.hdf')
    
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.