ptrepack.py :  » Database » PyTables » tables-2.1.2 » tables » scripts » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Database » PyTables 
PyTables » tables 2.1.2 » tables » scripts » ptrepack.py
########################################################################
#
#       License: BSD
#       Created: February 10, 2004
#       Author:  Francesc Alted - faltet@pytables.com
#
#       $Id: ptrepack.py 3776 2008-10-02 17:50:07Z faltet $
#
########################################################################

"""This utility lets you repack your data files in a flexible way.

Pass the flag -h to this for help on usage.

"""

import sys
import os.path
import time
import getopt
import warnings

from tables.file import openFile
from tables.group import Group
from tables.leaf import Filters
from tables.exceptions import \
     OldIndexWarning, NoIndexingWarning, NoSuchNodeError, FlavorWarning

# Global variables
verbose = False
regoldindexes = True


def newdstGroup(dstfileh, dstgroup, title, filters):
    group = dstfileh.root
    # Now, create the new group. This works even if dstgroup == '/'
    for nodeName in dstgroup.split('/'):
        if nodeName == '':
            continue
        # First try if possible intermediate groups does already exist.
        try:
            group2 = dstfileh.getNode(group, nodeName)
        except NoSuchNodeError:
            # The group does not exist. Create it.
            group2 = dstfileh.createGroup(group, nodeName,
                                          title=title,
                                          filters=filters)
        group = group2
    return group


def recreateIndexes(table, dstfileh, dsttable):
    listoldindexes = table._listoldindexes
    if listoldindexes != []:
        if not regoldindexes:
            if verbose:
                print "[I]Not regenerating indexes for table: '%s:%s'" % \
                      (dstfileh.filename, dsttable._v_pathname)
            return
        # Now, recreate the indexed columns
        if verbose:
            print "[I]Regenerating indexes for table: '%s:%s'" % \
                  (dstfileh.filename, dsttable._v_pathname)
        for colname in listoldindexes:
            if verbose:
                print "[I]Indexing column: '%s'. Please wait..." % colname
            colobj = dsttable.cols._f_col(colname)
            # We don't specify the filters for the indexes
            colobj.createIndex(filters = None)


def copyLeaf(srcfile, dstfile, srcnode, dstnode, title,
             filters, copyuserattrs, overwritefile, overwrtnodes, stats,
             start, stop, step, chunkshape, sortby, forceCSI,
             propindexes, upgradeflavors):
    # Open the source file
    if forceCSI:
        srcfileh = openFile(srcfile, 'a')
    else:
        srcfileh = openFile(srcfile, 'r')
    # Get the source node (that should exist)
    srcNode = srcfileh.getNode(srcnode)

    # Get the destination node and its parent
    last_slash = dstnode.rindex('/')
    if last_slash == len(dstnode)-1:
        # print "Detected a trailing slash in destination node. Interpreting it as a destination group."
        dstgroup = dstnode[:-1]
    elif last_slash > 0:
        dstgroup = dstnode[:last_slash]
    else:
        dstgroup = "/"
    dstleaf = dstnode[last_slash+1:]
    if dstleaf == "":
        dstleaf = srcNode.name
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = openFile(dstfile, 'a')
        try:
            dstGroup = dstfileh.getNode(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstGroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstGroup._v_parent
                    last_slash = dstGroup._v_pathname.rindex('/')
                    dstgroupname = dstGroup._v_pathname[last_slash+1:]
                    dstGroup.remove()
                    dstGroup = dstfileh.createGroup(parent, dstgroupname,
                                                    title=title,
                                                    filters=filters)
                else:
                    raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = openFile(dstfile, 'w', title=title, filters=filters)
        dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)

    # Finally, copy srcNode to dstNode
    try:
        dstNode = srcNode.copy(
            dstGroup, dstleaf, filters = filters,
            copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes)
    except:
        (type, value, traceback) = sys.exc_info()
        print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
              (srcfile, srcnode, dstfile, dstnode)
        print "The error was --> %s: %s" % (type, value)
        print "The destination file looks like:\n", dstfileh
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."

    # Upgrade flavors in dstNode, if required
    if upgradeflavors and srcfileh.format_version.startswith("1"):
        # Remove original flavor in case the source file has 1.x format
        dstNode.delAttr('FLAVOR')

    # Recreate possible old indexes in destination node
    if srcNode._c_classId == "TABLE":
        recreateIndexes(srcNode, dstfileh, dstNode)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()


def copyChildren(srcfile, dstfile, srcgroup, dstgroup, title,
                 recursive, filters, copyuserattrs, overwritefile,
                 overwrtnodes, stats, start, stop, step,
                 chunkshape, sortby, forceCSI, propindexes,
                 upgradeflavors):
    "Copy the children from source group to destination group"
    # Open the source file with srcgroup as rootUEP
    if forceCSI:
        srcfileh = openFile(srcfile, 'a', rootUEP=srcgroup)
    else:
        srcfileh = openFile(srcfile, 'r', rootUEP=srcgroup)
    #  Assign the root to srcGroup
    srcGroup = srcfileh.root

    created_dstGroup = False
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = openFile(dstfile, 'a')
        try:
            dstGroup = dstfileh.getNode(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
            created_dstGroup = True
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstGroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstGroup._v_parent
                    last_slash = dstGroup._v_pathname.rindex('/')
                    dstgroupname = dstGroup._v_pathname[last_slash+1:]
                    dstGroup.remove()
                    dstGroup = dstfileh.createGroup(parent, dstgroupname,
                                                    title=title,
                                                    filters=filters)
                else:
                    raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = openFile(dstfile, 'w', title=title, filters=filters)
        dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)
        created_dstGroup = True

    # Copy the attributes to dstGroup, if needed
    if created_dstGroup and copyuserattrs:
        srcGroup._v_attrs._f_copy(dstGroup)

    # Finally, copy srcGroup children to dstGroup
    try:
        srcGroup._f_copyChildren(
            dstGroup, recursive = recursive, filters = filters,
            copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes)
    except:
        (type, value, traceback) = sys.exc_info()
        print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
              (srcfile, srcgroup, dstfile, dstgroup)
        print "The error was --> %s: %s" % (type, value)
        print "The destination file looks like:\n", dstfileh
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired. In particular, pay attention that rootUEP is not fooling you."

    # Upgrade flavors in dstNode, if required
    if upgradeflavors and srcfileh.format_version.startswith("1"):
        for dstNode in dstGroup._f_walkNodes("Leaf"):
            # Remove original flavor in case the source file has 1.x format
            dstNode.delAttr('FLAVOR')

    # Convert the remaining tables with old indexes (if any)
    for table in srcGroup._f_walkNodes("Table"):
        dsttable = dstfileh.getNode(dstGroup, table._v_pathname)
        recreateIndexes(table, dstfileh, dsttable)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()


def main():
    global verbose
    global regoldindexes

    usage = """usage: %s [-h] [-v] [-o] [-R start,stop,step] [--non-recursive] [--dest-title=title] [--dont-copyuser-attrs] [--overwrite-nodes] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--keep-source-filters] [--chunkshape=value] [--upgrade-flavors] [--dont-regenerate-old-indexes] [--sortby=column] [--forceCSI] [--propindexes] sourcefile:sourcegroup destfile:destgroup
     -h -- Print usage message.
     -v -- Show more information.
     -o -- Overwite destination file.
     -R RANGE -- Select a RANGE of rows (in the form "start,stop,step")
         during the copy of *all* the leaves.  Default values are
         "None,None,1", which means a copy of all the rows.
     --non-recursive -- Do not do a recursive copy. Default is to do it.
     --dest-title=title -- Title for the new file (if not specified,
         the source is copied).
     --dont-copy-userattrs -- Do not copy the user attrs (default is to do it)
     --overwrite-nodes -- Overwrite destination nodes if they exist. Default is
         to not overwrite them.
     --complevel=(0-9) -- Set a compression level (0 for no compression, which
         is the default).
     --complib=lib -- Set the compression library to be used during the copy.
         lib can be set to "zlib", "lzo" or "bzip2". Defaults to "zlib".
     --shuffle=(0|1) -- Activate or not the shuffling filter (default is active
         if complevel>0).
     --fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter
        (not active by default).
     --keep-source-filters -- Use the original filters in source files. The
         default is not doing that if any of --complevel, --complib, --shuffle
         or --fletcher32 option is specified.
     --chunkshape=("keep"|"auto"|int|tuple) -- Set a chunkshape.  A value
         of "auto" computes a sensible value for the chunkshape of the
         leaves copied.  The default is to "keep" the original value.
     --upgrade-flavors -- When repacking PyTables 1.x files, the flavor of
         leaves will be unset. With this, such a leaves will be serialized
         as objects with the internal flavor ('numpy' for 2.x series).
     --dont-regenerate-old-indexes -- Disable regenerating old indexes. The
         default is to regenerate old indexes as they are found.
     --sortby=column -- Do a table copy sorted by the values of "column".
         This requires an existing index in "column".  For reversing the order,
         use a negative value in the "step" part of "RANGE" (see "-R" flag).
         Only applies to table objects.
     --forceCSI -- Force the creation of a CSI index in case one is not
         available for the --sortby column (this implies the modification of
         the *source* file).  The default is to not create it.
     --propindexes -- Propagate the indexes existing in original tables.  The
         default is to not propagate them.  Only applies to table objects.
    \n""" % os.path.basename(sys.argv[0])


    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'hvoR:',
                                    ['non-recursive',
                                     'dest-title=',
                                     'dont-copy-userattrs',
                                     'overwrite-nodes',
                                     'complevel=',
                                     'complib=',
                                     'shuffle=',
                                     'fletcher32=',
                                     'keep-source-filters',
                                     'chunkshape=',
                                     'upgrade-flavors',
                                     'dont-regenerate-old-indexes',
                                     'sortby=',
                                     'forceCSI',
                                     'propindexes',
                                     ])
    except:
        (type, value, traceback) = sys.exc_info()
        print "Error parsing the options. The error was:", value
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    overwritefile = False
    keepfilters = False
    chunkshape = "keep"
    complevel = None
    complib = None
    shuffle = None
    fletcher32 = None
    title = ""
    copyuserattrs = True
    rng = None
    recursive = True
    overwrtnodes = False
    upgradeflavors = False
    sortby = None
    forceCSI = False
    propindexes = False

    # Get the options
    for option in opts:
        if option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            verbose = True
        elif option[0] == '-o':
            overwritefile = True
        elif option[0] == '-R':
            try:
                rng = eval("slice("+option[1]+")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '--dest-title':
            title = option[1]
        elif option[0] == '--dont-copy-userattrs':
            copyuserattrs = False
        elif option[0] == '--non-recursive':
            recursive = False
        elif option[0] == '--overwrite-nodes':
            overwrtnodes = True
        elif option[0] == '--keep-source-filters':
            keepfilters = True
        elif option[0] == '--chunkshape':
             chunkshape = option[1]
             if chunkshape.isdigit() or chunkshape.startswith('('):
                 chunkshape = eval(chunkshape)
        elif option[0] == '--upgrade-flavors':
            upgradeflavors = True
        elif option[0] == '--dont-regenerate-old-indexes':
            regoldindexes = False
        elif option[0] == '--complevel':
            complevel = int(option[1])
        elif option[0] == '--complib':
            complib = option[1]
        elif option[0] == '--shuffle':
            shuffle = int(option[1])
        elif option[0] == '--fletcher32':
            fletcher32 = int(option[1])
        elif option[0] == '--sortby':
            sortby = option[1]
        elif option[0] == '--propindexes':
            propindexes = True
        elif option[0] == '--forceCSI':
            forceCSI = True
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # if we pass a number of files different from 2, abort
    if len(pargs) <> 2:
        print "You need to pass both source and destination!."
        sys.stderr.write(usage)
        sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    dst = pargs[1].split(':')
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)
    # Let the user be warned in case he is using ptrepack when copying
    # files with indexes
    #warnings.filterwarnings("ignore", category=NoIndexingWarning)
    # Ignore the flavors warnings during upgrading flavor operations
    if upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    if ((complevel, complib, shuffle, fletcher32) == (None,)*4 or keepfilters):
        filters = None
    else:
        if complevel is None: complevel = 0
        if shuffle is None:
            if complevel > 0:
                shuffle = True
            else:
                shuffle = False
        if complib is None: complib = "zlib"
        if fletcher32 is None: fletcher32 = False
        filters = Filters(complevel=complevel, complib=complib,
                          shuffle=shuffle, fletcher32=fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if rng:
        start, stop, step = rng.start, rng.stop, rng.step

    # Some timing
    t1 = time.time()
    cpu1 = time.clock()
    # Copy the file
    if verbose:
        print "+=+"*20
        print "Recursive copy:", recursive
        print "Applying filters:", filters
        if sortby is not None:
            print "Sorting table(s) by column:", sortby
            print "Forcing a CSI creation:", forceCSI
        if propindexes:
            print "Recreating indexes in copied table(s)"
        print "Start copying %s:%s to %s:%s" % (srcfile, srcnode,
                                                dstfile, dstnode)
        print "+=+"*20

    # Check whether the specified source node is a group or a leaf
    h5srcfile = openFile(srcfile, 'r')
    srcnodeobject = h5srcfile.getNode(srcnode)
    objectclass = srcnodeobject.__class__.__name__
    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'bytes': 0}
    if isinstance(srcnodeobject, Group):
        copyChildren(
            srcfile, dstfile, srcnode, dstnode,
            title = title, recursive = recursive, filters = filters,
            copyuserattrs = copyuserattrs, overwritefile = overwritefile,
            overwrtnodes = overwrtnodes, stats = stats,
            start = start, stop = stop, step = step, chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes,
            upgradeflavors=upgradeflavors)
    else:
        # If not a Group, it should be a Leaf
        copyLeaf(
            srcfile, dstfile, srcnode, dstnode,
            title = title, filters = filters, copyuserattrs = copyuserattrs,
            overwritefile = overwritefile, overwrtnodes = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes,
            upgradeflavors=upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = time.clock()
    tcopy = round(t2-t1, 3)
    cpucopy = round(cpu2-cpu1, 3)
    tpercent = int(round(cpucopy/tcopy, 2)*100)

    if verbose:
        ngroups = stats['groups']
        nleafs = stats['leaves']
        nbytescopied = stats['bytes']

        print "Groups copied:", ngroups, " Leaves copied:", nleafs
        if copyuserattrs:
            print "User attrs copied"
        else:
            print "User attrs not copied"
        print "KBytes copied:", round(nbytescopied/1024.,3)
        print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
              (tcopy, cpucopy, tpercent)
        print "Copied nodes/sec: ", round((ngroups+nleafs) / float(tcopy),1)
        print "Copied KB/s :", int(nbytescopied / (tcopy * 1024))
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.