relalg.py :  » Database » Gadfly » gadflyZip » relalg » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Database » Gadfly 
Gadfly » gadflyZip » relalg » relalg.py

"""Simple relational algebra interpreter.

usage:

   To make the grammar

       python relalg.py make

   To run some relatoinal algebra expressions

       python relalg.py < expressions_file

"""

# EDIT INSTALLDIR TO BE ABLE TO LOAD UNDER ANY CWD
INSTALLDIR = "."

## simple relational algebra using only the equality predicate
## note: string values cannot contain ;

## statement sequencing using ; handled at higher level

relalg_rules = """

statement ::

@R statementassn :: statement >> assignment

@R statementexpr :: statement >> rexpr

@R assignment1 :: assignment >> name = rexpr

@R assignmentn :: assignment >> name = assignment

@R union :: rexpr >> rexpr U rterm

@R rterm :: rexpr >> rterm

@R minus :: rexpr >> rexpr - rterm

@R intersect :: rterm >> rterm intersect rfactor

@R join :: rterm >> rterm join rfactor

@R rfactor :: rterm >> rfactor

@R projection :: rfactor >> projection [ names ] rfactor

@R names0 :: names >>

@R namesn :: names >> names1

@R names11 :: names1 >> name

@R names1n :: names1 >> names1 name

@R selection :: rfactor >> selection ( condition ) rfactor

@R conditionor :: condition >> condition | boolfactor

@R condfactor :: condition >> boolfactor

@R factorand :: boolfactor >> boolfactor & boolprimary

@R factorprime :: boolfactor >> boolprimary

@R notprimary :: boolprimary >> ~ boolprimary

@R primarycondition :: boolprimary >> ( condition )

@R primaryeq :: boolprimary >> expression = expression

@R expname :: expression >> name

@R expvalue :: expression >> value

@R rename :: rfactor >> rename [ names ] to [ names ] rfactor

@R named :: rfactor >> name

@R factorexpr :: rfactor >> ( rexpr )

@R relationval :: rfactor >> [ names ] ( rows )

@R rows0 :: rows >>

@R rowsn :: rows >> somerows

@R somerows1 :: somerows >> row

@R somerowsn :: somerows >> somerows , row

@R emptyrow :: row >> NIL

@R row1 :: row >> value

@R rown :: row >> row value

@R valuenum :: value >> number

@R valuestr :: value >> string
"""

keywords = """
  selection intersect rename projection to NIL U join
"""

puncts = """=^~|,-[]()&"""

nonterms = """
statement assignment rexpr rterm value rfactor
names names1 condition boolfactor boolprimary
expression rows somerows row
"""

try:
    from kjbuckets import *
except ImportError:
    from gadfly.kjbuckets0 import *


class relation:

   def __init__(self, names, rows):
       #print "relation init", names, rows
       names = self.names = tuple(names)
       nameset = self.nameset = kjSet(names)
       for r in rows:
           if nameset != kjSet(r.keys()):
               raise ValueError, \
                "bad names: "+`(names, r.items())`
       self.rows = kjSet(rows)

   def __repr__(self):
       from string import join
       names = self.names
       rows = self.rows.items()
       if not rows:
          nns = join(names)
          replist = [nns, "="*len(nns), " --<empty>--"]
          return join(replist, "\n")
       #print names, rows
       nnames = len(names)
       if nnames==1:
           replist = [names[0]]
       else:
           replist = [names]
       for r in rows:
           elt = r.dump(names)
           replist.append(r.dump(names))
       #print replist
       if nnames==1:
           replist = maxrep(replist)
       else:
           transpose = apply(map, tuple([None] + replist))
           adjusted = map(maxrep, transpose)
           replist = apply(map, tuple([None] + adjusted))
           replist = map(join, replist)
       replist.insert(1, "=" * len(replist[0]))
       #print replist
       return join(replist, "\n")
       
def maxrep(list):
    list = map(str, list)
    maxlen = max( map(len, list) )
    for i in range(len(list)):
        item = list[i]
        litem = len(item)
        list[i] = item + (" " * (maxlen-litem))
    return list

# context is a simple dictionary of named relations

def elt0(l, c):
    return l[0]

statementassn = elt0

def statementexpr(l, c):
    from string import split,join
    print
    print "  --- expression result ---"
    print
    data = str(l[0])
    print "   "+ join(split(data, "\n"), "\n   ")

def assignment1(l, c):
    [name, eq, val] = l
    c[name] = val
    return val

assignmentn = assignment1

def check_compat(v1, v2):
    names1, names2 = v1.names, v2.names
    if names1 != names2:
        raise ValueError, \
        "operands not union compatible "+`(names1, names2)`
    return names1, v1.rows, v2.rows

def union(l, c):
    [v1, U, v2] = l
    names1, r1, r2 = check_compat(v1, v2)
    return relation(names1, (r1+r2).items())

rterm = elt0

def minus(l, c):
    [v1, m, v2] = l
    names1, r1, r2 = check_compat(v1, v2)
    return relation(names1, (r1-r2).items())

def intersect(l, c):
    [v1, i, v2] = l
    names1, r1, r2 = check_compat(v1, v2)
    return relation(names1, (r1&r2).items())

def join(l, c):
    [v1, j, v2] = l
    n1, n2 = v1.names, v2.names
    r1, r2 = v1.rows.items(), v2.rows.items()
    n1s, n2s = kjSet(n1), kjSet(n2)
    common = tuple((n1s&n2s).items())
    result = kjSet()
    if common:
        # simple hashjoin
        G = kjGraph()
        for a in r1:
            G[a.dump(common)] = a
        for b in r2:
            for a in G.neighbors(b.dump(common)):
                result[a+b] = 1
    else:
        for a in r1:
            for b in r2:
                result[a+b] = 1
    return relation( (n1s+n2s).items(), result.items() )

rfactor = elt0

def projection(l, c):
    [p, b1, names, b2, val] = l
    proj = kjSet(names)
    result = kjSet()
    for row in val.rows.items():
        result[ proj * row ] = 1
    return relation( names, result.items())

def emptylist(l, c):
    return []

names0 = emptylist

namesn = elt0

def names11(l, c):
    return l

def names1n(l, c):
    [ns, n] = l
    ns.append(n)
    return ns

def selection(l, c):
    [sel, p1, cond, p2, val] = l
    return cond.filter(val)

## conditions are not optimized at all!

class conditionor:
    def __init__(self, l, c):
        [self.c1, op, self.c2] = l
    def filter(self, val):
        v1 = self.c1.filter(val)
        v2 = self.c2.filter(val)
        return relation(v1.names, (v1.rows+v2.rows).items())

condfactor = elt0

class factorand(conditionor):
    def filter(self, val):
        v1 = self.c1.filter(val)
        v2 = self.c2.filter(val)
        return relation(v1.names, (v1.rows&v2.rows).items())

factorprime = elt0

class notprimary:
    def __init__(self, l, c):
        [n, self.c1] = l
    def filter(self, val):
        v1 = self.c1.filter(val)
        return relation(v1.names, (val.rows-v1.rows).items())

def elt1(l, c):
    return l[1]

primarycondition = elt1

class primaryeq:
    def __init__(self, l, c):
        [self.e1, eq, self.e2] = l
    def filter(self, val):
        rows = val.rows.items()
        e1v = self.e1.value(rows)
        e2v = self.e2.value(rows)
        result = kjSet()
        for (r, v1, v2) in map(None, rows, e1v, e2v):
            if v1==v2:
                result[r] = 1
        return relation(val.names, result.items())

class expname:
    def __init__(self, l, c):
        self.name = l[0]
    def value(self, rows):
        name = self.name
        r = list(rows)
        for i in xrange(len(r)):
            r[i] = r[i][name]
        return r

class expvalue(expname):
    def value(self, rows):
        return [self.name] * len(rows)

def rename(l, c):
    [ren, b1, names, b2, to, b3, names2, b4, val] = l
    if len(names)!=len(names2):
        raise ValueError, "names lengths must match"+`(names1, names2)`
    remap = kjDict(map(None, names2, names))
    oldnames = kjSet(val.names)
    addnames = kjSet(names2)
    remnames = kjSet(names)
    keepnames = oldnames - remnames
    remap = remap + keepnames
    if not remnames.subset(oldnames):
        #print remnames, oldnames
        raise ValueError, "old names not present"+`(names, val.names)`
    newnames = keepnames+addnames
    rows = val.rows.items()
    for i in range(len(rows)):
        rows[i] = remap*rows[i]
    return relation(newnames.items(), rows)

def named(l, c):
    [name] = l
    return c[name]

def relationval(l, c):
    [b1, names, b2, p1, rows, p2] = l
    names = tuple(names)
    ln = len(names)
    for i in xrange(len(rows)):
        this = rows[i]
        lt = len(this)
        if lt!=ln:
           raise ValueError, "names, vals don't match"+`(names,this)`
        if len(this)==1:
           this = this[0]
        else:
           this = tuple(this)
        rows[i] = kjUndump(names, this)
    return relation(names, rows)

rows0 = emptylist

rowsn = elt0

def somerows1(l, c):
    #print "somerows1", l
    return l

def somerowsn(l, c):
    #print "somerowsn", l
    [sr, c, r] = l
    sr.append(r)
    return sr

emptyrow = emptylist

row1 = somerows1

def factorexpr(l, c):
    return l[1]

def rown(l, c):
    #print "rows", l
    [r, v] = l
    r.append(v)
    return r

valuenum = valuestr = elt0

## snarfed from sqlbind
# note: all reduction function defs must precede this assign
VARS = vars()

class punter:
   def __init__(self, name):
       self.name = name
   def __call__(self, list, context):
       print "punt:", self.name, list
       return list
       
class tracer:
   def __init__(self, name, fn):
       self.name = name
       self.fn = fn
     
   def __call__(self, list, context):
       print "tracing", self.name, list
       test = self.fn(list, context)
       print self.name, "returns", test
       return test

def BindRules(sqlg):
    for name in sqlg.RuleNameToIndex.keys():
        if VARS.has_key(name):
           #print "binding", name
           sqlg.Bind(name, VARS[name]) # nondebug
           #sqlg.Bind(name, tracer(name, VARS[name]) ) # debug
        else:
           print "unbound", name
           sqlg.Bind(name, punter(name))
    return sqlg

## snarfed from sqlgen

MARSHALMODULE = "relalg_mar"
MARSHALFILE = "%s/%s.py" % (INSTALLDIR, MARSHALMODULE)

import string
alphanum = string.letters+string.digits + "_"
userdefre = "[%s][%s]*" % (string.letters +"_", alphanum)
RACOMMENTREGEX = "COMMENT .*"

def userdeffn(str):
    return str
    
charstre = "'[^\n']*'"

def charstfn(str):
    return str[1:-1]
    
numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really...

def numlitfn(str):
    """Note: this is "safe" because regex
       filters out dangerous things."""
    return eval(str)

def DeclareTerminals(Grammar):
    Grammar.Addterm("name", userdefre, userdeffn)
    Grammar.Addterm("string", charstre, charstfn)
    Grammar.Addterm("number", numlitre, numlitfn)
    
def Buildrelalg(filename=MARSHALFILE):
    from gadfly import kjParseBuild
    SQLG = kjParseBuild.NullCGrammar()
    #SQLG.SetCaseSensitivity(0)
    DeclareTerminals(SQLG)
    SQLG.Keywords(keywords)
    SQLG.punct(puncts)
    SQLG.Nonterms(nonterms)
    # should add comments
    SQLG.comments([RACOMMENTREGEX])
    SQLG.Declarerules(relalg_rules)
    print "working..."
    SQLG.Compile()
    filename = INSTALLDIR+"/"+filename
    print "dumping to", filename
    outfile = open(filename, "wb")
    SQLG.MarshalDump(outfile)
    outfile.close()
    return SQLG
    
def reloadrelalg(modulename=MARSHALMODULE):
    from gadfly import kjParser
    #filename = INSTALLDIR+"/"+filename
    #infile = open(filename, "rb")
    SQLG = kjParser.UnMarshalGram(modulename)
    #infile.close()
    DeclareTerminals(SQLG)
    BindRules(SQLG)
    return SQLG
    
def runfile(f):
    from string import split,join
    ragram = reloadrelalg()
    context = {}
    #f = open(filename, "r")
    data = f.read()
    #f.close()
    from string import split,strip
    commands = split(data, ";")
    for c in commands:
        if not strip(c): continue
        print " COMMAND:"
        data = str(c)
        pdata = "  "+join(split(c, "\n"), "\n  ")
        print pdata
        test = ragram.DoParse1(c, context)
        print
    
def runfile2(f):
    from string import split,join
    ragram = reloadrelalg()
    context = {}
    #f = open(filename, "r")
    data = f.read()
    #f.close()
    from string import split,strip
    commands = split(data, ";")
    for c in commands:
        if not strip(c): continue
        #print " COMMAND:"
        data = str(c)
        pdata = "  "+join(split(c, "\n"), "\n  ")
        #print pdata
        test = ragram.DoParse1(c, context)
        print

# c:\python\python relalg.py ratest.txt

if __name__=="__main__":
    try:
        done = 0
        import sys
        argv = sys.argv
        if len(argv)>1:
            command = argv[1]
            if command=="make":
                print "building relational algebra grammar"
                Buildrelalg()
                done = 1
        else:
            runfile2(sys.stdin)
            done = 1
    finally:
        if not done:
            print __doc__



www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.