"""Simple relational algebra interpreter.
usage:
To make the grammar
python relalg.py make
To run some relatoinal algebra expressions
python relalg.py < expressions_file
"""
# EDIT INSTALLDIR TO BE ABLE TO LOAD UNDER ANY CWD
INSTALLDIR = "."
## simple relational algebra using only the equality predicate
## note: string values cannot contain ;
## statement sequencing using ; handled at higher level
relalg_rules = """
statement ::
@R statementassn :: statement >> assignment
@R statementexpr :: statement >> rexpr
@R assignment1 :: assignment >> name = rexpr
@R assignmentn :: assignment >> name = assignment
@R union :: rexpr >> rexpr U rterm
@R rterm :: rexpr >> rterm
@R minus :: rexpr >> rexpr - rterm
@R intersect :: rterm >> rterm intersect rfactor
@R join :: rterm >> rterm join rfactor
@R rfactor :: rterm >> rfactor
@R projection :: rfactor >> projection [ names ] rfactor
@R names0 :: names >>
@R namesn :: names >> names1
@R names11 :: names1 >> name
@R names1n :: names1 >> names1 name
@R selection :: rfactor >> selection ( condition ) rfactor
@R conditionor :: condition >> condition | boolfactor
@R condfactor :: condition >> boolfactor
@R factorand :: boolfactor >> boolfactor & boolprimary
@R factorprime :: boolfactor >> boolprimary
@R notprimary :: boolprimary >> ~ boolprimary
@R primarycondition :: boolprimary >> ( condition )
@R primaryeq :: boolprimary >> expression = expression
@R expname :: expression >> name
@R expvalue :: expression >> value
@R rename :: rfactor >> rename [ names ] to [ names ] rfactor
@R named :: rfactor >> name
@R factorexpr :: rfactor >> ( rexpr )
@R relationval :: rfactor >> [ names ] ( rows )
@R rows0 :: rows >>
@R rowsn :: rows >> somerows
@R somerows1 :: somerows >> row
@R somerowsn :: somerows >> somerows , row
@R emptyrow :: row >> NIL
@R row1 :: row >> value
@R rown :: row >> row value
@R valuenum :: value >> number
@R valuestr :: value >> string
"""
keywords = """
selection intersect rename projection to NIL U join
"""
puncts = """=^~|,-[]()&"""
nonterms = """
statement assignment rexpr rterm value rfactor
names names1 condition boolfactor boolprimary
expression rows somerows row
"""
try:
from kjbuckets import *
except ImportError:
from gadfly.kjbuckets0 import *
class relation:
def __init__(self, names, rows):
#print "relation init", names, rows
names = self.names = tuple(names)
nameset = self.nameset = kjSet(names)
for r in rows:
if nameset != kjSet(r.keys()):
raise ValueError, \
"bad names: "+`(names, r.items())`
self.rows = kjSet(rows)
def __repr__(self):
from string import join
names = self.names
rows = self.rows.items()
if not rows:
nns = join(names)
replist = [nns, "="*len(nns), " --<empty>--"]
return join(replist, "\n")
#print names, rows
nnames = len(names)
if nnames==1:
replist = [names[0]]
else:
replist = [names]
for r in rows:
elt = r.dump(names)
replist.append(r.dump(names))
#print replist
if nnames==1:
replist = maxrep(replist)
else:
transpose = apply(map, tuple([None] + replist))
adjusted = map(maxrep, transpose)
replist = apply(map, tuple([None] + adjusted))
replist = map(join, replist)
replist.insert(1, "=" * len(replist[0]))
#print replist
return join(replist, "\n")
def maxrep(list):
list = map(str, list)
maxlen = max( map(len, list) )
for i in range(len(list)):
item = list[i]
litem = len(item)
list[i] = item + (" " * (maxlen-litem))
return list
# context is a simple dictionary of named relations
def elt0(l, c):
return l[0]
statementassn = elt0
def statementexpr(l, c):
from string import split,join
print
print " --- expression result ---"
print
data = str(l[0])
print " "+ join(split(data, "\n"), "\n ")
def assignment1(l, c):
[name, eq, val] = l
c[name] = val
return val
assignmentn = assignment1
def check_compat(v1, v2):
names1, names2 = v1.names, v2.names
if names1 != names2:
raise ValueError, \
"operands not union compatible "+`(names1, names2)`
return names1, v1.rows, v2.rows
def union(l, c):
[v1, U, v2] = l
names1, r1, r2 = check_compat(v1, v2)
return relation(names1, (r1+r2).items())
rterm = elt0
def minus(l, c):
[v1, m, v2] = l
names1, r1, r2 = check_compat(v1, v2)
return relation(names1, (r1-r2).items())
def intersect(l, c):
[v1, i, v2] = l
names1, r1, r2 = check_compat(v1, v2)
return relation(names1, (r1&r2).items())
def join(l, c):
[v1, j, v2] = l
n1, n2 = v1.names, v2.names
r1, r2 = v1.rows.items(), v2.rows.items()
n1s, n2s = kjSet(n1), kjSet(n2)
common = tuple((n1s&n2s).items())
result = kjSet()
if common:
# simple hashjoin
G = kjGraph()
for a in r1:
G[a.dump(common)] = a
for b in r2:
for a in G.neighbors(b.dump(common)):
result[a+b] = 1
else:
for a in r1:
for b in r2:
result[a+b] = 1
return relation( (n1s+n2s).items(), result.items() )
rfactor = elt0
def projection(l, c):
[p, b1, names, b2, val] = l
proj = kjSet(names)
result = kjSet()
for row in val.rows.items():
result[ proj * row ] = 1
return relation( names, result.items())
def emptylist(l, c):
return []
names0 = emptylist
namesn = elt0
def names11(l, c):
return l
def names1n(l, c):
[ns, n] = l
ns.append(n)
return ns
def selection(l, c):
[sel, p1, cond, p2, val] = l
return cond.filter(val)
## conditions are not optimized at all!
class conditionor:
def __init__(self, l, c):
[self.c1, op, self.c2] = l
def filter(self, val):
v1 = self.c1.filter(val)
v2 = self.c2.filter(val)
return relation(v1.names, (v1.rows+v2.rows).items())
condfactor = elt0
class factorand(conditionor):
def filter(self, val):
v1 = self.c1.filter(val)
v2 = self.c2.filter(val)
return relation(v1.names, (v1.rows&v2.rows).items())
factorprime = elt0
class notprimary:
def __init__(self, l, c):
[n, self.c1] = l
def filter(self, val):
v1 = self.c1.filter(val)
return relation(v1.names, (val.rows-v1.rows).items())
def elt1(l, c):
return l[1]
primarycondition = elt1
class primaryeq:
def __init__(self, l, c):
[self.e1, eq, self.e2] = l
def filter(self, val):
rows = val.rows.items()
e1v = self.e1.value(rows)
e2v = self.e2.value(rows)
result = kjSet()
for (r, v1, v2) in map(None, rows, e1v, e2v):
if v1==v2:
result[r] = 1
return relation(val.names, result.items())
class expname:
def __init__(self, l, c):
self.name = l[0]
def value(self, rows):
name = self.name
r = list(rows)
for i in xrange(len(r)):
r[i] = r[i][name]
return r
class expvalue(expname):
def value(self, rows):
return [self.name] * len(rows)
def rename(l, c):
[ren, b1, names, b2, to, b3, names2, b4, val] = l
if len(names)!=len(names2):
raise ValueError, "names lengths must match"+`(names1, names2)`
remap = kjDict(map(None, names2, names))
oldnames = kjSet(val.names)
addnames = kjSet(names2)
remnames = kjSet(names)
keepnames = oldnames - remnames
remap = remap + keepnames
if not remnames.subset(oldnames):
#print remnames, oldnames
raise ValueError, "old names not present"+`(names, val.names)`
newnames = keepnames+addnames
rows = val.rows.items()
for i in range(len(rows)):
rows[i] = remap*rows[i]
return relation(newnames.items(), rows)
def named(l, c):
[name] = l
return c[name]
def relationval(l, c):
[b1, names, b2, p1, rows, p2] = l
names = tuple(names)
ln = len(names)
for i in xrange(len(rows)):
this = rows[i]
lt = len(this)
if lt!=ln:
raise ValueError, "names, vals don't match"+`(names,this)`
if len(this)==1:
this = this[0]
else:
this = tuple(this)
rows[i] = kjUndump(names, this)
return relation(names, rows)
rows0 = emptylist
rowsn = elt0
def somerows1(l, c):
#print "somerows1", l
return l
def somerowsn(l, c):
#print "somerowsn", l
[sr, c, r] = l
sr.append(r)
return sr
emptyrow = emptylist
row1 = somerows1
def factorexpr(l, c):
return l[1]
def rown(l, c):
#print "rows", l
[r, v] = l
r.append(v)
return r
valuenum = valuestr = elt0
## snarfed from sqlbind
# note: all reduction function defs must precede this assign
VARS = vars()
class punter:
def __init__(self, name):
self.name = name
def __call__(self, list, context):
print "punt:", self.name, list
return list
class tracer:
def __init__(self, name, fn):
self.name = name
self.fn = fn
def __call__(self, list, context):
print "tracing", self.name, list
test = self.fn(list, context)
print self.name, "returns", test
return test
def BindRules(sqlg):
for name in sqlg.RuleNameToIndex.keys():
if VARS.has_key(name):
#print "binding", name
sqlg.Bind(name, VARS[name]) # nondebug
#sqlg.Bind(name, tracer(name, VARS[name]) ) # debug
else:
print "unbound", name
sqlg.Bind(name, punter(name))
return sqlg
## snarfed from sqlgen
MARSHALMODULE = "relalg_mar"
MARSHALFILE = "%s/%s.py" % (INSTALLDIR, MARSHALMODULE)
import string
alphanum = string.letters+string.digits + "_"
userdefre = "[%s][%s]*" % (string.letters +"_", alphanum)
RACOMMENTREGEX = "COMMENT .*"
def userdeffn(str):
return str
charstre = "'[^\n']*'"
def charstfn(str):
return str[1:-1]
numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really...
def numlitfn(str):
"""Note: this is "safe" because regex
filters out dangerous things."""
return eval(str)
def DeclareTerminals(Grammar):
Grammar.Addterm("name", userdefre, userdeffn)
Grammar.Addterm("string", charstre, charstfn)
Grammar.Addterm("number", numlitre, numlitfn)
def Buildrelalg(filename=MARSHALFILE):
from gadfly import kjParseBuild
SQLG = kjParseBuild.NullCGrammar()
#SQLG.SetCaseSensitivity(0)
DeclareTerminals(SQLG)
SQLG.Keywords(keywords)
SQLG.punct(puncts)
SQLG.Nonterms(nonterms)
# should add comments
SQLG.comments([RACOMMENTREGEX])
SQLG.Declarerules(relalg_rules)
print "working..."
SQLG.Compile()
filename = INSTALLDIR+"/"+filename
print "dumping to", filename
outfile = open(filename, "wb")
SQLG.MarshalDump(outfile)
outfile.close()
return SQLG
def reloadrelalg(modulename=MARSHALMODULE):
from gadfly import kjParser
#filename = INSTALLDIR+"/"+filename
#infile = open(filename, "rb")
SQLG = kjParser.UnMarshalGram(modulename)
#infile.close()
DeclareTerminals(SQLG)
BindRules(SQLG)
return SQLG
def runfile(f):
from string import split,join
ragram = reloadrelalg()
context = {}
#f = open(filename, "r")
data = f.read()
#f.close()
from string import split,strip
commands = split(data, ";")
for c in commands:
if not strip(c): continue
print " COMMAND:"
data = str(c)
pdata = " "+join(split(c, "\n"), "\n ")
print pdata
test = ragram.DoParse1(c, context)
print
def runfile2(f):
from string import split,join
ragram = reloadrelalg()
context = {}
#f = open(filename, "r")
data = f.read()
#f.close()
from string import split,strip
commands = split(data, ";")
for c in commands:
if not strip(c): continue
#print " COMMAND:"
data = str(c)
pdata = " "+join(split(c, "\n"), "\n ")
#print pdata
test = ragram.DoParse1(c, context)
print
# c:\python\python relalg.py ratest.txt
if __name__=="__main__":
try:
done = 0
import sys
argv = sys.argv
if len(argv)>1:
command = argv[1]
if command=="make":
print "building relational algebra grammar"
Buildrelalg()
done = 1
else:
runfile2(sys.stdin)
done = 1
finally:
if not done:
print __doc__
|