""" SQL semantics
:Author: Aaron Watters
:Maintainers: http://gadfly.sf.net/
:Copyright: Aaron Robert Watters, 1994
:Id: $Id: semantics.py,v 1.8 2005/01/05 02:28:03 aaron_watters Exp $:
"""
### trim unused methods.
### make assns use equivalence classes.
### maybe eventually implement disj-conj-eq optimizations
### note: for multithreading x.relbind(...) should ALWAYs return
### a fresh copy of structure (sometimes in-place now).
### note: binding of order by is dubious with archiving,
### should not bind IN PLACE, leave unbound elts alone!
import sys, traceback, types
from kjbuckets_select import kjbuckets
import serialize
Tuple = kjbuckets.kjDict
Graph = kjbuckets.kjGraph
Set = kjbuckets.kjSet
### debug
#sys.stderr = sys.stdin
# operations on simple tuples, mostly from kjbuckets
#def maketuple(thing):
# """try to make a tuple from thing.
# thing should be a dictionary or sequence of (name, value)
# or other tuple."""
# from types import DictType
# if type(thing)==DictType:
# return Tuple(thing.items() )
# else: return Tuple(thing)
def no_ints_nulls(list):
"""in place remove all ints, Nones from a list (for null handling)"""
tt = type
nn = None
from types import IntType
count = 0
for x in list:
if tt(x) is not IntType and x is not nn:
list[count] = x
count = count+1
del list[count:]
return list
# stuff for bound tuples.
class HashJoiner:
def __init__(self, bt, relname, attributes, relation, witness):
self.relname = relname
self.attributes = attributes
self.relation = relation
self.witness = witness
self.bt = bt
eqs = bt.eqs
#print "relname", relname
#print "attributes", attributes
#print "relation", relation
#print "witness", witness
#print "bt", bt
transform = self.transform = kjbuckets.kjDict()
rbindings = self.rbindings = kjbuckets.kjSet()
for a in attributes:
b = (relname, a)
transform[b] = a
rbindings[b] = b
self.eqs = eqs = eqs + kjbuckets.kjGraph(rbindings)
witness = witness.remap(eqs)
known = kjbuckets.kjSet(witness.keys()) & rbindings
batts = tuple(known.items())
if not batts:
atts = ()
elif len(batts)==1:
atts = ( transform[batts[0]], )
else:
atts = transform.dump(batts)
self.atts = atts
self.batts = batts
self.transform = transform
eqs = bt.eqs
#eqs = (rbindings * eqs)
self.eqs = eqs = eqs + kjbuckets.kjGraph(rbindings)
self.transformG = transformG = eqs * transform
assns = self.assns = bt.assns
self.rassns = assns.remap( ~transformG )
def relbind(self, db, atts):
rel = self.relation
#print "rel is ", rel, type(rel)
#print dir(rel)
if rel.is_view:
self.relation = rel.relbind(db, atts)
return self
def uncache(self):
rel = self.relation
if rel.is_view:
self.relation.uncache()
def join(self, subseq):
relname = self.relname
result = []
assns = self.assns
if not subseq: return result
# apply equalities to unitary subseq (embedded subq)
if len(subseq)==1:
subseq0 = subseq[0]
subseq0r = subseq0.remap(self.eqs)
if subseq0r is None:
return [] # inconsistent
subseq0 = subseq0 + subseq0r + assns
if subseq0.Clean() is None:
return [] # inconsistent
subseq = [subseq0]
rassns = self.rassns
#print "rassns", rassns
#print "subseq", subseq
if rassns is None:
#print "inconsistent btup"
return []
relation = self.relation
#print "assns", assns
transformG = self.transformG
#print "transformG", transformG
transform = self.transform
atts = self.atts
batts = self.batts
#print "batts, atts", batts, atts
if not batts:
#print "cross product", relname
tuples = relation.rows()
for t in tuples:
#print "t is", t
if rassns:
t = (t + rassns).Clean()
if t is None:
#print "preselect fails"
continue
new = t.remap(transformG)
#print "new is", new
if new is None:
#print "transform fails"
continue
for subst in subseq:
#print "subst", subst
if subst:
add = (subst + new).Clean()
else:
add = new
#print "add is", add
if add is not None:
result.append(add)
else:
# hash join
#print "hash join"
# first try to use an index
index = relation.choose_index(atts)
#print transform
if index is not None:
#print "index join", index.name, relname
#print index.index.keys()
#print "rassns", rassns
atts = index.attributes()
invtransform = ~transform
if len(atts)==1:
batts = (invtransform[atts[0]],)
else:
batts = invtransform.dump(atts)
hash_tups = 1
tindex = index.index
# memoize converted tuples
tindex0 = {}
test = tindex.has_key
test0 = tindex0.has_key
for i in xrange(len(subseq)):
subst = subseq[i]
#print "substs is", subst
its = subst.dump(batts)
#print "its", its
othersubsts = []
if test0(its):
othersubsts = tindex0[its]
elif test(its):
tups = tindex[its]
for t in tups:
#print "t before", t
t = (t+rassns).Clean()
#print "t after", t
if t is None: continue
new = t.remap(transformG)
#print "new", new
if new is None: continue
othersubsts.append(new)
tindex0[its] = othersubsts
for other in othersubsts:
#print "adding", other, subst
add = (other + subst).Clean()
if add is not None:
result.append(add)
# hash join
#print "hash join"
else:
tuples = relation.rows(0, rassns, transformG, subseq)
if len(subseq)<len(tuples):
#print "hash subseq", relname
subseqindex = {}
test = subseqindex.has_key
for i in xrange(len(subseq)):
subst = subseq[i]
its = subst.dump(batts)
#print "items1", subseq, batts, its
if test(its):
subseqindex[its].append(subst)
else:
subseqindex[its] = [ subst ]
for t in tuples:
#print "on", t
if rassns:
t = (t+rassns).Clean()
if t is None:
#print "preselect fails"
continue
its = t.dump(atts)
#print "items2", its
if test(its):
new = t.remap(transformG)
#print "...new", new
if new is None:
#print "transform fails"
continue
l = subseqindex[its]
for subst in l:
add = (subst + new).Clean()
#print "adding", add
if add is not None:
result.append(add)
else:
#print "hash tuples", relname
tindex = {}
test = tindex.has_key
for i in xrange(len(tuples)):
t = tuples[i]
if rassns:
t = (t + rassns).Clean()
if t is None:
#print "preselect fails"
continue
new = t.remap(transformG)
#print "new is", new
if new is None:
#print "transform fails"
continue
its = t.dump(atts)
#print "items3", its
if test(its):
tindex[its].append(new)
else:
tindex[its] = [ new ]
for subst in subseq:
its = subst.dump(batts)
#print "items4", its
if test(its):
n = tindex[its]
for new in n:
add = (subst + new).Clean()
if add is not None:
result.append(add)
#print "hashjoin result"
#for x in result:
#print x
#break
return result
class SimpleRecursive:
"""Simple Recursive structure, only requires initargs"""
def demarshal(self, args):
pass
def marshaldata(self):
return ()
class BoundTuple:
clean = 1 # false if inconsistent
closed = 0 # true if equality constraints inferred
def __init__(self, **bindings):
"""bindings are name>simpletuple associations."""
self.eqs = Graph()
self.assns = Tuple()
for (name, simpletuple) in bindings.items():
# XXXX TODO FIXME.
# there _is_ no 'bind()' method! Fortunately, afaics
# this constructor is never called with args.
self.bind(name, simpletuple)
def initargs(self):
return ()
def marshaldata(self):
#print "btp marshaldata", self
return (self.eqs.items(), self.assns.items(), self.clean, self.closed)
def demarshal(self, args):
(eitems, aitems, self.clean, self.closed) = args
self.eqs = kjbuckets.kjGraph(eitems)
self.assns = kjbuckets.kjDict(aitems)
def relbind(self, dict, db):
"""return bindings of self wrt dict rel>att"""
result = BoundTuple()
e2 = result.eqs
a2 = result.assns
for ((a,b), (c,d)) in self.eqs.items():
if a is None:
try:
a = dict[b]
except KeyError:
raise NameError, `b`+": ambiguous or unknown attribute"
if c is None:
try:
c = dict[d]
except KeyError:
raise NameError, `d`+": ambiguous or unknown attribute"
e2[(a,b)] = (c,d)
for ((a,b), v) in self.assns.items():
if a is None:
try:
a = dict[b]
except KeyError:
raise NameError, `b`+": ambiguous or unknown attribute"
a2[(a,b)] = v
result.closed = self.closed
result.clean = self.clean
return result
#def known(self, relname):
# """return ([(relname, a1), ...], [a1, ...])
# for attributes ai of relname known in self."""
# atts = []
# batts = []
# for x in self.assns.keys():
# (r,a) = x
# if r==relname:
# batts.append(x)
# atts.append(a)
# return (batts, atts)
def relorder(self, db, allrels):
"""based on known constraints, pick an
ordering for materializing relations.
db is database (ignored currently)
allrels is names of all relations to include (list)."""
### not very smart about indices yet!!!
if len(allrels)<2:
# doesn't matter
return allrels
order = []
eqs = self.eqs
assns = self.assns
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjGraph
pinned = kjSet()
has_index = kjSet()
needed = kjSet(allrels)
akeys = assns.keys()
for (r,a) in akeys:
pinned[r]=r # pinned if some value known
known_map = kjGraph(akeys)
for r in known_map.keys():
rknown = known_map.neighbors(r)
if db.has_key(r):
rel = db[r]
index = rel.choose_index(rknown)
if index is not None:
has_index[r] = r # has an index!
if pinned: pinned = pinned & needed
if has_index: has_index = has_index & needed
related = kjGraph()
for ( (r1, a1), (r2, a2) ) in eqs.items():
related[r1]=r2 # related if equated to other
related[r2]=r1 # redundant if closed.
if related: related = needed * related * needed
chosen = kjSet()
pr = kjSet(related) & pinned
# choose first victim
if has_index:
choice = has_index.choose_key()
elif pr:
choice = pr.choose_key()
elif pinned:
choice = pinned.choose_key()
elif related:
choice = related.choose_key()
else:
return allrels[:] # give up!
while pinned or related or has_index:
order.append(choice)
chosen[choice] = 1
if pinned.has_key(choice):
del pinned[choice]
if related.has_key(choice):
del related[choice]
if has_index.has_key(choice):
del has_index[choice]
nexts = related * chosen
if nexts:
# prefer a relation related to chosen
choice = nexts.choose_key()
elif pinned:
# otherwise one that is pinned
choice = pinned.choose_key()
elif related:
# otherwise one that relates to something...
choice = related.choose_key()
others = kjSet(allrels) - chosen
if others: order = order + others.items()
return order
def domain(self):
kjSet = kjbuckets.kjSet
return kjSet(self.eqs) + kjSet(self.assns)
def __repr__(self):
result = []
for ( (name, att), value) in self.assns.items():
result.append( "%s.%s=%s" % (name, att, `value`) )
for ( (name, att), (name2, att2) ) in self.eqs.items():
result.append( "%s.%s=%s.%s" % (name, att, name2, att2) )
if self.clean:
if not result: return "TRUE"
else:
result.insert(0, "FALSE")
result.sort()
return ' & '.join(result)
def equate(self, equalities):
"""add equalities to self, only if not closed.
equalities should be seq of ( (name, att), (name, att) )
"""
if self.closed: raise ValueError, "cannot add equalities! Closed!"
e = self.eqs
for (a, b) in equalities:
e[a] = b
def close(self):
"""infer equalities, if consistent.
only recompute equality closure if not previously closed.
return None on inconsistency.
"""
neweqs = self.eqs
if not self.closed:
self.eqs = neweqs = (neweqs + ~neweqs).tclosure() # sym, trans closure
self.closed = 1
# add trivial equalities to self
for x in self.assns.keys():
if not neweqs.member(x,x):
neweqs[x] = x
newassns = self.assns.remap(neweqs)
if newassns is not None and self.clean:
self.assns = newassns
#self.clean = 1
return self
else:
self.clean = 0
return None
def share_eqs(self):
"""make clone of self that shares equalities, closure.
note: will share future side effects to eqs too."""
result = BoundTuple()
result.eqs = self.eqs
result.closed = self.closed
return result
def __add__(self, other):
"""combine self with other, return closure."""
result = self.share_eqs()
se = self.eqs
oe = other.eqs
if (se is not oe) and (se != oe):
result.eqs = se + oe
result.closed = 0
ra= result.assns = self.assns + other.assns
result.clean = result.clean and (ra.Clean() is not None)
return result.close()
def __and__(self, other):
"""return closed constraints common to self and other."""
result = BoundTuple()
se = self.eqs
oe = other.eqs
if (se is oe) or (se == oe):
result.eqs = self.eqs
result.closed = self.closed
else:
result.eqs = self.eqs & other.eqs
result.assns = self.assns & other.assns
result.clean = self.clean and other.clean
return result.close()
def __hash__(self):
# note: equalities don't enter into hash computation!
# (some may be spurious)
self.close()
return hash(self.assns)# ^ hash(self.eqs)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
sa = self.assns
oa = other.assns
test = cmp(sa, oa)
if test: return test
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjSet
se = self.eqs
se = kjGraph(se) - kjGraph(kjSet(se))
oe = other.eqs
oe = kjGraph(oe) - kjGraph(kjSet(oe))
return cmp(se, oe)
class BoundExpression(SimpleRecursive):
"""superclass for all bound expressions.
except where overloaded expressions are binary
with self.left and self.right
"""
contains_aggregate = 0 # default
def __init__(self, left, right):
self.left = left
self.right = right
self.contains_aggregate = left.contains_aggregate or right.contains_aggregate
def initargs(self):
return (self.left, self.right)
def uncache(self):
"""prepare for execution, clear cached data."""
self.left.uncache()
self.right.uncache()
# eventually add converters...
def equate(self,other):
"""return predicate equating self and other.
Overload for special cases, please!"""
return NontrivialEqPred(self, other)
def attribute(self):
return (None, `self`)
def le(self, other):
"""predicate self<=other"""
return LessEqPred(self, other)
# these should be overridden for 2 const case someday...
def lt(self, other):
"""predicate self<other"""
return LessPred(self, other)
def __coerce__(self, other):
return (self, other)
def __add__(self, other):
return BoundAddition(self, other)
def __sub__(self, other):
return BoundSubtraction(self, other)
def __mul__(self, other):
return BoundMultiplication(self, other)
def __neg__(self):
return BoundMinus(self)
def __div__(self, other):
return BoundDivision(self, other)
def relbind(self, dict, db):
Class = self.__class__
return Class(self.left.relbind(dict, db), self.right.relbind(dict, db))
def __repr__(self):
# XXXX not all subclasses are guaranteed to have a self.op
return "(%s)%s(%s)" % (self.left, self.op, self.right)
def domain(self):
return self.left.domain() + self.right.domain()
# always overload value
class BoundMinus(BoundExpression, SimpleRecursive):
def __init__(self, thing):
self.thing = thing
self.contains_aggregate = thing.contains_aggregate
def initargs(self):
return (self.thing,)
def __repr__(self):
return "-(%s)" % (self.thing,)
def value(self, contexts):
from types import IntType
tt = type
result = self.thing.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
result[i] = -result[i]
return result
def relbind(self, dict, db):
Class = self.__class__
return Class(self.thing.relbind(dict,db))
def uncache(self):
self.thing.uncache()
def domain(self):
return self.thing.domain()
### stuff for aggregation
class Average(BoundMinus):
contains_aggregate = 1
def __init__(self, expr, distinct=0):
self.distinct = distinct
if expr.contains_aggregate:
raise ValueError, `expr`+": aggregate in aggregate "+self.name
self.thing = expr
name = "Average"
def __repr__(self):
distinct = ""
if self.distinct:
distinct = "distinct "
return "%s(%s%s)" % (self.name, distinct, self.thing)
def relbind(self, dict, db):
Class = self.__class__
return Class(self.thing.relbind(dict,db), self.distinct)
def value(self, contexts):
if not contexts: return [] # ???
test = contexts[0]
if not test or test.has_key(None):
return self.agg_value(contexts)
else:
return [self.all_value(contexts)]
def dvalues(self, values):
d = {}
for i in xrange(len(values)):
d[values[i]] = 1
return d.keys()
def all_value(self, contexts):
thing = self.thing
values = self.clean(thing.value(contexts), contexts)
if self.distinct:
values = self.dvalues(values)
return self.op(values)
def clean(self, values, contexts):
D = {}
from types import IntType
tt = type
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
D[i] = values[i]
return D.values()
def agg_value(self, contexts):
from types import IntType
tt = type
result = list(contexts)
for i in xrange(len(contexts)):
context = contexts[i]
if tt(context) is not IntType:
result[i] = self.all_value( context[None] )
return result
def op(self, values):
sum = 0
for x in values:
sum = sum+x
return sum/(len(values)*1.0)
class Sum(Average):
name = "Sum"
def op(self, values):
if not values: return 0
sum = values[0]
for x in values[1:]:
sum = sum+x
return sum
class Median(Average):
name = "Median"
def op(self, values):
if not values:
raise ValueError, "Median of empty set"
values = list(values)
values.sort()
lvals = len(values)
return values[int(lvals/2)]
class Maximum(Average):
name = "Maximum"
def op(self, values):
return max(values)
class Minimum(Average):
name = "Minimum"
def op(self, values):
return min(values)
class Count(Average):
name = "Count"
distinct = 0
def __init__(self, thing, distinct = 0):
if thing=="*":
self.thing = "*"
else:
Average.__init__(self, thing, distinct)
def domain(self):
thing = self.thing
if thing=="*":
return kjbuckets.kjSet()
return thing.domain()
def all_value(self, contexts):
thing = self.thing
if thing=="*" or not self.distinct:
test = self.clean(contexts, contexts)
#print "count len", len(test), contexts[0]
return len(test)
return Average.all_value(self, contexts)
def op(self, values):
return len(values)
def relbind(self, dict, db):
if self.thing=="*":
return self
return Average.relbind(self, dict, db)
def uncache(self):
if self.thing!="*": self.thing.uncache()
def aggregate(assignments, exprlist):
"""aggregates are a assignments with special
attribute None > list of subtuple"""
lexprs = len(exprlist)
if lexprs<1:
raise ValueError, "aggregate on no expressions?"
lassns = len(assignments)
pairs = list(exprlist)
for i in xrange(lexprs):
expr = exprlist[i]
attributes = [expr.attribute()]*lassns
values = expr.value(assignments)
pairs[i] = map(None, attributes, values)
#for x in pairs:
#print "pairs", x
if lexprs>1:
newassnpairs = apply(map, (None,)+tuple(pairs))
else:
newassnpairs = pairs[0]
#for x in newassnpairs:
#print "newassnpairs", x
xassns = range(lassns)
dict = {}
test = dict.has_key
for i in xrange(lassns):
thesepairs = newassnpairs[i]
thissubassn = assignments[i]
if test(thesepairs):
dict[thesepairs].append(thissubassn)
else:
dict[thesepairs] = [thissubassn]
items = dict.items()
result = list(items)
kjDict = kjbuckets.kjDict
if lexprs>1:
for i in xrange(len(items)):
(pairs, subassns) = items[i]
#print "pairs", pairs
#print "subassns", subassns
D = kjDict(pairs)
D[None] = subassns
result[i] = D
else:
for i in xrange(len(items)):
(pair, subassns) = items[i]
#print "pair", pair
#print "subassns", subassns
result[i] = kjDict( [pair, (None, subassns)] )
return result
### stuff for order_by
class DescExpr(BoundMinus):
"""special wrapper used only for order by descending
for things with no -thing operation (eg, strings)"""
def __init__(self, thing):
self.thing = thing
self.contains_aggregate = thing.contains_aggregate
def value(self, contexts):
from types import IntType,StringType
tt = type
result = self.thing.value(contexts)
allwrap = None
allnowrap = None
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
resulti = result[i]
# currently assume only value needing wrapping is string
if tt(resulti) is StringType:
if allnowrap is not None:
raise ValueError, "(%s, %s) cannot order desc" % (allnowrap, resulti)
allwrap = resulti
result[i] = descOb(resulti)
else:
if allwrap is not None:
raise ValueError, "(%s, %s) cannot order desc" % (allwrap, resulti)
allnowrap = resulti
result[i] = -resulti
return result
def __repr__(self):
return "DescExpr(%s)" % (self.thing,)
def orderbind(self, order):
"""order is list of (att, expr)."""
Class = self.__class__
return Class(self.thing.orderbind(order))
class SimpleColumn(SimpleRecursive):
"""a simple column name for application to a list of tuples"""
contains_aggregate = 0
def __init__(self, name):
self.name = name
def relbind(self, dict, db):
# already bound!
return self
def orderbind(self, whatever):
# already bound!
return self
def initargs(self):
return (self.name,)
def value(self, simpletuples):
from types import IntType
tt = type
name = self.name
result = list(simpletuples)
for i in xrange(len(result)):
ri = result[i]
if tt(ri) is not IntType:
result[i] = ri[name]
else:
result[i] = None # ???
return result
def __repr__(self):
return "<SimpleColumn %s>" % (self.name,)
class NumberedColumn(BoundMinus):
"""order by column number"""
contains_aggregate = 0
def __init__(self, num):
self.thing = num
def __repr__(self):
return "<NumberedColumn %s>" % (self.thing,)
def relbind(self, dict, db):
from types import IntType
if type(self.thing)!=IntType:
raise ValueError, `self.thing`+": not a numbered column"
return self
def orderbind(self, order):
return SimpleColumn( order[self.thing-1][0] )
class OrderExpr(BoundMinus):
"""order by expression."""
def orderbind(self, order):
expratt = self.thing.attribute()
for (att, exp) in order:
if exp.attribute()==expratt:
return SimpleColumn(att)
else:
raise NameError, `self`+": invalid ordering specification"
def __repr__(self):
return "<order expression %s>" % (self.thing,)
class descOb:
"""special wrapper only used for sorting in descending order
should only be compared with other descOb instances.
should only wrap items that cannot be easily "order inverted",
(eg, strings).
"""
def __init__(self, ob):
self.ob = ob
def __cmp__(self, other):
#test = cmp(self.__class__, other.__class__)
#if test: return test
return -cmp(self.ob,other.ob)
def __coerce__(self, other):
return (self, other)
def __hash__(self):
return hash(self.ob)
def __repr__(self):
return "descOb(%s)" % (self.ob,)
def PositionedSort(num, ord):
nc = NumberedColumn(num)
if ord=="DESC":
return DescExpr(nc)
return nc
def NamedSort(name, ord):
oe = OrderExpr(name)
if ord=="DESC":
return DescExpr(oe)
return oe
def relbind_sequence(order_list, dict, db):
result = list(order_list)
for i in xrange(len(order_list)):
result[i] = result[i].relbind(dict,db)
return result
def orderbind_sequence(order_list, order):
result = list(order_list)
for i in xrange(len(order_list)):
result[i] = result[i].orderbind(order)
return result
def order_tuples(order_list, tuples):
lorder_list = len(order_list)
ltuples = len(tuples)
if lorder_list<1:
raise ValueError, "order on empty list?"
order_map = list(order_list)
for i in xrange(lorder_list):
order_map[i] = order_list[i].value(tuples)
if len(order_map)>1:
order_vector = apply(map, (None,)+tuple(order_map) )
else:
order_vector = order_map[0]
#G = kjbuckets.kjGraph()
pairs = map(None, range(ltuples), tuples)
ppairs = map(None, order_vector, pairs)
G = kjbuckets.kjGraph(ppairs)
#for i in xrange(ltuples):
# G[ order_vector[i] ] = (i, tuples[i])
Gkeys = G.keys()
Gkeys.sort()
result = list(tuples)
index = 0
for x in Gkeys:
#print x
for (i, y) in G.neighbors(x):
#print " ", y
result[index]=y
index = index+1
if index!=ltuples:
raise ValueError, \
"TUPLE LOST IN ORDERING COMPUTATION! (%s,%s)" % (ltuples, index)
return result
class BoundAddition(BoundExpression):
"""promised addition."""
op = "+"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] + rvs[i]
return lvs
class BoundSubtraction(BoundExpression):
"""promised subtraction."""
op = "-"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] - rvs[i]
return lvs
class BoundMultiplication(BoundExpression):
"""promised multiplication."""
op = "*"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
#print lvs
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] * rvs[i]
return lvs
class BoundDivision(BoundExpression):
"""promised division."""
op = "/"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] / rvs[i]
return lvs
class BoundAttribute(BoundExpression):
"""bound attribute: initialize with relname=None if
implicit."""
contains_aggregate = 0
def __init__(self, rel, name):
self.rel = rel
self.name = name
def initargs(self):
return (self.rel, self.name)
def relbind(self, dict, db):
if self.rel is not None: return self
name = self.name
try:
rel = dict[name]
except KeyError:
raise NameError, `name` + ": unknown or ambiguous"
return BoundAttribute(rel, name)
def uncache(self):
pass
def __repr__(self):
return "%s.%s" % (self.rel, self.name)
def attribute(self):
"""return (rename, attribute) for self."""
return (self.rel, self.name)
def domain(self):
return kjbuckets.kjSet([ (self.rel, self.name) ])
def value(self, contexts):
"""return value of self in context (bound tuple)."""
#print "value of ", self, "in", contexts
from types import IntType
tt = type
result = list(contexts)
ra = (self.rel, self.name)
for i in xrange(len(result)):
if tt(result[i]) is not IntType:
result[i] = contexts[i][ra]
return result
def equate(self, other):
oc = other.__class__
if oc==BoundAttribute:
result = BoundTuple()
result.equate([(self.attribute(), other.attribute())])
return BTPredicate(result)
elif oc==Constant:
result = BoundTuple()
result.assns[ self.attribute() ] = other.value([1])[0]
return BTPredicate(result)
else:
return NontrivialEqPred(self, other)
class Constant(BoundExpression):
contains_aggregate = 0
def __init__(self, value):
self.value0 = value
def __hash__(self):
return hash(self.value0)
def initargs(self):
return (self.value0,)
def domain(self):
return kjbuckets.kjSet()
def __add__(self, other):
if other.__class__==Constant:
return Constant(self.value0 + other.value0)
return BoundAddition(self, other)
def __sub__(self, other):
if other.__class__==Constant:
return Constant(self.value0 - other.value0)
return BoundSubtraction(self, other)
def __mul__(self, other):
if other.__class__==Constant:
return Constant(self.value0 * other.value0)
return BoundMultiplication(self, other)
def __neg__(self):
return Constant(-self.value0)
def __div__(self, other):
if other.__class__==Constant:
return Constant(self.value0 / other.value0)
return BoundDivision(self, other)
def relbind(self, dict, db):
return self
def uncache(self):
pass
def value(self, contexts):
"""return the constant value associated with self."""
return [self.value0] * len(contexts)
def equate(self,other):
if other.__class__==Constant:
if other.value0 == self.value0:
return BTPredicate() #true
else:
return ~BTPredicate() #false
else:
return other.equate(self)
def attribute(self):
"""invent a pair to identify a constant"""
return ('unbound', `self`)
def __repr__(self):
return str(self.value0)
#return "<const %s at %s>" % (`self.value0`, id(self))
class TupleCollector:
"""Translate a sequence of assignments to simple tuples.
(for implementing the select list of a SELECT).
"""
contains_aggregate = 0
contains_nonaggregate = 0
def __init__(self):
self.final = None
self.order = []
self.attorder = []
self.exporder = []
def initargs(self):
return ()
def marshaldata(self):
exps = map(serialize.serialize, self.exporder)
return (self.attorder, exps,
self.contains_aggregate, self.contains_nonaggregate)
def demarshal(self, args):
(self.attorder, exps, self.contains_aggregate,
self.contains_nonaggregate) = args
exporder = self.exporder = map(serialize.deserialize, exps)
self.order = map(None, self.attorder, exporder)
def uncache(self):
for exp in self.exporder:
exp.uncache()
def domain(self):
all=[]
for e in self.exporder:
all = all+e.domain().items()
return kjbuckets.kjSet(all)
def __repr__(self):
l = []
for (att, exp) in self.order:
l.append( "%s as %s" % (exp, att) )
return ', '.join(l)
def addbinding(self, attribute, expression):
"""bind att>expression."""
self.order.append((attribute, expression) )
self.attorder.append(attribute )
self.exporder.append(expression)
if expression.contains_aggregate:
self.contains_aggregate = 1
else:
self.contains_nonaggregate = 1
def map(self, assnlist):
"""remap btlist by self. return (tuplelist, attorder)"""
# DON'T eliminate nulls
from types import IntType
tt = type
values = []
for exp in self.exporder:
values.append(exp.value(assnlist))
if len(values)>1:
valtups = apply(map, (None,) + tuple(values) )
else:
valtups = values[0]
kjUndump = kjbuckets.kjUndump
undumper = tuple(self.attorder)
for i in xrange(len(valtups)):
test = assnlist[i]
if tt(test) is IntType or test is None:
valtups[i] = 0 # null/false
else:
tup = valtups[i]
valtups[i] = kjUndump(undumper, tup)
return (valtups, self.attorder)
def relbind(self, dict, db):
"""disambiguate missing rel names if possible.
also choose output names appropriately."""
# CURRENTLY THIS IS AN "IN PLACE" OPERATION
order = self.order
attorder = self.attorder
exporder = self.exporder
known = {}
for i in xrange(len(order)):
(att, exp) = order[i]
#print exp
exp = exp.relbind(dict, db)
if att is None:
# choose a name for this column
#print exp
(rname, aname) = exp.attribute()
if known.has_key(aname):
both = rname+"."+aname
att = both
count = 0
while known.has_key(att):
# crank away!
count = count+1
att = both+"."+`count`
else:
att = aname
else:
if known.has_key(att):
raise NameError, `att`+" ambiguous in select list"
order[i] = (att, exp)
exporder[i] = exp
attorder[i] = att
known[att] = att
return self
class BTPredicate(SimpleRecursive):
"""superclass for bound tuple predicates.
Eventually should be modified to use "compile" for speed
to generate an "inlined" evaluation function.
self(bt) returns bt with additional equality constraints
(possible) or None if predicate fails."""
false = 0
constraints = None
contains_aggregate = 0
def __init__(self, constraints=None):
"""default interpretation: True."""
if constraints is not None:
self.constraints = constraints.close()
def initargs(self):
return (self.constraints,)
def relbind(self, dict, db):
c = self.constraints
if c is None: return self
return BTPredicate( self.constraints.relbind(dict, db) )
def uncache(self):
pass
#def evaluate(self, db, relnames):
#"""evaluate the predicate over database bindings."""
# pretty simple strategy right now...
### need to do something about all/distinct...
#c = self.constraints
#if c is None:
# c = BoundTuple()
#order = c.relorder(db, relnames)
#if not order:
# raise ValueError, "attempt to evaluate over no relations: "+`relnames`
#result = [c]
#for r in order:
# result = hashjoin(result, r, db[r])
#if self.__class__==BTPredicate:
# # if it's just equality conjunction, we're done
# return result
#else:
# # apply additional constraints
# return self(result)
def domain(self):
c = self.constraints
kjSet = kjbuckets.kjSet
if c is None: return kjSet()
return c.domain()
def __repr__(self):
if self.false: return "FALSE"
c = self.constraints
if c is None: c = "true"
return "[pred](%s)" % c
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
if self.false: return self
if not self.constraints: return None
return self
def negated_constraints(self):
"""equality constraints always false of satisfactory tuple."""
return BoundTuple() # there aren't any
def __call__(self, assignments, toplevel=0):
"""apply self to sequence of assignments
return copy of asssignments with false results
replaced by 0! Input may have 0's!"""
# optimization
# if toplevel, the btpred has been evaluated during join.
if toplevel:
return list(assignments)
from types import IntType
tt = type
lbt = len(assignments)
if self.false: return [0] * lbt
c = self.constraints
if c is None or not c:
result = assignments[:] # no constraints
else:
assns = c.assns
eqs = c.eqs
eqsinteresting = 0
for (a,b) in eqs.items():
if a!=b:
eqsinteresting = 1
result = assignments[:]
for i in xrange(lbt):
this = assignments[i]
#print "comparing", self, "to", this
if type(this) is IntType: continue
this = (this + assns).Clean()
if this is None:
result[i] = 0
elif eqsinteresting:
this = this.remap(eqs)
if this is None:
result[i] = 0
return result
def __and__(self, other):
"""NOTE: all subclasses must define an __and__!!!"""
#print "BTPredicate.__and__", (self, other)
if self.__class__==BTPredicate and other.__class__==BTPredicate:
c = self.constraints
o = other.constraints
if c is None: return other
if o is None: return self
if self.false: return self
if other.false: return other
# optimization for simple constraints
all = (c+o)
result = BTPredicate( all ) # all constraints
if all is None: result.false = 1
else:
result = other & self
return result
def __or__(self, other):
if self.__class__==BTPredicate and other.__class__==BTPredicate:
c = self.constraints
o = other.constraints
if c is None: return self # true dominates
if o is None: return other
if other.false: return self
if self.false: return other
if self == other: return self
result = BTor_pred([self, other])
return result
def __invert__(self):
if self.false:
return BTPredicate()
if not self.constraints:
result = BTPredicate()
result.false = 1
return result
return BTnot_pred(self)
def __cmp__(self, other):
test = cmp(other.__class__, self.__class__)
if test: return test
if self.false and other.false: return 0
return cmp(self.constraints, other.constraints)
def __hash__(self):
if self.false: return 11111
return hash(self.constraints)
class BTor_pred(BTPredicate):
def __init__(self, members, *othermembers):
# replace any OR in members with its members
#print "BTor_pred", members
members = list(members) + list(othermembers)
for m in members[:]:
if m.__class__==BTor_pred:
members.remove(m)
members = members + m.members
#print "before", members
members = self.members = kjbuckets.kjSet(members).items()
#print members
for m in members[:]:
if m.false: members.remove(m)
self.constraints = None # common constraints
for m in members:
if m.contains_aggregate:
self.contains_aggregate = 1
if members:
# common constraints are those in all members
constraints = members[0].constraints
for m in members[1:]:
mc = m.constraints
if not constraints or not mc:
constraints = None
break
constraints = constraints & mc
self.constraints = constraints
#print members
def initargs(self):
return ((),) + tuple(self.members)
def relbind(self, dict, db):
ms = []
for m in self.members:
ms.append( m.relbind(dict, db) )
return BTor_pred(ms)
def uncache(self):
for m in self.members:
m.uncache()
def domain(self):
all = BTPredicate.domain(self).items()
for x in self.members:
all = all + x.domain().items()
return kjbuckets.kjSet(all)
def __repr__(self):
c = self.constraints
m = self.members
mr = map(repr, m)
mr.sort()
mr = ' | '.join(mr)
if not mr: mr = "FALSE_OR"
if c:
mr = "[disj](%s and %s)" % (c, mr)
return mr
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
ms = self.members
for i in xrange(len(ms)):
ms[i] = ms[i].detrivialize()
# now suck out subordinate ors
someor = None
for m in ms:
if m.__class__== BTor_pred:
someor = m
ms.remove(m)
break
if someor is not None:
result = someor
for m in ms:
result = result + m
return result.detrivialize()
allfalse = 1
for m in ms:
if m is None: allfalse=0; break # true member
allfalse = allfalse & m.false
if allfalse: return ~BTPredicate() # boundary case
ms[:] = filter(None, ms)
if not ms: return None # all true.
ms[:] = kjbuckets.kjSet(ms).items()
if len(ms)==1: return ms[0] # or of 1
return self
def __call__(self, boundtuples, toplevel=0):
# apply common constraints first
lbt = len(boundtuples)
# boundary case for or is false
members = self.members
if not members:
return [0] * lbt
current = BTPredicate.__call__(self, boundtuples, toplevel)
# now apply first alternative
alt1 = members[0](current)
# determine questionables
questionables = current[:]
rng = xrange(len(current))
from types import IntType
for i in rng:
if not isinstance(alt1[i], IntType):
questionables[i] = 0
# now test other alternatives
for m in self.members[1:]:
passm = m(questionables)
for i in rng:
test = passm[i]
if not isinstance(test, IntType):
questionables[i] = 0
alt1[i] = test
return alt1
def negated_constraints(self):
"""the negated constraints of an OR are
the negated constraints of *all* members"""
ms = self.members
result = ms.negated_constraints()
for m in ms[1:]:
if not result: return result
mc = m.negated_constraints()
if not mc: return mc
result = result & mc
return result
def __and__(self, other):
"""push "and" down"""
newmembers = self.members[:]
for i in xrange(len(newmembers)):
newmembers[i] = newmembers[i] & other
return BTor_pred(newmembers)
def __or__(self, other):
"""collapse two ors, otherwise just add new member"""
if self.__class__==BTor_pred and other.__class__==BTor_pred:
return BTor_pred(self.members+other.members)
return BTor_pred(self.members + [other])
def __invert__(self):
"""translate to and-not"""
ms = self.members
if not ms: return BTPredicate() # boundary case
result = ~ms[0]
for m in ms[1:]:
result = result & ~m
return result
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
kjSet = kjbuckets.kjSet
test = cmp(kjSet(self.members), kjSet(other.members))
if test: return test
return BTPredicate.__cmp__(self, other)
def __hash__(self):
return hash(kjbuckets.kjSet(self.members))
class BTnot_pred(BTPredicate):
def __init__(self, thing):
self.negated = thing
self.contains_aggregate = thing.contains_aggregate
self.constraints = thing.negated_constraints()
def initargs(self):
return (self.negated,)
def relbind(self, dict, db):
return BTnot_pred( self.negated.relbind(dict, db) )
def uncache(self):
self.negated.uncache()
def domain(self):
result = BTPredicate.domain(self) + self.negated.domain()
#print "neg domain is", `self`, result
return result
def __repr__(self):
c = self.constraints
n = self.negated
r = "(NOT %s)" % n
if c: r = "[neg](%s & %s)" % (c, r)
return r
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
# first, fix or/and/not precedence
thing = self.negated
if thing.__class__ == BTnot_pred:
return thing.negated.detrivialize()
if thing.__class__ == BTor_pred:
# translate to and_not
members = thing.members[:]
for i in xrange(len(members)):
members[i] = ~members[i]
result = BTand_pred(members)
return result.detrivialize()
if thing.__class__ == BTand_pred:
# translate to or_not
members = thing.members[:]
c = thing.constraints # precondition
if c is not None:
members.append(BTPredicate(c))
for i in xrange(len(members)):
members[i] = ~members[i]
result = BTor_pred(members)
return result.detrivialize()
self.negated = thing = self.negated.detrivialize()
if thing is None: return ~BTPredicate() # uniquely false
if thing.false: return None # uniquely true
return self
def __call__(self, boundtuples, toplevel=0):
from types import IntType
tt = type
current = BTPredicate.__call__(self, boundtuples, toplevel)
omit = self.negated(current)
for i in xrange(len(current)):
if tt(omit[i]) is not IntType:
current[i]=0
return current
def negated_constraints(self):
"""the negated constraints of a NOT are the
negated constraints of the thing negated."""
return self.negated.constraints
def __and__(self, other):
"""do the obvious thing."""
return BTand_pred([self, other])
def __or__(self, other):
"""do the obvious thing"""
return BTor_pred([self, other])
def __invert__(self):
return self.negated
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.negated,other.negated)
if test: return test
return BTPredicate.__cmp__(self,other)
def __hash__(self):
return hash(self.negated)^787876^hash(self.constraints)
class BTand_pred(BTPredicate):
def __init__(self, members, precondition=None, *othermembers):
#print "BTand_pred", (members, precondition)
members = list(members) + list(othermembers)
members = self.members = kjbuckets.kjSet(members).items()
self.constraints = precondition # common constraints
if members:
# common constraints are those in any member
if precondition is not None:
constraints = precondition
else:
constraints = BoundTuple()
for i in xrange(len(members)):
m = members[i]
mc = m.constraints
if mc:
#print "constraints", constraints
constraints = constraints + mc
if constraints is None: break
if m.__class__==BTPredicate:
members[i] = None # subsumed above
members = self.members = filter(None, members)
for m in members:
if m.contains_aggregate:
self.contains_aggregate=1
### consider propagating constraints down?
self.constraints = constraints
if constraints is None: self.false = 1
def initargs(self):
#print "self.members", self.members
#print "self.constraints", self.constraints
#return (list(self.members), self.constraints)
return ((), self.constraints) + tuple(self.members)
def relbind(self, dict, db):
ms = []
for m in self.members:
ms.append( m.relbind(dict, db) )
c = self.constraints.relbind(dict, db)
return BTand_pred(ms, c)
def uncache(self):
for m in self.members:
m.uncache()
def domain(self):
all = BTPredicate.domain(self).items()
for x in self.members:
all = all + x.domain().items()
return kjbuckets.kjSet(all)
def __repr__(self):
m = self.members
c = self.constraints
r = map(repr, m)
if self.false: r.insert(0, "FALSE")
r = ' AND '.join(r)
r = "(%s)" % r
if c: r = "[conj](%s and %s)" % (c, r)
return r
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
# first apply demorgan's law to push ands down
# (exponential in worst case).
#print "detrivialize"
#print self
ms = self.members
some_or = None
c = self.constraints
for m in ms:
if m.__class__==BTor_pred:
some_or = m
ms.remove(m)
break
if some_or is not None:
result = some_or
if c is not None:
some_or = some_or & BTPredicate(c)
for m in ms:
result = result & m # preserves or/and precedence
if result.__class__!=BTor_pred:
raise "what the?"
result = result.detrivialize()
#print "or detected, returning"
#print result
return result
for i in xrange(len(ms)):
ms[i] = ms[i].detrivialize()
ms[:] = filter(None, ms)
if not ms:
#print "returning boundary case of condition"
if c is None:
return None
else:
return BTPredicate(c).detrivialize()
ms[:] = kjbuckets.kjSet(ms).items()
if len(ms)==1 and c is None:
#print "and of 1, returning"
#print ms[0]
return ms[0] # and of 1
return self
def __call__(self, boundtuples, toplevel=0):
# apply common constraints first
current = BTPredicate.__call__(self, boundtuples, toplevel)
for m in self.members:
current = m(current)
return current
def negated_constraints(self):
"""the negated constraints of an AND are
the negated constraints of *any* member"""
ms = self.members
result = BoundTuple()
for m in ms:
mc = m.negated_constraints()
if mc: result = result + mc
return result
def __and__(self, other):
"""push "and" down if other is an or"""
if other.__class__==BTor_pred:
return other & self
c = self.constraints
# merge in other and
if other.__class__==BTand_pred:
allmem = self.members+other.members
oc = other.constraints
if c is None:
c = oc
elif oc is not None:
c = c+oc
return BTand_pred(allmem, c)
return BTand_pred(self.members + [other], c)
def __or__(self, other):
"""do the obvious thing."""
return BTor_pred([self, other])
def __invert__(self):
"""translate to or-not"""
ms = self.members
if not ms: return ~BTPredicate() # boundary case
result = ~ms[0]
for m in ms[1:]:
result = result | ~m
return result
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
kjSet = kjbuckets.kjSet
test = cmp(kjSet(self.members), kjSet(other.members))
if test: return test
return BTPredicate.__cmp__(self, other)
def __hash__(self):
return hash(kjbuckets.kjSet(self.members))
class NontrivialEqPred(BTPredicate):
"""equation of nontrivial expressions."""
def __init__(self, left, right):
#print "making pred", self.__class__, left, right
# maybe should used reflexivity...
self.left = left
self.right = right
self.contains_aggregate = left.contains_aggregate or right.contains_aggregate
def initargs(self):
return (self.left, self.right)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.right, other.right)
if test: return test
return cmp(other.left, other.left)
def hash(self, other):
return hash(self.left) ^ hash(self.right)
def relbind(self, dict, db):
Class = self.__class__
return Class(self.left.relbind(dict,db), self.right.relbind(dict,db) )
def uncache(self):
self.left.uncache()
self.right.uncache()
def domain(self):
return self.left.domain() + self.right.domain()
op = "=="
def __repr__(self):
return "(%s)%s(%s)" % (self.left, self.op, self.right)
def detrivialize(self):
return self
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lv = self.left.value(assigns)
rv = self.right.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if type(t) is not IntType and lv[i]!=rv[i]:
result[i] = 0
return result
def negated_constraints(self):
return None
def __and__(self, other):
return BTand_pred( [self, other] )
def __or__(self, other):
return BTor_pred( [self, other] )
def __invert__(self):
return BTnot_pred(self)
class BetweenPredicate(NontrivialEqPred):
"""e1 BETWEEN e2 AND e3"""
def __init__(self, middle, lower, upper):
self.middle = middle
self.lower = lower
self.upper = upper
def initargs(self):
return (self.middle, self.lower, self.upper)
def domain(self):
return (
self.middle.domain() + self.lower.domain() + self.upper.domain())
def relbind(self, dict, db):
self.middle = self.middle.relbind(dict, db)
self.lower = self.lower.relbind(dict, db)
self.upper = self.upper.relbind(dict, db)
return self
def uncache(self):
self.middle.uncache()
self.upper.uncache()
self.lower.uncache()
def __repr__(self):
return "(%s BETWEEN %s AND %s)" % (
self.middle, self.lower, self.upper)
def __hash__(self):
return hash(self.middle)^~hash(self.lower)^hash(self.upper)^55
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.lower, other.lower)
if test: return test
test = cmp(self.middle, other.middle)
if test: return test
return cmp(self.upper, other.upper)
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lowv = self.lower.value(assigns)
upv = self.upper.value(assigns)
midv = self.middle.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if tt(t) is not IntType:
midvi = midv[i]
if lowv[i]>midvi or upv[i]<midvi:
result[i] = 0
return result
class ExistsPred(NontrivialEqPred):
"""EXISTS subquery."""
contains_aggregate = 0
def __init__(self, subq):
self.cached_result = None
self.cachable = None
self.subq = subq
def initargs(self):
return (self.subq,)
def domain(self):
result = self.subq.unbound()
# if there are no outer bindings, evaluate ONCE!
if not result:
self.cachable = 1
return result
def relbind(self, dict, db):
self.subq = self.subq.relbind(db, dict)
return self
def uncache(self):
self.cached_result = None
self.subq.uncache()
def __repr__(self):
return "\nEXISTS\n%s\n" % (self.subq,)
def __call__(self, assigns, toplevel=0):
### should optimize!!!
#print "exists"
#print self.subq
from types import IntType
tt = type
eval = self.subq.eval
result = assigns[:]
# shortcut: if cachable, eval only once and cache
if self.cachable:
test = self.cached_result
if test is None:
self.cached_result = test = eval()
#print "exists cached", self.cached_result
if test:
return result
else:
return [0] * len(result)
kjDict = kjbuckets.kjDict
for i in xrange(len(assigns)):
#print "exists uncached"
assignsi = assigns[i]
if tt(assignsi) is IntType: continue
testbtup = BoundTuple()
testbtup.assns = kjDict(assignsi)
test = eval(outerboundtuple=testbtup).rows()
#for x in test:
#print "exists for", assignsi
#print x
#break
if not test:
result[i] = 0
return result
def __hash__(self):
return hash(self.subq)^3333
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
return cmp(self.subq, other.subq)
class QuantEQ(NontrivialEqPred):
"""Quantified equal any predicate"""
def __init__(self, expr, subq):
self.expr = expr
self.subq = subq
self.cachable = 0
self.cached_column = None
self.att = None
def initargs(self):
return (self.expr, self.subq)
def uncache(self):
self.cached_column = None
def domain(self):
first = self.subq.unbound()
if not first:
self.cachable = 1
more = self.expr.domain()
return first + more
def relbind(self, dict, db):
subq = self.subq = self.subq.relbind(db, dict)
self.expr = self.expr.relbind(dict, db)
# test that subquery is single column and determine att
sl = subq.select_list
atts = sl.attorder
if len(atts)<>1:
raise ValueError, \
"Quantified predicate requires unit select list: %s" % atts
self.att = atts[0]
return self
fmt = "(%s %s ANY %s)"
op = "="
def __repr__(self):
return self.fmt % (self.expr, self.op, self.subq)
def __call__(self, assigns, toplevel=0):
cached_column = self.cached_column
cachable = self.cachable
expr = self.expr
subq = self.subq
att = self.att
if cachable:
if cached_column is None:
subqr = subq.eval().rows()
cc = self.cached_column = dump_single_column(subqr, att)
#print self, "cached", self.cached_column
exprvals = expr.value(assigns)
kjDict = kjbuckets.kjDict
compare = self.compare
tt = type
from types import IntType
result = assigns[:]
for i in xrange(len(assigns)):
assignsi = assigns[i]
if tt(assignsi) is IntType: continue
thisval = exprvals[i]
testbtup = BoundTuple()
testbtup.assns = kjDict(assignsi)
if not cachable:
subqr = subq.eval(outerboundtuple=testbtup).rows()
cc = dump_single_column(subqr, att)
#print self, "uncached", cc, thisval
if not compare(thisval, cc):
#print "eliminated", assignsi
result[i] = 0
return result
def compare(self, value, column):
return value in column
def __hash__(self):
return hash(self.subq) ^ ~hash(self.expr)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.expr, other.expr)
if test: return test
return cmp(self.subq, other.subq)
# "expr IN (subq)" same as "expr = ANY (subq)"
InPredicate = QuantEQ
class InLits(NontrivialEqPred):
"""expr IN literals, support dynamic bindings."""
def __init__(self, expr, lits):
self.expr = expr
self.lits = lits
self.cached_lits = None
def initargs(self):
return (self.expr, self.lits)
def uncache(self):
self.cached_lits = None
def domain(self):
d = []
for l in self.lits:
d0 = l.domain()
if d0:
d = d + d0.items()
d0 = self.expr.domain()
if d:
kjSet = kjbuckets.kjSet
return d0 + kjSet(d)
else:
return d0
def relbind(self, dict, db):
newlits = []
for l in self.lits:
newlits.append(l.relbind(dict, db))
self.lits = newlits
self.expr = self.expr.relbind(dict, db)
return self
fmt = "(%s IN %s)"
def __repr__(self):
return self.fmt % (self.expr, self.lits)
def __call__(self, assigns, toplevel=0):
# LITERALS ARE CONSTANT! NEED ONLY LOOK FOR ONE ASSIGN.
tt = type
from types import IntType
litvals = self.cached_lits
if litvals is None:
assigns0 = []
for asn in assigns:
if tt(asn) is not IntType:
assigns0.append(asn)
break
if not assigns0:
# all false/unknown
return assigns
litvals = []
for lit in self.lits:
value = lit.value(assigns0)
litvals.append(value[0])
self.cached_lits = litvals
expr = self.expr
exprvals = expr.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
assignsi = assigns[i]
if tt(assignsi) is IntType: continue
thisval = exprvals[i]
if thisval not in litvals:
#print "eliminated", assignsi
result[i] = 0
return result
def compare(self, value, column):
return value in column
def __hash__(self):
return 10 ^ hash(self.expr)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.expr, other.expr)
if test: return test
return cmp(self.lits, other.lits)
class QuantNE(QuantEQ):
"""Quantified not equal any predicate"""
op = "<>"
def compare(self, value, column):
for x in column:
if value!=x: return 1
return 0
### note: faster NOT IN using QuantNE?
class QuantLT(QuantEQ):
"""Quantified less than any predicate"""
op = "<"
def uncache(self):
self.testval = self.cached = self.cached_column = None
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
testval = self.testval = max(column)
self.cached = 1
else:
testval = max(column)
return value < testval
class QuantLE(QuantLT):
"""Quantified less equal any predicate"""
op = "<="
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
testval = self.testval = max(column)
self.cached = 1
else:
testval = max(column)
return value <= testval
class QuantGE(QuantLT):
"""Quantified greater equal any predicate"""
op = ">="
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
testval = self.testval = min(column)
self.cached = 1
else:
testval = min(column)
return value >= testval
class QuantGT(QuantLT):
"""Quantified greater than any predicate"""
op = ">"
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
self.testval = testval = min(column)
self.cached = 1
else:
testval = min(column)
return value > testval
def dump_single_column(assigns, att):
"""dump single column assignment"""
result = assigns[:]
for i in xrange(len(result)):
result[i] = result[i][att]
return result
class LessPred(NontrivialEqPred):
op = "<"
def __call__(self, assigns, toplevel=0):
from types import IntType
#print '***********************************************'
#print self.left, self.right
#print assigns
lv = self.left.value(assigns)
rv = self.right.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if not isinstance(t, IntType) and lv[i] >= rv[i]:
result[i] = 0
return result
def __inv__(self):
return LessEqPred(self.right, self.left)
def __hash__(self):
return hash(self.left)^hash(self.right)
class LessEqPred(LessPred):
op = "<="
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lv = self.left.value(assigns)
rv = self.right.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if not isinstance(t, IntType) and lv[i] > rv[i]:
result[i] = 0
return result
def __inv__(self):
return LessPred(self.right, self.left)
class SubQueryExpression(BoundMinus, SimpleRecursive):
"""sub query expression (subq), must eval to single column, single value"""
def __init__(self, subq):
self.subq = subq
self.att = self.cachable = self.cached = self.cached_value = None
def initargs(self):
return (self.subq,)
def uncache(self):
self.cached = self.cached_value = None
def domain(self):
result = self.subq.unbound()
if not result:
self.cachable = 1
#print "expr subq domain", result
return result
def relbind(self, dict, db):
subq = self.subq = self.subq.relbind(db, dict)
# test that subquery is single column and determine att
sl = subq.select_list
atts = sl.attorder
if len(atts)<>1:
raise ValueError, \
"Quantified predicate requires unit select list: %s" % atts
self.att = atts[0]
return self
def __repr__(self):
return "(%s)" % self.subq
def value(self, contexts):
subq = self.subq
att = self.att
if self.cachable:
if self.cached:
cached_value = self.cached_value
else:
self.cached = 1
seval = subq.eval().rows()
lse = len(seval)
if lse<>1:
raise ValueError, \
"const subquery expression must return 1 result: got %s" % lse
self.cached_value = cached_value = seval[0][att]
#print "const subq cached", cached_value
return [cached_value] * len(contexts)
from types import IntType
tt = type
result = contexts[:]
kjDict = kjbuckets.kjDict
for i in xrange(len(contexts)):
contextsi = contexts[i]
if tt(contextsi) is not IntType:
testbtup = BoundTuple()
testbtup.assns = kjDict(contextsi)
#print "subq exp", testbtup
seval = subq.eval(outerboundtuple=testbtup).rows()
lse = len(seval)
if lse<>1:
raise ValueError, \
"dynamic subquery expression must return 1 result: got %s" % lse
result[i] = seval[0][att]
#print "nonconst subq uncached", result[i], contextsi
return result
SELECT_TEMPLATE = """\
SELECT %s %s
FROM %s
WHERE %s
GROUP BY %s
HAVING %s %s
ORDER BY %s %s
"""
def dynamic_binding(ndynamic, dynamic):
"""create bindings from dynamic tuple for ndynamic parameters
if a tuple is given create one
if a list is given create many
"""
from types import ListType,TupleType
if not dynamic:
if ndynamic>0:
raise ValueError, `ndynamic`+" dynamic parameters unbound"
return [kjbuckets.kjDict()]
ldyn = len(dynamic)
undumper = map(None, [0]*ndynamic, range(ndynamic))
undumper = tuple(undumper)
tdyn = type(dynamic)
if tdyn is TupleType:
ldyn = len(dynamic)
if len(dynamic)!=ndynamic:
raise ValueError, "%s,%s: wrong number of dynamics" % (ldyn,ndynamic)
dynamic = [dynamic]
elif tdyn is not ListType:
raise TypeError, "dynamic parameters must be list or tuple"
else:
lens = map(len, dynamic)
ndynamic = max(lens)
if ndynamic!=min(lens):
raise ValueError, "dynamic parameters of inconsistent lengths"
undumper = map(None, [0]*ndynamic, range(ndynamic))
undumper = tuple(undumper)
result = list(dynamic)
kjUndump = kjbuckets.kjUndump
for i in xrange(len(dynamic)):
dyn = dynamic[i]
ldyn = len(dyn)
#print undumper, dyn
if ldyn==1:
dynresult = kjUndump(undumper, dyn[0])
else:
dynresult = kjUndump(undumper, dyn)
result[i] = dynresult
return result
class Selector:
"""For implementing, eg the SQL SELECT statement."""
def __init__(self, alldistinct,
select_list,
table_reference_list,
where_pred,
group_list,
having_cond,
union_select =None,
order_by_spec =None,
ndynamic=0, # number of dyn params expected
):
self.ndynamic = ndynamic
self.alldistinct = alldistinct
self.select_list = select_list
self.table_list = table_reference_list
self.where_pred = where_pred
self.group_list = group_list
self.having_cond = having_cond
self.union_select = union_select
self.order_by = order_by_spec
#self.union_spec = "DISTINCT" # default union mode
self.relbindings = None # binding of relations
self.unbound_set = None # unbound attributes
self.rel_atts = None # graph of alias>attname bound in self
self.all_aggregate = 0
if select_list!="*" and not group_list:
if select_list.contains_aggregate:
### should restore this check somewhere else!
#if select_list.contains_nonaggregate:
#raise ValueError, "aggregates/nonaggregates don't mix without grouping"
self.all_aggregate = 1
if where_pred and where_pred.contains_aggregate:
raise ValueError, "aggregate in WHERE"
self.query_plan = None
def initargs(self):
#print self.alldistinct
#print self.select_list
#print self.table_list
#print self.where_pred
#print self.having_cond
#print self.union_select
#print self.group_list
#print self.order_by
#print self.ndynamic
# note: order by requires special handling
return (self.alldistinct, self.select_list, self.table_list, self.where_pred,
None, self.having_cond, self.union_select, None,
self.ndynamic)
def marshaldata(self):
order_by = self.order_by
if order_by:
order_by = map(serialize.serialize, order_by)
group_list = self.group_list
if group_list:
group_list = map(serialize.serialize, group_list)
#print "marshaldata"
#print order_by
#print group_list
return (order_by, group_list)
def demarshal(self, data):
(order_by, group_list) = data
if order_by:
order_by = map(serialize.deserialize, order_by)
if group_list:
group_list = map(serialize.deserialize, group_list)
#print "demarshal"
#print order_by
#print group_list
self.order_by = order_by
self.group_list = group_list
def unbound(self):
result = self.unbound_set
if result is None:
raise ValueError, "binding not available"
return result
def uncache(self):
wp = self.where_pred
hc = self.having_cond
sl = self.select_list
if wp is not None: wp.uncache()
if hc is not None: hc.uncache()
sl.uncache()
qp = self.query_plan
if qp:
for joiner in qp:
joiner.uncache()
def relbind(self, db, outerbindings=None):
ad = self.alldistinct
sl = self.select_list
tl = self.table_list
wp = self.where_pred
gl = self.group_list
hc = self.having_cond
us = self.union_select
ob = self.order_by
test = db.bindings(tl)
#print len(test)
#for x in test:
#print x
(attbindings, relbindings, ambiguous, ambiguousatts) = test
if outerbindings:
# bind in outerbindings where unambiguous
for (a,r) in outerbindings.items():
if ((not attbindings.has_key(a))
and (not ambiguousatts.has_key(a)) ):
attbindings[a] = r
# fix "*" select list
if sl=="*":
sl = TupleCollector()
for (a,r) in attbindings.items():
sl.addbinding(None, BoundAttribute(r,a))
for (dotted, (r,a)) in ambiguous.items():
sl.addbinding(dotted, BoundAttribute(r,a))
sl = sl.relbind(attbindings, db)
wp = wp.relbind(attbindings, db)
if hc is not None: hc = hc.relbind(attbindings, db)
if us is not None: us = us.relbind(db, attbindings)
# bind grouping if present
if gl:
gl = relbind_sequence(gl, attbindings, db)
# bind ordering list if present
#print ob
if ob:
ob = relbind_sequence(ob, attbindings, db)
ob = orderbind_sequence(ob, sl.order)
result = Selector(ad, sl, tl, wp, gl, hc, us, ob)
result.relbindings = relbindings
result.ndynamic = self.ndynamic
result.check_domains()
result.plan_query()
query_plan = result.query_plan
for i in range(len(query_plan)):
query_plan[i] = query_plan[i].relbind(db, attbindings)
return result
def plan_query(self):
"""generate a query plan (sequence of join operators)."""
rel_atts = self.rel_atts # rel>attname
where_pred = self.where_pred.detrivialize()
#select_list = self.select_list
# shortcut
if where_pred is None:
bt = BoundTuple()
else:
bt = self.where_pred.constraints
if bt is None:
bt = BoundTuple()
eqs = kjbuckets.kjGraph(bt.eqs)
witness = kjbuckets.kjDict()
# set all known and unbound atts as witnessed
for att in bt.assns.keys():
witness[att] = 1
#print self, "self.unbound_set", self.unbound_set
for att in self.unbound_set.items():
witness[att] = 1
relbindings = self.relbindings
allrels = relbindings.keys()
#print relbindings
allrels = bt.relorder(relbindings, allrels)
#print allrels
rel_atts = self.rel_atts
plan = []
for rel in allrels:
relation = relbindings[rel]
ratts = rel_atts.neighbors(rel)
h = HashJoiner(bt, rel, ratts, relation, witness)
plan.append(h)
for a in ratts:
ra = (rel, a)
witness[ra] = 1
eqs[ra] = ra
witness = witness.remap(eqs)
self.query_plan = plan
def check_domains(self):
"""determine set of unbound names in self.
"""
relbindings = self.relbindings
sl = self.select_list
wp = self.where_pred
gl = self.group_list
hc = self.having_cond
us = self.union_select
all = sl.domain().items()
if wp is not None:
all = all + wp.domain().items()
# ignore group_list ???
if hc is not None:
all = all + hc.domain().items()
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjGraph
alldomain = kjSet(all)
rel_atts = self.rel_atts = kjGraph(all)
allnames = kjSet()
#print "relbindings", relbindings.keys()
for name in relbindings.keys():
rel = relbindings[name]
for att in rel.attributes():
allnames[ (name, att) ] = 1
# union compatibility check
if us is not None:
us.check_domains()
myatts = self.attributes()
thoseatts = us.attributes()
if myatts!=thoseatts:
if len(myatts)!=len(thoseatts):
raise IndexError, "outer %s, inner %s: union select lists lengths differ"\
% (len(myatts), len(thoseatts))
for p in map(None, myatts, thoseatts):
(x,y)=p
if x!=y:
raise NameError, "%s union names don't match" % (p,)
self.unbound_set = alldomain - allnames
def attributes(self):
return self.select_list.attorder
def eval(self, dynamic=None, outerboundtuple=None):
"""leaves a lot to be desired.
dynamic and outerboundtuple are mutually
exclusive. dynamic is only pertinent to
top levels, outerboundtuple to subqueries"""
# only uncache if outerboundtuple is None (not subquery)
# ???
if outerboundtuple is None:
self.uncache()
query_plan = self.query_plan
where_pred = self.where_pred.detrivialize()
select_list = self.select_list
# shortcut
if where_pred is not None and where_pred.false:
return store.Relation0(select_list.attorder, [])
#print "where_pred", where_pred
if where_pred is None or where_pred.constraints is None:
assn0 = assn1 = kjbuckets.kjDict()
else:
assn1 = self.where_pred.constraints.assns
assn0 = assn1 = kjbuckets.kjDict(assn1)
# erase stored results from possible previous evaluation
ndynamic = self.ndynamic
if outerboundtuple is not None:
assn1 = assn1 + outerboundtuple.assns
elif ndynamic:
dyn = dynamic_binding(ndynamic, dynamic)
if len(dyn)!=1:
raise ValueError, "only one dynamic subst for selection allowed"
dyn = dyn[0]
assn1 = assn1 + dyn
#print "dynamic", bt
#print "assn1", assn1
# check unbound names
unbound_set = self.unbound_set
#print "unbound", unbound_set
#print unbound_set
#print self.rel_atts
for pair in unbound_set.items():
if not assn1.has_key(pair):
raise KeyError, `pair`+": unbound in selection"
assn1 = (unbound_set * assn1) + assn0
#print "assn1 now", assn1
substseq = [assn1]
for h in query_plan:
#print "***"
#for x in substseq:
#print x
#print "***"
substseq = h.join(substseq)
if not substseq: break
#print "***"
#for x in substseq:
#print x
#print "***"
# apply the rest of the where predicate at top level
if substseq and where_pred is not None:
#where_pred.uncache()
substseq = where_pred(substseq, 1)
# eliminate zeros/nulls
substseq = no_ints_nulls(substseq)
# apply grouping if present
group_list = self.group_list
if substseq and group_list:
substseq = aggregate(substseq, group_list)
having_cond = self.having_cond
#print having_cond
if having_cond is not None:
#having_cond.uncache()
substseq = no_ints_nulls(having_cond(substseq))
elif self.all_aggregate:
# universal group
substseq = [kjbuckets.kjDict( [(None, substseq)] ) ]
(tups, attorder) = select_list.map(substseq)
# do UNION if present
union_select = self.union_select
if union_select is not None:
tups = union_select.eval(tups, dynamic, outerboundtuple)
# apply DISTINCT if appropriate
if self.alldistinct=="DISTINCT":
tups = kjbuckets.kjSet(tups).items()
# apply ordering if present
ob = self.order_by
if ob:
tups = order_tuples(ob, tups)
return store.Relation0(attorder, tups)
def __repr__(self):
ndyn = ""
if self.ndynamic:
ndyn = "\n[%s dynamic parameters]" % self.ndynamic
result = SELECT_TEMPLATE % (
self.alldistinct,
self.select_list,
self.table_list,
self.where_pred,
self.group_list,
self.having_cond,
#union_spec,
self.union_select,
self.order_by,
ndyn
)
return result
class Union(SimpleRecursive):
"""union clause."""
def __init__(self, alldistinct, selection):
self.alldistinct = alldistinct
self.selection = selection
def initargs(self):
return (self.alldistinct, self.selection)
def unbound(self):
return self.selection.unbound()
def relbind(self, db, outer=None):
self.selection = self.selection.relbind(db, outer)
return self
def check_domains(self):
self.selection.check_domains()
def attributes(self):
return self.selection.attributes()
def eval(self, assns, dyn=None, outer=None):
r = self.selection.eval(dyn, outer)
rows = r.rows()
allrows = rows + assns
if self.alldistinct=="DISTINCT":
allrows = kjbuckets.kjSet(allrows).items()
return allrows
def __repr__(self):
return "\nUNION %s %s " % (self.alldistinct, self.selection)
class Intersect(Union):
def eval(self, assns, dyn=None, outer=None):
r = self.selection.eval(dyn, outer)
rows = r.rows()
kjSet = kjbuckets.kjSet
allrows = (kjSet(assns) & kjSet(rows)).items()
return allrows
op = "INTERSECT"
def __repr__(self):
return "\n%s %s" % (self.op, self.selection)
class Except(Union):
def eval(self, assns, dyn=None, outer=None):
r = self.selection.eval(dyn, outer)
rows = r.rows()
kjSet = kjbuckets.kjSet
allrows = (kjSet(assns) - kjSet(rows)).items()
return allrows
op = "EXCEPT"
class Parse_Context:
"""contextual information for parsing
p.param() returns a new sequence number for external parameter.
"""
# not serializable
parameter_index = 0
# no __init__ yet
def param(self):
temp = self.parameter_index
self.parameter_index = temp+1
return temp
def ndynamic(self):
return self.parameter_index
# update/delete/insert statements
import operations
CreateTable = operations.CreateTable
CreateIndex = operations.CreateIndex
DropIndex = operations.DropIndex
DropTable = operations.DropTable
UpdateOp = operations.UpdateOp
DeleteOp = operations.DeleteOp
InsertOp = operations.InsertOp
InsertValues = operations.InsertValues
InsertSubSelect = operations.InsertSubSelect
ColumnDef = operations.ColumnDef
CreateView = operations.CreateView
DropView = operations.DropView
# update storage structures from the database store
import store
Add_Tuples = store.Add_Tuples
Erase_Tuples = store.Erase_Tuples
Reset_Tuples = store.Reset_Tuples
#
# $Log: semantics.py,v $
# Revision 1.8 2005/01/05 02:28:03 aaron_watters
# fixed all remaining test failures. Added xsdbXML interface API
#
# Revision 1.7 2002/05/11 02:59:05 richard
# Added info into module docstrings.
# Fixed docco of kwParsing to reflect new grammar "marshalling".
# Fixed bug in gadfly.open - most likely introduced during sql loading
# re-work (though looking back at the diff from back then, I can't see how it
# wasn't different before, but it musta been ;)
# A buncha new unit test stuff.
#
# Revision 1.6 2002/05/08 00:49:00 anthonybaxter
# El Grande Grande reindente! Ran reindent.py over the whole thing.
# Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2.
#
# Revision 1.5 2002/05/07 23:19:02 richard
# Removed circular import (at import time at least)
#
# Revision 1.4 2002/05/07 14:33:00 anthonybaxter
# added a couple of 'what the??' comments.
# fixed a spot where float division would cause breakge in future (in Median)
#
# Revision 1.3 2002/05/07 09:48:54 anthonybaxter
# argh argh bastard. Average, or any op derived from it,
# assumed that the first element would always be there.
# in the case of, say, the second part of an 'or' operation,
# it might be missing.
#
# Revision 1.2 2002/05/07 04:03:14 richard
# . major cleanup of test_gadfly
#
# Revision 1.1.1.1 2002/05/06 07:31:10 richard
#
#
#
|