# -*- coding: iso-8859-1 -*-
#-----------------------------------------------------------------------------
# Modeling Framework: an Object-Relational Bridge for python
#
# Copyright (c) 2001-2004 Sbastien Bigaret <sbigaret@users.sourceforge.net>
# All rights reserved.
#
# This file is part of the Modeling Framework.
#
# This code is distributed under a "3-clause BSD"-style license;
# see the LICENSE file for details.
#-----------------------------------------------------------------------------
"""
This module is where any Qualifier strings are interpreted and turned into
real qualifier.
It is still unfinished: built qualifiers are not optimal at all. For example,
the expression "NOT ( name=='truc' AND age>30 AND age<65 )" builts two
AndQualifier, the first one joining "name=='truc' AND age>30", the second
one joining the result of the latter with "age < 65". There should better
be one AndQualifier joining the three sub-expression.
This was primarily derived from the calculator example given in package
'spark'. For sure the big part was to handle parens... I did not manipulate
grammars for years and I did not get back to the theory before doing this,
hence purists will surely get mad looking at this. If you are one and wanna
give a helpful hand, you're welcome here!
The grammar used here is the following:
qual ::= qual operator qual
qual ::= paren_open qual paren_close
qual ::= expr
expr ::= expr comp_op expr
expr ::= paren_open expr paren_close
expr ::= term
expr ::= not_expr
term ::= number
term ::= float
term ::= string
term ::= keypath
not_expr ::= not_operator qual
comp_op ::= +
comp_op ::= *
comp_op ::= <
comp_op ::= >
comp_op ::= ==
comp_op ::= >=
comp_op ::= <=
comp_op ::= !=
comp_op ::= IN
comp_op ::= NOT IN
operator ::= AND
operator ::= OR
not_operator ::= NOT
(Refer to the QualifierScanner class for a 'regexp-description' of literals
like 'string', 'keypath', etc.)
"""
try:
import spark
except ImportError:
import sys
try: del sys.modules['Modeling.QualifierParser']
except: pass
raise
from Qualifier import KeyValueQualifier,KeyComparisonQualifier,AndQualifier,OrQualifier,NotQualifier,operatorForString,allQualifierOperators
import string
def trace(msg, *args):
#if args:
# print msg%args
#else:
# print msg
pass
#
## Minimal token class.
class Token:
def __init__(self, type, attr=None):
self.type = type
self.attr = attr
def __cmp__(self, o):
return cmp(self.type, o)
def __str__(self):
if self.attr:
return '<Token %s: %s>'%(self.type, self.attr)
else:
return '<Token %s>'%(self.type, )
__repr__=__str__
##
## Minimal Abstract Syntax Tree class.
class AST:
def __init__(self, type, left=None, right=None):
self.type = type.type
self.attr = type.attr
self._kids = []
self.left = left
self.right = right
if left: self._kids.append(left)
if right: self._kids.append(right)
def __getitem__(self, i):
return self._kids[i]
def __str__(self):
ret='<AST %s,%s ['%(self.type, self.attr)
for kid in self._kids:
ret+=repr(kid)+', '
ret+=']>'
return ret
__repr__=__str__
#
# ASTTraversal is a deprecated interface to GenericASTTraversal.
#
ASTTraversal = spark.GenericASTTraversal
from spark import GenericScanner,GenericParser
#from ast import AST, ASTTraversal
#from token import Token
#
# SCANNING
#
class QualifierScanner(GenericScanner):
def __init__(self):
GenericScanner.__init__(self)
def tokenize(self, input):
self.rv = []
GenericScanner.tokenize(self, input)
return self.rv
def error(self):
trace('error')
def t_whitespace(self, s):
r' \s+ '
pass
#def t_paren(self, s):
# r' \( ([^\(\)])+ \) '
# #trace('paren_open %s'%self.rv
# trace('Token paren_open: %s'%s)
# self.rv.append(Token(type='paren'))
def t_paren_open(self, s):
r' \( '
#trace('paren_close %s'%self.rv
trace('Token paren_open: %s',s)
self.rv.append(Token(type='paren_open'))
def t_paren_close(self, s):
r' \) '
#trace('paren_close %s'%self.rv
trace('Token paren_close: %s',s)
self.rv.append(Token(type='paren_close'))
def t_operator(self, s):
r' AND[ (] | and[ (] | OR[ (] | or[ (] '
trace('Token Operator: %s',s)
self.rv.append(Token(type=s[:-1].upper()))
if s[-1]=='(':
self.t_paren_open(s[-1])
def t_not_operator(self, s):
r' NOT[ (] | not[ (] '
trace('Token NOT: %s',s)
self.rv.append(Token(type=s[:-1].upper()))
if s[-1]=='(':
self.t_paren_open(s[-1])
def t_comp_op(self, s):
r' \+ | \* | == | >= | > | <= | < | \!= | like | caseInsensitiveLike | ilike '
trace('Token COMP_OP: %s',s)
self.rv.append(Token(type=s))
def t_comp_op2(self, s):
r' in[ [] | IN[ [] '
trace('Token COMP_OP2: %s',s)
self.rv.append(Token(type=s[:-1].lower()))
if s[-1]=='[':
self.t_square_bracket_open(s[-1])
def t_number(self, s):
r' \d+ '
trace('Token number: %s',s)
t = Token(type='number', attr=s)
self.rv.append(t)
def t_float(self, s):
r' \d+ \. \d+ '
trace('Token float: %s',s)
t = Token(type='float', attr=s)
self.rv.append(t)
def t_string(self, s):
r' \"[^\"]*\" '
trace('Token string: %s',s)
t = Token(type='string', attr=s[1:-1])
self.rv.append(t)
def t_nullValue(self, s):
r' NULL | None '
trace('Token string: %s',s)
t = Token(type='nullValue', attr=None)
self.rv.append(t)
# with a 'z' so that this is interpreted AFTER t_operator
def t_z_keypath(self, s):
r' [a-zA-Z][a-zA-Z0-9_\.]+ '
trace('Token keyPath: %s',s)
t = Token(type='keypath', attr=s)
self.rv.append(t)
def t_square_bracket_open(self, s):
r' \[ '
trace('Token square_bracket_open: %s',s)
self.rv.append(Token(type='square_bracket_open'))
def t_square_bracket_close(self, s):
r' \] '
trace('Token square_bracket_close: %s',s)
self.rv.append(Token(type='square_bracket_close'))
def t_comma(self, s):
r' , '
trace('Token comma: %s',s)
self.rv.append(Token(type='comma'))
def t_zz_error(self, s):
r' .* '
raise ValueError, "Syntax Error: %s"%s
# TBD Implementation note:
# TBD this is needed to detect the case where not all the string is parsed
# TBD such as in: 'name like "you" AND isActive=1' which is NOT detected
# TBD as a syntax error otherwise
#def t_simplekeypath(self, s):
# r' \c+ '
# trace('Token simpleKeyPath: %s',s)
# t = Token(type='keypath', attr=s)
# self.rv.append(t)
#
#def t_compoundkeypath(self, s):
# r' \c+ ( \. \c+ )+'
# trace('Token compound: %s',s)
# t = Token(type='keypath', attr=s)
# self.rv.append(t)
def scan(f):
input = f.read()
scanner = QualifierScanner()
return scanner.tokenize(input)
#
# PARSING
#
class ExprParser(GenericParser):
def __init__(self, start='qual'):
GenericParser.__init__(self, start)
def error(self, token):
trace("Syntax error sur token: `%s'",token)
raise ValueError, "Syntax error near token: `%s'" % token
def p_qual_1(self, args):
r' qual ::= qual operator qual '
trace('p_qual_1 %s/%s/%s', args[0], args[1], args[2])
return AST(type=args[1],
left=args[0],
right=args[2])
def p_qual_2(self, args):
r' qual ::= paren_open qual paren_close '
trace('paren qual:%s (%s-%s-%s)',args[1],args[1].left,args[1].type,args[1].right)
#inside=AST(args[0])
return AST(Token('paren', args[1]))
def p_qual_3(self, args):
r' qual ::= expr '
trace('qual_3: %s',args[0])
return args[0]
def p_expr_1(self, args):
r' expr ::= expr comp_op expr '
trace('p_expr_1 %s/%s/%s',args[0], args[1], args[2])
return AST(type=args[1],
left=args[0],
right=args[2])
def p_expr_in(self, args):
r' expr ::= expr comp_op2 square_bracket_open comma_separated square_bracket_close '
trace('p_expr_1a %s/%s/%s/%s/%s',args[0],args[1],args[2],args[3],args[4])
return AST(type=args[1],
left=args[0],
right=args[3])
def p_expr_not_in(self, args):
r' expr ::= expr not_in square_bracket_open comma_separated square_bracket_close '
trace('p_expr_1a %s/%s/%s/%s/%s',args[0],args[1],args[2],args[3],args[4])
return AST(type=args[1],
left=args[0],
right=args[3])
def p_not_in(self, args):
r' not_in ::= not_operator comp_op2 '
trace('p_not_in %s %s',args[0],args[1])
return AST(Token('not_in'))
def p_comma_separated_1(self, args):
r' comma_separated ::= term '
trace('p_comma_separated_1 %s',args[0])
return AST(Token('comma_separated', [args[0]]))
def p_comma_separated_2(self, args):
r' comma_separated ::= comma_separated comma term'
trace('p_comma_separated_2 %s %s',args[0], args[2])
l=args[0].attr[:]
l.append(args[2])
return AST(Token('comma_separated', l))
def p_expr_2(self, args):
r' expr ::= paren_open expr paren_close '
trace('paren expr_2:%s (%s-%s-%s)',args[1],args[1].left,args[1].type,args[1].right)
return AST(Token('paren', args[1]))
def p_expr_3(self, args):
r' expr ::= term '
trace('expr_3: %s',args[0])
return args[0]
def p_expr_4(self, args):
r' expr ::= not_expr '
trace('expr_4: %s',args[0])
return AST(args[0])
#return AST(Token('NOT', args[0].left))
def p_term_1(self, args):
r' term ::= number '
trace('term_1: %s',args[0])
return AST(type=args[0])
def p_term_2(self, args):
r' term ::= float '
trace('term_2: %s',args[0])
return AST(type=args[0])
def p_term_3(self, args):
r' term ::= string '
trace('term_3: %s',args[0])
return AST(type=args[0])
def p_term_4(self, args):
r' term ::= keypath '
trace('term_4: %s',args[0])
return AST(type=args[0])
def p_term_5(self, args):
r' term ::= nullValue '
trace('term_5: %s',args[0])
return AST(type=args[0])
def p_not_expr(self, args):
r' not_expr ::= not_operator qual '
trace('not_expr: %s/%s',args[0],args[1])
type=args[0]
args[0].attr=args[1]
return AST(type=args[0],
left=args[1])
def p_comp_op(self, args):
'''
comp_op ::= +
comp_op ::= *
comp_op ::= <
comp_op ::= >
comp_op ::= ==
comp_op ::= >=
comp_op ::= <=
comp_op ::= !=
comp_op ::= like
comp_op ::= caseInsensitiveLike
comp_op ::= ilike
'''
trace('comp_op: %s',args[0])
return AST(type=args[0])
def p_comp_op2(self, args):
'''
comp_op2 ::= in
'''
trace('comp_op2: %s',args[0])
return AST(type=args[0])
def p_operator(self, args):
'''
operator ::= AND
operator ::= OR
'''
trace('Operator: %s',args[0])
return AST(type=args[0])
def p_not_operator(self, args):
r' not_operator ::= NOT '
trace('Operator: %s',args[0])
return AST(type=args[0])
#def p_paren_open(self, args):
# r' paren_open ::= paren_open '
# trace('0')
# return AST()
#
#def p_paren_close(self, args):
# r' paren_close ::= paren_close '
# return AST()
def parse(tokens):
parser = ExprParser()
trace(tokens)
return parser.parse(tokens)
#
# SEMANTIC CHECKING
#
class TypeCheck(ASTTraversal):
def __init__(self, ast):
ASTTraversal.__init__(self, ast)
self.postorder()
def n_number(self, node):
node.exprType = 'number'
def n_float(self, node):
node.exprType = 'float'
def n_string(self, node):
node.exprType = 'string'
def n_keypath(self, node):
node.exprType = 'keypath'
def n_nullValue(self, node):
node.exprType = 'nullValue'
def n_paren(self, node):
trace('N_PAREN ######%s %s',node.type,repr(node))
node.exprType = 'paren'
def n_comma_separated(self, node):
trace('COMMA_SEPARATED ######%s %s',node.type,repr(node))
node.exprType = 'comma_separated'
def default(self, node):
# this handles + and * nodes
trace("TypeCheck default: node: %s %s",node,node.type)
#leftType = node.left.exprType
#rightType = node.right.exprType
#if leftType != rightType:
# pass#raise 'Type error.'
#node.exprType = leftType
def semantic(ast):
TypeCheck(ast)
#
# Any other ASTTraversal classes
# for semantic checking would be
# instantiated here...
#
return ast
#
# CODE GENERATION
#
class Interpret(ASTTraversal):
def __init__(self, ast):
trace('Interpret.init() ast: %s',ast)
ASTTraversal.__init__(self, ast)
self.postorder()
self.value=ast.value
trace('Interpret returns: %s',self.value)
def n_number(self, node):
node.value = int(node.attr)
def n_float(self, node):
node.value = float(node.attr)
def n_keypath(self, node):
trace(repr(node))
node.value = str(node.attr)
def n_string(self, node):
node.value = str(node.attr)
def n_nullValue(self, node):
node.value = None
def n_paren(self, node):
trace('## Interpret_PAREN ###### %s',repr(node))
_node=Interpret(node.attr)
node.value = _node.value
#node.attr = _node.attr
def n_comma_separated(self, node):
trace('## Interpret comma_separated ###### %s',repr(node))
trace('## node.attr: %s',node.attr)
#_node=Interpret(node.attr)
l=[]
for attr in node.attr:
l.append(Interpret(attr).value)
#node.value = list(node.attr)
node.value = l
def n_NOT(self, node):
_node=Interpret(node.attr)
trace('NOT node :%s',_node.value)
node.value = NotQualifier(_node.value)
def default(self, node):
left = right = None
trace('default: node: %s',node)
try: left = node.left.value
except: pass
try: right = node.right.value
except: pass
# This is the original calculator!
#if node.type == '+':
# node.value = left + right
#else:
# node.value = left * right
trace('left: %s type: %s right: %s',left, node.type, right)
if node.type == 'AND':
node.value=AndQualifier(map(lambda o: o.value, node._kids))
trace("node.value: %s",node.value)
if node.type == 'OR':
node.value=OrQualifier(map(lambda o: o.value, node._kids))
trace("node.value: %s",node.value)
if node.type in allQualifierOperators():
if node[1].type=='keypath':
node.value=KeyComparisonQualifier(node[0].value,
operatorForString(node.type),
node[1].value)
trace("node.value(keypath): %s",node.value)
if node[1].type in ('string', 'number', 'float', 'nullValue',
'comma_separated'):
node.value=KeyValueQualifier(node[0].value,
operatorForString(node.type),
node[1].value)
trace("node.value(other): %s",node.value)
def generate(ast):
Interpret(ast)
return ast.value
def qualifierWithQualifierFormat(anExpression):
"Used by Qualifier.qualifierFromQualifierFormat"
if not anExpression:
return None
from cStringIO import StringIO
strIO=StringIO(anExpression)
sc=scan(strIO)
trace("\nEND scan: %s\n\n",sc)
_parse=parse(sc)
trace("\nEND parse: %s\n\n",_parse)
sem=semantic(_parse)
trace("\nEND sem: %s\n\n",sem)
result=generate(sem)
# The following ensures that the result is of the correct type
# e.g. if you supply 'age=30 AND ...', the result will be string 'age'
# This should normally be corrected within the parser, not here.
try:
if result.__class__ not in (KeyValueQualifier, KeyComparisonQualifier,
AndQualifier, OrQualifier, NotQualifier):
raise ValueError, 'Syntax error'
except:
raise ValueError, 'Syntax error'
return result
#
# MAIN
#
if __name__ == '__main__':
import sys
#filename = sys.argv[1]
#f = open(filename)
from StringIO import StringIO
strIO=StringIO(sys.argv[1])
sc=scan(strIO)
trace("\nEND scan: %s\n\n",sc)
parse=parse(sc)
trace("\nEND parse: %s\n\n",parse)
sem=semantic(parse)
trace("\nEND sem: %s\n\n",sem)
print str(generate(sem))
#f.close()
# Tests
#python ./QualifierParser.py "((3*2)+5*(2+1))"
#python ./QualifierParser.py "((3*2)+5*(2+1)) AND (2 == 4) AND (name=='truc')"
#python ./QualifierParser.py "(((3*2)+5*(2+1)==21) AND (2 == 4) AND name=='truc') OR glop==pasglop"
#python ./QualifierParser.py "(age>70) AND (firstName >= 'Victor') "
# python ./QualifierParser.py "NOT (3*2)"
#python ./QualifierParser.py "name=='truc' OR NOT glop==pasglop"
#python ./QualifierParser.py "name=='truc' OR NOT glop==pasglop"
#python ./QualifierParser.py "NOT name=='truc'"
#python ./QualifierParser.py "(name=='truc')"
#python ./QualifierParser.py "name=='truc'"
#python ./QualifierParser.py "NOT ( name=='truc' OR NOT age>30 ) "
|