# rules for python
# based on grammar given in Programming Python by Mark Lutz
# EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE
# GRAMMAR DATA STRUCTURES.
#
ARCHIVE = "."
marshalfilename = ARCHIVE + "/pygram.mar"
pyrules = """
all ::
## input terminates with "fake" dedent (forces read of all file)
@R all1 :: all >> file_input DEDENT
## 1 term newline
##@R lead_blank :: file_input >> NEWLINE file_input
@R top_stmt :: file_input >> file_input stmt
@R file_input :: file_input >> stmt
## 2
@R simple :: stmt >> simple_stmt
@R compound :: stmt >> compound_stmt
## 3 punct ; term NEWLINE
@R one_small :: simple_stmt >> small_stmt NEWLINE
@R more_small :: simple_stmt >> small_stmt ; simple_stmt
@R small_semi :: simple_stmt >> small_stmt ; NEWLINE
## 4 kw pass
@R smexpr :: small_stmt >> expr_stmt
@R smassn :: small_stmt >> assn
@R smprint :: small_stmt >> print_stmt
@R smdel :: small_stmt >> del_stmt
@R smpass :: small_stmt >> pass
@R smflow :: small_stmt >> flow_stmt
@R smimport :: small_stmt >> import_stmt
@R smglobal :: small_stmt >> global_stmt
## access ignored
@R smexec :: small_stmt >> exec_stmt
## 5
@R cmif :: compound_stmt >> if_stmt
@R cmwhile :: compound_stmt >> while_stmt
@R cmfor :: compound_stmt >> for_stmt
@R cmtry :: compound_stmt >> try_stmt
@R cmdef :: compound_stmt >> funcdef
@R cmclass :: compound_stmt >> classdef
##6
@R exprlist :: expr_stmt >> testlist
##@R assignment :: expr_stmt >> assn
@R assn1 :: assn >> testlist = testlist
@R assnn :: assn >> testlist = assn
@R assn1c :: assn >> testlist , = testlist
@R assn1c2 :: assn >> testlist , = testlist ,
@R assnnc :: assn >> testlist , = assn
##testing @R exprassn :: expr_stmt >> expr_stmt = testlist
@R exprlistc :: expr_stmt >> testlist ,
##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist ,
##7 kw print
@R rprint0 :: print_stmt >> print
@R rprint :: print_stmt >> print testlist
@R rprintc :: print_stmt >> print testlist ,
##8 kw del
@R rdel :: del_stmt >> del exprlist
##9 trivially handled in #4
##10 kw raise continue break return
## eliminates 11 12 13 14
@R rbreak :: flow_stmt >> break
@R rcontinue :: flow_stmt >> continue
@R rreturn0 :: flow_stmt >> return
@R rreturn :: flow_stmt >> return testlist
@R rreturnc :: flow_stmt >> return testlist ,
@R rraise1 :: flow_stmt >> raise test
@R rraise2 :: flow_stmt >> raise test , test
@R rraise3 :: flow_stmt >> raise test , test , test
## 11 12 13 14 skipped
## 15 kw import from
@R rimport :: import_stmt >> import dotted_name_list
@R rimportc :: import_stmt >> import dotted_name_list ,
@R dnlist1 :: dotted_name_list >> dotted_name
@R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name
@R rfrom :: import_stmt >> from dotted_name import name_list
@R rfroms :: import_stmt >> from dotted_name import *
@R rfromc :: import_stmt >> from dotted_name import name_list ,
@R nlistn :: name_list >> name_list , NAME
@R nlist1 :: name_list >> NAME
##16 nt NAME
@R dn1 :: dotted_name >> NAME
@R dnn :: dotted_name >> dotted_name . NAME
##17 kw global
@R global1 :: global_stmt >> global NAME
@R globaln :: global_stmt >> global_stmt , NAME
## 18 19 ignored
##20 kw exec in
@R exec1 :: exec_stmt >> exec expr
@R exec2 :: exec_stmt >> exec expr in test
@R exec3 :: exec_stmt >> exec expr in test , test
##21 kw if elif else punct :
@R ifr :: if_stmt >> if test : suite elifs
@R elifs0 :: elifs >>
@R relse :: elifs >> else : suite
@R elifsn :: elifs >> elif test : suite elifs
##22 kw while
@R while1 :: while_stmt >>
while test :
suite
@R while2 :: while_stmt >>
while test :
suite
else :
suite
##23 kw for
@R for1 :: for_stmt >>
for exprlist in testlist :
suite
@R for2 :: for_stmt >>
for exprlist in testlist :
suite
else :
suite
##24 kw try
@R tryr :: try_stmt >> try : suite excepts
@R excepts1 :: excepts >> except_clause : suite
@R excepts2 :: excepts >> except_clause : suite else : suite
@R exceptsn :: excepts >> except_clause : suite excepts
@R tryf :: try_stmt >> try : suite finally : suite
##25 kw except
@R except0 :: except_clause >> except
@R except1 :: except_clause >> except test
@R except2 :: except_clause >> except test , test
##26
@R class1 :: classdef >> class NAME : suite
@R class2 :: classdef >> class NAME ( testlist ) : suite
##27 kw def
@R rdef :: funcdef >> def NAME parameters : suite
##28, 29 punct = *
## (modified from grammar presented)
@R params1 :: parameters >> ( varargslist )
@R params1c :: parameters >> ( varargslist , )
@R params2 :: varargslist >>
## this is way too permissive: fix at semantic level
@R params3 :: varargslist >> arg
@R params4 :: varargslist >> varargslist , arg
@R argd :: arg >> NAME = test
@R arg2 :: arg >> fpdef
@R arg3 :: arg >> * NAME
@R arg4 :: arg >> ** NAME
## 30
@R fpdef1 :: fpdef >> NAME
@R fpdef2 :: fpdef >> ( fplist )
@R fpdef2c :: fpdef >> ( fplist , )
##31
@R fplist1 :: fplist >> fpdef
@R fplistn :: fplist >> fplist , fpdef
##32 t INDENT DEDENT
@R ssuite :: suite >> simple_stmt
@R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT
@R stmtseq1 :: stmtseq >> stmt
@R stmtseqn :: stmtseq >> stmtseq stmt
##33 kw or cancels 53
@R testor :: test >> or_test
@R testand :: or_test >> and_test
@R testor1 :: or_test >> or_test or and_test
## @R testlambda0 :: test >> lambda : test REDUNDANT
@R testlambda1 :: test >> lambda varargslist : test
##34 kw and
@R andnot :: and_test >> not_test
@R andand :: and_test >> and_test and not_test
##35 kw not
@R notnot :: not_test >> not not_test
@R notcmp :: not_test >> comparison
##36 NOTE KWS == >= <= <> !=
@R cmpexpr :: comparison >> expr
@R cmplt :: comparison >> comparison < expr
@R cmpgt :: comparison >> comparison > expr
@R cmpeq :: comparison >> comparison == expr
@R cmpge :: comparison >> comparison >= expr
@R cmple :: comparison >> comparison <= expr
@R cmpnep :: comparison >> comparison <> expr
@R cmpne :: comparison >> comparison != expr
@R cmpin :: comparison >> comparison in expr
@R cmpnotin :: comparison >> comparison not in expr
@R cmpis :: comparison >> comparison is expr
@R cmpisnot :: comparison >> comparison is not expr
##37 kw is not punct > < ! (eliminated)
##38 p |
@R expr_xor :: expr >> xor_expr
@R expr_lor :: expr >> expr | xor_expr
##39 p ^
@R xor_and :: xor_expr >> and_expr
@R xor_xor :: xor_expr >> xor_expr ^ and_expr
##40
@R and_shift :: and_expr >> shift_expr
@R and_and :: and_expr >> and_expr & shift_expr
##41 note kw's << >x> note goofy x to avoid confusing the grammar
@R shift_arith :: shift_expr >> arith_expr
@R shift_left :: shift_expr >> shift_expr << arith_expr
@R shift_right :: shift_expr >> shift_expr >x> arith_expr
##42
@R arith_term :: arith_expr >> term
@R arith_plus :: arith_expr >> arith_expr + term
@R arith_minus :: arith_expr >> arith_expr - term
##43 p */%
@R termfactor :: term >> factor
@R termmul :: term >> term * factor
@R termdiv :: term >> term / factor
@R termmod :: term >> term % factor
## stuff for power
@R factorpower :: factor >> power
@R factorexp :: factor >> factor ** power
##44 p ~
@R powera :: power >> atom trailerlist
@R trailerlist0 :: trailerlist >>
@R trailerlistn :: trailerlist >> trailer trailerlist
@R powerp :: power >> + power
@R powerm :: power >> - power
@R poweri :: power >> ~ power
##45 t NUMBER STRING
@R nulltup :: atom >> ( )
@R parens :: atom >> ( testlist )
@R parensc :: atom >> ( testlist , )
@R nulllist :: atom >> [ ]
@R list :: atom >> [ testlist ]
@R listc :: atom >> [ testlist , ]
@R nulldict :: atom >> { }
@R dict :: atom >> { dictmaker }
@R dictc :: atom >> { dictmaker , }
@R repr :: atom >> ` testlist `
## @R reprc :: atom >> ` testlist , ` doesn't work, apparently
@R aname :: atom >> NAME
## note number to be broken out into FLOAT OCTINT HEXINT INT
@R anumber :: atom >> NUMBER
@R astring :: atom >> stringseq
@R stringseq0 :: stringseq >> STRING
@R stringseqn :: stringseq >> stringseq STRING
##46
@R nullcall :: trailer >> ( )
@R call :: trailer >> ( arglist )
@R callc :: trailer >> ( arglist , )
@R index :: trailer >> [ subscriptdots ]
@R getattr :: trailer >> . NAME
##47
@R arg1 :: arglist >> argument
@R argn :: arglist >> arglist , argument
##@R argn1 :: arglist >> arglist , NAME = test
##48 ( !!!! is this wrong in PP?)
@R posarg :: argument >> test
## here the left test should be a NAME always, but parser doesn't like it
@R namearg :: argument >> test = test
##49 this IS wrong in PP (numeric ext)
@R nodots :: subscriptdots >> subscriptseq
@R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq
@R subscript1 :: subscriptseq >> subscript
@R subscriptn :: subscriptseq >> subscriptseq , subscript
@R subscriptt :: subscript >> test
@R subscripts0 :: subscript >> :
@R subscriptsL :: subscript >> test :
@R subscriptsR :: subscript >> : test
@R subscripts :: subscript >> test : test
##50
@R exprlist1 :: exprlist >> expr
@R exprlistn :: exprlist >> exprlist , expr
##51
@R testlist0 :: testlist >> test
@R testlistn :: testlist >> testlist , test
##52
@R dictmaker1 :: dictmaker >> test : test
@R dictmaker2 :: dictmaker >> dictmaker , test : test
"""
nonterms = """
subscriptdots subscript arg
argument arglist subscriptseq params trailerlist
factor atom trailer dictmaker stringseq power
xor_expr and_expr shift_expr arith_expr term
and_test or_test not_test comparison comp_op expr
fplist stmtseq varargslist assn
expr elifs suite excepts parameters pbasic pdefault pspecial
testlist exprlist test dotted_name_list dotted_name name_list
if_stmt while_stmt for_stmt try_stmt funcdef classdef
expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt
small_stmt compound_stmt stmt simple_stmt exec_stmt
file_input except_clause fpdef cmp_op
all
"""
import string
# python needs special handling for the lexical stuff
NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*"
NUMBERre = "[" + string.digits + "]+" # temporary!
STRINGre = '"[^"\n]*"' # to be overridden in lexdict
#NEWLINEre = "\n" # to be overridden in lexdict
INDENTre = "#" # a fake! to be overridden
DEDENTre = "#" # a fake! to be overridden
def echo(str):
return str
def DeclareTerminals(Grammar):
Grammar.Addterm("NAME", NAMEre, echo)
Grammar.Addterm("NUMBER", NUMBERre, echo)
Grammar.Addterm("STRING", STRINGre, echo)
#Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw!
Grammar.Addterm("INDENT", INDENTre, echo)
Grammar.Addterm("DEDENT", DEDENTre, echo)
# not >x> is a fake!
keywords = """
and break class continue def del elif else except exec
finally for from global if import in is lambda not or pass
print raise return try while == >= <= <> != >x> << NEWLINE
**
"""
from gadfly import kjParser
import string, re
from gadfly.kjParser import KEYFLAG,ENDOFFILETERM
alphanumunder = string.letters+string.digits+"_"
alpha = string.letters + "_"
# components that are part of a identifier (cannot be next to kw).
id_letters = map(None, alphanumunder)
# terminator re for names
nametermre = "[^" + alphanumunder + "]"
nameterm = re.compile(nametermre)
# terminator re for numbers (same as above but allow "." in num).
numtermre = "[^" + alphanumunder + "\.]"
numterm = re.compile(numtermre)
parseerror = "parseerror"
pycommentre = r"(#.*)"
# whitespace regex outside of brackets
# white followed by (comment\n maybe repeated)
# DON'T EAT NEWLINE!!
pywhiteoutre = r"([ \t\r\014]|[\\]\n)*%s?" % pycommentre
pywhiteout = re.compile(pywhiteoutre)
# whitespace regex inside brackets
# white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
pywhitein = re.compile(pywhiteinre)
# totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre
#allblank = re.compile(allblankre)
# re for indentation (might accept empty string)
indentp = re.compile(r"[\t ]*")
# two char kws and puncts
char2kw = ["if", "or", "in", "is"]
punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="]
# >two char kws as map of first 3 chars to others
char3k_data = """
and break class continue def del elif else except
finally for from global import lambda not pass print
raise return try while exec
"""
char3kw = string.split(char3k_data)
char3kwdict = {}
for x in char3kw:
char3kwdict[x[:3]] = x
# NOTE: newline is treated same as a punctuation
# NOTE: "' ARE NOT PUNCTS
punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`"
punctlist = map(None, punct)
kwmap = {}
for x in char2kw + punct2 + char3kw + map(None, punct):
# everything parses as length 1 to the outer world.
kwmap[x] = (((KEYFLAG, x), x), 1)
# special hack
kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1)
newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)
#finaldedent = (((TERMFLAG, "DEDENT"), ""), 1)
# Python lexical dictionary.
### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!
def RMATCH(re, key, start=0):
group = re.match(key, start)
if group is None: return -1
return group.end() - group.start()
class pylexdict(kjParser.LexDictionary):
def __init__(self):
kjParser.LexDictionary.__init__(self)
# need to add special map for >>
self.brackets = 0 # count of active brackets
self.realindex = 0 # where to start
self.indents = [""] # stack of indents (start with a fake one)
self.lineno = 0
self.atdedent = 0
### handle multiple dedents correctly!!!
### translate tabs to 8 spaces...
from gadfly.kjParser import TERMFLAG
self.NAMEflag = (TERMFLAG, "NAME")
self.STRINGflag = (TERMFLAG, "STRING")
self.NEWLINEflag = (TERMFLAG, "NEWLINE")
self.INDENTflag = (TERMFLAG, "INDENT")
self.DEDENTflag = (TERMFLAG, "DEDENT")
self.NUMBERflag = (TERMFLAG, "NUMBER")
def endoffile(self, String):
# pop off all indentations!
indents = self.indents
#lastresult = self.lastresult
self.realindex = len(String)
if not indents:
# pop indents
#print "eof after dedent"
result = self.lastresult = (ENDOFFILETERM, 0)
else:
#print "eof as dedent after", self.lastresult
del indents[-1]
if indents:
dedent = indents[-1]
else:
dedent = ""
result = self.lastresult = ((self.DEDENTflag, dedent), 1)
#print "returning eof", result, "after", lastresult
return result
def Token(self, String, StartPosition):
#print "Token", (StartPosition,
# `String[self.realindex:self.realindex+20]`, self.lastresult)
# HAVE TO FAKE OUT LEXER FOR DEDENTS
# STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION
# STRING POSITION IS MAINTAINED IN LexDict object.
lastindex = self.lastindex
lastresult = self.lastresult
if self.laststring is not String:
#print "parsing new string"
self.laststring = String
# special hack: skip lead whitespace
cursor = 0
self.lineno = 1
while 1:
test = RMATCH(pywhitein,String, cursor)
if test<0: break
next = cursor + test
#print "lead skip:", next, String[cursor:next]
if String[next]!="\n": break
#skipped = String[cursor:next]
#if "\n" in skipped:
# self.lineno = (
# self.lineno + len(string.splitfields(skipped, "\n")))
#self.lineno = self.lineno+1
cursor = next + 1
self.realindex = cursor
self.saveindex = 0
self.indents = [""] # stack of indents (start with a fake one)
# pretend we saw a newline
self.lastresult = newlineresult
if StartPosition!=0:
self.laststring = None
raise ValueError, "python lexical parsing must start at zero"
lastindex = self.lastindex
lastresult = None
elif lastindex == StartPosition:
#print "returning lastresult ", lastresult
return lastresult
elif lastindex != StartPosition-1:
raise ValueError, "python lexer can't skip tokens"
#print "parsing", StartPosition, lastresult
# do newline counting here!
delta = String[self.saveindex: self.realindex]
#print "delta", `delta`
if "\n" in delta:
#print self.lineno, self.saveindex, self.realindex, `delta`
self.lineno = self.lineno + len(
string.splitfields(delta, "\n")) - 1
realindex = self.saveindex = self.realindex
self.lastindex = StartPosition
# skip whitespace (including comments)
### needs to be improved to parse blank lines, count line numbers...
# skip all totally blank lines (don't eat last newline)
atlineend = (String[realindex:realindex+1] == "\n"
or lastresult is newlineresult
or self.atdedent)
skipnewlines = (lastresult is newlineresult or
self.atdedent or
self.brackets>0)
if atlineend: #String[realindex:realindex+1]=="\n":
#print "trying to skip blank lines", String[realindex:realindex+10]
while 1:
#if String[realindex:realindex+1]=="\n":
# start = realindex+1 # move past current newline
# self.lineno = self.lineno + 1
#else:
# start = realindex
start = realindex
if skipnewlines:
while String[start:start+1]=="\n":
start = start+1
#self.lineno = self.lineno+1
#print "matching", `String[start:start+10]`
skip = RMATCH(pywhitein,String, start)
#print "skip=", skip
if skip<0: break
rs = skip + realindex + (start-realindex)
if rs==realindex: break
#print "at", rs, `String[rs]`
if (rs<len(String) and
(String[rs] == "\n" or
(skipnewlines and String[rs-1:rs]=="\n"))):
#print "skipping blank line"
#if lastresult is newlineresult or self.brackets>0:
# rs = rs + 1
#skipped = String[start:rs]
#if "\n" in skipped:
#self.lineno = self.lineno + len(
# string.splitfields(skipped, "\n"))
self.realindex = realindex = rs
#self.lineno = self.lineno+1
else:
if skipnewlines: self.realindex = realindex = start
break
#print "after skipping blank lines", `String[realindex:realindex+20]`
skipto = realindex
skip = 0
if self.brackets>0:
while 1:
#print "skipping white in brackets", skipto
if realindex>len(String):
break
if String[skipto]=="\n":
#self.lineno = self.lineno+1
skipto = skipto + 1
self.realindex = realindex = skipto
continue
skip = RMATCH(pywhiteout,String, skipto)
nextskipto = skipto+skip
#skipped = String[skipto:nextskipto]
#if "\n" in skipped:
# self.lineno = self.lineno+len(
# string.splitfields(skipped, "\n"))
if skip>0:
skipto = nextskipto
else: break
skip = skipto - realindex
elif not atlineend:
skip = RMATCH(pywhitein,String, realindex)
if skip<=0:
skip = 0
else:
#print "skipping", skip
nextri = realindex + skip
#skipped = String[realindex:nextri]
#if "\n" in skipped:
# self.lineno = self.lineno + len(
# string.splitfields(skipped, "\n"))
realindex = self.realindex = nextri
if realindex>=len(String):
return self.endoffile(String)
# now look for a keyword, name, number, punctuation,
# INDENT, DEDENT, NEWLINE
first = String[realindex]
#if last parse was newline and not in brackets:
# look for indent/dedent
if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
and first != "\n"):
#print "looking for dent", realindex, `String[realindex:realindex+20]`
match = RMATCH(indentp,String, realindex)
if match>=0:
dent = String[realindex: realindex+match]
#print "dent match", match, `dent`
oldindex = realindex
self.realindex = realindex = realindex+match
# replace tabs with 8 spaces
dent = string.joinfields(string.splitfields(dent, "\t"),
" ")
dents = self.indents
lastdent = dents[-1]
ldl = len(lastdent)
dl = len(dent)
#print "last", ldl, dents
if ldl<dl:
self.atdedent = 0
result = self.lastresult = ((self.INDENTflag, dent), 1)
dents.append(dent)
#print "indent ", result, dents
return result
if ldl>dl:
self.realindex = oldindex # back up, may have to see it again!
self.atdedent = 1
result = self.lastresult = ((self.DEDENTflag, dent), 1)
del dents[-1]
#print "dedent ", result, dl, dents
return result
# otherwise, indentation is same, keep looking
# might be at eof now:
if realindex>=len(String):
#print "returning eof"
return self.endoffile(String)
first = String[realindex]
self.atdedent = 0
from string import digits#, letters
if(firstinpunctlistand
# special case for .123 numbers (yuck!)
(first!="." or String[realindex+1] not in digits)):
# is it a 2 char punct?
first2 = String[realindex:realindex+2]
if first2 in punct2:
result = self.lastresult = kwmap[first2]
self.realindex = realindex+2
#print "2 digit punct", result
return result
# otherwise, just return normal punct
result = self.lastresult = kwmap[first]
self.realindex = self.realindex + 1
### special bookkeeping
if first=="\n":
result = newlineresult
#print "newline!"
#self.lineno = self.lineno+1
elif first in "[{(":
#print "bracket!"
self.brackets = self.brackets + 1
elif first in "]})":
#print "close bracket!"
self.brackets = self.brackets - 1
#print "1 digit punct", result
return result
if first in digits or first==".":
# parse a number...
skip = numterm.search(String, realindex)
if skip<=realindex:
raise parseerror, "number length<1 (!)"
thenumber = String[realindex:skip]
self.realindex = skip
### note don't interpret number here!!
result = self.lastresult = ((self.NUMBERflag, thenumber), 1)
#print "number", result
return result
if first in alpha:
# try keyword...
first2 = String[realindex: realindex+2]
if first2 in char2kw:
if String[realindex+2:realindex+3] not in id_letters:
# parse a 2 char kw first2
result = self.lastresult = kwmap[first2]
self.realindex = self.realindex+2
#print "keyword 2", result
return result
first3 = String[realindex: realindex+3]
if char3kwdict.has_key(first3):
the_kw = char3kwdict[first3]
the_end = realindex+len(the_kw)
if ((the_end<len(String)) and
(String[the_end] not in id_letters) and
(String[realindex:the_end]==the_kw)):
# parse the_kw
self.realindex = the_end
result = self.lastresult = kwmap[the_kw]
#print "keyword +", result
return result
#otherwise parse an identifier
#print "looking for name:", `String[realindex:realindex+10]`
skip = nameterm.search(String, realindex)
if skip<=realindex:
raise parseerror, "identifier length<1 (!)"
theid = String[realindex:skip]
self.realindex = skip
### note don't interpret number here!!
result = self.lastresult = ((self.NAMEflag, theid), 1)
#print "id", result
return result
if first in "\"'":
# check for triplequotes
first3 = first*3
if String[realindex: realindex+3] == first3:
# parse triple quotes
start = place = realindex+3
while 1:
last = string.find(String, first3, place)
if last<0:
raise parseerror, "failed to terminate triple quotes"
if String[last-1:last]=="\\" and String[last-2:last-1]!="\\":
place = last+1
else: break
the_string = String[start: last]
self.realindex = last+3
result = self.lastresult = ((self.STRINGflag, the_string), 1)
#print "3q string", result
# count the newlines!
#newlinecount = len(string.splitfields(the_string, "\n"))
#self.lineno = self.lineno+newlinecount
#print "triple quotes", result
return result
else:
# parse single quotes
sanity = start = place = realindex+1
done = 0
while 1:
sanity = min(string.find(String, "\n", sanity), len(String))
if sanity<start:
sanity=len(String)
break
if String[sanity-1]!="\\":
break
else:
#self.lineno = self.lineno+1
sanity = sanity + 1
while 1:
last = string.find(String, first, place)
if last<0 or last>sanity:
raise parseerror, "failed to terminate single quotes"
if String[last-1:last]=="\\":
# are we at the end of an odd number of backslashes? (yuck!)
bplace = last-1
while String[bplace:bplace+1]=="\\":
bplace = bplace-1
if (last-bplace)%2==1:
break # the end quote is real!
place = last+1
else: break
the_string = String[start:last]
self.realindex = last+1
result = self.lastresult = ((self.STRINGflag, the_string), 1)
#print "1q string", result
return result
#print (String[realindex-20:realindex-1], String[realindex],
# String[realindex+1:realindex+20])
raise parseerror, "invalid first: " + `first`
# use a modified lexstringwalker
class pylexstringwalker(kjParser.LexStringWalker):
def DUMP(self):
kjParser.DumpStringWindow(self.String, self.LexDict.realindex)
## a HORRIBLE HACK! of a hack: override the DoParse of Grammar
## to give Python line numbers. RELIES ON GLOBAL pyg
##
def hackDoParse(String, Context=None, DoReductions=1):
import sys
from gadfly import kjParser
try:
# construct the ParserObj
# add a newline to front to avoid problem with leading comment
#String = "\n%s\n" % String
Stream = pylexstringwalker( String, pyg.LexD )
Stack = [] # {-1:0} #Walkers.SimpleStack()
ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \
DoReductions, Context )
# do the parse
ParseResult = ParseOb.GO()
# return final result of reduction and the context
return (ParseResult[1], Context)
#return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions)
except: ### for testing!!
t, v = sys.exc_type, sys.exc_value
v = ("near line", pyg.LexD.lineno, v)
raise t, v
buildinfo = """
Please edit the ARCHIVE parameter of this module (%s)
to place the python grammar archive in a standard
directory to prevent the module from rebuilding
the python grammar over and over and over...
""" % __name__
def GrammarBuild():
global pyg
from gadfly import kjParseBuild
pyg = kjParseBuild.NullCGrammar()
pyg.DoParse = hackDoParse
# override lexical dict here
pyg.LexD = pylexdict()
DeclareTerminals(pyg)
pyg.Keywords(keywords)
pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}")
pyg.Nonterms(nonterms)
pyg.Declarerules(pyrules)
print buildinfo
print "compiling... this may take a while..."
pyg.Compile()
print "dumping"
outfile = open(marshalfilename, "wb")
pyg.MarshalDump(outfile)
outfile.close()
print "self testing the grammar"
test(pyg)
print "\n\ndone with regeneration"
return pyg
def unMarshalpygram():
global pyg
from gadfly import kjParser
print "loading"
try:
infile = open(marshalfilename, "rb")
except IOError:
print marshalfilename, "not found, attempting creation"
pyg = GrammarBuild()
else:
pyg = kjParser.UnMarshalGram(infile)
infile.close()
pyg.DoParse = hackDoParse
# lexical override
pyg.LexD = pylexdict()
DeclareTerminals(pyg)
# BindRules(pyg)
if dotest:
print "self testing the grammar"
test(pyg)
return pyg
# not used, commented
#### interpretation rules/classes
#
#def zeroth(list, Context):
# return list[0] # eg, for all1, ignore all but first
#
## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob
#def append(list, Context):
# "eg, for top_stmt, conjoin two smt lists"
# return list[0] + list[1]
#
## file_input >zeroth
#
## simple, compound, one_small, small_semi: echol
#def echol(list, Context):
# return list
#
## more_small > seq_sep
#def seq_sep(list, Context):
# list[0].append(list[2])
# return list[0]
#
## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec
## > zeroth
#
## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth
#
#
#def BindRules(pyg):
# for name in string.split("""
# all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass
# smexpr smassn smprint smdel smflow smimport smglobal smexec
# """):
# pyg.Bind(name, zeroth)
# for name in string.split("""
# simple compound one_small small_semi
# """):
# pyg.Bind(name, echol)
# pyg.Bind("top_stmt", append)
# pyg.Bind("more_small", seq_sep)
teststring = """#
#
# a test string
#
from string import join, split
'''
import re
for a in l:
a.attr, a[x], b = c
else:
d = b
'''
class zzz:
'''
#doc string
'''
'''
global re, join
d = {}
for i in range(10): d[i] = i
'''
def test(c,s):
return "this"
while not done:
print done
break
list = [1,2,3]
# comment
return 5
n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8
if x==5:
while y:
for i in range(6):
raise SystemError, "oops"
"""
#teststring ="""\
## comment
#if x in y: print z
#elif 1: print w
#"""
'''
teststring="""
exec "print 1"
"""
'''
def test(grammar, context=None, teststring=teststring):
from time import time
now = time()
x = grammar.DoParse1(teststring, context)
elapsed = time()-now
print x
print elapsed
return x
regen = 0
dotest = 0
if __name__ == "__main__" :
if regen: GrammarBuild()
unMarshalpygram()
|