pygram.py : » Database » Gadfly » gadflyZip » doc » demo » kjParsing » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Database » Gadfly
Gadfly » gadflyZip » doc » demo » kjParsing » pygram.py
# rules for python
# based on grammar given in Programming Python by Mark Lutz

# EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE
# GRAMMAR DATA STRUCTURES.
#
ARCHIVE = "."

marshalfilename = ARCHIVE + "/pygram.mar"

pyrules = """

all ::

## input terminates with "fake" dedent (forces read of all file)

@R all1 :: all >> file_input DEDENT

## 1 term newline

##@R lead_blank :: file_input >> NEWLINE file_input

@R top_stmt :: file_input >> file_input stmt
@R file_input :: file_input >> stmt


## 2
@R simple :: stmt >> simple_stmt
@R compound :: stmt >> compound_stmt

## 3 punct ; term NEWLINE
@R one_small :: simple_stmt >> small_stmt NEWLINE
@R more_small :: simple_stmt >> small_stmt ; simple_stmt
@R small_semi :: simple_stmt >> small_stmt ; NEWLINE

## 4 kw pass
@R smexpr :: small_stmt >> expr_stmt
@R smassn :: small_stmt >> assn
@R smprint :: small_stmt >> print_stmt
@R smdel :: small_stmt >> del_stmt
@R smpass :: small_stmt >> pass
@R smflow :: small_stmt >> flow_stmt
@R smimport :: small_stmt >> import_stmt
@R smglobal :: small_stmt >> global_stmt
## access ignored
@R smexec :: small_stmt >> exec_stmt

## 5
@R cmif :: compound_stmt >> if_stmt
@R cmwhile :: compound_stmt >> while_stmt
@R cmfor :: compound_stmt >> for_stmt
@R cmtry :: compound_stmt >> try_stmt
@R cmdef :: compound_stmt >> funcdef
@R cmclass :: compound_stmt >> classdef

##6
@R exprlist :: expr_stmt >> testlist
##@R assignment :: expr_stmt >> assn
@R assn1 :: assn >> testlist = testlist

@R assnn :: assn >> testlist = assn

@R assn1c :: assn >> testlist , = testlist

@R assn1c2 :: assn >> testlist , = testlist ,

@R assnnc :: assn >> testlist , = assn

##testing @R exprassn :: expr_stmt >> expr_stmt = testlist

@R exprlistc :: expr_stmt >> testlist ,

##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist ,

##7 kw print
@R rprint0 :: print_stmt >> print
@R rprint :: print_stmt >> print testlist
@R rprintc :: print_stmt >> print testlist ,

##8 kw del
@R rdel :: del_stmt >> del exprlist

##9 trivially handled in #4

##10 kw raise continue break return

## eliminates 11 12 13 14
@R rbreak  :: flow_stmt >> break
@R rcontinue :: flow_stmt >> continue
@R rreturn0 :: flow_stmt >> return
@R rreturn :: flow_stmt >> return testlist
@R rreturnc :: flow_stmt >> return testlist ,
@R rraise1 :: flow_stmt >> raise test
@R rraise2 :: flow_stmt >> raise test , test
@R rraise3 :: flow_stmt >> raise test , test , test

## 11 12 13 14 skipped

## 15 kw import from
@R rimport :: import_stmt >> import dotted_name_list
@R rimportc :: import_stmt >> import dotted_name_list ,
@R dnlist1 :: dotted_name_list >> dotted_name
@R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name
@R rfrom :: import_stmt >> from dotted_name import name_list
@R rfroms :: import_stmt >> from dotted_name import *
@R rfromc :: import_stmt >> from dotted_name import name_list ,
@R nlistn :: name_list >> name_list  , NAME
@R nlist1 :: name_list >> NAME

##16 nt NAME
@R dn1 :: dotted_name >> NAME
@R dnn :: dotted_name >> dotted_name . NAME

##17 kw global
@R global1 :: global_stmt >> global NAME
@R globaln :: global_stmt >> global_stmt , NAME

## 18 19 ignored

##20 kw exec in
@R exec1 :: exec_stmt >> exec expr
@R exec2 :: exec_stmt >> exec expr in test
@R exec3 :: exec_stmt >> exec expr in test , test

##21  kw if elif else punct :
@R ifr :: if_stmt >> if test : suite elifs
@R elifs0 :: elifs >>
@R relse :: elifs >> else : suite
@R elifsn :: elifs >> elif test : suite elifs

##22 kw while
@R while1 :: while_stmt >>
while test :
    suite
@R while2 :: while_stmt >>
while test :
   suite
else :
   suite

##23 kw for
@R for1 :: for_stmt >>
for exprlist in testlist  :
     suite
@R for2 :: for_stmt >>
for exprlist in testlist  :
     suite
else :
     suite

##24 kw try
@R tryr :: try_stmt >> try : suite excepts
@R excepts1 :: excepts >> except_clause : suite
@R excepts2 :: excepts >> except_clause : suite else : suite
@R exceptsn :: excepts >> except_clause : suite excepts
@R tryf :: try_stmt >> try : suite finally : suite

##25 kw except
@R except0 :: except_clause >> except
@R except1 :: except_clause >> except test
@R except2 :: except_clause >> except test , test

##26
@R class1 :: classdef  >> class NAME : suite
@R class2 :: classdef  >> class NAME ( testlist ) : suite

##27 kw def
@R rdef :: funcdef >> def NAME parameters : suite

##28, 29 punct = *

## (modified from grammar presented)
@R params1 :: parameters >> ( varargslist )
@R params1c :: parameters >> ( varargslist , )
@R params2 :: varargslist >>

## this is way too permissive: fix at semantic level
@R params3 :: varargslist >> arg
@R params4 :: varargslist >> varargslist , arg
@R argd :: arg >> NAME = test
@R arg2 :: arg >> fpdef
@R arg3 :: arg >> * NAME
@R arg4 :: arg >> ** NAME

## 30
@R fpdef1 :: fpdef  >> NAME
@R fpdef2 :: fpdef  >>  ( fplist )
@R fpdef2c :: fpdef  >>  ( fplist , )

##31
@R fplist1 :: fplist >> fpdef
@R fplistn :: fplist >> fplist , fpdef

##32 t INDENT DEDENT
@R ssuite :: suite >> simple_stmt
@R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT
@R stmtseq1 :: stmtseq >> stmt
@R stmtseqn :: stmtseq >> stmtseq stmt

##33 kw or cancels 53
@R testor :: test >> or_test
@R testand :: or_test >> and_test
@R testor1 :: or_test >> or_test or and_test
## @R testlambda0 :: test >> lambda : test REDUNDANT
@R testlambda1 :: test >> lambda varargslist : test

##34 kw and
@R andnot :: and_test >> not_test
@R andand :: and_test >> and_test and not_test

##35 kw not
@R notnot :: not_test >> not not_test
@R notcmp :: not_test >> comparison

##36 NOTE KWS == >= <= <> !=
@R cmpexpr :: comparison >> expr
@R cmplt :: comparison >> comparison < expr
@R cmpgt :: comparison >> comparison > expr
@R cmpeq :: comparison >> comparison == expr
@R cmpge :: comparison >> comparison >= expr
@R cmple :: comparison >> comparison <=  expr
@R cmpnep :: comparison >> comparison <> expr
@R cmpne :: comparison >> comparison != expr
@R cmpin :: comparison >> comparison in expr
@R cmpnotin :: comparison >> comparison not in expr
@R cmpis :: comparison >> comparison is expr
@R cmpisnot :: comparison >> comparison is not expr

##37 kw is not punct > < ! (eliminated)

##38 p |
@R expr_xor :: expr >> xor_expr
@R expr_lor :: expr >> expr | xor_expr

##39 p ^
@R xor_and :: xor_expr >> and_expr
@R xor_xor :: xor_expr >> xor_expr ^ and_expr

##40
@R and_shift :: and_expr >> shift_expr
@R and_and :: and_expr >> and_expr & shift_expr

##41 note kw's << >x> note goofy x to avoid confusing the grammar
@R shift_arith :: shift_expr >> arith_expr
@R shift_left :: shift_expr >> shift_expr << arith_expr
@R shift_right :: shift_expr >> shift_expr >x> arith_expr

##42
@R arith_term :: arith_expr >> term
@R arith_plus :: arith_expr >> arith_expr + term
@R arith_minus :: arith_expr >> arith_expr - term

##43 p */%
@R termfactor :: term >> factor
@R termmul :: term >> term * factor
@R termdiv :: term >> term / factor
@R termmod :: term >> term % factor

## stuff for power
@R factorpower :: factor >> power
@R factorexp :: factor >> factor ** power

##44 p ~
@R powera :: power >> atom trailerlist
@R trailerlist0 :: trailerlist >>
@R trailerlistn :: trailerlist >> trailer trailerlist
@R powerp :: power >> + power
@R powerm :: power >> - power
@R poweri :: power >> ~ power

##45 t NUMBER STRING
@R nulltup :: atom >> ( )
@R parens :: atom >> ( testlist )
@R parensc :: atom >> ( testlist , )
@R nulllist :: atom >> [ ]
@R list :: atom >> [ testlist  ]
@R listc :: atom >> [ testlist , ]
@R nulldict :: atom >> { }
@R dict :: atom >> { dictmaker   }
@R dictc :: atom >> { dictmaker , }
@R repr :: atom >> ` testlist  `
## @R reprc :: atom >> ` testlist , ` doesn't work, apparently
@R aname :: atom >> NAME
## note number to be broken out into FLOAT OCTINT HEXINT INT
@R anumber :: atom >> NUMBER
@R astring :: atom >> stringseq
@R stringseq0 :: stringseq >> STRING
@R stringseqn :: stringseq >> stringseq STRING

##46
@R nullcall :: trailer >> ( )
@R call :: trailer >> ( arglist  )
@R callc :: trailer >> ( arglist , )
@R index :: trailer >> [ subscriptdots ]
@R getattr :: trailer >> . NAME

##47
@R arg1 :: arglist >> argument
@R argn :: arglist >> arglist , argument
##@R argn1 :: arglist >> arglist , NAME = test

##48 ( !!!! is this wrong in PP?)

@R posarg :: argument >> test

## here the left test should be a NAME always, but parser doesn't like it
@R namearg :: argument >> test = test

##49 this IS wrong in PP (numeric ext)
@R nodots :: subscriptdots >> subscriptseq
@R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq
@R subscript1 :: subscriptseq >> subscript
@R subscriptn :: subscriptseq >> subscriptseq , subscript
@R subscriptt :: subscript >> test
@R subscripts0 :: subscript >> :
@R subscriptsL :: subscript >> test :
@R subscriptsR :: subscript >> : test
@R subscripts :: subscript >> test : test

##50
@R exprlist1 :: exprlist >> expr
@R exprlistn :: exprlist >> exprlist , expr

##51
@R testlist0 :: testlist >> test
@R testlistn :: testlist >> testlist , test

##52
@R dictmaker1 :: dictmaker >> test : test
@R dictmaker2 :: dictmaker >> dictmaker , test : test

"""

nonterms = """
subscriptdots subscript arg
argument arglist subscriptseq params trailerlist
factor atom trailer dictmaker stringseq power
xor_expr and_expr shift_expr arith_expr term
and_test or_test not_test comparison comp_op expr
fplist stmtseq varargslist assn
expr elifs suite excepts parameters pbasic pdefault pspecial
testlist exprlist test dotted_name_list dotted_name name_list
if_stmt while_stmt for_stmt try_stmt funcdef classdef
expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt
small_stmt compound_stmt stmt simple_stmt exec_stmt
file_input except_clause fpdef cmp_op
all
"""

import string
# python needs special handling for the lexical stuff
NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*"
NUMBERre = "[" + string.digits + "]+" # temporary!
STRINGre = '"[^"\n]*"' # to be overridden in lexdict
#NEWLINEre = "\n" # to be overridden in lexdict
INDENTre = "#" # a fake! to be overridden
DEDENTre = "#" # a fake! to be overridden

def echo(str):
    return str

def DeclareTerminals(Grammar):
    Grammar.Addterm("NAME", NAMEre, echo)
    Grammar.Addterm("NUMBER", NUMBERre, echo)
    Grammar.Addterm("STRING", STRINGre, echo)
    #Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw!
    Grammar.Addterm("INDENT", INDENTre, echo)
    Grammar.Addterm("DEDENT", DEDENTre, echo)

# not >x> is a fake!
keywords = """
and break class continue def del elif else except exec
finally for from global if import in is lambda not or pass
print raise return try while == >= <= <> != >x> << NEWLINE
**
"""

from gadfly import kjParser
import string, re
from gadfly.kjParser import KEYFLAG,ENDOFFILETERM

alphanumunder = string.letters+string.digits+"_"
alpha = string.letters + "_"

# components that are part of a identifier (cannot be next to kw).
id_letters = map(None, alphanumunder)

# terminator re for names
nametermre = "[^" + alphanumunder + "]"
nameterm = re.compile(nametermre)

# terminator re for numbers (same as above but allow "." in num).
numtermre =  "[^" + alphanumunder + "\.]"
numterm = re.compile(numtermre)

parseerror = "parseerror"

pycommentre = r"(#.*)"

# whitespace regex outside of brackets
#  white followed by (comment\n maybe repeated)
#  DON'T EAT NEWLINE!!
pywhiteoutre = r"([ \t\r\014]|[\\]\n)*%s?" % pycommentre
pywhiteout = re.compile(pywhiteoutre)

# whitespace regex inside brackets
#  white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
pywhitein = re.compile(pywhiteinre)

# totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre
#allblank = re.compile(allblankre)

# re for indentation (might accept empty string)
indentp = re.compile(r"[\t ]*")

# two char kws and puncts
char2kw = ["if", "or", "in", "is"]
punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="]

# >two char kws as map of first 3 chars to others
char3k_data = """
  and break class continue def del elif else except
  finally for from global import lambda not pass print
  raise return try while exec
"""

char3kw = string.split(char3k_data)
char3kwdict = {}
for x in char3kw:
    char3kwdict[x[:3]] = x

# NOTE: newline is treated same as a punctuation
# NOTE: "' ARE NOT PUNCTS
punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`"
punctlist = map(None, punct)

kwmap = {}
for x in char2kw + punct2 + char3kw + map(None, punct):
    # everything parses as length 1 to the outer world.
    kwmap[x] = (((KEYFLAG, x), x), 1)

# special hack
kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1)
newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)

#finaldedent = (((TERMFLAG, "DEDENT"), ""), 1)

# Python lexical dictionary.

### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!

def RMATCH(re, key, start=0):
    group = re.match(key, start)
    if group is None: return -1
    return group.end() - group.start()

class pylexdict(kjParser.LexDictionary):
    def __init__(self):
        kjParser.LexDictionary.__init__(self)
        # need to add special map for >>
        self.brackets = 0 # count of active brackets
        self.realindex = 0 # where to start
        self.indents = [""] # stack of indents (start with a fake one)
        self.lineno = 0
        self.atdedent = 0
        ### handle multiple dedents correctly!!!
        ### translate tabs to 8 spaces...
        from gadfly.kjParser import TERMFLAG
        self.NAMEflag = (TERMFLAG, "NAME")
        self.STRINGflag = (TERMFLAG, "STRING")
        self.NEWLINEflag = (TERMFLAG, "NEWLINE")
        self.INDENTflag = (TERMFLAG, "INDENT")
        self.DEDENTflag = (TERMFLAG, "DEDENT")
        self.NUMBERflag = (TERMFLAG, "NUMBER")

    def endoffile(self, String):
        # pop off all indentations!
        indents = self.indents
        #lastresult = self.lastresult
        self.realindex = len(String)
        if not indents:
            # pop indents
            #print "eof after dedent"
            result = self.lastresult = (ENDOFFILETERM, 0)
        else:
            #print "eof as dedent after", self.lastresult
            del indents[-1]
            if indents:
                dedent = indents[-1]
            else:
                dedent = ""
            result = self.lastresult = ((self.DEDENTflag, dedent), 1)
        #print "returning eof", result, "after", lastresult
        return result

    def Token(self, String, StartPosition):
        #print "Token", (StartPosition,
        #  `String[self.realindex:self.realindex+20]`, self.lastresult)
        # HAVE TO FAKE OUT LEXER FOR DEDENTS
        # STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION
        # STRING POSITION IS MAINTAINED IN LexDict object.
        lastindex = self.lastindex
        lastresult = self.lastresult
        if self.laststring is not String:
            #print "parsing new string"
            self.laststring = String
            # special hack: skip lead whitespace
            cursor = 0
            self.lineno = 1
            while 1:
                test = RMATCH(pywhitein,String, cursor)
                if test<0: break
                next = cursor + test
                #print "lead skip:", next, String[cursor:next]
                if String[next]!="\n": break
                #skipped = String[cursor:next]
                #if "\n" in skipped:
                #   self.lineno = (
                #    self.lineno + len(string.splitfields(skipped, "\n")))
                #self.lineno = self.lineno+1
                cursor = next + 1
            self.realindex = cursor
            self.saveindex = 0
            self.indents = [""] # stack of indents (start with a fake one)
            # pretend we saw a newline
            self.lastresult = newlineresult
            if StartPosition!=0:
                self.laststring = None
                raise ValueError, "python lexical parsing must start at zero"
            lastindex = self.lastindex
            lastresult = None
        elif lastindex == StartPosition:
            #print "returning lastresult ", lastresult
            return lastresult
        elif lastindex != StartPosition-1:
            raise ValueError, "python lexer can't skip tokens"

        #print "parsing", StartPosition, lastresult
        # do newline counting here!
        delta = String[self.saveindex: self.realindex]
        #print "delta", `delta`
        if "\n" in delta:
            #print self.lineno, self.saveindex, self.realindex, `delta`
            self.lineno = self.lineno + len(
              string.splitfields(delta, "\n")) - 1
        realindex = self.saveindex = self.realindex
        self.lastindex = StartPosition

        # skip whitespace (including comments)
        ### needs to be improved to parse blank lines, count line numbers...
        # skip all totally blank lines (don't eat last newline)
        atlineend = (String[realindex:realindex+1] == "\n"
                     or lastresult is newlineresult
                     or self.atdedent)
        skipnewlines = (lastresult is newlineresult or
                        self.atdedent or
                        self.brackets>0)
        if atlineend: #String[realindex:realindex+1]=="\n":
            #print "trying to skip blank lines", String[realindex:realindex+10]
            while 1:
                #if String[realindex:realindex+1]=="\n":
                #   start = realindex+1 # move past current newline
                #   self.lineno = self.lineno + 1
                #else:
                #   start = realindex
                start = realindex
                if skipnewlines:
                    while String[start:start+1]=="\n":
                        start = start+1
                        #self.lineno = self.lineno+1
                #print "matching", `String[start:start+10]`
                skip = RMATCH(pywhitein,String, start)
                #print "skip=", skip
                if skip<0: break
                rs = skip + realindex + (start-realindex)
                if rs==realindex: break
                #print "at", rs, `String[rs]`
                if (rs<len(String) and
                    (String[rs] == "\n" or
                     (skipnewlines and String[rs-1:rs]=="\n"))):
                    #print "skipping blank line"
                    #if lastresult is newlineresult or self.brackets>0:
                    #   rs = rs + 1
                    #skipped = String[start:rs]
                    #if "\n" in skipped:
                        #self.lineno = self.lineno + len(
                        #   string.splitfields(skipped, "\n"))
                    self.realindex = realindex = rs
                    #self.lineno = self.lineno+1
                else:
                    if skipnewlines: self.realindex = realindex = start
                    break
        #print "after skipping blank lines", `String[realindex:realindex+20]`
        skipto = realindex
        skip = 0
        if self.brackets>0:
            while 1:
                #print "skipping white in brackets", skipto
                if realindex>len(String):
                    break
                if String[skipto]=="\n":
                    #self.lineno = self.lineno+1
                    skipto = skipto + 1
                    self.realindex = realindex = skipto
                    continue
                skip = RMATCH(pywhiteout,String, skipto)
                nextskipto = skipto+skip
                #skipped = String[skipto:nextskipto]
                #if "\n" in skipped:
                #   self.lineno = self.lineno+len(
                #       string.splitfields(skipped, "\n"))
                if skip>0:
                    skipto = nextskipto
                else: break
            skip = skipto - realindex
        elif not atlineend:
            skip = RMATCH(pywhitein,String, realindex)
        if skip<=0:
            skip = 0
        else:
            #print "skipping", skip
            nextri = realindex + skip
            #skipped = String[realindex:nextri]
            #if "\n" in skipped:
            #   self.lineno = self.lineno + len(
            #    string.splitfields(skipped, "\n"))
            realindex = self.realindex = nextri
        if realindex>=len(String):
            return self.endoffile(String)
        # now look for a keyword, name, number, punctuation,
        # INDENT, DEDENT, NEWLINE
        first = String[realindex]
        #if last parse was newline and not in brackets:
        #   look for indent/dedent
        if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
            and first != "\n"):
            #print "looking for dent", realindex, `String[realindex:realindex+20]`
            match = RMATCH(indentp,String, realindex)
            if match>=0:
                dent = String[realindex: realindex+match]
                #print "dent match", match, `dent`
                oldindex = realindex
                self.realindex = realindex = realindex+match
                # replace tabs with 8 spaces
                dent = string.joinfields(string.splitfields(dent, "\t"),
                                         "        ")
                dents = self.indents
                lastdent = dents[-1]
                ldl = len(lastdent)
                dl = len(dent)
                #print "last", ldl, dents
                if ldl<dl:
                    self.atdedent = 0
                    result = self.lastresult = ((self.INDENTflag, dent), 1)
                    dents.append(dent)
                    #print "indent ", result, dents
                    return result
                if ldl>dl:
                    self.realindex = oldindex # back up, may have to see it again!
                    self.atdedent = 1
                    result = self.lastresult = ((self.DEDENTflag, dent), 1)
                    del dents[-1]
                    #print "dedent ", result, dl, dents
                    return result
                # otherwise, indentation is same, keep looking
                # might be at eof now:
                if realindex>=len(String):
                    #print "returning eof"
                    return self.endoffile(String)
                first = String[realindex]
        self.atdedent = 0
        from string import digits#, letters
if(firstinpunctlistand
            # special case for .123 numbers (yuck!)
            (first!="." or String[realindex+1] not in digits)):
            # is it a 2 char punct?
            first2 = String[realindex:realindex+2]
            if first2 in punct2:
                result = self.lastresult = kwmap[first2]
                self.realindex = realindex+2
                #print "2 digit punct", result
                return result
            # otherwise, just return normal punct
            result = self.lastresult = kwmap[first]
            self.realindex = self.realindex + 1
            ### special bookkeeping
            if first=="\n":
                result = newlineresult
                #print "newline!"
                #self.lineno = self.lineno+1
            elif first in "[{(":
                #print "bracket!"
                self.brackets = self.brackets + 1
            elif first in "]})":
                #print "close bracket!"
                self.brackets = self.brackets - 1
            #print "1 digit punct", result
            return result
        if first in digits or first==".":
            # parse a number...
            skip = numterm.search(String, realindex)
            if skip<=realindex:
                raise parseerror, "number length<1 (!)"
            thenumber = String[realindex:skip]
            self.realindex = skip
            ### note don't interpret number here!!
            result = self.lastresult = ((self.NUMBERflag, thenumber), 1)
            #print "number", result
            return result
        if first in alpha:
            # try keyword...
            first2 = String[realindex: realindex+2]
            if first2 in char2kw:
                if String[realindex+2:realindex+3] not in id_letters:
                    # parse a 2 char kw first2
                    result = self.lastresult = kwmap[first2]
                    self.realindex = self.realindex+2
                    #print "keyword 2", result
                    return result
            first3 = String[realindex: realindex+3]
            if char3kwdict.has_key(first3):
                the_kw = char3kwdict[first3]
                the_end = realindex+len(the_kw)
                if ((the_end<len(String)) and
                    (String[the_end] not in id_letters) and
                    (String[realindex:the_end]==the_kw)):
                    # parse the_kw
                    self.realindex = the_end
                    result = self.lastresult = kwmap[the_kw]
                    #print "keyword +", result
                    return result
            #otherwise parse an identifier
            #print "looking for name:", `String[realindex:realindex+10]`
            skip = nameterm.search(String, realindex)
            if skip<=realindex:
                raise parseerror, "identifier length<1 (!)"
            theid = String[realindex:skip]
            self.realindex = skip
            ### note don't interpret number here!!
            result = self.lastresult = ((self.NAMEflag, theid), 1)
            #print "id", result
            return result
        if first in "\"'":
            # check for triplequotes
            first3 = first*3
            if String[realindex: realindex+3] == first3:
                # parse triple quotes
                start = place = realindex+3
                while 1:
                    last = string.find(String, first3, place)
                    if last<0:
                        raise parseerror, "failed to terminate triple quotes"
                    if String[last-1:last]=="\\" and String[last-2:last-1]!="\\":
                        place = last+1
                    else: break
                the_string = String[start: last]
                self.realindex = last+3
                result = self.lastresult = ((self.STRINGflag, the_string), 1)
                #print "3q string", result
                # count the newlines!
                #newlinecount = len(string.splitfields(the_string, "\n"))
                #self.lineno = self.lineno+newlinecount
                #print "triple quotes", result
                return result
            else:
                # parse single quotes
                sanity = start = place = realindex+1
                done = 0
                while 1:
                    sanity = min(string.find(String, "\n", sanity), len(String))
                    if sanity<start:
                        sanity=len(String)
                        break
                    if String[sanity-1]!="\\":
                        break
                    else:
                        #self.lineno = self.lineno+1
                        sanity = sanity + 1
                while 1:
                    last = string.find(String, first, place)
                    if last<0 or last>sanity:
                        raise parseerror, "failed to terminate single quotes"
                    if String[last-1:last]=="\\":
                        # are we at the end of an odd number of backslashes? (yuck!)
                        bplace = last-1
                        while String[bplace:bplace+1]=="\\":
                            bplace = bplace-1
                        if (last-bplace)%2==1:
                            break # the end quote is real!
                        place = last+1
                    else: break
                the_string = String[start:last]
                self.realindex = last+1
                result = self.lastresult = ((self.STRINGflag, the_string), 1)
                #print "1q string", result
                return result
        #print (String[realindex-20:realindex-1], String[realindex],
        #       String[realindex+1:realindex+20])
        raise parseerror, "invalid first: " + `first`

# use a modified lexstringwalker
class pylexstringwalker(kjParser.LexStringWalker):
    def DUMP(self):
        kjParser.DumpStringWindow(self.String, self.LexDict.realindex)

## a HORRIBLE HACK! of a hack: override the DoParse of Grammar
## to give Python line numbers.  RELIES ON GLOBAL pyg
##
def hackDoParse(String, Context=None, DoReductions=1):
    import sys
    from gadfly import kjParser
    try:
        # construct the ParserObj
        # add a newline to front to avoid problem with leading comment
        #String = "\n%s\n" % String
        Stream = pylexstringwalker( String, pyg.LexD )
        Stack = [] # {-1:0} #Walkers.SimpleStack()
        ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \
                         DoReductions, Context )
        # do the parse
        ParseResult = ParseOb.GO()
        # return final result of reduction and the context
        return (ParseResult[1], Context)
        #return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions)
    except: ### for testing!!
        t, v = sys.exc_type, sys.exc_value
        v = ("near line", pyg.LexD.lineno, v)
        raise t, v

buildinfo = """
Please edit the ARCHIVE parameter of this module (%s)
to place the python grammar archive in a standard
directory to prevent the module from rebuilding
the python grammar over and over and over...
""" % __name__

def GrammarBuild():
    global pyg
    from gadfly import kjParseBuild
    pyg = kjParseBuild.NullCGrammar()
    pyg.DoParse = hackDoParse
    # override lexical dict here
    pyg.LexD = pylexdict()
    DeclareTerminals(pyg)
    pyg.Keywords(keywords)
    pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}")
    pyg.Nonterms(nonterms)
    pyg.Declarerules(pyrules)
    print buildinfo
    print "compiling... this may take a while..."
    pyg.Compile()
    print "dumping"
    outfile = open(marshalfilename, "wb")
    pyg.MarshalDump(outfile)
    outfile.close()
    print "self testing the grammar"
    test(pyg)
    print "\n\ndone with regeneration"
    return pyg

def unMarshalpygram():
    global pyg
    from gadfly import kjParser
    print "loading"
    try:
        infile = open(marshalfilename, "rb")
    except IOError:
        print marshalfilename, "not found, attempting creation"
        pyg = GrammarBuild()
    else:
        pyg = kjParser.UnMarshalGram(infile)
        infile.close()
    pyg.DoParse = hackDoParse
    # lexical override
    pyg.LexD = pylexdict()
    DeclareTerminals(pyg)
    # BindRules(pyg)
    if dotest:
        print "self testing the grammar"
        test(pyg)
    return pyg


# not used, commented
#### interpretation rules/classes
#
#def zeroth(list, Context):
#    return list[0] # eg, for all1, ignore all but first
#
## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob
#def append(list, Context):
#    "eg, for top_stmt, conjoin two smt lists"
#    return list[0] + list[1]
#
## file_input >zeroth
#
## simple, compound, one_small, small_semi: echol
#def echol(list, Context):
#    return list
#
## more_small > seq_sep
#def seq_sep(list, Context):
#    list[0].append(list[2])
#    return list[0]
#
## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec
##  > zeroth
#
## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth
#
#
#def BindRules(pyg):
#    for name in string.split("""
#        all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass
#        smexpr smassn smprint smdel smflow smimport smglobal smexec
#        """):
#        pyg.Bind(name, zeroth)
#    for name in string.split("""
#        simple compound one_small small_semi
#        """):
#        pyg.Bind(name, echol)
#    pyg.Bind("top_stmt", append)
#    pyg.Bind("more_small", seq_sep)

teststring = """#
#
# a test string
#
from string import join, split
'''
import re

for a in l:
    a.attr, a[x], b = c
else:
    d = b
'''
class zzz:
   '''
   #doc string
   '''
   '''
   global re, join

   d = {}
   for i in range(10): d[i] = i
   '''
   def test(c,s):
       return "this"
       while not done:
             print done
             break
       list = [1,2,3]
         # comment
       return 5


   n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8

if x==5:
   while y:
     for i in range(6):
         raise SystemError, "oops"


"""

#teststring ="""\
## comment
#if x in y: print z
#elif 1: print w
#"""

'''
teststring="""
exec "print 1"
"""
'''

def test(grammar, context=None, teststring=teststring):
    from time import time
    now = time()
    x = grammar.DoParse1(teststring, context)
    elapsed = time()-now
    print x
    print elapsed
    return x

regen = 0
dotest = 0

if __name__ == "__main__" :
    if regen: GrammarBuild()
    unMarshalpygram()
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.