parserutils.py : » Development » Bicycle-Repair-Man » bicyclerepair-0.9 » bike » parsing » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML

Python Open Source » Development » Bicycle Repair Man

Bicycle Repair Man » bicyclerepair 0.9 » bike » parsing » parserutils.py

from __future__ import generators
import re

escapedQuotesRE = re.compile(r"(\\\\|\\\"|\\\')")

# changess \" \' and \\ into ** so that text searches
# for " and ' won't hit escaped ones
def maskEscapedQuotes(src):
    return escapedQuotesRE.sub("**", src)

stringsAndCommentsRE =  \
      re.compile("(\"\"\".*?\"\"\"|'''.*?'''|\"[^\"]*\"|\'[^\']*\'|#.*?\n)", re.DOTALL)

import string
#transtable = string.maketrans('classdefifforwhiletry', "*********************")

# performs a transformation on all of the comments and strings so that
# text searches for python keywords won't accidently find a keyword in
# a string or comment
def maskPythonKeywordsInStringsAndComments(src):
    src = escapedQuotesRE.sub("**", src)
    allstrings = stringsAndCommentsRE.split(src)
    # every odd element is a string or comment
    for i in xrange(1, len(allstrings), 2):
        allstrings[i] = allstrings[i].upper()
        #allstrings[i] = allstrings[i].translate(transtable)
    return "".join(allstrings)


allchars = string.maketrans("", "")
allcharsExceptNewline = allchars[: allchars.index('\n')]+allchars[allchars.index('\n')+1:]
allcharsExceptNewlineTranstable = string.maketrans(allcharsExceptNewline, '*'*len(allcharsExceptNewline))


# replaces all chars in a string or a comment with * (except newlines).
# this ensures that text searches don't mistake comments for keywords, and that all
# matches are in the same line/comment as the original
def maskStringsAndComments(src):
    src = escapedQuotesRE.sub("**", src)
    allstrings = stringsAndCommentsRE.split(src)
    # every odd element is a string or comment
    for i in xrange(1, len(allstrings), 2):
        if allstrings[i].startswith("'''")or allstrings[i].startswith('"""'):
            allstrings[i] = allstrings[i][:3]+ \
                           allstrings[i][3:-3].translate(allcharsExceptNewlineTranstable)+ \
                           allstrings[i][-3:]
        else:
            allstrings[i] = allstrings[i][0]+ \
                           allstrings[i][1:-1].translate(allcharsExceptNewlineTranstable)+ \
                           allstrings[i][-1]

    return "".join(allstrings)


# replaces all chars in a string or a comment with * (except newlines).
# this ensures that text searches don't mistake comments for keywords, and that all
# matches are in the same line/comment as the original
def maskStringsAndRemoveComments(src):
    src = escapedQuotesRE.sub("**", src)
    allstrings = stringsAndCommentsRE.split(src)
    # every odd element is a string or comment
    for i in xrange(1, len(allstrings), 2):
        if allstrings[i].startswith("'''")or allstrings[i].startswith('"""'):
            allstrings[i] = allstrings[i][:3]+ \
                           allstrings[i][3:-3].translate(allcharsExceptNewlineTranstable)+ \
                           allstrings[i][-3:]
        elif allstrings[i].startswith("#"):
            allstrings[i] = '\n'
        else:
            allstrings[i] = allstrings[i][0]+ \
                           allstrings[i][1:-1].translate(allcharsExceptNewlineTranstable)+ \
                           allstrings[i][-1]
    return "".join(allstrings)
        

implicitContinuationChars = (('(', ')'), ('[', ']'), ('{', '}'))
emptyHangingBraces = [0,0,0,0,0]
linecontinueRE = re.compile(r"\\\s*(#.*)?$")
multiLineStringsRE =  \
      re.compile("(^.*?\"\"\".*?\"\"\".*?$|^.*?'''.*?'''.*?$)", re.DOTALL)

#def splitLogicalLines(src):
#    src = multiLineStringsRE.split(src)

# splits the string into logical lines.  This requires the comments to
# be removed, and strings masked (see other fns in this module)
def splitLogicalLines(src):
    physicallines = src.splitlines(1)
    return [x for x in generateLogicalLines(physicallines)]


class UnbalancedBracesException: pass

# splits the string into logical lines.  This requires the strings
# masked (see other fns in this module)
# Physical Lines *Must* start on a non-continued non-in-a-comment line
# (although detects unbalanced braces)
def generateLogicalLines(physicallines):
    tmp = []
    hangingBraces = list(emptyHangingBraces)
    hangingComments = 0
    for line in physicallines:
        # update hanging braces
        for i in range(len(implicitContinuationChars)):
            contchar = implicitContinuationChars[i]
            numHanging = hangingBraces[i]
            hangingBraces[i] = numHanging+line.count(contchar[0]) - \
                               line.count(contchar[1])

        hangingComments ^= line.count('"""') % 2
        hangingComments ^= line.count("'''") % 2

        if hangingBraces[0] < 0 or \
           hangingBraces[1] < 0 or \
           hangingBraces[2] < 0:
            raise UnbalancedBracesException()
        
        if linecontinueRE.search(line):
            tmp.append(line)
        elif hangingBraces != emptyHangingBraces:
            tmp.append(line)
        elif hangingComments:
            tmp.append(line)
        else:
            tmp.append(line)
            yield "".join(tmp)
            tmp = []
    

# see above but yields (line,linenum)
#   needs physicallines to have linenum attribute
#   TODO: refactor with previous function
def generateLogicalLinesAndLineNumbers(physicallines):
    tmp = []
    hangingBraces = list(emptyHangingBraces)
    hangingComments = 0
    linenum = None
    for line in physicallines:
        if tmp == []:
            linenum = line.linenum

        # update hanging braces
        for i in range(len(implicitContinuationChars)):
            contchar = implicitContinuationChars[i]
            numHanging = hangingBraces[i]
            hangingBraces[i] = numHanging+line.count(contchar[0]) - \
                               line.count(contchar[1])

        hangingComments ^= line.count('"""') % 2
        hangingComments ^= line.count("'''") % 2
            
        if linecontinueRE.search(line):
            tmp.append(line)
        elif hangingBraces != emptyHangingBraces:
            tmp.append(line)
        elif hangingComments:
            tmp.append(line)
        else:
            tmp.append(line)
            yield "".join(tmp),linenum
            tmp = []
        



# takes a line of code, and decorates it with noops so that it can be
# parsed by the python compiler.
# e.g.  "if foo:"  -> "if foo: pass"
# returns the line, and the adjustment made to the column pos of the first char
# line must have strings and comments masked
#
# N.B. it only inserts keywords whitespace and 0's
notSpaceRE = re.compile("\s*(\S)")
commentRE = re.compile("#.*$")

def makeLineParseable(line):
    return makeLineParseableWhenCommentsRemoved(commentRE.sub("",line))

def makeLineParseableWhenCommentsRemoved(line):
    line = line.strip()
    if ":" in line:
        if line.endswith(":"):
            line += " pass"
        if line.startswith("try"):
            line += "\nexcept: pass"
        elif line.startswith("except") or line.startswith("finally"):
            line = "try: pass\n" + line
            return line
        elif line.startswith("else") or line.startswith("elif"):
            line = "if 0: pass\n" + line
            return line
    elif line.startswith("yield"):
        return ("return"+line[5:])
    return line

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.