scanner.py :  » Development » PyObjC » trunk » pyobjc » pyobjc-metadata » Lib » PyObjCMetaData » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Development » PyObjC 
PyObjC » trunk » pyobjc » pyobjc metadata » Lib » PyObjCMetaData » scanner.py
import sre_parse, sre_compile, sre_constants
from sre_constants import BRANCH,SUBPATTERN
from re import VERBOSE,MULTILINE,DOTALL
import re 


__all__ = ['Scanner', 'Token', 'IgnoreToken', 'ScanningToken', 'InsignificantWhitespace']

class Scanner(object):
    def __init__(self, lexicon, flags=(VERBOSE | MULTILINE | DOTALL), verify=True):
        self.actions = [None]
        # combine phrases into a compound pattern
        s = sre_parse.Pattern()
        s.flags = flags
        p = []
        for idx, token in enumerate(lexicon):
            phrase = token.pattern
            try:
                subpattern = sre_parse.SubPattern(s,
                    [(SUBPATTERN, (idx+1, sre_parse.parse(phrase, flags)))])
            except sre_constants.error:
                print "Can't parse %s" % (token.__name__,)
                raise
            token.regex = re.compile(phrase, flags)
            p.append(subpattern)
            self.actions.append(token)

        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
        self.scanner = sre_compile.compile(p)

        if verify:
            for token in lexicon:
                example = token.example
                if example is None:
                    continue

                def dead(string, i, j):
                    print token.__name__, i, j
                    print '--- PATTERN ---'
                    print token.pattern
                    print '--- PARSED EXAMPLE ---'
                    print string[:i]
                    print '--- UNMATCHED CHUNK ---'
                    print repr(string[i:j])
                    raise ValueError, "Token %s can not be verified" % token.__name__
                s = Scanner([token, InsignificantWhitespace], verify=False)
                try:
                    for m in s.iterscan(example, dead=dead):
                        pass
                except:
                    print example
                    raise


    def iterscan(self, string, dead=None, idx=0):
        match = self.scanner.scanner(string, idx).search
        actions = self.actions
        i, j, k = 0, 0, 0
        end = len(string)
        while True:
            m = match()
            if m is None:
                break
            k, j = m.span()
            if i == j:
                break
            # yield for dead space
            if k != i and dead is not None:
                rval = dead(string, i, k)
                if rval is not None:
                    yield rval
            action = actions[m.lastindex]
            if action is not None:
                rval, next_pos = action(m)
                yield rval
                if next_pos is not None and next_pos != j:
                    # "fast forward" the scanner
                    j = next_pos
                    match = self.scanner.scanner(string, j).search
            i = j
        if i != end and dead is not None:
            rval = dead(string, i, end)
            yield rval
            

class Token(object):
    pattern = None
    example = None
    regex = None
    match = None
    _groupdict = None

    def __new__(cls, match):
        self = super(Token, cls).__new__(cls)
        return self.found(match)
    
    def groupdict(self):
        groups = self._groupdict
        if groups is None:
            if self.match is None:
                return None
            groups = self._groupdict = self.match.groupdict()
        return groups
       
    def __getitem__(self, item):
        return self.groupdict()[item]

    def found(self, match):
        if self.regex is not None:
            match = self.regex.match(match.string, *match.span())
        self.match = match
        return self, None

    def __repr__(self):
        return '%s(%r)' % (type(self).__name__, self.groupdict())

class ScanningToken(Token):
    scanner = None
    dead = None
    endtoken = None
    lexicon = None
    _matches = None

    def found(self, match):
        if self.lexicon is None:
            raise ValueError, "missing lexicon"
        if self.endtoken is None:
            raise ValueError, "missing endtoken"
        if self.endtoken in self.lexicon:
            raise ValueError, "endtoken is present in lexicon"
        end = match.end()

        if self.regex is not None:
            self.match = self.regex.match(match.string, *match.span())
        else:
            self.match = match

        if self.scanner is None:
            lex = [self.endtoken]
            lex.extend(self.lexicon)
            self.scanner = Scanner(lex)

        scanner = self.scanner.iterscan(match.string, dead=self.dead, idx=end)
        matches = self._matches = [ ]
        for match in scanner:
            if match is None:
                continue
            matches.append(match)
            if isinstance(match, self.endtoken):
                return self, match.match.end()
        else:
            raise ValueError, "EndToken not matched %r : %r"%(type(self), matches[-1])

    def matches(self):
        return self._matches

    def __repr__(self):
        return '%s(%r, %r)' % (
            type(self).__name__,
            self.groupdict(),
            self.matches(),
        )

class IgnoreToken(Token):
    def found(self, match):
        return None, None

class InsignificantWhitespace(IgnoreToken):
    pattern = r'\s+'
    example = '  \t \n \r   '
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.