simpleparsegrammar.py : » Parser » SimpleParse » SimpleParse-2.1.1a2 » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Parser » SimpleParse
SimpleParse » SimpleParse 2.1.1a2 » simpleparsegrammar.py
'''Default SimpleParse EBNF grammar as a generator with productions

This module defines the original SimpleParse
grammar.  It uses the generator objects directly
as this is the first grammar being written.
'''
from simpleparse.objectgenerator import *
from simpleparse import generator,baseparser
import string
from simpleparse.dispatchprocessor import *
try:
  unicode
  HAVE_UNICODE = 1
except NameError:
  HAVE_UNICODE = 0

# note that whitespace is slightly different
# due to a bug with NULL-matching repeating groups
# we make all the ts references ts?
whitespace = Name (value = "ts", report = 0)
element_token = Name( value = "element_token" )
literal = Name ( value = "literal")
group = Name ( value = "group")
characterrange = Name ( value = "range")
name = Name ( value = "name")


SPGenerator = generator.Generator ()

SPGenerator.addDefinition(
  "declarationset",
  Name (value = "declaration", repeating = 1),
)



SPGenerator.addDefinition (
  "declaration",
  SequentialGroup (
    children = [
      whitespace,
      FirstOfGroup (
        children = [
          Name (value = "unreportedname", ),
          Name (value = "expandedname", ),
          Name (value = "name", ),
        ],
      ),
      whitespace,
      Literal (value = ":"),
      Literal (value = ":", optional=1),
      Literal (value = "=",),
      Name( value = "seq_group"),
    ],
  )
)

SPGenerator.addDefinition (
  "group",
  SequentialGroup (
    children = [
      Literal (value ="("),
      Name( value= "seq_group"),
      Literal (value =")"),
    ],
    expanded = 1,
  )
)

_seq_children = FirstOfGroup(
  children = [
    Name(value="error_on_fail"),
    Name(value="fo_group"),
    Name(value="element_token"),
  ],
)

SPGenerator.addDefinition (
  "seq_group",
  SequentialGroup (
    children = [
      whitespace,
      _seq_children,
      SequentialGroup(
        children = [
          whitespace,
          Name( value="seq_indicator"),
          whitespace,
          _seq_children,
        ],
        repeating = 1, optional = 1,
      ),
      whitespace,
    ],
  ),
)

SPGenerator.addDefinition (
  "fo_group",
  SequentialGroup (
    children = [
      element_token,
      SequentialGroup(
        children = [
          whitespace,
          Name( value="fo_indicator"),
          whitespace,
          element_token,
        ],
        repeating = 1,
      ),
    ],
  )
)
SPGenerator.addDefinition (
  "seq_indicator",
  Literal(value = ",", report=0 ),
)  
SPGenerator.addDefinition (
  "fo_indicator",
  Literal(value = "/", report=0 ),
)  

SPGenerator.addDefinition (
  "element_token",
  SequentialGroup (
    children = [
      Name (value = "lookahead_indicator", optional = 1),
      whitespace,
      Name (value = "negpos_indicator", optional = 1),
      whitespace,
      FirstOfGroup (
        children = [
          literal,
          characterrange,
          group,
          name,
        ]
      ),
      whitespace,
      Name (value = "occurence_indicator", optional = 1),
      whitespace,
      Name (value = "error_on_fail", optional = 1),
    ]
  )
)

SPGenerator.addDefinition (
  "negpos_indicator",
  Range (value = "+-" )
)
SPGenerator.addDefinition (
  "lookahead_indicator",
  Literal(value = "?" ),
)  

SPGenerator.addDefinition (
  "occurence_indicator",
  Range (value = "+*?" ),
)  
SPGenerator.addDefinition (
  "error_on_fail",
  SequentialGroup (
    children = [
      Literal (value ="!"),
      SequentialGroup (
        children = [
          whitespace,
          Name( value="literal"),
        ],
        optional = 1,
      ),
    ],
  ),
)

SPGenerator.addDefinition (
  "unreportedname",
  SequentialGroup (
    children = [
      Literal (value ="<"),
      whitespace,
      name,
      whitespace,
      Literal (value =">"),
    ]
  )
)
SPGenerator.addDefinition (
  "expandedname",
  SequentialGroup (
    children = [
      Literal (value =">"),
      whitespace,
      name,
      whitespace,
      Literal (value ="<"),
    ]
  )
)

SPGenerator.addDefinition (
  "name",
  SequentialGroup (
    children = [
      Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'),
      Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789', optional= 1, repeating= 1),
    ]
  )
)

SPGenerator.addDefinition (
  "ts", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )*
  FirstOfGroup (
    children = [
      Range(value =' \011\012\013\014\015', repeating=1),
      Name( value = "comment" ),
    ],
    repeating = 1, optional=1,
  )
)
SPGenerator.addDefinition (
  "comment", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )*
  SequentialGroup (
    children = [
      Literal ( value ="#"),
      Literal (value ="\n", negative = 1, repeating = 1, optional=1),
      Literal (value = "\n",),
    ],
  ),
)

SPGenerator.addDefinition (
  "literalDecorator", # literalDecorator    :=  [c]
  Range( value = 'c' )
)

SPGenerator.addDefinition (
  "literal",  # ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
  SequentialGroup(
    children = [
      Name( value = 'literalDecorator', optional=1 ),
      FirstOfGroup (
        children = [
          SequentialGroup (
            children = [
              Literal (value ="'"),
              FirstOfGroup (
                children = [
                  Name (value = "CHARNOSNGLQUOTE"),
                  Name (value = "ESCAPEDCHAR"),
                ],
                optional = 1, repeating = 1,
              ),
              Literal (value ="'"),
            ],
          ),
          SequentialGroup (
            children = [
              Literal (value ='"'),
              FirstOfGroup (
                children = [
                  Name (value = "CHARNODBLQUOTE"),
                  Name (value = "ESCAPEDCHAR"),
                ],
                optional = 1, repeating = 1,
              ),
              Literal (value ='"'),
            ],
          )
        ],
      ),
    ],
  )
)

SPGenerator.addDefinition (
  "range",   # '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
  SequentialGroup (
    children =[
      Literal (value ="["),
      Name (value ="CHARBRACE",optional = 1),
      Name (value ="CHARDASH",optional = 1),
      FirstOfGroup(
        children = [
          Name (value ="CHARRANGE"),
          Name (value ="CHARNOBRACE"),
        ],
        optional = 1, repeating = 1,
      ),
      Name (value ="CHARDASH",optional = 1),
      Literal (value ="]"),
    ],
  )
)
SPGenerator.addDefinition (
  "CHARBRACE",   
  Literal (value = "]"),
)
SPGenerator.addDefinition (
  "CHARDASH",   
  Literal (value = "-"),
)
SPGenerator.addDefinition (
  "CHARRANGE",   # CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
  SequentialGroup (
    children =[
      Name (value ="CHARNOBRACE"),
      Literal (value ="-"),
      Name (value ="CHARNOBRACE"),
    ],
  ),
)
SPGenerator.addDefinition (
  "CHARNOBRACE",   # CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
  FirstOfGroup(
    children =[
      Name (value ="ESCAPEDCHAR"),
      Name (value ="CHAR"),
    ],
  ),
)
SPGenerator.addDefinition (
  "CHAR",
  Literal (
    value ="]",
    negative = 1,
  ),
)

SPGenerator.addDefinition (
  "ESCAPEDCHAR",   # '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / OCTALESCAPEDCHAR  )
  SequentialGroup (
    children =[
      Literal (value ="\\"),
      FirstOfGroup(
        children = [
          Name (value ="SPECIALESCAPEDCHAR"),
          SequentialGroup(
            children = [
              Range( value = 'xX' ),
              Name( value="HEXESCAPEDCHAR"),
            ]
          ),
          Name (value ="OCTALESCAPEDCHAR"),
        ],
      ),
    ],
  )
)

SPGenerator.addDefinition (
  "SPECIALESCAPEDCHAR",
  Range(value ='\\abfnrtv"\''),
)

SPGenerator.addDefinition (
  "OCTALESCAPEDCHAR",   # [0-7],[0-7]?,[0-7]?
  SequentialGroup (
    children =[
      Range (value ="01234567"),
      Range (value ="01234567", optional = 1),
      Range (value ="01234567", optional = 1),
    ],
  )
)
SPGenerator.addDefinition (
  "HEXESCAPEDCHAR",   # [0-9a-fA-F],[0-9a-fA-F]
  SequentialGroup (
    children =[
      Range (value ="0123456789abcdefABCDEF"),
      Range (value ="0123456789abcdefABCDEF"),
    ],
  )
)


SPGenerator.addDefinition (
  "CHARNODBLQUOTE",
  Range(value ='\\"', negative = 1, repeating = 1),
)
SPGenerator.addDefinition (
  "CHARNOSNGLQUOTE",
  Range(value ="\\'", negative = 1, repeating = 1),
)

declaration = r"""declarationset      :=  declaration+
declaration         :=  ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group

element_token       :=  lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail?

negpos_indicator    :=  [-+]
lookahead_indicator :=  "?"
occurence_indicator :=  [+*?]
error_on_fail       :=  "!", (ts,literal)?

>group<             :=  '(',seq_group, ')'
seq_group           :=  ts,(error_on_fail/fo_group/element_token),
                          (ts, seq_indicator, ts,
                              (error_on_fail/fo_group/element_token)
                          )*, ts

fo_group            :=  element_token, (ts, fo_indicator, ts, element_token)+


# following two are likely something peoples might want to
# replace in many instances...
<fo_indicator>      :=  "/"
<seq_indicator>     :=  ','

unreportedname      :=  '<', name, '>'
expandedname        :=  '>', name, '<'
name                :=  [a-zA-Z_],[a-zA-Z0-9_]*
<ts>                :=  ( [ \011-\015]+ / comment )*
comment             :=  '#',-'\n'*,'\n'
literal             :=  literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
literalDecorator    :=  [c]



range               :=  '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
CHARBRACE           :=  ']'
CHARDASH            :=  '-'
CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
CHARNOBRACE         :=  ESCAPEDCHAR/CHAR
CHAR                :=  -[]]
ESCAPEDCHAR         :=  '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ("u",UNICODEESCAPEDCHAR_16) /("U",UNICODEESCAPEDCHAR_32)/OCTALESCAPEDCHAR  )
SPECIALESCAPEDCHAR  :=  [\\abfnrtv"']
OCTALESCAPEDCHAR    :=  [0-7],[0-7]?,[0-7]?
HEXESCAPEDCHAR      :=  [0-9a-fA-F],[0-9a-fA-F]
CHARNODBLQUOTE      :=  -[\\"]+
CHARNOSNGLQUOTE     :=  -[\\']+
UNICODEESCAPEDCHAR_16 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
UNICODEESCAPEDCHAR_32 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
"""

### Now the interpreter objects...
class Parser(baseparser.BaseParser):
  """Parser which generates new parsers from EBNF grammars

  This parser class allows you to pass in an EBNF grammar as
  the initialisation parameter.  The EBNF is processed, and a
  SimpleParse generator object is created as self.generator.

  Unlike most Parsers, this object is intended to be re-created
  for each bit of data it parses (i.e. each EBNF), so it warps
  the standard API a lot.
  """
  _rootProduction = 'declarationset'
  def __init__( self, ebnf, prebuilts=(), methodSource=None, definitionSources=() ):
    """Create a new generator based on the EBNF in simpleparse format"""
    processor = SPGrammarProcessor( prebuilts, definitionSources )
    success, tags, next = self.parse( ebnf, self._rootProduction, processor=processor )
    if next != len(ebnf):
      lineNumber = lines(0, next, ebnf)
      raise ValueError(
        """Unable to complete parsing of the EBNF, stopped at line %s (%s chars of %s)
Unparsed:\n%s..."""%(lineNumber, next, len(ebnf), ebnf[next:next+100])
      )
    self.generator = processor.generator
  def buildTagger( self, name=None, processor = None ):
    """Build the tag-table for parsing the EBNF for this parser"""
    return SPGenerator.buildParser( name, processor )

class SPGrammarProcessor( DispatchProcessor ):
  """Processing object for post-processing an EBNF into a new generator"""
  ### top level
  def __init__( self, prebuilts=(), definitionSources=() ):
    """Create a new generator based on the EBNF in simpleparse format"""
    self.generator = generator.Generator()
    for (name, table) in prebuilts:
      if isinstance( table, ElementToken):
        self.generator.addDefinition( name, table)
      else:
        self.generator.addDefinition( name, Prebuilt(value=table))
    for source in definitionSources:
      self.generator.addDefinitionSource( source )
  
  def declaration( self, (tag, left, right, sublist), buffer):
    '''Base declaration from the grammar, a "production" or "rule"'''
    name = sublist[0]
    expanded = 0
    if name[0] == "unreportedname":
      name = name[3][0]
      # note that the info is stored in the wrong place :(
      report = 0
    elif name[0] == 'expandedname':
      report = 1
      expanded = 1
      name = name[3][0]
    else:
      report = 1
    name = getString( name, buffer )
    self.currentProduction = name
    content = dispatch( self, sublist[1], buffer )
    content.report = report
    content.expanded = expanded
    self.generator.addDefinition(
      name,
      content,
    )
    del self.currentProduction

  ### element configuration
  def element_token( self, (tag, left, right, sublist), buffer):
    '''get the children, then configure'''
    base = None
    negative = 0
    optional = 0
    repeating = 0
    lookahead = 0
    errorOnFail = None
    for tup in sublist:
      result = dispatch( self, tup, buffer )
      if tup[0] == 'negpos_indicator':
        negative = result
      elif tup[0] == 'occurence_indicator':
        optional, repeating = result
      elif tup[0] == 'lookahead_indicator':
        lookahead = result
      elif tup[0] == 'error_on_fail':
        # we do some extra work here
        errorOnFail = result
        self._config_error_on_fail( errorOnFail, (tag,left,tup[1],[]), buffer )
      else:
        base = result
    base.optional = optional
    base.negative = negative
    base.repeating = repeating
    base.lookahead = lookahead
    if errorOnFail:
      base.errorOnFail = errorOnFail
    return base

  ### generator-node-builders
  def seq_group( self, (tag, left, right, sublist), buffer):
    """Process a sequential-group into a SequentialGroup element token"""
    children = dispatchList( self, sublist, buffer )
    errorOnFail = None
    result = []
    for (item,tup) in map(None,children,sublist):
      if isinstance( item, ErrorOnFail ):
        errorOnFail = item
      else:
        if errorOnFail:
          item.errorOnFail = errorOnFail.copy()
          self._config_error_on_fail(
            item.errorOnFail,
            tup,
            buffer
          )
        result.append( item )
    if len(result) == 1:
      # single-item sequential group (very common)
      return result[0]
    elif not result:
      raise ValueError( """SequentialGroup on line %s doesn't have an element-token child! grammar was %s"""%( lines(0,left, buffer), buffer[left:left+25]))
    base = SequentialGroup(
      children = result,
    )
    return base
  def fo_group( self, (tag, left, right, sublist), buffer):
    """Process a first-of-group into a FirstOf element token"""
    children = dispatchList( self, sublist, buffer )
    if len(children) == 1:
      # this should never happen, but if it does, we can deal with it I suppose...
      return children[0]
    base = FirstOfGroup(
      children = children
    )
    return base
    
  def literal( self, (tag, left, right, sublist), buffer):
    '''Turn a literal result into a literal generator'''
    if sublist and sublist[0][0] == 'literalDecorator':
      # right now only have the one decorator...
      sublist = sublist[1:]
      classObject = CILiteral
    else:
      classObject = Literal
    elements = dispatchList( self, sublist, buffer)
    ### Should check for CILiteral with non-CI string or single-character value!
    return classObject( value = string.join(elements, "" ) )

  def range( self, (tag, left, right, sublist), buffer):
##    if hasattr( Range, 'requiresExpandedSet') and Range.requiresExpandedSet:
    return Range(
      value = string.join(dispatchList( self, sublist, buffer),''),
    )
##    else:
##      # need to build up a new-syntax version of the range...
##      # escape ^ to \^
##      # escape \ to \\
##      # escape - to \-
##      # make sure range-sets are in proper order...
##      raise NotImplementedError( """Haven't got the new CharSet version implemented yet""")
  def name( self, tup, buffer):
    return Name(
      value = getString(tup, buffer),
    )
  ### simple translators
  occurenceIndicatorMap = {
    '*': (1,1),
    '+': (0,1),
    '?': (1,0),
  }
  def occurence_indicator( self, tup, buffer):
    '''Return optional, repeating as a tuple of true/false values'''
    value = getString(tup, buffer)
    return self.occurenceIndicatorMap[value]
  def lookahead_indicator( self, tup, buffer ):
    """If present, the lookahead indictor just says "yes", so just return 1"""
    return 1
  def error_on_fail( self, (tag,left,right,children), buffer ):
    """If present, we are going to make the current object an errorOnFail type,

    If there's a string literal child, then we use it to create the
    "message" attribute of the errorOnFail object.
    """
    err = ErrorOnFail()
    if children:
      (tag,left,right,children) = children[0]
      message = string.join( dispatchList( self, children, buffer), "")
      err.message = message
    return err
  def _config_error_on_fail( self, errorOnFail, tup, buffer ):
    """Configure an error-on-fail instance for a given child tuple"""
    # what we expected to find...
    errorOnFail.expected = buffer[tup[1]:tup[2]]
    if hasattr( self, "currentProduction"):
      errorOnFail.production = self.currentProduction
    

  negposIndicatorMap = {
    '+': 0,
    '-': 1,
  }
  def negpos_indicator( self, tup, buffer ):
    '''return whether indicates negative'''
    value = getString(tup, buffer)
    return self.negposIndicatorMap[value]

  def CHARNODBLQUOTE( self, tup, buffer):
    return getString(tup, buffer)
  CHAR = CHARNOSNGLQUOTE = CHARNODBLQUOTE
  def ESCAPEDCHAR( self, (tag, left, right, sublist), buffer):
    return string.join(dispatchList( self, sublist, buffer), "")
  specialescapedmap = {
  'a':'\a',
  'b':'\b',
  'f':'\f',
  'n':'\n',
  'r':'\r',
  't':'\t',
  'v':'\v',
  '\\':'\\',
  '"':'"',
  "'":"'",
  }
  def SPECIALESCAPEDCHAR( self, tup, buffer):
    return self.specialescapedmap[ getString(tup, buffer)]
  def OCTALESCAPEDCHAR(self, tup, buffer):
    return chr(string.atoi( getString(tup, buffer), 8 ))
  def HEXESCAPEDCHAR( self, tup , buffer):
    return chr(string.atoi( getString(tup, buffer), 16 ))
  def CHARNOBRACE( self, (tag, left, right, sublist), buffer):
    return string.join(dispatchList( self, sublist, buffer), "")
  def CHARRANGE( self, (tag, left, right, sublist), buffer):
    '''Create a string from first to second item'''
    # following should never raise an error, as there's only one possible format...
    try:
      first, second = map( ord, dispatchList( self, sublist, buffer))
    except TypeError:
      import pdb
      pdb.set_trace ()
    if second < first:
      second, first = first, second
    return string.join(map( chr, range(first, second+1),), '')
  def CHARDASH( self, tup , buffer):
    return '-'
  def CHARBRACE( self, tup , buffer):
    return ']'

  if HAVE_UNICODE:
    def UNICODEESCAPEDCHAR_16( self, (tag, left, right, sublist), buffer):
      """Only available in unicode-aware Python versions"""
      char = unichr(int( buffer[left:right], 16 ))
      return char
    ### Only available in wide-unicode Python versions (rare)
    UNICODEESCAPEDCHAR_32 = UNICODEESCAPEDCHAR_16
  else:
    # ignore unicode-specific characters, though this isn't a particularly
    # useful approach, I don't see a better option at the moment...
    def UNICODEESCAPEDCHAR_16( self, (tag, left, right, sublist), buffer):
      """Only available in unicode-aware Python versions"""
      return ""
      
    def UNICODEESCAPEDCHAR_32( self, (tag, left, right, sublist), buffer):
      """Only available in wide-unicode Python versions (rare)"""
      return ""
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.