# sexpParser.py
#
# Demonstration of the pyparsing module, implementing a simple S-expression
# parser.
#
# Copyright 2007, by Paul McGuire
#
"""
BNF reference: http://theory.lcs.mit.edu/~rivest/sexp.txt
<sexp> :: <string> | <list>
<string> :: <display>? <simple-string> ;
<simple-string> :: <raw> | <token> | <base-64> | <hexadecimal> |
<quoted-string> ;
<display> :: "[" <simple-string> "]" ;
<raw> :: <decimal> ":" <bytes> ;
<decimal> :: <decimal-digit>+ ;
-- decimal numbers should have no unnecessary leading zeros
<bytes> -- any string of bytes, of the indicated length
<token> :: <tokenchar>+ ;
<base-64> :: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ;
<hexadecimal> :: "#" ( <hex-digit> | <white-space> )* "#" ;
<quoted-string> :: <decimal>? <quoted-string-body>
<quoted-string-body> :: "\"" <bytes> "\""
<list> :: "(" ( <sexp> | <whitespace> )* ")" ;
<whitespace> :: <whitespace-char>* ;
<token-char> :: <alpha> | <decimal-digit> | <simple-punc> ;
<alpha> :: <upper-case> | <lower-case> | <digit> ;
<lower-case> :: "a" | ... | "z" ;
<upper-case> :: "A" | ... | "Z" ;
<decimal-digit> :: "0" | ... | "9" ;
<hex-digit> :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ;
<simple-punc> :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ;
<whitespace-char> :: " " | "\t" | "\r" | "\n" ;
<base-64-char> :: <alpha> | <decimal-digit> | "+" | "/" | "=" ;
<null> :: "" ;
"""
from pyparsing import *
from base64 import b64decode
import pprint
def verifyLen(t):
t = t[0]
if t.len is not None:
t1len = len(t[1])
if t1len != t.len:
raise ParseFatalException, \
"invalid data of length %d, expected %s" % (t1len, t.len)
return t[1]
# define punctuation literals
LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR = map(Suppress, "()[]{}|")
decimal = Word("123456789",nums).setParseAction(lambda t: int(t[0]))
bytes = Word(printables)
raw = Group(decimal.setResultsName("len") + Suppress(":") + bytes).setParseAction(verifyLen)
token = Word(alphanums + "-./_:*+=")
base64_ = Group(Optional(decimal,default=None).setResultsName("len") + VBAR
+ OneOrMore(Word( alphanums +"+/=" )).setParseAction(lambda t: b64decode("".join(t)))
+ VBAR).setParseAction(verifyLen)
hexadecimal = ("#" + OneOrMore(Word(hexnums)) + "#")\
.setParseAction(lambda t: int("".join(t[1:-1]),16))
qString = Group(Optional(decimal,default=None).setResultsName("len") +
dblQuotedString.setParseAction(removeQuotes)).setParseAction(verifyLen)
simpleString = raw | token | base64_ | hexadecimal | qString
display = LBRK + simpleString + RBRK
string_ = Optional(display) + simpleString
sexp = Forward()
sexpList = Group(LPAR + ZeroOrMore(sexp) + RPAR)
sexp << ( string_ | sexpList )
######### Test data ###########
test00 = """(snicker "abc" (#03# |YWJj|))"""
test01 = """(certificate
(issuer
(name
(public-key
rsa-with-md5
(e |NFGq/E3wh9f4rJIQVXhS|)
(n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|))
aid-committee))
(subject
(ref
(public-key
rsa-with-md5
(e |NFGq/E3wh9f4rJIQVXhS|)
(n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|))
tom
mother))
(not-before "1997-01-01_09:00:00")
(not-after "1998-01-01_09:00:00")
(tag
(spend (account "12345678") (* numeric range "1" "1000"))))
"""
test02 = """(lambda (x) (* x x))"""
test03 = """(def length
(lambda (x)
(cond
((not x) 0)
( t (+ 1 (length (cdr x))))
)
)
)
"""
test04 = """(2:XX "abc" (#30# |YWJj|))"""
test05 = """(if (is (window_name) "XMMS") (set_workspace 2))"""
test06 = """(if
(and
(is (application_name) "Firefox")
(or
(contains (window_name) "Enter name of file to save to")
(contains (window_name) "Save As")
(contains (window_name) "Save Image")
)
)
(geometry "+140+122")
)
"""
test07 = """(defun factorial (x)
(if (zerop x) 1
(* x (factorial (- x 1)))))
"""
test51 = """(3:XX "abc" (#30# |YWJj|))"""
# Run tests
t = None
alltests = [ locals()[t] for t in sorted(locals()) if t.startswith("test") ]
for t in alltests:
print '-'*50
print t
try:
sexpr = sexp.parseString(t)
pprint.pprint(sexpr.asList())
except ParseFatalException, pfe:
print "Error:", pfe.msg
print line(pfe.loc,t)
print pfe.markInputline()
print
|