# $Id: sltokenize.py,v 1.3 2006/04/27 16:55:10 mbaas Exp $
"""RenderMan Shading Language Tokenizer."""
import re, os.path
NAME = 1
TYPE = 7
# tokenize
def tokenize(readline, tokeater):
"""Reads a Shading Language input stream and creates tokens.
The first parameter, *readline*, must be a callable object which
provides the same interface as the :meth:`readline()` method of built-in
file objects. Each call to the function should return one line of
input as a string.
The second parameter, *tokeater*, must also be a callable object.
It is called with six parameters: the token type, the token
string, a tuple (*srow*, *scol*) specifying the row and column where
the token begins in the source, a tuple (*erow*, *ecol*) giving the
ending position of the token, the line on which the token was
found and the filename of the current file.
The token type can be one of:
* ``WHITESPACE``: This is a series of blanks and/or tabs
* ``NAME``: A valid identifier name or keyword
* ``NUMBER``: An integer or float
* ``STRING``: A string enclosed in ``'"'``.
* ``NEWLINE``: A newline character.
* ``OPERATOR``: An operator such as ``'+', '-', '!', '==', '!='``, etc.
* ``CHARACTER``: A single character that doesn't fit anything else.
* ``TYPE``: A Shading Language type (float, point, vector, normal, matrix, color)
By default, the filename argument is an empty string. It will only
be the actual filename if you provide a preprocessed file stream
as input (so you should first run ``cpp`` on any shader). The
tokenizer actually expects preprocessed data as it doesn't handle
types = ["float", "point", "vector", "normal", "matrix", "color"]
regs = ( (WHITESPACE, re.compile(r"[ \t]+")),
(NAME, re.compile(r"[A-Za-z_][A-Za-z_0-9]*")),
(NUMBER, re.compile(r"[0-9]+(\.[0-9]+)?(E(\+|-)?[0-9]+)?")),
(STRING, re.compile(r"\"[^\"]*\"")),
(OPERATOR, re.compile(r"\+|-|!|\.|\*|/|\^|<|>|<=|>=|==|!=|&&|\|\||\?|:|=|\(|\)")),
(NEWLINE, re.compile(r"\n"))
linenr = 0
filename = ""
while 1:
# Read next line
line = readline()
# No more lines? then finish
if line=="":
# Base for starting column...
scolbase = 0
# Process preprocessor lines...
if line[0]=="#":
f = line.strip().split(" ")
linenr = int(f[1])-1
filename = f[2][1:-1]
s = line
# Create tokens...
while s!="":
# Check all regular expressions...
for r in regs:
# Does it match? then the token is found
if m!=None:
scol = m.start()
ecol = m.end()
tok = s[scol:ecol]
s = s[ecol:]
typ = r[0]
if typ==NAME:
if tok in types:
typ = TYPE
tokeater(typ, tok, (linenr, scolbase+scol), (linenr, scolbase+ecol), line, filename)
scolbase += ecol
# No match? then report a single character...
if unmatched:
tok = s[0]
tokeater(CHARACTER, tok, (linenr, scolbase), (linenr, scolbase+1), line, filename)
s = s[1:]
scolbase += 1
def _tokeater(type, s, start, end, line, filename):
if type==WHITESPACE or type==NEWLINE:
# print "Token:",type,s, start,end,'\t"%s"'%line.replace("\n",""),filename
print "%-30s %-10s %s %s %s"%(s, typs[type], start, end, os.path.basename(filename))
if __name__=="__main__":
import sys
tokenize(f.readline, _tokeater)