import re
import types
from string import whitespace
import StringIO
from fontTools.misc import eexec
from psOperators import *
ps_special = '()<>[]{}%' # / is one too, but we take care of that one differently
skipwhiteRE = re.compile("[%s]*" % whitespace)
endofthingPat = "[^][(){}<>/%%%s]*" % whitespace
endofthingRE = re.compile(endofthingPat)
commentRE = re.compile("%[^\n\r]*")
# XXX This not entirely correct as it doesn't allow *nested* embedded parens:
stringPat = r"""
\(
(
(
[^()]* \ [()]
)
|
(
[^()]* \( [^()]* \)
)
)*
[^()]*
\)
"""
stringPat = "".join(stringPat.split())
stringRE = re.compile(stringPat)
hexstringRE = re.compile("<[%s0-9A-Fa-f]*>" % whitespace)
class PSTokenError(Exception): pass
class PSError(Exception): pass
class PSTokenizer(StringIO.StringIO):
def getnexttoken(self,
# localize some stuff, for performance
len=len,
ps_special=ps_special,
stringmatch=stringRE.match,
hexstringmatch=hexstringRE.match,
commentmatch=commentRE.match,
endmatch=endofthingRE.match,
whitematch=skipwhiteRE.match):
_, nextpos = whitematch(self.buf, self.pos).span()
self.pos = nextpos
if self.pos >= self.len:
return None, None
pos = self.pos
buf = self.buf
char = buf[pos]
if char in ps_special:
if char in '{}[]':
tokentype = 'do_special'
token = char
elif char == '%':
tokentype = 'do_comment'
_, nextpos = commentmatch(buf, pos).span()
token = buf[pos:nextpos]
elif char == '(':
tokentype = 'do_string'
m = stringmatch(buf, pos)
if m is None:
raise PSTokenError, 'bad string at character %d' % pos
_, nextpos = m.span()
token = buf[pos:nextpos]
elif char == '<':
tokentype = 'do_hexstring'
m = hexstringmatch(buf, pos)
if m is None:
raise PSTokenError, 'bad hexstring at character %d' % pos
_, nextpos = m.span()
token = buf[pos:nextpos]
else:
raise PSTokenError, 'bad token at character %d' % pos
else:
if char == '/':
tokentype = 'do_literal'
m = endmatch(buf, pos+1)
else:
tokentype = ''
m = endmatch(buf, pos)
if m is None:
raise PSTokenError, 'bad token at character %d' % pos
_, nextpos = m.span()
token = buf[pos:nextpos]
self.pos = pos + len(token)
return tokentype, token
def skipwhite(self, whitematch=skipwhiteRE.match):
_, nextpos = whitematch(self.buf, self.pos).span()
self.pos = nextpos
def starteexec(self):
self.pos = self.pos + 1
#self.skipwhite()
self.dirtybuf = self.buf[self.pos:]
self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
self.len = len(self.buf)
self.pos = 4
def stopeexec(self):
if not hasattr(self, 'dirtybuf'):
return
self.buf = self.dirtybuf
del self.dirtybuf
def flush(self):
if self.buflist:
self.buf = self.buf + "".join(self.buflist)
self.buflist = []
class PSInterpreter(PSOperators):
def __init__(self):
systemdict = {}
userdict = {}
self.dictstack = [systemdict, userdict]
self.stack = []
self.proclevel = 0
self.procmark = ps_procmark()
self.fillsystemdict()
def fillsystemdict(self):
systemdict = self.dictstack[0]
systemdict['['] = systemdict['mark'] = self.mark = ps_mark()
systemdict[']'] = ps_operator(']', self.do_makearray)
systemdict['true'] = ps_boolean(1)
systemdict['false'] = ps_boolean(0)
systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding)
systemdict['FontDirectory'] = ps_dict({})
self.suckoperators(systemdict, self.__class__)
def suckoperators(self, systemdict, klass):
for name in dir(klass):
attr = getattr(self, name)
if callable(attr) and name[:3] == 'ps_':
name = name[3:]
systemdict[name] = ps_operator(name, attr)
for baseclass in klass.__bases__:
self.suckoperators(systemdict, baseclass)
def interpret(self, data, getattr = getattr):
tokenizer = self.tokenizer = PSTokenizer(data)
getnexttoken = tokenizer.getnexttoken
do_token = self.do_token
handle_object = self.handle_object
try:
while 1:
tokentype, token = getnexttoken()
#print token
if not token:
break
if tokentype:
handler = getattr(self, tokentype)
object = handler(token)
else:
object = do_token(token)
if object is not None:
handle_object(object)
tokenizer.close()
self.tokenizer = None
finally:
if self.tokenizer is not None:
if 0:
print 'ps error:\n- - - - - - -'
print self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos]
print '>>>'
print self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50]
print '- - - - - - -'
def handle_object(self, object):
if not (self.proclevel or object.literal or object.type == 'proceduretype'):
if object.type <> 'operatortype':
object = self.resolve_name(object.value)
if object.literal:
self.push(object)
else:
if object.type == 'proceduretype':
self.call_procedure(object)
else:
object.function()
else:
self.push(object)
def call_procedure(self, proc):
handle_object = self.handle_object
for item in proc.value:
handle_object(item)
def resolve_name(self, name):
dictstack = self.dictstack
for i in range(len(dictstack)-1, -1, -1):
if dictstack[i].has_key(name):
return dictstack[i][name]
raise PSError, 'name error: ' + str(name)
def do_token(self, token,
int=int,
float=float,
ps_name=ps_name,
ps_integer=ps_integer,
ps_real=ps_real):
try:
num = int(token)
except (ValueError, OverflowError):
try:
num = float(token)
except (ValueError, OverflowError):
if '#' in token:
hashpos = token.find('#')
try:
base = int(token[:hashpos])
num = int(token[hashpos+1:], base)
except (ValueError, OverflowError):
return ps_name(token)
else:
return ps_integer(num)
else:
return ps_name(token)
else:
return ps_real(num)
else:
return ps_integer(num)
def do_comment(self, token):
pass
def do_literal(self, token):
return ps_literal(token[1:])
def do_string(self, token):
return ps_string(token[1:-1])
def do_hexstring(self, token):
hexStr = "".join(token[1:-1].split())
if len(hexStr) % 2:
hexStr = hexStr + '0'
cleanstr = []
for i in range(0, len(hexStr), 2):
cleanstr.append(chr(int(hexStr[i:i+2], 16)))
cleanstr = "".join(cleanstr)
return ps_string(cleanstr)
def do_special(self, token):
if token == '{':
self.proclevel = self.proclevel + 1
return self.procmark
elif token == '}':
proc = []
while 1:
topobject = self.pop()
if topobject == self.procmark:
break
proc.append(topobject)
self.proclevel = self.proclevel - 1
proc.reverse()
return ps_procedure(proc)
elif token == '[':
return self.mark
elif token == ']':
return ps_name(']')
else:
raise PSTokenError, 'huh?'
def push(self, object):
self.stack.append(object)
def pop(self, *types):
stack = self.stack
if not stack:
raise PSError, 'stack underflow'
object = stack[-1]
if types:
if object.type not in types:
raise PSError, 'typecheck, expected %s, found %s' % (`types`, object.type)
del stack[-1]
return object
def do_makearray(self):
array = []
while 1:
topobject = self.pop()
if topobject == self.mark:
break
array.append(topobject)
array.reverse()
self.push(ps_array(array))
def close(self):
"""Remove circular references."""
del self.stack
del self.dictstack
def unpack_item(item):
tp = type(item.value)
if tp == types.DictionaryType:
newitem = {}
for key, value in item.value.items():
newitem[key] = unpack_item(value)
elif tp == types.ListType:
newitem = [None] * len(item.value)
for i in range(len(item.value)):
newitem[i] = unpack_item(item.value[i])
if item.type == 'proceduretype':
newitem = tuple(newitem)
else:
newitem = item.value
return newitem
def suckfont(data):
import re
m = re.search(r"/FontName\s+/([^ \t\n\r]+)\s+def", data)
if m:
fontName = m.group(1)
else:
fontName = None
interpreter = PSInterpreter()
interpreter.interpret("/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
interpreter.interpret(data)
fontdir = interpreter.dictstack[0]['FontDirectory'].value
if fontdir.has_key(fontName):
rawfont = fontdir[fontName]
else:
# fall back, in case fontName wasn't found
fontNames = fontdir.keys()
if len(fontNames) > 1:
fontNames.remove("Helvetica")
fontNames.sort()
rawfont = fontdir[fontNames[0]]
interpreter.close()
return unpack_item(rawfont)
if __name__ == "__main__":
import macfs
fss, ok = macfs.StandardGetFile("LWFN")
if ok:
from fontTools import t1Lib
data, kind = t1Lib.read(fss.as_pathname())
font = suckfont(data)
|