from __future__ import nested_scopes
import os, sys, re, htmlentitydefs, struct, bisect
__metaclass__ = type
tmpfile = '~tmpfile.tmp'
# the disassembler to use. 'objdump' writes GNU-style instructions.
# 'ndisasm' uses Intel syntax.
objdump = 'objdump -b binary -m i386 --adjust-vma=%(origin)d -D %(file)s'
if sys.platform == "win32":
try:
from xam import __file__
except ImportError:
raise ImportError, "could not import xam module"
_win32_path = os.path.join(os.path.split(_xamfile)[0], "win32")
objdump = os.path.join(_win32_path, objdump)
_objdumpexe = objdump.split()[0]+".exe"
# test whether it works:
if os.system(_objdumpexe + " -v"):
raise IOError, "file %s and cygwin1.dll must exist" % _objdumpexe
#objdump = 'ndisasm -o %(origin)d -u %(file)s'
# the files from which symbols are loaded.
# the order and number of files must match
# psyco_dump_code_buffers() in psyco.c.
symbolfiles = [sys.executable]
try:
from psyco import _psyco
symbolfiles.append(_psyco.__file__)
except ImportError:
pass
# the program that lists symbols, and the output it gives
symbollister = 'nm %s'
re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
if sys.platform == "win32":
# no way to get full info into the executables by
# VC7. /PDB:NONE no longer supported.
# so we have to read the map files.
if sys.executable.lower().endswith("_d.exe"):
_mapfiles = ("python23_d.map", "_psyco_d.map")
else:
_mapfiles = ("python23.map", "_psyco.map")
symbolfiles = [os.path.join(_win32_path, x) for x in _mapfiles]
for _filepath in symbolfiles:
if not os.path.exists(_filepath):
raise IOError, "please make sure that '%s' exists" % _filepath
class symbollister:
def __init__(self, filename):
self.file = file(filename)
self.generator = self._readline()
def _readline(self):
for line in self.file:
# 0001:000661e0 _PyEval_CallFunction 1e0671e0 f modsupport.obj
# 0003:0000e770 _PyClass_Type 1e0d8770 classobject.obj
pieces = line.split()
if len(pieces) == 5:
colonadr, name, adr, dummy, obj = pieces
elif len(pieces) == 4:
colonadr, name, adr, obj = pieces
dummy = "d"
else:
continue
if colonadr.count(":") == 1 and obj.endswith(".obj"):
yield "%s %s %s\n" % (adr, dummy, name[1:])
def readline(self):
try:
return self.generator.next()
except StopIteration:
return ""
def close(self):
self.file.close()
def __iter__(self):
return self.generator
re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F]+)')
re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
symbols = {}
#rawtargets = {}
codeboundary = []
try:
from xamsupport import any_pointer
except ImportError:
def any_pointer(addr0, data, start, end, unpack=struct.unpack):
for i in range(4, len(data)+1):
offset, = unpack('l', data[i-4:i])
if start <= addr0+i+offset < end or start <= offset < end:
return 1
return 0
def machine_code_dump(data, originaddr, format):
if format == 'ivm':
import ivmdump
result = ivmdump.dump(data, originaddr)
elif format == 'i386':
f = open(tmpfile, 'wb')
f.write(data)
f.close()
try:
g = os.popen(objdump % {'file': tmpfile, 'origin': originaddr}, 'r')
result = g.readlines()
g.close()
finally:
os.unlink(tmpfile)
return result
def load_symbol_file(filename, symb1, addr1):
d = {}
if type(symbollister) is str:
g = os.popen(symbollister % filename, "r")
else:
g = symbollister(filename)
while 1:
line = g.readline()
if not line:
break
match = re_symbolentry.match(line)
if match:
d[match.group(2)] = long(match.group(1), 16)
g.close()
if d.has_key(symb1):
delta = addr1 - d[symb1]
else:
delta = 0
print >> sys.stderr,"Warning: no symbol '%s' in '%s'" % (symb1, filename)
for key, value in d.items():
symbols[value + delta] = key
def symtext(sym, addr, inbuf=None, lineaddr=None):
if isinstance(sym, CodeBuf):
if sym is inbuf:
name = 'top'
else:
name = '%s codebuf 0x%x' % (sym.mode, sym.addr)
if addr > sym.addr:
name += ' + %d' % (addr-sym.addr)
return name
else:
return sym
revmap = {}
for key, value in htmlentitydefs.entitydefs.items():
if type(value) is type(' '):
revmap[value] = '&%s;' % key
def htmlquote(text):
return ''.join([revmap.get(c,c) for c in text])
def lineaddresses(line):
result = []
i = 0
while 1:
match = re_addr.search(line, i)
if not match:
break
i = match.end()
addr = long(match.group(1), 16)
result.append(addr)
return result
def codeat(addr):
i = bisect.bisect(codeboundary, (addr, None))
if i>0:
addrend, codebuf = codeboundary[i-1]
if isinstance(codebuf, CodeBuf):
return codebuf
re_int = re.compile(r"(\-?\d+)$")
re_ctvinfo = re.compile(r"ct (\d+) (\-?\d+)$")
re_rtvinfo = re.compile(r"rt (\-?\d+)$")
re_vtvinfo = re.compile(r"vt 0x([0-9a-fA-F]+)$")
LOC_LOCALS_PLUS = 3
class CodeBuf:
__slots__ = ['mode', 'co_filename', 'co_name', 'nextinstr', 'addr',
'stackdepth', 'specdict', 'data', 'cache_text',
'disass_text', 'reverse_lookup', 'vlocals',
'complete_list', 'dumpfile', 'vlocalsofs', 'codemap']
machine_code_format = '?'
def __init__(self, mode, co_filename, co_name, nextinstr,
addr, stackdepth):
self.mode = mode
self.co_filename = co_filename
self.co_name = co_name
self.nextinstr = nextinstr
self.addr = addr
#self.data = data
self.stackdepth = stackdepth
#self.reverse_lookup = [] # list of (offset, codebuf pointing there)
self.specdict = []
if self.mode != "proxy":
codeboundary.append((self.addr-0.5, self))
else:
self.data = ""
#for i in range(4, len(data)+1):
# offset, = struct.unpack('l', data[i-4:i])
# rawtargets.setdefault(addr+i+offset, {})[self] = 1
def getboundary(self):
i = bisect.bisect(codeboundary, (self.addr-0.5, self))
prev = codeboundary[i-1][1]
next = codeboundary[i][1]
#while not isinstance(next, BigBuffer) and next.addr == self.addr:
# i = i + 1
# next = codeboundary[i][1]
while not isinstance(codeboundary[i][1], BigBuffer):
i = i + 1
bigbuf = codeboundary[i][1]
return prev, next, bigbuf
def splitheader(self):
data = self.data
addr = self.addr
k = 0
while data[k:k+1] == '\xCC':
k = k + 1
if data[k:k+4] == '\x66\x66\x66\x66':
# detected a rt_local_buf_t structure
next, key = struct.unpack('LL', data[k+4:k+12])
data = data[k+12:]
addr += k+12
else:
next = key = None
return data, addr, next, key
def __getattr__(self, attr):
if attr == 'data':
prev, next, bigbuf = self.getboundary()
assert prev is self
self.data = data = bigbuf.load(self.addr, next.addr)
return data
if attr == 'cache_text':
# produce the disassembly listing
data, addr, next, key = self.splitheader()
self.cache_text = []
if key is not None:
self.cache_text.append(
'Created by promotion of the value 0x%x\n' % key)
if next is not None:
self.cache_text.append(
'Next promoted value at buffer 0x%x\n' % next)
self.cache_text += machine_code_dump(data, addr,
CodeBuf.machine_code_format)
return self.cache_text
if attr == 'disass_text':
txt = self.cache_text
if self.specdict:
txt.append('\n')
txt.append("'do_promotion' dictionary:\n")
for key, value in self.specdict:
txt.append('.\t%s:\t\t\n' % htmlquote(key))
txt.append('.\t\t0x%x\t\t\n' % value)
self.disass_text = txt
return txt
if attr == 'reverse_lookup':
# 'reverse_lookup' is a list of (offset, codebuf pointing there)
self.reverse_lookup = []
start = self.addr
end = start + len(self.data)
for codebuf in self.complete_list:
if any_pointer(codebuf.addr, codebuf.data, start, end):
for line in codebuf.disass_text:
for addr in lineaddresses(line):
if start <= addr < end:
self.reverse_lookup.append((addr-start, codebuf))
return self.reverse_lookup
if attr == 'vlocals':
self.dumpfile.seek(self.vlocalsofs)
self.vlocals = self.load_vi_array({0: None})
return self.vlocals
raise AttributeError, attr
def load_vi_array(self, d):
dumpfile = self.dumpfile
match = re_int.match(dumpfile.readline())
assert match
count = int(match.group(1))
a = []
for i in range(count):
line = dumpfile.readline()
match = re_int.match(line)
assert match
addr = long(match.group(1))
if d.has_key(addr):
vi = d[addr]
else:
line = dumpfile.readline()
match = re_ctvinfo.match(line)
if match:
vi = CompileTimeVInfo(int(match.group(1)),
long(match.group(2)))
else:
match = re_rtvinfo.match(line)
if match:
vi = RunTimeVInfo(long(match.group(1)), self.stackdepth)
else:
match = re_vtvinfo.match(line)
assert match
vi = VirtualTimeVInfo(long(match.group(1), 16))
d[addr] = vi
vi.addr = addr
vi.array = self.load_vi_array(d)
a.append(vi)
a.reverse()
return a
def get_next_instr(self):
if self.nextinstr >= 0:
return self.nextinstr
def spec_dict(self, key, value):
self.specdict.append((key, value))
#rawtargets.setdefault(value, {})[self] = 1
try:
del self.disass_text
except:
pass
try:
del self.reverse_lookup
except:
pass
## def build_reverse_lookup(self):
## for line in self.disass_text:
## for addr in lineaddresses(line):
## sym = symbols.get(addr)
## if isinstance(sym, CodeBuf):
## sym.reverse_lookup.append((addr-sym.addr, self))
def disassemble(self, symtext=symtext, linetext=None, snapshot=None):
seen = {}
data = []
for line in self.disass_text:
if line.endswith('\n'):
line = line[:-1]
match = re_lineaddr.match(line)
if match:
lineaddr = long(match.group(1), 16)
if not seen.has_key(lineaddr):
if self.codemap.has_key(lineaddr) and snapshot:
for proxy in self.codemap[lineaddr]:
data.append(snapshot(proxy))
seen[lineaddr] = 1
ofs = lineaddr - self.addr
sources = [c for o, c in self.reverse_lookup if o == ofs]
if sources and linetext:
line = linetext(line, lineaddr)
if sources != [self]*len(sources):
data.append('\n')
else:
lineaddr = None
for addr in lineaddresses(line):
sym = symbols.get(addr) or codeat(addr)
if sym:
line = '%s\t(%s)' % (line, symtext(sym, addr, self,lineaddr))
break
data.append(line + '\n')
return ''.join(data)
class BigBuffer:
__slots__ = ['file', 'offset', 'start', 'length', 'addr', 'priority']
def __init__(self, file, start, length):
#if sys.stderr.softspace:
# print >> sys.stderr
#print >> sys.stderr, 'BigBuffer:', hex(start), hex(start+length),
#print >> sys.stderr, '(%d)' % length
self.file = file
self.offset = file.tell()
self.start = start
self.length = length
self.addr = start + length # end address
self.priority = -len(codeboundary)
codeboundary.append((self.addr-0.25, self))
file.seek(self.length, 1)
def load(self, begin, end):
assert self.start <= begin <= self.addr, \
(hex(self.start), hex(begin), hex(end), hex(self.addr))
self.file.seek(self.offset + (begin-self.start))
return self.file.read(min(self.addr, end) - begin)
class VInfo:
__slots__ = ['addr', 'array']
class CompileTimeVInfo(VInfo):
__slots__ = ['flags', 'value']
def __init__(self, flags, value):
self.flags = flags
self.value = value
def gettext(self):
text = "Compile-time value 0x%x" % self.value
if self.flags & 1:
text += ", fixed"
if self.flags & 2:
text += ", reference"
return text
def getsummarytext(self):
text = "Compile-time"
if self.flags & 1:
text += " fixed"
text += " 0x%x" % self.value
return text
class RunTimeVInfo(VInfo):
__slots__ = ['source', 'stackdepth']
REG_NAMES = ["eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"]
def __init__(self, source, stackdepth=None):
self.source = source
self.stackdepth = stackdepth
def gettext(self):
text = "Run-time source,"
reg = self.source >> 28
stack = self.source & 0x03FFFFFC
if CodeBuf.machine_code_format == 'ivm':
if reg:
text += " in a register ??????"
if not stack:
text += " not in stack ??????"
else:
text += " in stack [%d] or from top #%d" % (
(self.stackdepth-stack)/4,
stack/4)
else:
if 0 <= reg < 8:
text += " in register %s" % self.REG_NAMES[reg].upper()
if stack:
text += " and"
if stack:
if self.stackdepth is None:
sd = ""
else:
sd = "[ESP+0x%x] or " % (self.stackdepth - stack)
text += " in stack %sfrom top %d" % (sd, stack)
if not (self.source & 0x08000000):
text += " holding a reference"
if self.source & 0x04000000:
text += " >=0"
return text
def getsummarytext(self):
return "Run-time"
class VirtualTimeVInfo(VInfo):
__slots__ = ['vs']
def __init__(self, vs):
self.vs = vs
def gettext(self):
return "Virtual-time source (%x)" % self.vs
def getsummarytext(self):
return "Virtual-time (%x)" % self.vs
def readdump(filename = 'psyco.dump'):
del codeboundary[:]
re_header = re.compile(r"Psyco dump [[](\w+?)[]]")
re_symb1 = re.compile(r"(\w+?)[:]\s0x([0-9a-fA-F]+)")
re_codebuf = re.compile(r"CodeBufferObject 0x([0-9a-fA-F]+) (\-?\d+) \'(.*?)\' \'(.*?)\' (\-?\d+) \'(.*?)\'$")
re_specdict = re.compile(r"spec_dict 0x([0-9a-fA-F]+)")
re_vinfo_array = re.compile(r"vinfo_array")
re_spec1 = re.compile(r"0x([0-9a-fA-F]+)\s(.*)$")
re_bigbuffer = re.compile(r"BigBuffer 0x([0-9a-fA-F]+) (\d+)$")
codebufs = []
dumpfile = open(filename, 'rb')
match = re_header.match(dumpfile.readline())
if not match:
raise ValueError, "'%s' does not look like a Psyco dump" % filename
CodeBuf.machine_code_format = match.group(1)
bufcount, = struct.unpack("i", dumpfile.read(4))
buftable = list(struct.unpack("l"*bufcount, dumpfile.read(4*bufcount)))
buftable.reverse()
if buftable:
filesize = buftable[-1]
else:
filesize = sys.maxint
filesize *= 1.0
nextp = 0.1
cbsortedsize = 0
for filename in symbolfiles:
line = dumpfile.readline()
match = re_symb1.match(line)
assert match
load_symbol_file(filename, match.group(1), long(match.group(2), 16))
while 1:
line = dumpfile.readline()
if not line:
print "Note: unexpected end of file"
break
#print line.strip()
match = re_codebuf.match(line)
if match:
percent = dumpfile.tell() / filesize
if percent >= nextp:
print >> sys.stderr, '%d%%...' % int(100*percent),
nextp += 0.1
#size = int(match.group(2))
#data = dumpfile.read(size)
#assert len(data) == size
codebuf = CodeBuf(match.group(6), match.group(3), match.group(4),
int(match.group(5)), long(match.group(1), 16),
int(match.group(2)))
codebuf.dumpfile = dumpfile
codebuf.vlocalsofs = buftable.pop()
codebufs.append(codebuf)
else:
match = re_specdict.match(line)
if match:
addr = long(match.group(1), 16)
if len(codeboundary) != cbsortedsize:
codeboundary.sort()
cbsortedsize = len(codeboundary)
codebuf = codeat(addr-4)
if codebuf is None:
raise "spec_dict with no matching code buffer", line
while 1:
line = dumpfile.readline()
if len(line)<=1:
break
match = re_spec1.match(line)
assert match
codebuf.spec_dict(match.group(2), long(match.group(1), 16))
elif re_vinfo_array.match(line):
assert len(codebufs) == bufcount
break
else:
match = re_bigbuffer.match(line)
if match:
BigBuffer(dumpfile, long(match.group(1), 16),
int(match.group(2)))
else:
raise "invalid line", line
print >> sys.stderr, 'sorting...',
if len(codeboundary) != cbsortedsize:
codeboundary.sort()
codemap = {}
#cblist = []
codebufs.reverse()
for codebuf in codebufs:
codebuf.complete_list = codebufs
codebuf.codemap = codemap
codemap.setdefault(codebuf.addr, []).insert(0, codebuf)
#prev, next, bigbuf = codebuf.getboundary()
#cblist.append((bigbuf.priority, codebuf.addr, codebuf))
#cblist.sort()
#codebufs[:] = [codebuf for priority, addr, codebuf in cblist]
print >> sys.stderr, '100%'
return codebufs
if __name__ == '__main__':
if len(sys.argv) > 1:
codebufs = readdump(sys.argv[1])
else:
codebufs = readdump()
for codebuf in codebufs:
print codebuf.disassemble()
|