# uses pyexpat to build the tree. Yuk, globals,
# but want a quick speed comparison with pyRXP
import xml.parsers.expat
class ExpatTreeParser:
"""Crude and incomplete tree-builder based on expat.
Need to add a few more handlers before it accurately
deals with all relevant elements; but close enough
for benchmark comparisons. It (like expat) returns
Unicode strings; we don't want to penalize it for
this so leave them as Unicode."""
def __init__(self):
# fake top node makes it easy to initialize
self.curNode = ('_FAKE_ROOT_',{},[],None)
self.nodestack = [self.curNode]
def handleStartElement(self, name, attrs):
#print 'start element %s' % name
newNode = (name, attrs, [], None)
self.nodestack.append(newNode)
self.curNode[2].append(newNode)
self.curNode = newNode
def handleCharData(self, data):
#print 'char data %s' % data
self.curNode[2].append(data)
def handleEndElement(self, name):
#print 'end element %s' % name
self.nodestack.pop()
self.curNode = self.nodestack[-1]
def parse(self, data):
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = self.handleStartElement
p.EndElementHandler = self.handleEndElement
p.CharacterDataHandler = self.handleCharData
p.Parse(data)
# will be the first child of our fake top node
return self.curNode[2][0]
def expattree(data):
return ExpatTreeParser().parse(data)
|