"""This module demonstates an entity resolver that makes it possible
to process XML files stored in zip archives.
File types other than zip archives are handled normally. Zip files
must end in a '.zip' extension. For archives containing more than
one file, the exact file in the zip archive must be specified.
This is done by appending a '?' and then the name of the file e.g.
<xsl:value-of select="document('arhive.zip?data.xml')"/>
This example would be better if the Python ZipFile object presented
a real stream-oriented interface.
"""
import Pyana
import sys
from StringIO import StringIO
from urlparse import urlparse
from urllib import urlopen
from zipfile import ZipFile
from re import match
class StringSource:
def __init__(self, string):
self.string = string
def makeStream(self):
return StringIO(self.string)
class ZipEntityResolver:
def __init__(self, reportExceptions = 1):
# Failed document() calls are not reported by Xalan,
# so we'll use reportExceptions for debugging
# purposes
self.reportExceptions = reportExceptions
def resolveEntity(self, public, system):
try:
# Xalan presents file URIs as file:///C:/file.txt
# Python expects file URIs as file:///C|/file.txt
filematch = match('file:///(\w):(.*)', system)
if filematch:
system = 'file:///%s|%s' % (
filematch.group(1),
filematch.group(2)
)
# Lot of code duplication here. If I weren't the laziest
# person in the world, I'd fix that.
if system.lower().endswith('.zip'):
zipRead = StringIO(urlopen(system).read())
zipfile = ZipFile(zipRead)
if len(zipfile.namelist()) != 1:
raise ValueError('Zip archive must contain a single '\
'file or the query notation must '\
'be used.')
else:
return StringSource(
zipfile.read(zipfile.namelist()[0])
)
else:
ziploc = system.lower().find('.zip?')
if ziploc != -1:
zipRead = StringIO(urlopen(system[:ziploc + 4]).read())
zipfile = ZipFile(zipRead)
return StringSource(
zipfile.read(system[ziploc + 5:])
)
else:
return None
except:
if self.reportExceptions:
import traceback
traceback.print_exc()
raise
def main(xml, xsl):
t = Pyana.Transformer()
resolver = ZipEntityResolver()
xmlSource = resolver.resolveEntity('', xml) or Pyana.URI(xml)
xslSource = resolver.resolveEntity('', xsl) or Pyana.URI(xsl)
t.setEntityResolver(resolver)
print t.transform2String(xmlSource, xslSource)
if __name__ == '__main__':
main(sys.argv[1], sys.argv[2])
|