# -*- coding: iso-8859-1 -*-
#-----------------------------------------------------------------------------
# Modeling Framework: an Object-Relational Bridge for python
#
# Copyright (c) 2001-2004 Sbastien Bigaret <sbigaret@users.sourceforge.net>
# All rights reserved.
#
# This file is part of the Modeling Framework.
#
# This code is distributed under a "3-clause BSD"-style license;
# see the LICENSE file for details.
#-----------------------------------------------------------------------------
"""
XMLutils
XMLutils provides standard imports along with error messages when these
imports fail.
Import statement should be: 'from XMLutils import *'
The import statements declared are (exhaustively):
from xml.dom.ext.reader import Sax2
from xml.dom.ext.reader.Sax import FromXmlStream
from xml import xpath
Other addition to the namespace:
- Exception XMLImportError
CVS information
$Id: XMLutils.py 932 2004-07-20 06:21:57Z sbigaret $
"""
__version__='$Revision: 932 $'[11:-2]
class XMLImportError(Exception):
"Raised when a xml import fails"
pass
try:
from xml import xpath
except:
raise 'ImportError', 'PyXML is not installed: failed to import xml.xpath'
import codecs, encodings
def strToUnicode(aString, encoding='iso-8859-1'):
encoder = codecs.lookup(encoding)# encode,decode,reader,writer
return encoder[1](aString)[0]
def unicodeToStr(unicode, encoding='iso-8859-1'):
encoder = codecs.lookup(encoding)# encode,decode,reader,writer
return encoder[0](unicode)[0]
def createDOMDocumentObject(param):
from xml.dom import ext
from xml.dom.DOMImplementation import DOMImplementation
implementation = DOMImplementation()
dt = implementation.createDocumentType(None, None, None)
doc = implementation.createDocument(None, param, dt)
return doc
class XMLCapability:
"Mix-in class ..."
def xmlAttributeType(self, attributeName):
return self.xmlAttributesDict()[attributeName][0]
def xmlSetAttribute(self, attributeName):
return self.xmlAttributesDict()[attributeName][1]
def xmlGetAttribute(self, attributeName):
return self.xmlAttributesDict()[attributeName][2]
def xmlAttributesDict(self):
"""
"""
raise 'AbstractInterface', 'xmlAttributesDict is an abstract method which should be defined in classes using the mix-in class XMLCapability'
#return {'name': ('string',
# lambda self=None,p=None: None,
# self.name ),
# 'isAbstract': ( 'boolean',
# self.setIsAbstract,
# self.isAbstract ),
# 'typeName': ( 'string',
# self.setTypeName,
# self.typeName )
# }
def getXMLNodeName(self):
"Returns the node name corresponding to the receiver"
raise 'AbstractInterface', 'xmlAttributesDict is an abstract method which should be defined in classes using the mix-in class XMLCapability'
def getXMLDOM(self, doc=None, parentNode=None):
"""
Returns the (DOM) DocumentObject for the receiver.
Parameters 'doc' and 'parentDoc' should be both omitted or supplied.
If they are omitted, a new DocumentObject is created.
If they are supplied, elements are added to the parentNode.
Returns: the (possibly new) DocumentObject.
"""
raise 'AbstractInterface', 'xmlAttributesDict is an abstract method which should be defined in classes using the mix-in class XMLCapability'
###
### The following taken from
### http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52257
#"""Caller will hand this library a buffer and ask it to either convert
#it or auto-detect the type."""
# None represents a potentially variable byte. "##" in the XML spec...
autodetect_dict={ # bytepattern : ("name",
(0x00, 0x00, 0xFE, 0xFF) : ("ucs4_be"),
(0xFF, 0xFE, 0x00, 0x00) : ("ucs4_le"),
(0xFE, 0xFF, None, None) : ("utf_16_be"),
(0xFF, 0xFE, None, None) : ("utf_16_le"),
(0x00, 0x3C, 0x00, 0x3F) : ("utf_16_be"),
(0x3C, 0x00, 0x3F, 0x00) : ("utf_16_le"),
(0x3C, 0x3F, 0x78, 0x6D): ("utf_8"),
(0x4C, 0x6F, 0xA7, 0x94): ("EBCDIC")
}
def autoDetectXMLEncoding(buffer):
""" buffer -> encoding_name
The buffer should be at least 4 bytes long.
Returns None if encoding cannot be detected.
Note that encoding_name might not have an installed
decoder (e.g. EBCDIC)
"""
# a more efficient implementation would not decode the whole
# buffer at once but otherwise we'd have to decode a character at
# a time looking for the quote character...that's a pain
encoding = "utf_8" # according to the XML spec, this is the default
# this code successively tries to refine the default
# whenever it fails to refine, it falls back to
# the last place encoding was set.
bytes = (byte1, byte2, byte3, byte4) = tuple(map(ord, buffer[0:4]))
enc_info = autodetect_dict.get(bytes, None)
if not enc_info: # try autodetection again removing potentially
# variable bytes
bytes = (byte1, byte2, None, None)
enc_info = autodetect_dict.get(bytes)
if enc_info:
encoding = enc_info # we've got a guess... these are
#the new defaults
# try to find a more precise encoding using xml declaration
secret_decoder_ring = codecs.lookup(encoding)[1]
(decoded,length) = secret_decoder_ring(buffer, 'replace')
first_line = decoded.split("\n")[0]
if first_line and first_line.startswith(u"<?xml"):
encoding_pos = first_line.find(u"encoding")
if encoding_pos!=-1:
# look for double quote
quote_pos=first_line.find('"', encoding_pos)
if quote_pos==-1: # look for single quote
quote_pos=first_line.find("'", encoding_pos)
if quote_pos>-1:
quote_char,rest=(first_line[quote_pos],
first_line[quote_pos+1:])
encoding=rest[:rest.find(quote_char)]
return encoding
|