InputSource.py :  » XML » 4Suite » 4Suite-XML-1.0.2 » Ft » Xml » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » XML » 4Suite 
4Suite » 4Suite XML 1.0.2 » Ft » Xml » InputSource.py
########################################################################
# $Header: /var/local/cvsroot/4Suite/Ft/Xml/InputSource.py,v 1.49 2005/09/15 00:06:15 jkloth Exp $
"""
Classes providing a standard interface and encapsulation of metadata for
document/entity streams intended for input to various XML processors.

Copyright 2005 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

import os, cStringIO, types, warnings, mimetools

from Ft import FtWarning
from Ft.Lib import Uri,Uuid

__all__ = ['InputSource', 'NullInputSource',
           'InputSourceFactory',
           'DefaultFactory', 'NoCatalogFactory']

# Methods an InputSource instance should expose from its underly stream.
_file_methods = ('read', 'readline', 'readlines', 'close')

class InputSource:
    """
    An input source is an encapsulation of a source of content.
    It includes a stream (Python file-like object) from which the
    content can be read, a URI to identify the stream and facilitate
    resolution of relative URI references / system IDs encountered
    within the stream, and parameters used by the processors of the
    stream (XML parsers, XSLT processors).

    It is designed to be overridden as applications need different
    functionality from sources.
    """

    RESOLVE_ENTITY_HINT = "EXTERNAL ENTITY"
    RESOLVE_URI_HINT = "RESOLVE URI"
    CATALOG_URI_HINT = "CATALOG URI"

    def __init__(self, stream, uri=None, processIncludes=True,
                 stripElements=None, factory=None,
                 resolver=Uri.BASIC_RESOLVER, catalog=None, encoding=None):
        """
        InputSource constructor

        source = InputSource(...)

        stream - the stream associated with this input source
        uri - the absolute URI of the input source
        processIncludes - Whether or not XIncludes should be expanded
        stripElements - Space stripping rules
        factory - The factory that created this instance
        resolver - URI resolver; defaults to Ft.Lib.Uri.BASIC_RESOLVER
        catalog - TR9401/XML Catalog object for resolving public IDs
        encoding - a string externally declaring the stream's encoding
        """
        if uri:
            self.uri = uri
        else:
            self.uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        self.stream = stream
        self.processIncludes = processIncludes
        self.stripElements = stripElements or []
        self.factory = factory
        self.fragment = Uri.SplitFragment(self.uri)[1]
        self._resolver = resolver
        self._catalog = catalog
        enc = self._getStreamEncoding(stream)
        if enc is None:
            enc = encoding
        self.encoding = enc
        self.name = self.uri
        for name in _file_methods:
            method = getattr(stream, name, None)
            if method:
                setattr(self, name, method)
        return


    def _getStreamEncoding(self, stream):
        """
        Returns the encoding of the given stream, if this info can be
        determined from metadata in the stream object with a reasonable
        degree of confidence.

        Adheres to RFC 3023, which requires the the charset value in the
        Content-Type header to take precedence, or if no value is
        available, to assume us-ascii in the case of certain text/*
        media types. For other text/* media types, adheres to RFC 2616
        sec. 3.7.1, which requires the assumption of iso-8859-1, when
        the entity was transmitted by HTTP. Media type and charset info
        is ignored for streams believed to originate from a local file,
        in accordance with XML 1.0 Third Edition appendix F.2.
        """
        # We should never try to deduce the encoding when the stream is
        # a local file, in order to conform with XML 1.0 Third Edition
        # appendix F.2, and also because urllib.urlopen() uses
        # mimetypes.guess_type() to set the media type on both local
        # files and FTP resources, thus causing '*.xml' files to tend to
        # get a 'text/xml' mapping, which is bad because RFC 3023
        # requires them to be assumed to be us-ascii. Therefore, we must
        # look for clues that assure us that the stream is not likely to
        # be wrapping a file or FTP resource. The way to tell is to look
        # for the 'url' attribute on the stream object. urllib.urlopen()
        # MAY create this attribute and set it to the URL that was
        # passed in. Note that this 'URL' have just been a local
        # filesystem path or partial URL or junk like 'C:/x/y/z'
        stream_url = getattr(stream, 'url', None)
        if stream_url is None:
            return None
        scheme = Uri.GetScheme(stream_url)
        if scheme is None or scheme.lower() in ('file', 'ftp') \
                          or len(scheme) == 1:
            return None
        # Get the stream metadata.
        # Streams created by urllib.urlopen() MAY have an info() method
        # that MAY return a mimetools.Message object. We can trust this
        # as a source of metadata since we have already ruled out the
        # likelihood of it being a local file or FTP resource.
        info = None
        if hasattr(self.stream, 'info'):
            if isinstance(self.stream.info, types.MethodType):
                info = self.stream.info()
        if isinstance(info, mimetools.Message):
            # use explicit charset if present and not empty string.
            charset = info.getparam('charset')
            if charset:
                return charset
            # charset empty or not present, so examine media type
            # and protocol.
            maintype = getattr(info, 'maintype', None)
            subtype = getattr(info, 'subtype', None)
            if maintype == 'text':
                if subtype == 'xml' or \
                   subtype == 'xml-external-parsed-entity' or \
                   subtype.endswith('+xml'):
                    return 'us-ascii'
                elif scheme == 'http':
                    return 'iso-8859-1'
        # If we reach this point, the stream metadata was of no use,
        # so we'll let the parser determine the encoding from
        # the entity itself.
        return None


    def resolveEntity(self, publicId, systemId):
        """
        Resolve an external entity to a new InputSource.

        Presented with an optional public identifier and a system identifier,
        this function attempts to locate a mapping in the catalog, if one is
        defined.  If no mapping is found, the system identifier will be
        dereferenced as a URL.
        """
        hint = InputSource.RESOLVE_ENTITY_HINT
        if self._catalog:
            new_uri = self._catalog.resolveEntity(publicId, systemId)
            if new_uri is not None:
                systemId = new_uri
                hint = InputSource.CATALOG_URI_HINT
        return self._resolve(systemId, None, hint)


    def resolve(self, uri, base=None, hint=None):
        """
        Resolve a URI reference into a new InputSource.
        
        This function is used when a URI reference is encountered in the
        original stream and needs to be resolved (e.g. xi:include,
        xsl:include, xsl:import, document(), etc.).  When a catalog is
        available, its URI entries are used first.  If no entry is found,
        the URI is resolved against the current URI and then opened.
        
        The hint parameter is used to give a hint as to what the
        resolution will be used for.

        If the ignoreErrors flag is set, an error during resolution
        (such as "file not found") will result in None's being returned,
        rather than a raised exception.
        """
        if self._catalog:
            new_uri = self._catalog.resolveURI(uri)
            if new_uri is not None:
                uri = new_uri
                hint = InputSource.CATALOG_URI_HINT
        if hint is None:
            hint = InputSource.RESOLVE_URI_HINT
        return self._resolve(uri, base, hint)


    def getUriResolver(self):
        """
        This method returns the URI resolver that is used by this
        input source to normalize (resolve to absolute form) and
        resolve (dereference) URI references. This is the public method
        to use if just URI resolution is needed.
        """
        return self._resolver

    #Helper Methods
    def _resolve(self, uri, base, hint, ignoreErrors=False):
        """
        Resolves a system identifier (fragmentless URI reference) into a
        new input source.

        The hint parameter is used to give a hint as to what the
        resolution will be used for.

        If the ignoreErrors flag is set, an error during resolution
        (such as "file not found") will result in None's being returned,
        rather than a raised exception.
        """
        uri = self._normalize(uri, base)
        stream = self._openStream(uri, ignoreErrors, hint)
        return self.clone(stream, uri, hint)

    def _normalize(self, uriref, base=None):
        """
        Normalize (resolve to absolute form) a given URI reference,
        using the URI of this input source as the base.

        The default implementation will just use the default URI resolver.

        If your input source is working with non-standard or not supported
        URIs, then you will need to override this or the getUriResolver method.
        """
        if base is None:
            base = self.uri
        return self.getUriResolver().normalize(uriref, base)

    def _openStream(self, uri, ignoreErrors=False, hint=None):
        """
        Returns a representation of a resource as a stream by
        resolving the given URI. If ignoreErrors is set, failure to
        obtain the stream will result in None being returned, rather
        than an exception (e.g. "file not found") being raised.

        Default behaviour is to use the resolver associated with this
        InputSource. If your custom InputSource needs to open URIs
        that are not supported natively by this InputSource (e.g.,
        repository objects, or objects from a database), then you
        should override this method and do whatever it takes to
        resolve the URI into a readable stream.
        """
        try:
            return self.getUriResolver().resolve(uri)
        except:
            if ignoreErrors:
                return None
            raise

    def clone(self, stream, uri=None, hint=None):
        """
        Clones this input source, creating a new instance with
        the known params.

        If your derived InputSource requires additional state information
        then you have to override how it is cloned and pickled.
        """
        if uri is None:
            uri = self.uri
        if stream is None:
            return NullInputSource(uri)
        if hint is not None:
            # don't inherit encoding when cloning for self.resolve()
            encoding = None
        else:
            encoding = self.encoding
        return self.__class__(stream, uri,
                              processIncludes=self.processIncludes,
                              stripElements=self.stripElements,
                              factory=self.factory, resolver=self._resolver,
                              catalog=self._catalog, encoding=encoding)

    #Pickle routines.  We need to be able to pickle an input source
    #but cannot pickle a stream
    def __getstate__(self):
        state = self.__dict__.copy()
        state['stream'] = None
        return state


class NullInputSource(InputSource):
    """
    An InputSource that simulates an empty stream.
    """
    def __init__(self, uri=None):
        InputSource.__init__(self, cStringIO.StringIO(), uri)


class InputSourceFactory:
    """
    A factory for creating new InputSource instances.
    """
    FACTORY_URI_HINT = 'FACTORY URI'

    def __init__(self, inputSourceClass=None, resolver=Uri.BASIC_RESOLVER,
                 catalog=None):
        self._klass = inputSourceClass or InputSource
        self.resolver = resolver
        self.catalog = catalog
        return

    def fromUri(self, uri, *v_args, **kw_args):
        """
        Creates an InputSource from the stream resulting from the
        resolution of the given URI.

        uri - a URI from which the input will be read.  Important: a file
              path is generally not a URI. To be safe, if you wish to read
              from a file, use the following pattern:
              from Ft.Lib import Uri
              uri = Uri.OsPathToUri("/path/to/file.ext")
              OR uri = Uri.OsPathToUri("C:\\path\\to\\file.ext")
        """
        hint = InputSourceFactory.FACTORY_URI_HINT
        if self.catalog:
            new_uri = self.catalog.resolveURI(uri)
            if new_uri is not None:
                uri = new_uri
                hint = InputSource.CATALOG_URI_HINT
        src = self.fromStream(None, uri, *v_args, **kw_args)
        return src._resolve(uri, None, hint)

    def fromString(self, st, uri=None, *v_args, **kw_args):
        """
        Creates an InputSource from a stream derived from the given
        string. The uri argument is the URI to use for the stream
        (one should always be given, even if it's bogus).
        """
        if not isinstance(st, str):
            raise ValueError("String must be of type string, not %s" %
                             (st is None and 'None' or type(st).__name__))
        stream = cStringIO.StringIO(st)
        return self.fromStream(stream, uri, *v_args, **kw_args)

    def fromStream(self, stream, uri=None, *v_args, **kw_args):
        """
        Creates an InputSource from the given stream.
        The uri argument is the URI to use for the stream
        (one should always be given, even if it's bogus).
        """
        if not uri:
            warnings.warn("Creation of InputSource without a URI",
                          FtWarning, 2)
        kw_args['factory'] = self
        if 'resolver' not in kw_args: kw_args['resolver'] = self.resolver
        if 'catalog' not in kw_args: kw_args['catalog'] = self.catalog
        return self._klass(stream, uri, *v_args, **kw_args)

NoCatalogFactory = InputSourceFactory(catalog=None)

from Ft.Xml.Catalog import GetDefaultCatalog
DefaultFactory = InputSourceFactory(catalog=GetDefaultCatalog())
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.