Uri.py : » XML » 4Suite » 4Suite-XML-1.0.2 » Ft » Lib » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » XML » 4Suite
4Suite » 4Suite XML 1.0.2 » Ft » Lib » Uri.py
########################################################################
# $Header: /var/local/cvsroot/4Suite/Ft/Lib/Uri.py,v 1.106.2.1 2006/10/22 04:20:36 jkloth Exp $
"""
Classes and functions related to URI validation and resolution

APIs that currently differentiate between Unicode and byte strings are
considered to be experimental; do not count on their uniformity between
releases.

Copyright 2005 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

from __future__ import generators
import urllib, urllib2, os, sys, re, mimetools, cStringIO
from string import ascii_letters

from Ft.Lib import UriException,ImportUtil

__all__ = [ # RFC 3986 implementation
            'MatchesUriRefSyntax', 'MatchesUriSyntax',
            'PercentEncode', 'PercentDecode',
            'SplitUriRef', 'UnsplitUriRef',
            'SplitAuthority', 'SplitFragment',
            'Absolutize', 'Relativize', 'RemoveDotSegments',
            'NormalizeCase', 'NormalizePercentEncoding',
            'NormalizePathSegments', 'NormalizePathSegmentsInUri',
            # URI resolution
            'UriResolverBase', 'FtUriResolver',
            'BASIC_RESOLVER',
            'DEFAULT_URI_SCHEMES',
            'UrlOpen',
            # RFC 3151 implementation
            'UrnToPublicId', 'PublicIdToUrn',
            # Miscellaneous
            'IsAbsolute', 'GetScheme', 'StripFragment',
            'OsPathToUri', 'UriToOsPath', 'BaseJoin',
            'MakeUrllibSafe',
            'WINDOWS_SLASH_COMPAT',
            # 4Suite-specific
            'UriDict', 'PathResolve',
            ]

# whether OsPathToUri should treat "/" same as "\" in a Windows path
WINDOWS_SLASH_COMPAT = True

# URI schemes supported by UriResolverBase
DEFAULT_URI_SCHEMES = ['http', 'https', 'file', 'ftp', 'data', 'pep302']
if not hasattr(urllib2, 'HTTPSHandler'):
    DEFAULT_URI_SCHEMES.remove('https')
DEFAULT_URI_SCHEMES = tuple(DEFAULT_URI_SCHEMES)

#=============================================================================
# Functions that implement aspects of RFC 3986
#
_validationSetupCompleted = False
def _initUriValidationRegex():
    """
    Called internally to compile the regular expressions needed by
    URI validation functions, just once, the first time a function
    that needs them is called.
    """
    global _validationSetupCompleted
    if _validationSetupCompleted:
        return

    #-------------------------------------------------------------------------
    # Regular expressions for determining the non-URI-ness of strings
    #
    # A given string's designation as a URI or URI reference comes from the
    # context in which it is being used, not from its syntax; a regular
    # expression can at most only determine whether a given string COULD be a
    # URI or URI reference, based on its lexical structure.
    #
    # 1. Altova's regex (in the public domain; courtesy Altova)
    #
    # # based on the BNF grammar in the original RFC 2396
    # ALTOVA_REGEX = r"(([a-zA-Z][0-9a-zA-Z+\-\.]*:)?/{0,2}" + \
    #                r"[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?" + \
    #                r"(#[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?"
    #
    # This regex matches URI references, and thus URIs as well. It is also
    # lenient; some strings that are not URI references can falsely match.
    #
    # It is also not very useful as-is, because it essentially has the form
    # (group1)?(group2)? -- this matches the empty string, and in fact any
    # string or substring can be said to match this pattern. To be useful,
    # this regex (and any like it) must be changed so that it only matches
    # an entire string. This is accomplished in Python by using the \A and \Z
    # delimiters around the pattern:
    #
    # BETTER_ALTOVA_REGEX = r"\A(?!\n)%s\Z" % ALTOVA_REGEX
    #
    # The (?!\n) takes care of an edge case where a string consisting of a
    # sole linefeed character would falsely match.
    #
    # 2. Python regular expressions for strict validation of URIs and URI
    #    references (in the public domain; courtesy Fourthought, Inc.)
    #
    # Note that we do not use any \d or \w shortcuts, as these are
    # potentially locale or Unicode sensitive.
    #
    # # based on the ABNF in RFC 3986,
    # # "Uniform Resource Identifier (URI): Generic Syntax"
    pchar           = r"(?:[0-9A-Za-z\-_\.!~*'();:@&=+$,]|(?:%[0-9A-Fa-f]{2}))"
    fragment        = r"(?:[0-9A-Za-z\-_\.!~*'();:@&=+$,/?]|(?:%[0-9A-Fa-f]{2}))*"
    query           = fragment
    segment_nz_nc   = r"(?:[0-9A-Za-z\-_\.!~*'();@&=+$,]|(?:%[0-9A-Fa-f]{2}))+"
    segment_nz      = r'%s+' % pchar
    segment         = r'%s*' % pchar
    #path_empty      = r''  # zero characters
    path_rootless   = r'%s(?:/%s)*' % (segment_nz, segment)   # begins with a segment
    path_noscheme   = r'%s(?:/%s)*' % (segment_nz_nc, segment)  # begins with a non-colon segment
    path_absolute   = r'/(?:%s)?' % path_rootless  # begins with "/" but not "//"
    path_abempty    = r'(?:/%s)*' % segment   # begins with "/" or is empty
    #path            = r'(?:(?:%s)|(?:%s)|(?:%s)|(?:%s))?' % (path_abempty, path_absolute, path_noscheme, path_rootless)
    domainlabel     = r'[0-9A-Za-z](?:[0-9A-Za-z\-]{0,61}[0-9A-Za-z])?'
    qualified       = r'(?:\.%s)*\.?' % domainlabel
    reg_name        = r"(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)"
    dec_octet       = r'(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
    IPv4address     = r'(?:%s\.){3}(?:%s)' % (dec_octet, dec_octet)
    h16             = r'[0-9A-Fa-f]{1,4}'
    ls32            = r'(?:(?:%s:%s)|%s)' % (h16, h16, IPv4address)
    IPv6address     = r'(?:' + \
                      r'(?:(?:%s:){6}%s)' % (h16, ls32) + \
                      r'|(?:::(?:%s:){5}%s)' % (h16, ls32) + \
                      r'|(?:%s?::(?:%s:){4}%s)' % (h16, h16, ls32) + \
                      r'|(?:(?:(?:%s:)?%s)?::(?:%s:){3}%s)' % (h16, h16, h16, ls32) + \
                      r'|(?:(?:(?:%s:)?%s){0,2}::(?:%s:){2}%s)' % (h16, h16, h16, ls32) + \
                      r'|(?:(?:(?:%s:)?%s){0,3}::%s:%s)' % (h16, h16, h16, ls32) + \
                      r'|(?:(?:(?:%s:)?%s){0,4}::%s)' % (h16, h16, ls32) + \
                      r'|(?:(?:(?:%s:)?%s){0,5}::%s)' % (h16, h16, h16) + \
                      r'|(?:(?:(?:%s:)?%s){0,6}::)' % (h16, h16) + \
                      r')'
    IPvFuture       = r"(?:v[0-9A-Fa-f]+\.[0-9A-Za-z\-\._~!$&'()*+,;=:]+)"
    IP_literal      = r'\[(?:%s|%s)\]' % (IPv6address, IPvFuture)
    port            = r'[0-9]*'
    host            = r'(?:%s|%s|%s)?' % (IP_literal, IPv4address, reg_name)
    userinfo        = r"(?:[0-9A-Za-z\-_\.!~*'();:@&=+$,]|(?:%[0-9A-Fa-f]{2}))*"
    authority       = r'(?:%s@)?%s(?::%s)?' % (userinfo, host, port)
    scheme          = r'[A-Za-z][0-9A-Za-z+\-\.]*'
    #absolute_URI    = r'%s:%s(?:\?%s)?' % (scheme, hier_part, query)
    relative_part   = r'(?:(?://%s%s)|(?:%s)|(?:%s))?' % (authority, path_abempty,
                                                          path_absolute, path_noscheme)
    relative_ref    = r'%s(?:\?%s)?(?:#%s)?' % (relative_part, query, fragment)
    hier_part       = r'(?:(?://%s%s)|(?:%s)|(?:%s))?' % (authority, path_abempty,
                                                          path_absolute, path_rootless)
    URI             = r'%s:%s(?:\?%s)?(?:#%s)?' % (scheme, hier_part, query, fragment)
    URI_reference   = r'(?:%s|%s)' % (URI, relative_ref)

    STRICT_URI_PYREGEX = r"\A%s\Z" % URI
    STRICT_URIREF_PYREGEX = r"\A(?!\n)%s\Z" % URI_reference

    global URI_PATTERN, URI_REF_PATTERN
    URI_PATTERN = re.compile(STRICT_URI_PYREGEX)        # strict checking for URIs
    URI_REF_PATTERN = re.compile(STRICT_URIREF_PYREGEX) # strict checking for URI refs
    _validationSetupCompleted = True
    return


def MatchesUriRefSyntax(s):
    """
    This function returns true if the given string could be a URI reference,
    as defined in RFC 3986, just based on the string's syntax.

    A URI reference can be a URI or certain portions of one, including the
    empty string, and it can have a fragment component.
    """
    if not _validationSetupCompleted:
        _initUriValidationRegex()
    return URI_REF_PATTERN.match(s) is not None


def MatchesUriSyntax(s):
    """
    This function returns true if the given string could be a URI, as defined
    in RFC 3986, just based on the string's syntax.

    A URI is by definition absolute (begins with a scheme) and does not end
    with a #fragment. It also must adhere to various other syntax rules.
    """
    if not _validationSetupCompleted:
        _initUriValidationRegex()
    return URI_PATTERN.match(s) is not None


_splitUriRefSetupCompleted = False
def _initSplitUriRefPattern():
    """
    Called internally to compile the regular expression used by
    SplitUriRef() just once, the first time the function is called.
    """
    global _splitUriRefSetupCompleted
    if _splitUriRefSetupCompleted:
        return

    # Like the others, this regex is also in the public domain.
    # It is based on this one, from RFC 3986 appendix B
    # (unchanged from RFC 2396 appendix B):
    # ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    regex = r"^(?:(?P<scheme>[^:/?#]+):)?(?://(?P<authority>[^/?#]*))?(?P<path>[^?#]*)(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$"
    global SPLIT_URI_REF_PATTERN
    SPLIT_URI_REF_PATTERN = re.compile(regex)
    _splitUriRefSetupCompleted = True
    return


def SplitUriRef(uriref):
    """
    Given a valid URI reference as a string, returns a tuple representing the
    generic URI components, as per RFC 3986 appendix B. The tuple's structure
    is (scheme, authority, path, query, fragment).

    All values will be strings (possibly empty) or None if undefined.

    Note that per RFC 3986, there is no distinction between a path and
    an "opaque part", as there was in RFC 2396.
    """
    if not _splitUriRefSetupCompleted:
        _initSplitUriRefPattern()
    # the pattern will match every possible string, so it's safe to
    # assume there's a groupdict method to call.
    g = SPLIT_URI_REF_PATTERN.match(uriref).groupdict()
    scheme      = g['scheme']
    authority   = g['authority']
    path        = g['path']
    query       = g['query']
    fragment    = g['fragment']
    return (scheme, authority, path, query, fragment)


def UnsplitUriRef(uriRefSeq):
    """
    Given a sequence as would be produced by SplitUriRef(), assembles and
    returns a URI reference as a string.
    """
    if not isinstance(uriRefSeq, (tuple, list)):
        raise TypeError("sequence expected, got %s" % type(uriRefSeq))
    (scheme, authority, path, query, fragment) = uriRefSeq
    uri = ''
    if scheme is not None:
        uri += scheme + ':'
    if authority is not None:
        uri += '//' + authority
    uri += path
    if query is not None:
        uri += '?' + query
    if fragment is not None:
        uri += '#' + fragment
    return uri


_splitAuthoritySetupCompleted = False
def _initSplitAuthorityPattern():
    """
    Called internally to compile the regular expression used by
    SplitAuthority() just once, the first time the function is called.
    """
    global _splitAuthoritySetupCompleted
    if _splitAuthoritySetupCompleted:
        return
    global SPLIT_AUTHORITY_PATTERN
    regex = r'(?:(?P<userinfo>[^@]*)@)?(?P<host>[^:]*)(?::(?P<port>.*))?'
    SPLIT_AUTHORITY_PATTERN = re.compile(regex)
    _splitAuthoritySetupCompleted = True
    return


def SplitAuthority(authority):
    """
    Given a string representing the authority component of a URI, returns
    a tuple consisting of the subcomponents (userinfo, host, port). No
    percent-decoding is performed.
    """
    if not _splitAuthoritySetupCompleted:
        _initSplitAuthorityPattern()
    m = SPLIT_AUTHORITY_PATTERN.match(authority)
    if m:
        return m.groups()
    else:
        return (None, authority, None)


def SplitFragment(uri):
    """
    Given a URI or URI reference, returns a tuple consisting of
    (base, fragment), where base is the portion before the '#' that
    precedes the fragment component.
    """
    # The only '#' in a legit URI will be the fragment separator,
    # but in the wild, people get sloppy. Assume the last '#' is it.
    pos = uri.rfind('#')
    if pos == -1:
        return (uri, uri[:0])
    else:
        return (uri[:pos], uri[pos+1:])


# "unreserved" characters are allowed in a URI, and do not have special
# meaning as delimiters of URI components or subcomponents. They may
# appear raw or percent-encoded, but percent-encoding is discouraged.
# This set of characters is sufficiently long enough that using a
# compiled regex is faster than using a string with the "in" operator.
#UNRESERVED_PATTERN = re.compile(r"[0-9A-Za-z\-\._~!*'()]") # RFC 2396
UNRESERVED_PATTERN = re.compile(r'[0-9A-Za-z\-\._~]') # RFC 3986

# "reserved" characters are allowed in a URI, but they may or always do
# have special meaning as delimiters of URI components or subcomponents.
# When being used as delimiters, they must be raw, and when not being
# used as delimiters, they must be percent-encoded.
# This set of characters is sufficiently short enough that using a
# string with the "in" operator is faster than using a compiled regex.
# The characters in the string are ordered according to how likely they
# are to be found (approximately), for faster operation with "in".
#RESERVED = "/&=+?;@,:$[]" # RFC 2396 + RFC 2732
RESERVED = "/=&+?#;@,:$!*[]()'" # RFC 3986

# workaround for Py 2.2 bytecode issue; see
# http://mail.python.org/pipermail/python-list/2005-March/269948.html
SURR_DC00 = unichr(0xdc00)

def _chars(s):
    """
    This generator function helps iterate over the characters in a
    string. When the string is unicode and a surrogate pair is
    encountered, the pair is returned together, regardless of whether
    Python was built with 32-bit ('wide') or 16-bit code values for
    its internal representation of unicode. This function will raise a
    ValueError if it detects an illegal surrogate pair.

    For example, given s = u'\ud800\udc00\U00010000',
    with narrow-char unicode, "for c in s" normally iterates 4 times,
    producing u'\ud800', u'\udc00', 'u\ud800', u'\udc00', while
    "for c in _chars(s)" will iterate 2 times: producing
    u'\ud800\udc00' both times; and with wide-char unicode,
    "for c in s" iterates 3 times, producing u'\ud800', u'\udc00',
    and u'\U00010000', while "for c in _chars(s)" will iterate 2 times,
    producing u'\U00010000' both times.

    With this function, the value yielded in each iteration is thus
    guaranteed to represent a single abstract character, allowing for
    ideal encoding by the built-in codecs, as is necessary when
    percent-encoding.
    """
    if isinstance(s, str):
        for i in s:
            yield i
        return
    s = iter(s)
    for i in s:
        if u'\ud7ff' < i < SURR_DC00:
            try:
                j = s.next()
            except StopIteration:
                raise ValueError("Bad pair: string ends after %r" % i)
            if SURR_DC00 <= j < u'\ue000':
                yield i + j
            else:
                raise ValueError("Bad pair: %r (bad second half)" % (i+j))
        elif SURR_DC00 <= i < u'\ue000':
                raise ValueError("Bad pair: %r (no first half)" % i)
        else:
            yield i


def PercentEncode(s, encoding='utf-8', encodeReserved=True, spaceToPlus=False,
                     nlChars=None, reservedChars=RESERVED):
    """
    [*** Experimental API ***] This function applies percent-encoding, as
    described in RFC 3986 sec. 2.1, to the given string, in order to prepare
    the string for use in a URI. It replaces characters that are not allowed
    in a URI. By default, it also replaces characters in the reserved set,
    which normally includes the generic URI component delimiters ":" "/"
    "?" \"#\" "[" "]" "@" and the subcomponent delimiters "!" "$" "&" "\'" "("
    ")" "*" "+" "," ";" "=".

    Ideally, this function should be used on individual components or
    subcomponents of a URI prior to assembly of the complete URI, not
    afterward, because this function has no way of knowing which characters
    in the reserved set are being used for their reserved purpose and which
    are part of the data. By default it assumes that they are all being used
    as data, thus they all become percent-encoded.

    The characters in the reserved set can be overridden from the default by
    setting the reservedChars argument. The percent-encoding of characters
    in the reserved set can be disabled by unsetting the encodeReserved flag.
    Do this if the string is an already-assembled URI or a URI component,
    such as a complete path.

    If the given string is Unicode, the name of the encoding given in the
    encoding argument will be used to determine the percent-encoded octets
    for characters that are not in the U+0000 to U+007F range. The codec
    identified by the encoding argument must return a byte string.

    If the given string is not Unicode, the encoding argument is ignored and
    the string is interpreted to represent literal octets, rather than
    characters. Octets above \\x7F will be percent-encoded as-is, e.g., \\xa0
    becomes %A0, not, say, %C2%A0.

    The spaceToPlus flag controls whether space characters are changed to
    "+" characters in the result, rather than being percent-encoded.
    Generally, this is not required, and given the status of "+" as a
    reserved character, is often undesirable. But it is required in certain
    situations, such as when generating application/x-www-form-urlencoded
    content or RFC 3151 public identifier URNs, so it is supported here.

    The nlChars argument, if given, is a sequence type in which each member
    is a substring that indicates a "new line". Occurrences of this substring
    will be replaced by '%0D%0A' in the result, as is required when generating
    application/x-www-form-urlencoded content.

    This function is similar to urllib.quote(), but is more conformant and
    Unicode-friendly. Suggestions for improvements welcome.
    """
    res = ''
    is_unicode = isinstance(s, unicode)
    if nlChars is not None:
        for c in nlChars:
            s.replace(c, '\r\n')
    for c in _chars(s):
        # surrogates? -> percent-encode according to given encoding
        if is_unicode and len(c) - 1:
            for octet in c.encode(encoding):
                res += '%%%02X' % ord(octet)
        # not unreserved?
        elif UNRESERVED_PATTERN.match(c) is None:
            cp = ord(c)
            # ASCII range?
            if cp < 128:
                # space? -> plus if desired
                if spaceToPlus and c == ' ':
                    res += '+'
                # reserved? -> percent-encode if desired
                elif c in reservedChars:
                    if encodeReserved:
                        res += '%%%02X' % cp
                    else:
                        res += c
                # not unreserved or reserved, so percent-encode
                # FIXME: should percent-encode according to given encoding;
                # ASCII range is not special!
                else:
                    res += '%%%02X' % cp
            # non-ASCII-range unicode?
            elif is_unicode:
                # percent-encode according to given encoding
                for octet in c.encode(encoding):
                    res += '%%%02X' % ord(octet)
            # non-ASCII str; percent-encode the bytes
            else:
                for octet in c:
                    res += '%%%02X' % ord(octet)

        # unreserved -> safe to use as-is
        else:
            res += c
    return res


def PercentDecode(s, encoding='utf-8', decodable=None):
    """
    [*** Experimental API ***] Reverses the percent-encoding of the given
    string.

    This function is similar to urllib.unquote(), but can also process a
    Unicode string, not just a regular byte string.

    By default, all percent-encoded sequences are decoded, but if a byte
    string is given via the 'decodable' argument, only the sequences
    corresponding to those octets will be decoded.

    If the string is Unicode, the percent-encoded sequences are converted to
    bytes, then converted back to Unicode according to the encoding given in
    the encoding argument. For example, by default, u'abc%E2%80%A2' will be
    converted to u'abc\u2022', because byte sequence E2 80 A2 represents
    character U+2022 in UTF-8.

    If the string is not Unicode, the percent-encoded octets are just
    converted to bytes, and the encoding argument is ignored. For example,
    'abc%E2%80%A2' will be converted to 'abc\xe2\x80\xa2'.

    This function is intended for use on the portions of a URI that are
    delimited by reserved characters (see PercentEncode), or on a value from
    data of media type application/x-www-form-urlencoded.
    """
    # Most of this comes from urllib.unquote().
    # urllib.unquote(), if given a unicode argument, does not decode
    # percent-encoded octets above %7F.
    is_unicode = isinstance(s, unicode)
    if is_unicode:
        mychr = unichr
    else:
        mychr = chr
    list_ = s.split('%')
    res = [list_[0]]
    myappend = res.append
    del list_[0]
    for item in list_:
        if item[1:2]:
            try:
                c = mychr(int(item[:2], 16))
                if decodable is None:
                    myappend(c + item[2:])
                elif c in decodable:
                    myappend(c + item[2:])
                else:
                    myappend('%' + item)
            except ValueError:
                myappend('%' + item)
        else:
            myappend('%' + item)
    s = ''.join(res)
    # If the original input was unicode, then we assume it represented
    # characters; e.g., u'%E2%80%A2' -> '\xe2\x80\xa2' -> u'\u2022'
    # (assuming UTF-8 was the basis for percent-encoding). However,
    # at this point in the implementation, variable s would actually be
    # u'\u00e2\u0080\u00a2', so we first convert it to bytes (via an
    # iso-8859-1 encode) in order to get '\xe2\x80\xa2'. Then we decode back
    # to unicode according to the desired encoding (UTF-8 by default) in
    # order to produce u'\u2022'.
    if is_unicode:
        s = s.encode('iso-8859-1').decode(encoding)
    return s


def Absolutize(uriRef, baseUri):
    """
    Resolves a URI reference to absolute form, effecting the result of RFC
    3986 section 5. The URI reference is considered to be relative to the
    given base URI.

    It is the caller's responsibility to ensure that the base URI matches
    the absolute-URI syntax rule of RFC 3986, and that its path component
    does not contain '.' or '..' segments if the scheme is hierarchical.
    Unexpected results may occur otherwise.

    This function only conducts a minimal sanity check in order to determine
    if relative resolution is possible: it raises a UriException if the base
    URI does not have a scheme component. While it is true that the base URI
    is irrelevant if the URI reference has a scheme, an exception is raised
    in order to signal that the given string does not even come close to
    meeting the criteria to be usable as a base URI.

    It is the caller's responsibility to make a determination of whether the
    URI reference constitutes a "same-document reference", as defined in RFC
    2396 or RFC 3986. As per the spec, dereferencing a same-document
    reference "should not" involve retrieval of a new representation of the
    referenced resource. Note that the two specs have different definitions
    of same-document reference: RFC 2396 says it is *only* the cases where the
    reference is the empty string, or \"#\" followed by a fragment; RFC 3986
    requires making a comparison of the base URI to the absolute form of the
    reference (as is returned by the spec), minus its fragment component,
    if any.

    This function is similar to urlparse.urljoin() and urllib.basejoin().
    Those functions, however, are (as of Python 2.3) outdated, buggy, and/or
    designed to produce results acceptable for use with other core Python
    libraries, rather than being earnest implementations of the relevant
    specs. Their problems are most noticeable in their handling of
    same-document references and 'file:' URIs, both being situations that
    come up far too often to consider the functions reliable enough for
    general use.
    """
    # Reasons to avoid using urllib.basejoin() and urlparse.urljoin():
    # - Both are partial implementations of long-obsolete specs.
    # - Both accept relative URLs as the base, which no spec allows.
    # - urllib.basejoin() mishandles the '' and '..' references.
    # - If the base URL uses a non-hierarchical or relative path,
    #    or if the URL scheme is unrecognized, the result is not
    #    always as expected (partly due to issues in RFC 1808).
    # - If the authority component of a 'file' URI is empty,
    #    the authority component is removed altogether. If it was
    #    not present, an empty authority component is in the result.
    # - '.' and '..' segments are not always collapsed as well as they
    #    should be (partly due to issues in RFC 1808).
    # - Effective Python 2.4, urllib.basejoin() *is* urlparse.urljoin(),
    #    but urlparse.urljoin() is still based on RFC 1808.

    # This procedure is based on the pseudocode in RFC 3986 sec. 5.2.
    #
    # ensure base URI is absolute
    if not baseUri or not IsAbsolute(baseUri):
        raise UriException(UriException.RELATIVE_BASE_URI,
                           base=baseUri, ref=uriRef)
    # shortcut for the simplest same-document reference cases
    if uriRef == '' or uriRef[0] == '#':
        return baseUri.split('#')[0] + uriRef
    # ensure a clean slate
    tScheme = tAuth = tPath = tQuery = None
    # parse the reference into its components
    (rScheme, rAuth, rPath, rQuery, rFrag) = SplitUriRef(uriRef)
    # if the reference is absolute, eliminate '.' and '..' path segments
    # and skip to the end
    if rScheme is not None:
        tScheme = rScheme
        tAuth = rAuth
        tPath = RemoveDotSegments(rPath)
        tQuery = rQuery
    else:
        # the base URI's scheme, and possibly more, will be inherited
        (bScheme, bAuth, bPath, bQuery, bFrag) = SplitUriRef(baseUri)
        # if the reference is a net-path, just eliminate '.' and '..' path
        # segments; no other changes needed.
        if rAuth is not None:
            tAuth = rAuth
            tPath = RemoveDotSegments(rPath)
            tQuery = rQuery
        # if it's not a net-path, we need to inherit pieces of the base URI
        else:
            # use base URI's path if the reference's path is empty
            if not rPath:
                tPath = bPath
                # use the reference's query, if any, or else the base URI's,
                tQuery = rQuery is not None and rQuery or bQuery
            # the reference's path is not empty
            else:
                # just use the reference's path if it's absolute
                if rPath[0] == '/':
                    tPath = RemoveDotSegments(rPath)
                # merge the reference's relative path with the base URI's path
                else:
                    if bAuth is not None and not bPath:
                        tPath = '/' + rPath
                    else:
                        tPath = bPath[:bPath.rfind('/')+1] + rPath
                    tPath = RemoveDotSegments(tPath)
                # use the reference's query
                tQuery = rQuery
            # since the reference isn't a net-path,
            # use the authority from the base URI
            tAuth = bAuth
        # inherit the scheme from the base URI
        tScheme = bScheme
    # always use the reference's fragment (but no need to define another var)
    #tFrag = rFrag

    # now compose the target URI (RFC 3986 sec. 5.3)
    return UnsplitUriRef((tScheme, tAuth, tPath, tQuery, rFrag))


def Relativize(targetUri, againstUri, subPathOnly=False):
    """
    This method returns a relative URI that is consistent with `targetURI`
    when resolved against `againstUri`.  If no such relative URI exists, for
    whatever reason, this method returns `None`.

    To be precise, if a string called `rel` exists such that
    ``Absolutize(rel, againstUri) == targetUri``, then `rel` is returned by
    this function.  In these cases, `Relativize` is in a sense the inverse
    of `Absolutize`.  In all other cases, `Relativize` returns `None`.

    The following idiom may be useful for obtaining compliant relative
    reference strings (e.g. for `path`) for use in other methods of this
    package::

      path = Relativize(OsPathToUri(path), OsPathToUri('.'))

    If `subPathOnly` is `True`, then this method will only return a relative
    reference if such a reference exists relative to the last hierarchical
    segment of `againstUri`.  In particular, this relative reference will
    not start with '/' or '../'.
    """

    # We might want to change the semantics slightly to allow a relative
    # target URI to be a valid "relative path" (and just return it).  For
    # now, though, absolute URIs only.
    #
    # TODO: We also need to decide if/when we want to use exceptions instead
    # of returning `None`.
    if not IsAbsolute(targetUri) or not IsAbsolute(againstUri):
        return None

    targetUri = NormalizePathSegmentsInUri(targetUri)
    againstUri = NormalizePathSegmentsInUri(againstUri)

    splitTarget = list(SplitUriRef(Absolutize(targetUri, targetUri)))
    splitAgainst = list(SplitUriRef(Absolutize(againstUri, againstUri)))

    if not splitTarget[:2] == splitAgainst[:2]:
        return None

    subPathSplit = [None, None] + splitTarget[2:]

    targetPath = splitTarget[2]
    againstPath = splitAgainst[2]

    leadingSlash = False
    if targetPath[:1] == '/' or againstPath[:1] == '/':
        if targetPath[:1] == againstPath[:1]:
            targetPath = targetPath[1:]
            againstPath = againstPath[1:]
            leadingSlash = True
        else:
            return None

    targetPathSegments = targetPath.split('/')
    againstPathSegments = againstPath.split('/')

    # Count the number of path segments in common.
    i = 0
    while True:
        # Stop if we get to the end of either segment list.
        if not(len(targetPathSegments) > i and
               len(againstPathSegments) > i):
            break

        # Increment the count when the lists agree, unless we are at the
        # last segment of either list and that segment is an empty segment.
        # We bail on this case because an empty ending segment in one path
        # must not match a mid-path empty segment in the other.
        if (targetPathSegments[i] == againstPathSegments[i]
            and not (i + 1 == len(againstPathSegments) and
                     '' == againstPathSegments[i])
            and not (i + 1 == len(targetPathSegments) and
                     '' == targetPathSegments[i])):
            i = i + 1
        # Otherwise stop.
        else:
            break

    # The target path has `i` segments in common with the basis path, and
    # the last segment (after the final '/') doesn't matter; we'll need to
    # traverse the rest.
    traverse = len(againstPathSegments) - i - 1

    relativePath = None
    # If the two paths do not agree on any segments, we have two special
    # cases.
    if i == 0 and leadingSlash:
        # First, if the ruling path only had one segment, then our result
        # can be a relative path.
        if len(againstPathSegments) == 1:
            relativePath = targetPath
        # Otherwise, the ruling path had a number of segments, so our result
        # must be an absolute path (unless we only want a subpath result, in
        # which case none exists).
        elif subPathOnly:
            return None
        else:
            relativePath = '/' + targetPath
    elif traverse > 0:
        if subPathOnly:
            return None
        relativePath = (("../" * traverse) +
                        '/'.join(targetPathSegments[i:]))
    # If the ith segment of the target path is empty and that is not the
    # final segment, then we need to precede the path with "./" to make it a
    # relative path.
    elif (len(targetPathSegments) > i + 1 and
          '' == targetPathSegments[i]):
        relativePath = "./" + '/'.join(targetPathSegments[i:])
    else:
        relativePath = '/'.join(targetPathSegments[i:])

    return UnsplitUriRef([None, None, relativePath] + splitTarget[3:])


def RemoveDotSegments(path):
    """
    Supports Absolutize() by implementing the remove_dot_segments function
    described in RFC 3986 sec. 5.2.  It collapses most of the '.' and '..'
    segments out of a path without eliminating empty segments. It is intended
    to be used during the path merging process and may not give expected
    results when used independently. Use NormalizePathSegments() or
    NormalizePathSegmentsInUri() if more general normalization is desired.
    """
    # return empty string if entire path is just "." or ".."
    if path == '.' or path == '..':
        return path[0:0] # preserves string type
    # remove all "./" or "../" segments at the beginning
    while path:
        if path[:2] == './':
            path = path[2:]
        elif path[:3] == '../':
            path = path[3:]
        else:
            break
    # We need to keep track of whether there was a leading slash,
    # because we're going to drop it in order to prevent our list of
    # segments from having an ambiguous empty first item when we call
    # split().
    leading_slash = False
    if path[:1] == '/':
        path = path[1:]
        leading_slash = True
    # replace a trailing "/." with just "/"
    if path[-2:] == '/.':
        path = path[:-1]
    # convert the segments into a list and process each segment in
    # order from left to right.
    segments = path.split('/')
    keepers = []
    segments.reverse()
    while segments:
        seg = segments.pop()
        # '..' means drop the previous kept segment, if any.
        # If none, and if the path is relative, then keep the '..'.
        # If the '..' was the last segment, ensure
        # that the result ends with '/'.
        if seg == '..':
            if keepers:
                keepers.pop()
            elif not leading_slash:
                keepers.append(seg)
            if not segments:
                keepers.append('')
        # ignore '.' segments and keep all others, even empty ones
        elif seg != '.':
            keepers.append(seg)
    # reassemble the kept segments
    return leading_slash * '/' + '/'.join(keepers)


def NormalizeCase(uriRef, doHost=False):
    """
    Returns the given URI reference with the case of the scheme,
    percent-encoded octets, and, optionally, the host, all normalized,
    implementing section 6.2.2.1 of RFC 3986. The normal form of
    scheme and host is lowercase, and the normal form of
    percent-encoded octets is uppercase.

    The URI reference can be given as either a string or as a sequence as
    would be provided by the SplitUriRef function. The return value will
    be a string or tuple.
    """
    if not isinstance(uriRef, (tuple, list)):
        uriRef = SplitUriRef(uriRef)
        tup = None
    else:
        tup = True
    # normalize percent-encoded octets
    newRef = []
    for component in uriRef:
        if component:
            newRef.append(re.sub('%([0-9a-f][0-9a-f])',
                          lambda m: m.group(0).upper(), component))
        else:
            newRef.append(component)
    # normalize scheme
    scheme = newRef[0]
    if scheme:
        scheme = scheme.lower()
    # normalize host
    authority = newRef[1]
    if doHost:
        if authority:
            userinfo, host, port = SplitAuthority(authority)
            authority = ''
            if userinfo is not None:
                authority += '%s@' % userinfo
            authority += host.lower()
            if port is not None:
                authority += ':%s' % port

    res = (scheme, authority, newRef[2], newRef[3], newRef[4])
    if tup:
        return res
    else:
        return UnsplitUriRef(res)


def NormalizePercentEncoding(s):
    """
    Given a string representing a URI reference or a component thereof,
    returns the string with all percent-encoded octets that correspond to
    unreserved characters decoded, implementing section 6.2.2.2 of RFC
    3986.
    """
    return PercentDecode(s, decodable='0123456789%s-._~' % ascii_letters)


def NormalizePathSegments(path):
    """
    Given a string representing the path component of a URI reference having a
    hierarchical scheme, returns the string with dot segments ('.' and '..')
    removed, implementing section 6.2.2.3 of RFC 3986. If the path is
    relative, it is returned with no changes.
    """
    if not path or path[:1] != '/':
        return path
    else:
        return RemoveDotSegments(path)


def NormalizePathSegmentsInUri(uri):
    """
    Given a string representing a URI or URI reference having a hierarchical
    scheme, returns the string with dot segments ('.' and '..') removed from
    the path component, implementing section 6.2.2.3 of RFC 3986. If the
    path is relative, the URI or URI reference is returned with no changes.
    """
    components = list(SplitUriRef(uri))
    components[2] = NormalizePathSegments(components[2])
    return UnsplitUriRef(components)


#=============================================================================
# Extendable normalization and resolution functions for URI references
#
class UriResolverBase:
    """
    This is class provides a set of functions related to the resolution of
    URIs, including the resolution to absolute form of URI references, and
    the retrieval of a representation of a resource that is identified by a
    URI.

    The object attribute supportedSchemes is a list of URI schemes supported
    for dereferencing (representation retrieval). Schemes supported by
    default are: %s.
    """ % ', '.join(DEFAULT_URI_SCHEMES)
    def __init__(self):
        self.supportedSchemes = list(DEFAULT_URI_SCHEMES)

    def normalize(self, uriRef, baseUri):
        """
        Resolves a URI reference to absolute form, effecting the result of RFC
        3986 section 5. The URI reference is considered to be relative to
        the given base URI.

        Also verifies that the resulting URI reference has a scheme that
        resolve() supports, raising a UriException if it doesn't.

        The default implementation does not perform any validation on the base
        URI beyond that performed by Absolutize().
        """
        # since we know how Absolutize works, we can anticipate the scheme of
        # its return value and verify that it's supported first
        scheme = GetScheme(uriRef) or GetScheme(baseUri)
        if scheme in self.supportedSchemes:
            return Absolutize(uriRef, baseUri)
        else:
            if scheme is None:
                raise ValueError('When the URI to resolve is a relative '
                    'reference, it must be accompanied by a base URI.')
            else:
                raise UriException(UriException.UNSUPPORTED_SCHEME,
                                   scheme=scheme,
                                   resolver=self.__class__.__name__)

    def resolve(self, uri, baseUri=None):
        """
        This function takes a URI or a URI reference plus a base URI, produces
        a normalized URI using the normalize function if a base URI was given,
        then attempts to obtain access to an entity representing the resource
        identified by the resulting URI, returning the entity as a stream (a
        Python file-like object).

        Raises a UriException if the URI scheme is unsupported or if a stream
        could not be obtained for any reason.
        """
        if baseUri is not None:
            uri = self.normalize(uri, baseUri)
            scheme = GetScheme(uri)
        else:
            scheme = GetScheme(uri)
            # since we didn't use normalize(), we need to verify here
            if scheme not in self.supportedSchemes:
                if scheme is None:
                    raise ValueError('When the URI to resolve is a relative '
                        'reference, it must be accompanied by a base URI.')
                else:
                    raise UriException(UriException.UNSUPPORTED_SCHEME,
                                       scheme=scheme,
                                       resolver=self.__class__.__name__)

        # Bypass urllib for opening local files. This means we don't get all
        # the extra metadata that urllib adds to the stream (Last-modified,
        # Content-Length, a poorly guessed Content-Type, and the URI), but
        # we also avoid its potentially time-consuming socket.gethostbyname()
        # calls, which aren't even warranted and are part of urllib's dubious
        # interpretation of RFC 1738.
        if scheme == 'file':
            path = UriToOsPath(uri, attemptAbsolute=False)
            try:
                stream = open(path, 'rb')
            except IOError, e:
                raise UriException(UriException.RESOURCE_ERROR,
                                   loc='%s (%s)' % (uri, path),
                                   uri=uri, msg=str(e))
        else:
            # urllib2.urlopen, wrapped by us, will suffice for http, ftp,
            # data and gopher
            try:
                stream = UrlOpen(uri)
            except IOError, e:
                raise UriException(UriException.RESOURCE_ERROR,
                                   uri=uri, loc=uri, msg=str(e))
        return stream

    def generate(self, hint=None):
        """
        This function generates and returns a URI.
        The hint is an object that helps decide what to generate.
        The default action is to generate a random UUID URN.
        """
        import Uuid
        return 'urn:uuid:'+Uuid.UuidAsString(Uuid.GenerateUuid())


class FtUriResolver(UriResolverBase):
    """
    The URI resolver class used by most of 4Suite, outside of the repository.

    Adds support for lenient processing of base URIs.
    """
    def normalize(self, uriRef, baseUri):
        """
        This function differs from UriResolverBase.normalize() in the
        following manner:

        This function allows for the possibility of the base URI beginning
        with a '/', in which case the argument is assumed to be an absolute
        path component of 'file' URI that has no authority component.
        """
        # assume file: if leading "/"
        if baseUri[:1] == '/':
            baseUri = 'file://' + baseUri

        return UriResolverBase.normalize(self, uriRef, baseUri)


BASIC_RESOLVER = FtUriResolver()

_urlopener = None
class _DataHandler(urllib2.BaseHandler):
    """
    A class to handle 'data' URLs.

    The actual handling is done by urllib.URLopener.open_data() method.
    """
    def data_open(self, request):
        global _urlopener
        if _urlopener is None:
            _urlopener = urllib.URLopener()
        return _urlopener.open_data(self, request.get_full_url())

def ResourceToUri(package, resource):
    """Return a PEP 302 pseudo-URL for the specified resource.

    'package' is a Python module name (dot-separated module names) and
    'resource' is a '/'-separated pathname.
    """
    provider, resource_name = ImportUtil.NormalizeResource(package, resource)
    if provider.loader:
        # Use a 'pep302' pseudo-URL
        segments = resource_name.split('/')
        if not resource.startswith('/'):
            dirname = provider.module_path[len(provider.zip_pre):]
            segments[0:0] = dirname.split(os.sep)
        path = '/'.join(map(PercentEncode, segments))
        uri = 'pep302://%s/%s' % (package, path)
    else:
        # Use a 'file' URL
        filename = ImportUtil.GetResourceFilename(package, resource)
        uri = OsPathToUri(filename)
    return uri

class _Pep302Handler(urllib2.FileHandler):
    """
    A class to handler opening of PEP 302 pseudo-URLs.

    The syntax for this pseudo-URL is:
        url    := "pep302://" module "/" path
        module := <Python module name>
        path   := <'/'-separated pathname>

    The "path" portion of the URL will be passed to the get_data() method
    of the loader identified by "module" with '/'s converted to the OS
    native path separator.
    """
    def pep302_open(self, request):
        import mimetypes
        import mimetools
        import rfc822

        # get the package and resource components
        package = request.get_host()
        resource = request.get_selector()
        resource = PercentDecode(re.sub('%2[fF]', '\\/', resource))

        # get the stream associated with the resource
        try:
            stream = ImportUtil.GetResourceStream(package, resource)
        except EnvironmentError, error:
            raise urllib2.URLError(str(error))

        # compute some simple header fields
        try:
            stream.seek(0, 2) # go to the end of the stream
        except IOError:
            data = stream.read()
            stream = cStringIO.StringIO(data)
            length = len(data)
        else:
            length = stream.tell()
            stream.seek(0, 0) # go to the start of the stream
        mtime = ImportUtil.GetResourceLastModified(package, resource)
        mtime = rfc822.formatdate(mtime)
        mtype = mimetypes.guess_type(resource) or 'text/plain'
        headers = ("Content-Type: %s\n"
                   "Content-Length: %d\n"
                   "Last-Modified: %s\n" % (mtype, length, mtime))
        headers = mimetools.Message(cStringIO.StringIO(headers))
        return urllib.addinfourl(stream, headers, request.get_full_url())

_opener = None
def UrlOpen(url, *args, **kwargs):
    """
    A replacement/wrapper for urllib2.urlopen().

    Simply calls MakeUrllibSafe() on the given URL and passes the result
    and all other args to urllib2.urlopen().
    """
    global _opener
    if _opener is None:
        _opener = urllib2.build_opener(_DataHandler, _Pep302Handler)

    # work around urllib's intolerance for proper URIs, Unicode, IDNs
    stream = _opener.open(MakeUrllibSafe(url), *args, **kwargs)
    stream.name = url
    return stream


#=============================================================================
# RFC 3151 implementation
#

def UrnToPublicId(urn):
    """
    Converts a URN that conforms to RFC 3151 to a public identifier.

    For example, the URN
    "urn:publicid:%2B:IDN+example.org:DTD+XML+Bookmarks+1.0:EN:XML"
    will be converted to the public identifier
    "+//IDN example.org//DTD XML Bookmarks 1.0//EN//XML"

    Raises a UriException if the given URN cannot be converted.
    Query and fragment components, if present, are ignored.
    """
    if urn is not None and urn:
        (scheme, auth, path, query, frag) = SplitUriRef(urn)
        if scheme is not None and scheme.lower() == 'urn':
            pp = path.split(':', 1)
            if len(pp) > 1:
                urn_scheme = PercentDecode(pp[0])
                if urn_scheme == 'publicid':
                    publicid = pp[1].replace('+', ' ')
                    publicid = publicid.replace(':', '//')
                    publicid = publicid.replace(';', '::')
                    publicid = PercentDecode(publicid)
                    return publicid

    raise UriException(UriException.INVALID_PUBLIC_ID_URN, urn=urn)


def PublicIdToUrn(publicid):
    """
    Converts a public identifier to a URN that conforms to RFC 3151.
    """
    # 1. condense whitespace, XSLT-style
    publicid = re.sub('[ \t\r\n]+', ' ', publicid.strip())
    # 2. // -> :
    #    :: -> ;
    #    space -> +
    #    + ; ' ? # % / : -> percent-encode
    #    (actually, the intent of the RFC is to not conflict with RFC 2396,
    #     so any character not in the unreserved set must be percent-encoded)
    r = ':'.join([';'.join([PercentEncode(dcpart, spaceToPlus=True)
                            for dcpart in dspart.split('::')])
                  for dspart in publicid.split('//')])
    return 'urn:publicid:%s' % r


#=============================================================================
# Miscellaneous public functions
#

SCHEME_PATTERN = re.compile(r'([a-zA-Z][a-zA-Z0-9+\-.]*):')
def GetScheme(uriRef):
    """
    Obtains, with optimum efficiency, just the scheme from a URI reference.
    Returns a string, or if no scheme could be found, returns None.
    """
    # Using a regex seems to be the best option. Called 50,000 times on
    # different URIs, on a 1.0-GHz PIII with FreeBSD 4.7 and Python
    # 2.2.1, this method completed in 0.95s, and 0.05s if there was no
    # scheme to find. By comparison,
    #   urllib.splittype()[0] took 1.5s always;
    #   Ft.Lib.Uri.SplitUriRef()[0] took 2.5s always;
    #   urlparse.urlparse()[0] took 3.5s always.
    m = SCHEME_PATTERN.match(uriRef)
    if m is None:
        return None
    else:
        return m.group(1)


def StripFragment(uriRef):
    """
    Returns the given URI or URI reference with the fragment component, if
    any, removed.
    """
    return SplitFragment(uriRef)[0]


def IsAbsolute(identifier):
    """
    Given a string believed to be a URI or URI reference, tests that it is
    absolute (as per RFC 3986), not relative -- i.e., that it has a scheme.
    """
    # We do it this way to avoid compiling another massive regex.
    return GetScheme(identifier) is not None


_ntPathToUriSetupCompleted = False
def _initNtPathPattern():
    """
    Called internally to compile the regular expression used by
    OsPathToUri() on Windows just once, the first time the function is
    called.
    """
    global _ntPathToUriSetupCompleted
    if _ntPathToUriSetupCompleted:
        return
    # path variations we try to handle:
    #
    # a\b\c (a relative path)
    #    file:a/b/c is the best we can do.
    #    Dot segments should not be collapsed in the final URL.
    #
    # \a\b\c
    #    file:///a/b/c is correct
    #
    # C:\a\b\c
    #    urllib.urlopen() requires file:///C|/a/b/c or ///C|/a/b/c
    #     because it currently relies on urllib.url2pathname().
    #    Windows resolver will accept the first or file:///C:/a/b/c
    #
    # \\host\share\x\y\z
    #    Windows resolver accepts file://host/share/x/y/z
    #    Netscape (4.x?) accepts file:////host/share/x/y/z
    #
    # If an entire drive is shared, the share name might be
    #  $drive$, like this: \\host\$c$\a\b\c
    #  We could recognize it as a drive letter, but it's probably
    #  best not to treat it specially, since it will never occur
    #  without a host. It's just another share name.
    #
    # There's also a weird C:\\host\share\x\y\z convention
    #  that is hard to find any information on. Presumably the C:
    #  is ignored, but the question is do we put it in the URI?
    #
    # So the format, in ABNF, is roughly:
    # [ drive ":" ] ( [ "\\" host "\" share ] abs-path ) / rel-path
    drive         = r'(?P<drive>[A-Za-z])'
    host          = r'(?P<host>[^\\]*)'
    share         = r'(?P<share>[^\\]+)'
    abs_path      = r'(?P<abspath>\\(?:[^\\]+\\?)*)'
    rel_path      = r'(?P<relpath>(?:[^\\]+\\?)*)'
    NT_PATH_REGEX = r"^(?:%s:)?(?:(?:(?:\\\\%s\\%s)?%s)|%s)$" % (
                        drive,
                        host,
                        share,
                        abs_path,
                        rel_path)
    global NT_PATH_PATTERN
    NT_PATH_PATTERN = re.compile(NT_PATH_REGEX)
    # We can now use NT_PATH_PATTERN.match(path) to parse the path and use
    #  the returned object's .groupdict() method to get a dictionary of
    #  path subcomponents. For example,
    #  NT_PATH_PATTERN.match(r"\\h\$c$\x\y\z").groupdict()
    #  yields
    #  {'abspath': r'\x\y\z',
    #   'share': '$c$',
    #   'drive': None,
    #   'host': 'h',
    #   'relpath': None
    #  }
    # Note that invalid paths such as r'\\foo\bar'
    #  (a UNC path with no trailing '\') will not match at all.
    _ntPathToUriSetupCompleted = True
    return


def _splitNtPath(path):
    """
    Called internally to get a tuple representing components of the given
    Windows path.
    """
    if not _ntPathToUriSetupCompleted:
        _initNtPathPattern()
    m = NT_PATH_PATTERN.match(path)
    if not m:
        raise ValueError("Path %s is not a valid Windows path.")
    components = m.groupdict()
    (drive, host, share, abspath, relpath) = (
        components['drive'],
        components['host'],
        components['share'],
        components['abspath'],
        components['relpath'],
        )
    return (drive, host, share, abspath, relpath)


def _getDriveLetter(s):
    """
    Called internally to get a drive letter from a string, if the string
    is a drivespec.
    """
    if len(s) == 2 and s[1] in ':|' and s[0] in ascii_letters:
        return s[0]
    return


def OsPathToUri(path, attemptAbsolute=True, osname=None):
    r"""This function converts an OS-specific file system path to a URI of
    the form 'file:///path/to/the/file'.

    In addition, if the path is absolute, any dot segments ('.' or '..') will
    be collapsed, so that the resulting URI can be safely used as a base URI
    by functions such as Absolutize().

    The given path will be interpreted as being one that is appropriate for
    use on the local operating system, unless a different osname argument is
    given.

    If the given path is relative, an attempt may be made to first convert
    the path to absolute form by interpreting the path as being relative
    to the current working directory.  This is the case if the attemptAbsolute
    flag is True (the default).  If attemptAbsolute is False, a relative
    path will result in a URI of the form file:relative/path/to/a/file .

    attemptAbsolute has no effect if the given path is not for the
    local operating system.

    On Windows, the drivespec will become the first step in the path component
    of the URI. If the given path contains a UNC hostname, this name will be
    used for the authority component of the URI.

    Warning: Some libraries, such as urllib.urlopen(), may not behave as
    expected when given a URI generated by this function. On Windows you may
    want to call re.sub('(/[A-Za-z]):', r'\1|', uri) on the URI to prepare it
    for use by functions such as urllib.url2pathname() or urllib.urlopen().

    This function is similar to urllib.pathname2url(), but is more featureful
    and produces better URIs.
    """
    # Problems with urllib.pathname2url() on all platforms include:
    # - the generated URL has no scheme component;
    # - percent-encoding is incorrect, due to urllib.quote() issues.
    #
    # Problems with urllib.pathname2url() on Windows include:
    # - trailing backslashes are ignored;
    # - all leading backslashes are considered part of the absolute
    #    path, so UNC paths aren't properly converted (assuming that
    #    a proper conversion would be to use the UNC hostname in the
    #    hostname component of the resulting URL);
    # - non-leading, consecutive backslashes are collapsed, which may
    #    be desirable but is correcting what is, arguably, user error;
    # - the presence of a letter followed by ":" is believed to
    #    indicate a drivespec, no matter where it occurs in the path,
    #    which may have been considered a safe assumption since the
    #    drivespec is the only place where ":" can legally, but there's
    #    no need to search the whole string for it;
    # - the ":" in a drivespec is converted to "|", a convention that
    #    is becoming increasingly less common. For compatibility, most
    #    web browser resolvers will accept either "|" or ":" in a URL,
    #    but urllib.urlopen(), relying on url2pathname(), expects "|"
    #    only. In our opinion, the callers of those functions should
    #    ensure that the arguments are what are expected. Our goal
    #    here is to produce a quality URL, not a URL designed to play
    #    nice with urllib's bugs & limitations.
    # - it treats "/" the same as "\", which results in being able to
    #    call the function with a posix-style path, a convenience
    #    which allows the caller to get sloppy about whether they are
    #    really passing a path that is apprropriate for the desired OS.
    #    We do this a lot in 4Suite.
    #
    # There is some disagreement over whether a drivespec should be placed in
    # the authority or in the path. Placing it in the authority means that
    # ":", which has a reserved purpose in the authority, cannot be used --
    # this, along with the fact that prior to RFC 3986, percent-encoded
    # octets were disallowed in the authority, is presumably a reason why "|"
    # is a popular substitute for ":". Using the authority also allows for
    # the drive letter to be retained whe resolving references like this:
    #   reference '/a/b/c' + base 'file://C|/x/y/z' = 'file://C|/a/b/c'
    # The question is, is that really the ideal result? Should the drive info
    # be inherited from the base URI, if it is unspecified in a reference
    # that is otherwise representing an absolute path? Using the authority
    # for this purpose means that it would be overloaded if we also used it
    # to represent the host part of a UNC path. The alternative is to put the
    # UNC host in the path (e.g. 'file:////host/share/path'), but when such a
    # URI is used as a base URI, relative reference resolution often returns
    # unexpected results.
    #
    osname = osname or os.name

    if osname == 'nt':
        if WINDOWS_SLASH_COMPAT:
            path = path.replace('/','\\')
        (drive, host, share, abspath, relpath) = _splitNtPath(path)
        if attemptAbsolute and relpath is not None and osname == os.name:
            path = os.path.join(os.getcwd(), relpath)
            (drive, host, share, abspath, relpath) = _splitNtPath(path)
        path = abspath or relpath
        path = '/'.join([PercentEncode(seg) for seg in path.split('\\')])
        uri = 'file:'
        if host:
            uri += '//%s' % PercentEncode(host)
        elif abspath:
            uri += '//'
        if drive:
            uri += '/%s:' % drive.upper()
        if share:
            uri += '/%s' % PercentEncode(share)
        if abspath:
            path = RemoveDotSegments(path)
        uri += path

    elif osname == 'posix':
        try:
            from posixpath import isabs
        except ImportError:
            isabs = lambda p: p[:1] == '/'
        pathisabs = isabs(path)
        if pathisabs:
            path = RemoveDotSegments(path)
        elif attemptAbsolute and osname == os.name:
            path = os.path.join(os.getcwd(), path)
            pathisabs = isabs(path)
        path = '/'.join([PercentEncode(seg) for seg in path.split('/')])
        if pathisabs:
            uri = 'file://%s' % path
        else:
            uri = 'file:%s' % path

    else:
        # 4Suite only supports posix and nt, so we're not going to worry about
        # improving upon urllib.pathname2url() for other OSes.
        if osname == os.name:
            from urllib import pathname2url
            if attemptAbsolute and not os.path.isabs(path):
                path = os.path.join(os.getcwd(), path)
        else:
            try:
                module = '%surl2path' % osname
                exec 'from %s import pathname2url' % module
            except ImportError:
                raise UriException(UriException.UNSUPPORTED_PLATFORM,
                                   osname, OsPathToUri)
        uri = 'file:' + pathname2url(path)

    return uri


def UriToOsPath(uri, attemptAbsolute=True, encoding='utf-8', osname=None):
    r"""
    This function converts a URI reference to an OS-specific file system path.

    If the URI reference is given as a Unicode string, then the encoding
    argument determines how percent-encoded components are interpreted, and
    the result will be a Unicode string. If the URI reference is a regular
    byte string, the encoding argument is ignored and the result will be a
    byte string in which percent-encoded octets have been converted to the
    bytes they represent. For example, the trailing path segment of
    u'file:///a/b/%E2%80%A2' will by default be converted to u'\u2022',
    because sequence E2 80 A2 represents character U+2022 in UTF-8. If the
    string were not Unicode, the trailing segment would become the 3-byte
    string '\xe2\x80\xa2'.

    The osname argument determines for what operating system the resulting
    path is appropriate. It defaults to os.name and is typically the value
    'posix' on Unix systems (including Mac OS X and Cygwin), and 'nt' on
    Windows NT/2000/XP.

    This function is similar to urllib.url2pathname(), but is more featureful
    and produces better paths.

    If the given URI reference is not relative, its scheme component must be
    'file', and an exception will be raised if it isn't.

    In accordance with RFC 3986, RFC 1738 and RFC 1630, an authority
    component that is the string 'localhost' will be treated the same as an
    empty authority.

    Dot segments ('.' or '..') in the path component are NOT collapsed.

    If the path component of the URI reference is relative and the
    attemptAbsolute flag is True (the default), then the resulting path
    will be made absolute by considering the path to be relative to the
    current working directory. There is no guarantee that such a result
    will be an accurate interpretation of the URI reference.

    attemptAbsolute has no effect if the
    result is not being produced for the local operating system.

    Fragment and query components of the URI reference are ignored.

    If osname is 'posix', the authority component must be empty or just
    'localhost'. An exception will be raised otherwise, because there is no
    standard way of interpreting other authorities. Also, if '%2F' is in a
    path segment, it will be converted to r'\/' (a backslash-escaped forward
    slash). The caller may need to take additional steps to prevent this from
    being interpreted as if it were a path segment separator.

    If osname is 'nt', a drivespec is recognized as the first occurrence of a
    single letter (A-Z, case-insensitive) followed by '|' or ':', occurring as
    either the first segment of the path component, or (incorrectly) as the
    entire authority component. A UNC hostname is recognized as a non-empty,
    non-'localhost' authority component that has not been recognized as a
    drivespec, or as the second path segment if the first path segment is
    empty. If a UNC hostname is detected, the result will begin with
    '\\<hostname>\'. If a drivespec was detected also, the first path segment
    will be '$<driveletter>$'. If a drivespec was detected but a UNC hostname
    was not, then the result will begin with '<driveletter>:'.

    Windows examples:
    'file:x/y/z' => r'x\y\z';
    'file:/x/y/z' (not recommended) => r'\x\y\z';
    'file:///x/y/z' => r'\x\y\z';
    'file:///c:/x/y/z' => r'C:\x\y\z';
    'file:///c|/x/y/z' => r'C:\x\y\z';
    'file:///c:/x:/y/z' => r'C:\x:\y\z' (bad path, valid interpretation);
    'file://c:/x/y/z' (not recommended) => r'C:\x\y\z';
    'file://host/share/x/y/z' => r'\\host\share\x\y\z';
    'file:////host/share/x/y/z' => r'\\host\share\x\y\z'
    'file://host/x:/y/z' => r'\\host\x:\y\z' (bad path, valid interp.);
    'file://localhost/x/y/z' => r'\x\y\z';
    'file://localhost/c:/x/y/z' => r'C:\x\y\z';
    'file:///C:%5Cx%5Cy%5Cz' (not recommended) => r'C:\x\y\z'
    """
    (scheme, authority, path) = SplitUriRef(uri)[0:3]
    if scheme and scheme != 'file':
        raise UriException(UriException.NON_FILE_URI, uri)
    # enforce 'localhost' URI equivalence mandated by RFCs 1630, 1738, 3986
    if authority == 'localhost':
        authority = None
    osname = osname or os.name

    if osname == 'nt':
        # Get the drive letter and UNC hostname, if any. Fragile!
        unchost = None
        driveletter = None
        if authority:
            authority = PercentDecode(authority, encoding=encoding)
            if _getDriveLetter(authority):
                driveletter = authority[0]
            else:
                unchost = authority
        if not (driveletter or unchost):
            # Note that we have to treat %5C (backslash) as a path separator
            # in order to catch cases like file:///C:%5Cx%5Cy%5Cz => C:\x\y\z
            # We will also treat %2F (slash) as a path separator for
            # compatibility.
            if WINDOWS_SLASH_COMPAT:
                regex = '%2[fF]|%5[cC]'
            else:
                regex = '%5[cC]'
            path = re.sub(regex, '/', path)
            segs = path.split('/')
            if not segs[0]:
                # //host/... => [ '', '', 'host', '...' ]
                if len(segs) > 2 and not segs[1]:
                    unchost = PercentDecode(segs[2], encoding=encoding)
                    path = len(segs) > 3 and '/' + '/'.join(segs[3:]) or ''
                # /C:/...    => [ '', 'C:', '...' ]
                elif len(segs) > 1:
                    driveletter = _getDriveLetter(PercentDecode(segs[1],
                                                   encoding=encoding))
                    if driveletter:
                        path = len(segs) > 2 and '/' + '/'.join(segs[2:]) or ''
            else:
                # C:/...     => [ 'C:', '...' ]
                driveletter = _getDriveLetter(PercentDecode(segs[0],
                                                encoding=encoding))
                if driveletter:
                    path = len(segs) > 1 and path[2:] or ''



        # Do the conversion of the path part
        sep = '\\' # we could try to import from ntpath,
                   # but at this point it would just waste cycles.
        path = PercentDecode(path.replace('/', sep), encoding=encoding)

        # Assemble and return the path
        if unchost:
            # It's a UNC path of the form \\host\share\path.
            # driveletter is ignored.
            path = r'%s%s%s' % (sep * 2, unchost, path)
        elif driveletter:
            # It's an ordinary Windows path of the form C:\x\y\z
            path = r'%s:%s' % (driveletter.upper(), path)
        # It's an ordinary Windows path of the form \x\y\z or x\y\z.
        # We need to make sure it doesn't end up looking like a UNC
        # path, so we discard extra leading backslashes
        elif path[:1] == '\\':
            path = re.sub(r'^\\+', '\\\\', path)
        # It's a relative path. If the caller wants it absolute, attempt to comply
        elif attemptAbsolute and osname == os.name:
            path = os.path.join(os.getcwd(), path)

        return path

    elif osname == 'posix':
        # a non-empty, non-'localhost' authority component is ambiguous on Unix
        if authority:
            raise UriException(UriException.UNIX_REMOTE_HOST_FILE_URI, uri)
        # %2F in a path segment would indicate a literal '/' in a
        # filename, which is possible on posix, but there is no
        # way to consistently represent it. We'll backslash-escape
        # the literal slash and leave it to the caller to ensure it
        # gets handled the way they want.
        path = PercentDecode(re.sub('%2[fF]', '\\/', path), encoding=encoding)
        # If it's relative and the caller wants it absolute, attempt to comply
        if attemptAbsolute and osname == os.name and not os.path.isabs(path):
            path = os.path.join(os.getcwd(), path)

        return path

    else:
        # 4Suite only supports posix and nt, so we're not going to worry about
        # improving upon urllib.pathname2url() for other OSes.
        if osname == os.name:
            from urllib import url2pathname
        else:
            try:
                module = '%surl2path' % osname
                exec 'from %s import url2pathname' % module
            except ImportError:
                raise UriException(UriException.UNSUPPORTED_PLATFORM,
                                   osname, UriToOsPath)
        # drop the scheme before passing to url2pathname
        if scheme:
            uri = uri[len(scheme)+1:]
        return url2pathname(uri)

REG_NAME_HOST_PATTERN = re.compile(r"^(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)$")

def MakeUrllibSafe(uriRef):
    """
    Makes the given RFC 3986-conformant URI reference safe for passing
    to legacy urllib functions. The result may not be a valid URI.

    As of Python 2.3.3, urllib.urlopen() does not fully support
    internationalized domain names, it does not strip fragment components,
    and on Windows, it expects file URIs to use '|' instead of ':' in the
    path component corresponding to the drivespec. It also relies on
    urllib.unquote(), which mishandles unicode arguments. This function
    produces a URI reference that will work around these issues, although
    the IDN workaround is limited to Python 2.3 only. May raise a
    UnicodeEncodeError if the URI reference is Unicode and erroneously
    contains non-ASCII characters.
    """
    # IDN support requires decoding any percent-encoded octets in the
    # host part (if it's a reg-name) of the authority component, and when
    # doing DNS lookups, applying IDNA encoding to that string first.
    # As of Python 2.3, there is an IDNA codec, and the socket and httplib
    # modules accept Unicode strings and apply IDNA encoding automatically
    # where necessary. However, urllib.urlopen() has not yet been updated
    # to do the same; it raises an exception if you give it a Unicode
    # string, and does no conversion on non-Unicode strings, meaning you
    # have to give it an IDNA string yourself. We will only support it on
    # Python 2.3 and up.
    #
    # see if host is a reg-name, as opposed to IPv4 or IPv6 addr.
    (scheme, auth, path, query, frag) = SplitUriRef(uriRef)
    if auth and auth.find('@') > -1:
        userinfo, hostport = auth.split('@')
    else:
        userinfo = None
        hostport = auth
    if hostport and hostport.find(':') > -1:
        host, port = hostport.split(':')
    else:
        host = hostport
        port = None
    if host and REG_NAME_HOST_PATTERN.match(host):
        # percent-encoded hostnames will always fail DNS lookups
        host = PercentDecode(host)
        # IDNA-encode if possible.
        # We shouldn't do this for schemes that don't need DNS lookup,
        # but are there any (that you'd be calling urlopen for)?
        if sys.version_info[0:2] >= (2,3):
            if isinstance(host, str):
                host = host.decode('utf-8')
            host = host.encode('idna')
        # reassemble the authority with the new hostname
        # (percent-decoded, and possibly IDNA-encoded)
        auth = ''
        if userinfo:
            auth += userinfo + '@'
        auth += host
        if port:
            auth += ':' + port

    # On Windows, ensure that '|', not ':', is used in a drivespec.
    if os.name == 'nt' and scheme == 'file':
        path = path.replace(':','|',1)

    # Note that we drop fragment, if any. See RFC 3986 sec. 3.5.
    uri = UnsplitUriRef((scheme, auth, path, query, None))

    # parts of urllib are not unicode safe
    if isinstance(uri, unicode):
        try:
            # should work if IDNA encoding was applied (Py 2.3+)
            uri = uri.encode('us-ascii')
        except UnicodeError:
            raise UriException(Error.IDNA_UNSUPPORTED, uri=uriRef)
    return uri


def PathResolve(paths):
    """
    This function takes a list of file URIs.  The first can be
    absolute or relative to the URI equivalent of the current working
    directory. The rest must be relative to the first.
    The function converts them all to OS paths appropriate for the local
    system, and then creates a single final path by resolving each path
    in the list against the following one. This final path is returned
    as a URI.
    """
    if not paths: return paths
    paths = [UriToOsPath(p, attemptAbsolute=False) for p in paths]
    if not os.path.isabs(paths[0]):
        paths[0] = os.path.join(os.getcwd(), paths[0])
    resolved = reduce(lambda a, b: \
                       BaseJoin(os.path.isdir(a)
                                 and OsPathToUri(
                                      os.path.join(a, ''),
                                      attemptAbsolute=False,
                                     ) or OsPathToUri(a, attemptAbsolute=False),
                                OsPathToUri(b, attemptAbsolute=False)[5:]),
                       paths)
    return resolved


def BaseJoin(base, uriRef):
    """
    Merges a base URI reference with another URI reference, returning a
    new URI reference.

    It behaves exactly the same as Absolutize(), except the arguments
    are reversed, and it accepts any URI reference (even a relative URI)
    as the base URI. If the base has no scheme component, it is
    evaluated as if it did, and then the scheme component of the result
    is removed from the result, unless the uriRef had a scheme. Thus, if
    neither argument has a scheme component, the result won't have one.

    This function is named BaseJoin because it is very much like
    urllib.basejoin(), but it follows the current RFC 3986 algorithms
    for path merging, dot segment elimination, and inheritance of query
    and fragment components.

    WARNING: This function exists for 2 reasons: (1) because of a need
    within the 4Suite repository to perform URI reference absolutization
    using base URIs that are stored (inappropriately) as absolute paths
    in the subjects of statements in the RDF model, and (2) because of
    a similar need to interpret relative repo paths in a 4Suite product
    setup.xml file as being relative to a path that can be set outside
    the document. When these needs go away, this function probably will,
    too, so it is not advisable to use it.
    """
    if IsAbsolute(base):
        return Absolutize(uriRef, base)
    else:
        dummyscheme = 'basejoin'
        res = Absolutize(uriRef, '%s:%s' % (dummyscheme, base))
        if IsAbsolute(uriRef):
            # scheme will be inherited from uriRef
            return res
        else:
            # no scheme in, no scheme out
            return res[len(dummyscheme)+1:]


class UriDict(dict):
    """
    A dictionary that uses URIs as keys. It attempts to observe some degree of
    URI equivalence as defined in RFC 3986 section 6. For example, if URIs
    A and B are equivalent, a dictionary operation involving key B will return
    the same result as one involving key A, and vice-versa.

    This is useful in situations where retrieval of a new representation of a
    resource is undesirable for equivalent URIs, such as "file:///x" and
    "file://localhost/x" (see RFC 1738), or "http://spam/~x/",
    "http://spam/%7Ex/" and "http://spam/%7ex" (see RFC 3986).

    Normalization performed includes case normalization on the scheme and
    percent-encoded octets, percent-encoding normalization (decoding of
    octets corresponding to unreserved characters), and the reduction of
    'file://localhost/' to 'file:///', in accordance with both RFC 1738 and
    RFC 3986 (although RFC 3986 encourages using 'localhost' and doing
    this for all schemes, not just file).

    An instance of this class is used by Ft.Xml.Xslt.XsltContext for caching
    documents, so that the XSLT function document() will return identical
    nodes, without refetching/reparsing, for equivalent URIs.
    """
    # RFC 3986 requires localhost to be the default host no matter
    # what the scheme, but, being descriptive of existing practices,
    # leaves it up to the implementation to decide whether to use this
    # and other tests of URI equivalence in the determination of
    # same-document references. So our implementation results in what
    # is arguably desirable, but not strictly required, behavior.
    #
    #FIXME: make localhost the default for all schemes, not just file
    def _normalizekey(self, key):
        key = NormalizeCase(NormalizePercentEncoding(key))
        if key[:17] == 'file://localhost/':
            return 'file://' + key[16:]
        else:
            return key

    def __getitem__(self, key):
        return super(UriDict, self).__getitem__(self._normalizekey(key))

    def __setitem__(self, key, value):
        return super(UriDict, self).__setitem__(self._normalizekey(key), value)

    def __delitem__(self, key):
        return super(UriDict, self).__delitem__(self._normalizekey(key))

    def has_key(self, key):
        return super(UriDict, self).has_key(self._normalizekey(key))

    def __contains__(self, key):
        return super(UriDict, self).__contains__(self._normalizekey(key))

    def __iter__(self):
        return iter(self.keys())

    iterkeys = __iter__
    def iteritems(self):
        for key in self.iterkeys():
            yield key, self.__getitem__(key)


def FileUrl(filepath):
    import warnings
    warnings.warn("FileUrl() deprecated; use OsPathToUri()",
                  DeprecationWarning, 2)
    return OsPathToUri(filepath, attemptAbsolute=True)


#=======================================================================
#
# Further reading re: percent-encoding
#
# http://lists.w3.org/Archives/Public/ietf-http-wg/2004JulSep/0009.html
#
#=======================================================================
#
# 'file:' URI notes
#
# 'file:' URI resolution is difficult to get right, because the 'file'
# URL scheme is underspecified, and is handled by resolvers in very
# lenient and inconsistent ways.
#
# RFC 3986 provides definitive clarification on how all URIs,
# including the quirky 'file:' ones, are to be interpreted for purposes
# of resolution to absolute form, so that is what we implement to the
# best of our ability.
#
#-----------------------------------------------------------------------
#
# Notes from our previous research on 'file:' URI resolution:
#
# According to RFC 2396 (original), these are valid absolute URIs:
#  file:/autoexec.bat     (scheme ":" abs_path)
#  file:///autoexec.bat   (scheme ":" net_path)
#
# This one is valid but is not what you'd expect it to be:
#
#  file://autoexec.bat    (authority = autoexec.bat, no abs_path)
#
# If you have any more than 3 slashes, it's OK because each path segment
# can be an empty string.
#
# This one is valid too, although everything after 'file:' is
# considered an opaque_part (note that RFC 3986 changes this):
#
#   file:etc/passwd
#
# Unescaped backslashes are NOT allowed in URIs, ever.
# It is not possible to use them as path segment separators.
# Yet... Windows Explorer will accept these:
#   file:C:\WINNT\setuplog.txt
#   file:/C:\WINNT\setuplog.txt
#   file:///C:\WINNT\setuplog.txt
# However, it will also accept "|" in place of the colon after the drive:
#   file:C|/WINNT/setuplog.txt
#   file:/C|/WINNT/setuplog.txt
#   file:///C|/WINNT/setuplog.txt
#
# RFC 1738 says file://localhost/ and file:/// are equivalent;
# localhost in this case is always the local machine, no matter what
# your DNS says.
#
# Basically, any file: URI is valid. Good luck resolving them, though.
#
# Jeremy's idea is to not use open() or urllib.urlopen() on Windows;
# instead, use a C function that wraps Windows' generic open function,
# which resolves any path or URI exactly as Explorer would (he thinks).
#
#-----------------------------------------------------------------------
#
# References for further research on 'file:' URI resolution:
#  http://mail.python.org/pipermail/xml-sig/2001-February/004572.html
#  http://mail.python.org/pipermail/xml-sig/2001-March/004791.html
#  http://mail.python.org/pipermail/xml-sig/2002-August/008236.html
#  http://www.perldoc.com/perl5.8.0/lib/URI/file.html
#  http://lists.w3.org/Archives/Public/uri/2004Jul/0013.html
#
#=======================================================================
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.