Strings.py :  » XML » 4Suite » 4Suite-XML-1.0.2 » Ft » Xml » Xslt » Exslt » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » XML » 4Suite 
4Suite » 4Suite XML 1.0.2 » Ft » Xml » Xslt » Exslt » Strings.py
########################################################################
# $Source: /var/local/cvsroot/4Suite/Ft/Xml/Xslt/Exslt/Strings.py,v $ $Revision: 1.25.4.2 $ $Date: 2006/12/17 21:29:26 $
"""
EXSLT - Strings

Copyright 2005 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

import re, codecs
from Ft.Xml.XPath import Conversions,XPathTypes,NAMESPACE_NODE
from Ft.Xml.Xslt import XsltRuntimeException,Error
from Ft.Xml.Xslt.CopyOfElement import CopyNode

EXSL_STRINGS_NS = "http://exslt.org/strings"

def Align(context, target, padding, alignment=''):
    """
    The str:align function aligns a string within another string.

    See http://exslt.org/str/functions/align/str.align.html for further
    explanation.
    """
    target = Conversions.StringValue(target)
    padding = Conversions.StringValue(padding)
    alignment = alignment and Conversions.StringValue(alignment)

    # If the target string is longer than the padding string, then it is
    # truncated to be the same length as the padding string and returned.
    if len(target) > len(padding):
        return target[:len(padding)]

    # If no third argument is given or if it is not one of 'left', 'right'
    # or 'center', then it defaults to left alignment.
    if alignment == 'right':
        result = padding[:-len(target)] + target
    elif alignment == 'center':
        # With center alignment, the range of characters replaced by the target
        # string is in the middle of the padding string, such that either the
        # number of unreplaced characters on either side of the range is the
        # same or there is one less on the left than there is on the right.
        left = (len(padding) - len(target)) / 2
        right = left + len(target)
        result = padding[:left] + target + padding[right:]
    else:
        result = target + padding[len(target):]
    return result

def Concat(context, nodeset):
    """
    The str:concat function takes a node set and returns the concatenation of
    the string values of the nodes in that node set. If the node set is empty,
    it returns an empty string.
    """
    if type(nodeset) != type([]):
        raise XsltRuntimeException(Error.WRONG_ARGUMENT_TYPE,
                                   context.currentInstruction)

    strings = map(Conversions.StringValue, nodeset)
    return u''.join(strings)

def DecodeUri(context, uri, encoding=u'UTF-8'):
    """
    The str:decode-uri function decodes a percent-encoded string, such as
    one would find in a URI.
    """
    uri = Conversions.StringValue(uri)
    encoding = Conversions.StringValue(encoding)
    try:
        decoder = codecs.lookup(encoding)[1]
    except LookupError:
        # Unsupported encoding
        return u''

    def repl(match, decoder=decoder):
        # Remove the leading '%'
        sequence = match.group()[1:]
        # There may be multiple encoded characters that are required
        # to produce a single Unicode character.
        ordinals = sequence.split('%')
        characters = [ chr(int(ordinal, 16)) for ordinal in ordinals ]

        # Ignore any invalid sequences in this encoding
        return decoder(''.join(characters), 'ignore')[0]

    return re.sub('(?:%[0-9a-fA-F]{2})+', repl, uri)


_unreserved = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
               'abcdefghijklmnopqrstuvwxyz'
               '0123456789'
               "-_.!~*'()"
               '%') # not really unreserved, but handled specially before these

_reserved = ';/?:@&=+$,[]'

def EncodeUri(context, uri, escapeReserved, encoding=u'UTF-8'):
    """
    The str:encode-uri function percent-encodes a string for embedding in a URI.
    The second argument is a boolean indicating whether to escape reserved characters;
    if true, the given string can be a URI already, with just some of its characters
    needing to be escaped (not recommended, but users who don't understand the nuances
    of the URI syntax tend to prefer it over assembling a URI piece-by-piece).
    """
    uri = Conversions.StringValue(uri)
    escape_reserved = Conversions.BooleanValue(escapeReserved)
    encoding = Conversions.StringValue(encoding)

    try:
        encoder = codecs.lookup(encoding)[0]
    except LookupError:
        return u''

    # The "%" is escaped only if it is not followed by two hexadecimal digits.
    uri = re.sub('%(?![0-9A-Fa-f]{2})', u'%25', uri)

    safe = _unreserved
    if not escape_reserved:
        safe += _reserved

    res = list(uri)
    for i in xrange(len(res)):
        c = res[i]
        if c not in safe:
            try:
                if ord(c) > 127:
                    encoded = encoder(c, 'strict')[0]
                else:
                    encoded = chr(ord(c))
            except UnicodeError:
                # Not valid in this encoding
                encoded = '%3F'
            else:
                # The Unicode character could map to multiple bytes
                encoded = u''.join([ '%%%02X' % ord(c) for c in encoded ])
            res[i] = encoded
    return u''.join(res)


def Padding(context, length, chars=None):
    """
    The str:padding function creates a padding string of a certain length.

    The second argument gives a string to be used to create the padding.
    This string is repeated as many times as is necessary to create a string
    of the length specified by the first argument; if the string is more than
    a character long, it may have to be truncated to produce the required
    length. If no second argument is specified, it defaults to a space (' ').
    """
    length = int(Conversions.NumberValue(length))
    chars = chars and Conversions.StringValue(chars) or u' '
    return (chars*length)[:length]

def Replace(context, s, searchNodes, replNodes):
    """
    The str:replace function converts a string to a node-set, with
    each instance of a substring from a given list (obtained from the
    string-values of nodes in the second argument) replaced by the
    node at the corresponding position of the node-set given as the
    third argument. Unreplaced substrings become text nodes.

    The second and third arguments can be any type of object; if
    either is not a node-set, it is treated as if it were a node-set
    of just one text node, formed from the object's string-value.

    Attribute and namespace nodes in the replacement set are
    erroneous but are treated as empty text nodes.

    All occurrences of the longest substrings are replaced first,
    and once a replacement is made, that span of the original string
    is no longer eligible for future replacements.

    An empty search string matches between every character of the
    original string.

    See http://exslt.org/str/functions/replace/str.replace.html for details.
    """
    #FIXME: http://www.exslt.org/str/functions/replace/ doesn't say we have
    #to convert the first arg to a string, but should we, anyway?
    #If not, we should at least check and flag non-strings with a clear error?
    # prepare a list of strings to search for (based on searchNodeSet)
    s = Conversions.StringValue(s)
    if not isinstance(searchNodes, XPathTypes.NodesetType):
        search_set = [Conversions.StringValue(searchNodes)]
    else:
        search_set = map(Conversions.StringValue, searchNodes)

    # prepare a list of replacement nodes for each search string (based on replNodes)
    if type(replNodes) is not type([]):
        replace_set = [context.node.rootNode.createTextNode(Conversions.StringValue(replNodes))]
    else:
        # use replNodes but replace attr, ns nodes with empty text nodes
        replace_set = [(n.nodeType == n.ATTRIBUTE_NODE or
                        n.nodeType == NAMESPACE_NODE) and
                       context.node.createTextNode(u'') or n
                       for n in replNodes]

    # make a list of tuples that map each search string to a replacement node or None
    replacements = map(None, search_set, replace_set)
    replacements = [tup for tup in replacements if tup[0]]

    # Sort the tuples in ascending order by length of string.
    # So that the longest search strings will be replaced first,
    # we will process it in reverse order (it may be more efficient to
    # pop items off the end of a list; see
    # http://groups.google.com/groups?selm=3DE41EBE.B60BA9FE%40alcyone.com
    replacements.sort(lambda a, b: cmp(len(a[0]), len(b[0])))

    # generate a result tree fragment
    processor = context.processor
    processor.pushResultTree(context.currentInstruction.baseUri)
    try:
        _replace(s, replacements, processor)
    finally:
        rtf = processor.popResult()

    # return it as a node-set
    return rtf.childNodes


def _replace(s, replmap, processor):
    """
    Supports str:replace(). s is a string. replmap is a list of tuples,
    where each tuple is a search string and a replacement node or None.
    This recursive function will cause the original string to have
    occurrences of the search strings replaced with the corresponding
    node or deleted. When a replacement is made, that portion of the
    original string is no longer available for further replacements.
    All replacements are made for each search string before moving on
    to the next. Empty search strings match in between every character
    of the original string.
    """
    # rm is a locally-scoped copy of replmap
    rm = replmap[:]
    if rm:
        sr = rm.pop()
        if sr[0]:
            nms = s.split(sr[0])
        else:
            nms = [c for c in s] #FIXME nms = list(s) as soon as 2.3 is minimum
        last_i = len(nms) - 1
        for i in xrange(len(nms)):
            if nms[i]:
                _replace(nms[i], rm, processor)
            if i < last_i and sr[1]:
                CopyNode(processor, sr[1])
    else:
        processor.writer.text(s)
    return


def Split(context, string, pattern=u' '):
    """
    The str:split function splits up a string and returns a node set of
    token elements, each containing one token from the string.

    The first argument is the string to be split. The second argument is a
    pattern string (default=' '). The string given by the first argument is
    split at any occurrence of this pattern. An empty string pattern will
    result in a split on every character in the string.
    """
    string = Conversions.StringValue(string)
    pattern = Conversions.StringValue(pattern)
    processor = context.processor
    processor.pushResultTree(context.currentInstruction.baseUri)
    try:
        if pattern:
            for token in string.split(pattern):
                processor.writer.startElement(u'token')
                processor.writer.text(token)
                processor.writer.endElement(u'token')
        else:
            for ch in string:
                processor.writer.startElement(u'token')
                processor.writer.text(ch)
                processor.writer.endElement(u'token')
    finally:
        rtf = processor.popResult()
    return rtf.childNodes

def Tokenize(context, string, delimiters='\t\n\r '):
    """
    The str:tokenize function splits up a string and returns a node set of
    'token' elements, each containing one token from the string.

    The first argument is the string to be tokenized. The second argument
    is a string consisting of a number of characters. Each character in
    this string is taken as a delimiting character. The string given by the
    first argument is split at any occurrence of any of these characters.
    """
    string = Conversions.StringValue(string)
    if delimiters:
        delimiters = Conversions.StringValue(delimiters)
        tokens = re.split('[%s]' % delimiters, string)
    else:
        tokens = string

    processor = context.processor
    processor.pushResultTree(context.currentInstruction.baseUri)
    try:
        for token in tokens:
            processor.writer.startElement(u'token')
            processor.writer.text(token)
            processor.writer.endElement(u'token')
    finally:
        rtf = processor.popResult()
    return rtf.childNodes

ExtNamespaces = {
    EXSL_STRINGS_NS : 'str',
    }

ExtFunctions = {
    (EXSL_STRINGS_NS, 'align') : Align,
    (EXSL_STRINGS_NS, 'concat') : Concat,
    (EXSL_STRINGS_NS, 'decode-uri') : DecodeUri,
    (EXSL_STRINGS_NS, 'encode-uri') : EncodeUri,
    (EXSL_STRINGS_NS, 'padding') : Padding,
    (EXSL_STRINGS_NS, 'replace') : Replace,
    (EXSL_STRINGS_NS, 'split') : Split,
    (EXSL_STRINGS_NS, 'tokenize') : Tokenize,
}

ExtElements = {}

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.