#!/usr/bin/env python
# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: uri_utils.py 4008 2008-08-22 03:26:34Z dhiraj $
"""
Utilities for working with URIs.
"""
import fnmatch
from snaplogic.common.snap_exceptions import SnapValueError
def _match_segments(pattern_segments, uri_segments):
for i in xrange(len(pattern_segments)):
if not fnmatch.fnmatchcase(uri_segments[i], pattern_segments[i]):
return False
return True
def path_glob_match(pattern, uri_list, recursive=False):
"""
Runs a globbing match over a URI list and returns matches.
A glob pattern much like those used in a shell against a filesystem is possible with this function. It uses
the fnmatch python module to perform glob matches on the segments of a URI path. Like filesystem globbing,
the glob pattern will not span URI segments as divided by the "/" character. For example the pattern "/foo*" could
match "/foo" and "/foobar" but not "/foo/bar".
If the recursive flag is given and True, the match will become recursive. If a pattern matches a prefix of segments
in a URI and the recursive flag is True, that URI will be included. With the flag, The pattern "/foo*"
would match "/foo", "/foobar", and "/foo/bar".
@param pattern: A glob pattern.
@type pattern: str
@param uri_list: List of URIs to perform search against.
@type uri_list: sequence
@param recursive: A flag indicating if recursive pattern matching should be used.
@type recursive: bool
@return: List of URIs that match glob pattern.
@rtype: list
@raise SnapValueError: The pattern or uri_list are invalidly formatted.
"""
if not pattern.startswith('/'):
raise SnapValueError("URI path pattern '%s' must begin with '/'." % pattern)
include_all = recursive and pattern == '/'
# Chop off the first segment that will just be the empty string
pattern_segments = pattern.split('/')[1:]
matched_uris = []
for uri in uri_list:
if not uri.startswith('/'):
raise SnapValueError("URI path '%s' must begin with '/'." % uri)
if include_all:
matched_uris.append(uri)
else:
# Chop leading empty string again.
uri_segments = uri.split('/')[1:]
# If the recursive flag is not set, the number of segments in the pattern and the URI must match.
# With the recursive flag, the number of URI segments must be >= the number of pattern segments
if recursive:
if _match_segments(pattern_segments, uri_segments):
matched_uris.append(uri)
else:
if len(uri_segments) == len(pattern_segments) and _match_segments(pattern_segments, uri_segments):
matched_uris.append(uri)
return matched_uris
def extract_comp_name_from_uri(uri):
"""
Extract component name from URI.
This is a very application specific method used both by CC and server. It parses URIs to extract component
name from them.
@param uri: The URI being parsed
@type uri: str
@return: component name
@rtype: str
"""
if uri.endswith("/"):
uri = uri[:-1]
slist = uri.split("/")
if len(slist) <= 1:
# This cannot be a valid URI
raise SnapException("URI for component is invalid")
comp_name = slist[-1:][0]
return comp_name
|