# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: __init__.py 5682 2008-12-05 18:14:13Z grisha $
"""
This package contains utility functions for components, which
need to work with files and URLs.
"""
__docformat__ = "epytext en"
import sys, os, re, sets, urlparse
from snaplogic.common.snap_exceptions import *
from snaplogic.common.SnapReader import SnapReader
from snaplogic.common.SnapReader import SnapFtpReader
from snaplogic.common.SnapReader import SnapHttpReader
from snaplogic.common.SnapReader import SnapFileReader
from snaplogic.cc import component_api
reader_schemes = ["file", "ftp", "http", "https"]
writer_schemes = ["file"]
known_schemes = reader_schemes + writer_schemes
CONFIG_ROOT_DIRECTORY = "root_directory"
def init_file_component(fileComp, propName='filename', fileList=None):
"""
Checks whether the value of 'filename' module property is within
the allowed file space -- specified by the value of 'Root'
registration-time argument. This will be called by all
file-based components (CsvRead, etc.). If the 'filename' module
property value is in the allowed file space, the Component's
_filename attribute is set to the normalized path of the specified
file.
@param fileComp: fileComp is a file-reading/writing Component to check.
@type fileComp: Component
@param propName: The name of the resource property for the file name/url.
@type propName: str
@param fileList: A list of files. This parameter is used when the file
specified by propName is for a list of files.
@type fileList: list
@raise SnapComponentError: Raises SnapComponentError if the 'filename'
module property value specifies a file that is not within the allowed 'Root'.
"""
try:
root = fileComp.reg_time_args['Root']
except Exception:
# Should this be an error instead?
# raise SnapComponentError("Missing registration-time argument: Root")
root = "/"
if not os.path.isdir(root):
raise SnapComponentError('Root (' + root + ') is not a directory')
filename = fileComp.moduleProperties[propName]
# Strip the 'file://' prefix, if exists.
filename = re.sub('^file://', '', filename)
# Do we want actually realpath? I think not; it's up to the
# admin...
fileComp._filename = check_root(root, filename)
if fileList:
for f in fileList:
check_root(root, f)
def get_file_location(filename, config):
"""
Gets the absolute filename by prepending the value of 'root_directory'
key in config to the filename.
@param filename: name of the file. Can be a file:// qualified URL or
not.
@type filename: string
@param config: a dictionary containing a component's configuration.
'root_directory' key is expected to be defined.
@type config: dict
@return: absolute filename within the root
@rtype: string
"""
orig_filename = filename
if filename is None:
raise SnapComponentError("No filename specified")
if config is None:
raise SnapComponentConfigError("No configuration specified")
if not qualify_filename(filename).startswith("file://"):
return filename
# Figure out the root:
if CONFIG_ROOT_DIRECTORY in config:
root = config[CONFIG_ROOT_DIRECTORY]
else:
raise SnapComponentConfigError("No %s specified in configuration" % CONFIG_ROOT_DIRECTORY)
qualified = False
if filename.startswith("file://"):
qualified = True
filename = filename[7:]
if filename.startswith("/"):
filename = filename[1:]
if os.name == 'nt':
(drive,path) = os.path.splitdrive(filename)
path = path.replace("\\","/")
if drive:
if not path.startswith("/"):
raise SnapComponentError("Cannot find %s, please specify absolute path" % orig_filename)
if root == '/':
if drive:
filename = os.path.abspath(filename)
else:
filename = os.path.abspath("/" + filename)
filename = os.path.normpath(filename)
filename = filename.replace("\\","/")
if qualified:
filename = qualify_filename(filename)
return filename
root = os.path.normpath(os.path.abspath(root))
if os.name == 'nt':
root = root.replace("\\","/")
root = root.lower()
if not root.endswith("/"):
root += "/"
if os.name != 'nt' or not drive:
filename = root + filename
filename = os.path.normpath(os.path.abspath(filename))
if os.name == 'nt':
filename = filename.replace("\\","/")
filename_to_compare = filename.lower()
else:
filename_to_compare = filename
if os.path.commonprefix([filename_to_compare, root]) != root:
raise SnapComponentError("Access denied for file: " + orig_filename + " (not in " + root + ")")
if qualified:
filename = qualify_filename(filename)
return filename
def qualify_filename(filename):
"""
Qualify the filename to make sure it always starts with a scheme prefix.
E.g. if filename is "/tmp/foo" return "file:///tmp/foo"
"""
# Use urlparse to parse the URL.
(scheme, host, path, params, query, fragment) = urlparse.urlparse(filename)
if (scheme is None or not scheme in known_schemes):
# No scheme or unsupported scheme.
# Treat as unqualified filename
if not filename.startswith("/"):
filename = "/" + filename
return "file://" + filename
else:
return filename
def validate_schemes(config_schemes, component_schemes):
"""
Validate the schemes declared in the component configuration file
against the list of schemes actually supported by this component.
If an unsupported scheme is declared in the configuration file
throw an exception.
@param config_schemes: schemes as declared in the component configuration file
@type config_schemes: list
@param component_schemes: list of schemes supported by component
@type component_schemes: list
@throws SnapComponentError if scheme isn't allowed
"""
# Make sure config_schemes is always a list, even if it contains only a single scheme
if type(config_schemes) is not list:
config_schemes = [config_schemes]
# Get a set of schemes that aren't supported
# by subtracting supported_schemes from config_schemes
unsupported_schemes = sets.Set(config_schemes).difference(sets.Set(component_schemes))
if (len(unsupported_schemes) > 0):
raise SnapComponentConfigError("Unsupported scheme(s) in the component configuration file: " +
str(unsupported_schemes))
def validate_filename_property(filename, prop, err_obj, config_schemes, default_schemes):
"""
Validate the scheme used by the filename to ensure it's one of the allowed schemes.
If filename doesn't validate flag the filename property with an error in the err_obj.
If filename uses a parameter like ${var} it's not validated, because we cannot
expand the parameter at this stage.
@param filename: filename to validate
@type filename: string
@param prop: property name that should be flagged with error
if filename doesn't validate
@type prop: string
@param err_obj: error object where error is reported
if filename doesn't validate
@type err_obj: L{SimplePropErr} or L{ListPropErr} or L{DictPropErr}
@param config_schemes: URI schemes specified for this component
in the component configuration file.
@type config_schemes: string or list
@param default_schemes: URI schemes supported by this component.
Used as default if no schemes are specified
in the configuration file.
@type default_schemes: string or list
"""
# Only validate the filename if it's literal (no $ parameters)
if not component_api.has_param(filename):
error = validate_file_scheme(filename, config_schemes, default_schemes)
if error is not None:
# Store the message in the error object
err_obj.get_property_err(prop).set_message(error)
def validate_file_scheme(filename, configSchemes, defaultSchemes):
"""
Validate the scheme used by filename to ensure it's one of the allowed schemes.
If filename is not qualified with a URI prefix, we prepend file:// to it
before validating.
@param filename: filename to validate
@type filename: string
@param configSchemes: list of URI schemes specified in the config file
@type configSchemes: list
@param defaultSchemes: default list of URI schemes,
that takes effect if no configSchemes are specified
@type defaultSchemes: list
@return: error string if any, None if no errors
@rtype: string
"""
# Qualify the filename with file:// prefix if necessary
filename = qualify_filename(filename)
# If schemes is specified in the config file use it,
# otherwise use the default list of schemes
usingDefaultSchemes = False
schemes = configSchemes
if schemes is None or len(schemes) == 0:
schemes = defaultSchemes
usingDefaultSchemes = True
# Validate that the filename is using one of the approved schemes
# Make sure schemes is always a list, even if it contains only a single scheme
if type(schemes) is not list:
schemes = [schemes]
# See if the "filename" begins with any of the "approved" schemes
for scheme in schemes:
if filename.startswith(scheme + "://"):
return None
# Depending on whether they are using default or config schemes return a different error
if usingDefaultSchemes:
return "File name '%s' must use one of the schemes supported by the component: %s" % (filename, schemes)
else:
return "File name '%s' must use one of the schemes specified in the component configuration file: %s" % (filename, schemes)
def read_input_list(filename, username, password):
# Create the reader for the filename ...
inputs = []
rdr = SnapReader.create(filename, username, password)
rdr.open()
for line in rdr.readlines():
line = line.strip()
if line and not line[0] == '#':
tokens = line.split()
fields = len(tokens)
username = ""
password = ""
if fields < 1 or fields > 2:
# Incorrect number of fields: too few or too many
raise SnapValueError('Invalid input source (%s).' % line)
elif fields == 1:
# No credentials given, we'll use blanks
pass
else:
# We have exactly two fields
# The second one is in the form of username:password. Split it.
creds = tokens[1].split(':')
if len(creds) > 2:
# We have too many fields: there should be exactly one colon in credentials
raise SnapValueError("Invalid credentials given, must be username:password. Check to make sure there isn't an extra colon.")
if len(creds) >= 1:
username = creds[0]
if len(creds) == 2:
password = creds[1]
url = tokens[0]
url = qualify_filename(url)
inputs.append((url, username, password))
return inputs
|