# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008-2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: cc_proxy.py 7423 2009-04-28 18:54:56Z grisha $
"""
This module handles requests that are typically forwaded to the CC. The module
also provides some methods which under some circumstances, are invoked, instead
of forwarding the request to CC. These circumstances are:
1) When the server needs to do some resdef operation, before forwarding
it to the CC.
2) When the component has not implemented optional methods like create_template or
suggest_resource_values or validate and the server needs to provide some kind of
default behavior for such situations
"""
import urlparse
from snaplogic.common import prop_err,uri_utils,uri_prefix,snap_http_lib
from snaplogic.common.headers import CC_TOKEN_HEADER,INVOKER_HEADER
from snaplogic.snapi_base.resdef import ResDef
from snaplogic import server
from snaplogic import rp
from snaplogic.server import product_prefix,RhResponse,cc_list
from snaplogic.server import component_list
from snaplogic.snapi_base import keys
from snaplogic.common.snap_exceptions import *
from snaplogic.server import repository
from snaplogic.common import snap_log
from snaplogic import snapi_base
from snaplogic.common import headers
from snaplogic.common.config import snap_config
from snaplogic.server.auth import ADMIN_USER
def cc_uri_mapper(http_req, raw_text = False, title=None):
"""
Map component related URIs from main-server to CC.
When we populated the component list of the main server, we also
translated CC-specific URIs into main-server URIs. A mapping
dictionary was created along the way. Along the same line, to
access the log URIs of the CC, we have created similar mappings
and have added them to the list as well.
This function then takes an incoming request, looks up the
actual URI to which it should go, and re-issues the request
to that URI. In effect, this is a very simple proxy. We send
the original message to the CC as a binary blob (because we
don't have to modify the data and thus can save us some cycles).
The return data, however, is properly read by a CC and converted
to a Python object. This is done because we need to make some
changes to the data.
Note: This is NOT suitable for streams and such, only for the
normal interactions with components through edit/create time.
@param http_req: HTTP request object.
@type http_req: L{HttpRequest}
@param raw_text: A flag indicating indicating whether
the return data is raw text or a JSON
object.
@type raw_text: bool
@param title: A title suggestion for the output page.
@type title: string
@return: RhResponse object with data and code
to be written to client.
@rtype: L{RhResponse}
"""
if not title:
title = product_prefix.SNAPLOGIC_PRODUCT + ': Component template'
# Let's see if we have this URI mapped.
mapped_uri = server.component_list.map_uri(http_req.path)
if mapped_uri is None:
# So there is no CC URIs for this entry in the map. If these are URIs for
# create_resource_template/suggest_resource_values/validate, then this
# means that the component has not implemented these methods and the server
# should handle the request with generic responses.
if http_req.path.startswith(uri_prefix.COMPONENT_RESOURCE_TEMPLATE):
return create_resource_template(http_req, title)
elif http_req.path.startswith(uri_prefix.COMPONENT_SUGGEST_RESOURCE_VALUES):
return suggest_resource_values(http_req, title)
elif http_req.path.startswith(uri_prefix.COMPONENT_VALIDATE_RESOURCE):
return validate(http_req, title)
else:
# Its none of those URIs, the request has earned a 404.
# We don't want to log it if the URI is not found, since this
# can happen quite often and can also be triggered by user
# interactions.
return RhResponse(http_req.NOT_FOUND, "Not found")
# We want to read the raw data, so even though an RP was created for us
# previously, we will ignore it and just read from the raw input. By
# using the raw data, we can save ourselves some effort and CPU cycles,
# since we do not first have to translate the data into a Python object
# and vice versa.
# We also start the process of assembling the request headers.
headers = {}
if http_req.content_length:
d = http_req._input.read(http_req.content_length)
headers['Content-length'] = http_req.content_length
else:
d = http_req._input.read()
# The new URI, even though translated to a different server, still needs
# to contain any parameters that were specified.
if http_req.params:
new_uri = "%s?%s" % (mapped_uri, http_req.raw_params)
else:
new_uri = mapped_uri
# Hm. I need to reassemble some more (sub-)section of HTTP headers for
# the proxied request. Can't just blindly copy them all.
if http_req.http_content_type:
headers['Content-type'] = http_req.http_content_type
if not raw_text:
if http_req.http_accept:
# We are forcing the CC to reply with JSON. We can't take the binary
# equivalent, because we need to mess with the response a little bit.
headers['Accept'] = 'application/json'
else:
# We are forcing the 'text/html' header if requested by the caller.
headers['Accept'] = 'text/html'
if http_req.http_cache_control:
headers['Cache-control'] = http_req.http_cache_control
# Fetch the token for the CC.
cc_token = cc_list.get_token_by_uri(new_uri)
if cc_token is not None:
headers[CC_TOKEN_HEADER] = cc_token
else:
SnapObjNotFoundError("No CC token found for URI %s" % new_uri)
# The auth layer sets username to the value specified in the invoker HTTP header
# of the client request. Here, we pass on the username (if any) as the invoker
# of the request to component. In this manner, the original invoker id is
# propagated to the component.
if http_req.username is not None:
# Never set header to value None, it gets stringified.
headers[INVOKER_HEADER] = http_req.username
# Here now we manually issue the request to the CC. We do all of this
# with the raw data, rather than SnAPI functions, because we don't want
# to have to de-serialize and serialize again, rather unnecessarily.
resp = snap_http_lib.urlopen(http_req.method, new_uri, d, headers)
status = resp.getStatus()
resp_headers = resp.getHeaders()
# The response, however, is handled via an RP, because we need to perform
# a couple of modifications in the returned data, and also allow the HTML
# RP (if it should be used for communication with the client) to add some
# headers and footers to the output, if we are in human-readable mode.
if not raw_text:
jrp = rp.get_rp('application/json')
jrp_reader = jrp.Reader(resp)
try:
d = jrp_reader.next()
# In a few cases (log files, for example), we are getting more than
# one object back here. So for that we are collecting all we get
# into a list, which is then processed separately later on.
l = [ d ]
try:
# We are trying this loop here to see if there is more than one
# item returned to us and to collect it into a list (see comment above).
while True:
n = jrp_reader.next()
if n:
l.append(n)
else:
break
except:
pass
if len(l) == 1:
# We are replacing CC-specific portions of URIs with main server
# specific portions. The goal is to hide CC-specific URIs from the
# client.
# We only do this if we don't get a list of things (which is used for
# log files). This logic works because the meta data objects wherew
# such replacement may be necessary are always returned as a single
# object.
cc_parts = new_uri.split("/")
cc_host_portion = cc_parts[0] + "//" + cc_parts[2]
comp_list.make_uri_mapping(d, cc_host_portion, server.public_uri)
else:
# Lists of things cannot be just passed to the JSON output RP, since
# we would then get "[ [ 'foo', 'bar' ] ]" on the receiving end. But
# we just want "[ 'foo', 'bar' ]". So in that case then we need to
# actually handle our own output stream and write each list entry
# individually.
http_req.send_response_headers(200, None, True)
for d in l:
http_req.output.write(d)
http_req.output.end()
server.rlog(http_req, 200)
return None
except Exception, e:
# We may also not have gotten anything back, in case this was a
# request without response data. That is not an error, and needs
# to just be silently handled.
d = None
pass
else:
# The output is not JSON or some other kind of format that needs to
# be read by an RP. Instead, it was just raw text. We can read that
# straight from the socket.
d = resp.sock.read()
return RhResponse(status, d, None, { 'title' : title } )
def validate(http_req, title):
"""
Do server side validation of resdef.
Before any component can validate a resdef, the resdef needs to be first validated by the
server to see if the resdef structure is fundamentally sound in its structure. This method
carries out this server side validation.
@param resdef_dict: The resource definition dictionary that should be
validated.
@type resdef_dict: dict
@return: A dictionary with the validation results (which
may contain possible error messages).
@rtype: L{snapi_base.ValidationDict}
"""
http_req.make_input_rp()
try:
resdef_dict = http_req.input.next()
except StopIteration:
return RhResponse(http_req.BAD_REQUEST, "Resource definition missing")
err_obj = prop_err.ComponentResourceErr(resdef_dict)
# If the resdef sent was found to be of invalid structure, then send the errors back right away
# There is no need to go a step further and contact CC.
e_dict = err_obj._to_resdef()
if e_dict is not None:
return RhResponse(http_req.BAD_REQUEST, e_dict)
return RhResponse(http_req.OK, None, None, { 'title' : title })
def create_resource_template(http_req, title):
"""
Return a resdef template for the component, based on the information in component list.
@return: Resdef template
@rtype: dict
"""
# First, read everything the client is sending, even though we don't care for it for create_template_request()
# Not doing so, resulted in the client not being able to send an empty list and caused the client to raise
# "connection reset" or "connection abort" error.
http_req._input.read()
resdef_dict = {}
comp_name = uri_utils.extract_comp_name_from_uri(http_req.path)
cinfo = comp_list.get_component_info(comp_name)
for c in (keys.CAPABILITY_CREATE_RESOURCE_TEMPLATE, keys.CAPABILITY_SUGGEST_RESOURCE_VALUES,
keys.CAPABILITY_VALIDATE):
# A quick sanity check to make sure all URIs are present.
if cinfo[keys.CAPABILITIES].get(c) is None:
# This should never happen.
raise SnapObjNotFoundError("Server did not find URI for capability '%s' in component '%s'" % (c, comp_name))
resdef_dict[keys.COMPONENT_NAME] = comp_name
resdef_dict[keys.DESCRIPTION] = cinfo[keys.DESCRIPTION]
resdef_dict[keys.LABEL] = cinfo[keys.LABEL]
resdef_dict[keys.CAPABILITIES] = cinfo[keys.CAPABILITIES]
resdef = ResDef(resdef_dict)
return RhResponse(http_req.OK, resdef.dict, None, { 'title' : title })
def suggest_resource_values(http_req, title):
"""
Return the resdef 'as is' for suggest resource values.
This method is called by the server when it detects that component has not implemented
anythin component specific for this call. The server does nothing special with this resdef,
just returs it back 'as is'.
@return: The suggested resdef dictionary.
@rtype: dict.
"""
http_req.make_input_rp()
try:
resdef_dict = http_req.input.next()
except StopIteration:
return RhResponse(http_req.BAD_REQUEST, "Resource definition missing")
return RhResponse(http_req.OK, resdef_dict, None, { 'title' : title })
def _upgrade_summarize_resources(input_list, credentials):
"""
Summarize a list of resources to prepare for the upgrade
@return: Dictionary where relative resource URI is mapped to summary
@rtype: dict
"""
rep = repository.get_instance()
# Make a list of fully-qualified resource URIs
if input_list is None:
# Upgrade the entire repository
uri_list = None
else:
# Upgrade of specific resources was requested
if not type(input_list) is list:
input_list = [ input_list ]
uri_list = []
for resource in input_list:
if resource.startswith(server.public_uri):
# Convert absolute server URI to relative (strip off the http://server prefix)
uri_list.append(resource[len(server.public_uri):])
else:
uri_list.append(resource)
# Get a summary for each resource that is to be upgraded
resources = rep.summarize_resources(uri_list,
(keys.COMPONENT_NAME, keys.COMPONENT_VERSION),
credentials)
return resources
def resource_version_matches_component_version(resdef_dict, comp_name):
"""
Check if resource version matches the component version,
and return true if it does.
"""
if keys.COMPONENT_VERSION in resdef_dict and resdef_dict[keys.COMPONENT_VERSION] is not None:
resource_version = resdef_dict[keys.COMPONENT_VERSION]
else:
# Resdef has no version, assume 1.0
resource_version = '1.0'
# Get component info
component = server.component_list.get_component_info(comp_name)
# Get component version
if keys.COMPONENT_VERSION in component and component[keys.COMPONENT_VERSION] is not None:
component_version = component[keys.COMPONENT_VERSION]
else:
# If component has no version, assume version 1.0
component_version = "1.0"
return resource_version == component_version
def _upgrade_analyze_resources(resources):
"""
Analyze the resources to see which ones need the upgrade.
@return: Dictionary where relative resource URI is mapped to the dictionary
describing upgrade results, e.g.:
{uri1 : {'needs_upgrade': true, # Does this resource need an upgrade?
'upgraded' : false, # Did this resource get upgraded?
'error' : 'error description', # Whether there was an error upgrading
'uri' : 'http://server/comp/upgrade', # Component upgrade URI
'token' : '123134512'} # CC token to send along with the request
@rtype: dict
"""
# This dictionary stores the upgrade results
results = {}
# Go through the resource summary, and compare resdef version to component version.
# If there is a mismatch this resource will need an upgrade
for resource_uri in resources:
resource = resources[resource_uri]
# Prepare a dictionary where upgrade result is stored for the given resource URI
result = {}
result['upgraded'] = False
results[resource_uri] = result
if resource is None:
# We weren't able to get the summary, maybe this resource doesn't exist
result['error'] = 'Cannot find resource %s (summarize resource returned None) ' % resource_uri
continue
summary = resource[keys.SUMMARY]
component_name = summary[keys.COMPONENT_NAME]
# If it's a pipeline
if component_name == keys.PIPELINE_COMPONENT_NAME:
# We don't upgrade pipelines yet
result['needs_upgrade'] = False
continue
try:
versions_match = resource_version_matches_component_version(summary, component_name)
except Exception, e:
# If we couldn't find the component report an error
result['error'] = 'Cannot upgrade resource: component %s cannot be located' % component_name
continue
result['needs_upgrade'] = not versions_match
if not versions_match:
# This resource needs an upgrade:
# Get the upgrade URI and the CC token used to call the CC with the upgrade request.
component = server.component_list.get_component_info(component_name)
cc_uri = component[keys.URI]
cc_uri_parsed = urlparse.urlparse(cc_uri)
# Because the URI stored in the component_list structure
# has been modified to replace CC host:port with dataserver host:port
# here we have to modify it again to restore the original CC URI
# since we're going to the CC directly
upgrade_uri = component[keys.CAPABILITIES][keys.CAPABILITY_UPGRADE]
upgrade_uri_parsed = list(urlparse.urlparse(upgrade_uri))
# Change protocol and host:port to point to the CC instead of the server
upgrade_uri_parsed[0] = cc_uri_parsed[0]
upgrade_uri_parsed[1] = cc_uri_parsed[1]
result['uri'] = urlparse.urlunparse(upgrade_uri_parsed)
result['token'] = cc_list.get_token_by_uri(cc_uri)
return results
def _upgrade_one_resource(resource_uri, resource, result):
""" Upgrade one resource """
old_resdef = resource[keys.RESDEF]
upgrade_uri = result['uri']
cc_token = result['token']
# Call the component container to upgrade the resource
try:
new_resdef = snapi_base.send_req("POST", upgrade_uri,
old_resdef, {headers.CC_TOKEN_HEADER: cc_token})
except Exception, e:
# CC wasn't able to upgrade this resource
server.elog(e, str(e))
result['error'] = str(e)
return
# This resource was successfully upgraded:
# replace the resdef in the resource and save it
resource[keys.RESDEF] = new_resdef
# Save this resource
try:
rep = repository.get_instance()
rep.update_resource(resource_uri, resource[keys.GUID], resource[keys.GEN_ID], new_resdef)
except Exception, e:
# It is possible that this resource was updated by another request while
# we were upgrading it: that'd cause an exception,
# or it could be that something else went wrong during the update.
server.elog(e, str(e))
result['error'] = str(e)
return
# Now that we've upgraded and saved the resource set the upgraded flag
result['upgraded'] = True
def upgrade_resources(http_req):
"""
Upgrade resources
This method takes a list of URIs of the resources to be upgraded.
It determines which ones need upgrade, if any, and delegates the upgrade
to the component container. The server then saves the upgraded resources.
@param uri_list: List of URIs of resources that need upgrade.
@type uri_list: list
@return:
a dictionary with the upgrade results where:
- key is the resource URI
- value is a dictionary with the following keys:
* needed_upgrade, # Does this resource require an upgrade?
* upgrade_succeeded, # Did this resource get upgraded successfully?
* error_message # Whether there was an error upgrading
@rtype: dict
"""
http_req.make_input_rp()
try:
uri_list = http_req.input.next()
except StopIteration:
return RhResponse(http_req.BAD_REQUEST, "URI list missing")
if len(uri_list) == 0:
# If they send in an empty list it tells us to upgrade the entire repository.
# Only admin can upgrade the entire repository
if http_req.username != ADMIN_USER:
return RhResponse(http_req.UNAUTHORIZED, "Only user '%s' is allowed to upgrade the entire repository" % ADMIN_USER)
# Server must be started in admin_mode to allow upgrading the entire repository
if not snap_config.get_instance().get_section('main')['admin_mode']:
return RhResponse(http_req.UNAUTHORIZED, "Server must be started in admin_mode to allow upgrading "\
"the entire repository")
# Set uri_list to None to tell the summarize function to get summaries
# for all resources in the repository.
uri_list = None
# Get summary information for the resources
summaries = _upgrade_summarize_resources(uri_list, (http_req.username, http_req.groups))
# Analyze the resources:
# e.g. look at resdef version vs component version
# to see which ones need to be upgraded.
# We get a dict back with some helpful information for each resource.
results = _upgrade_analyze_resources(summaries)
# Make a list of resources that we need to fetch in their entirety.
resource_uris_to_fetch = []
for resource in results:
if 'needs_upgrade' in results[resource] and results[resource]['needs_upgrade']:
resource_uris_to_fetch.append(resource)
# Now fetch the entire resdefs for all the resources needing an upgrade.
resources = repository.get_instance().read_resources(resource_uris_to_fetch)
# Now upgrade the resources that were fetched successfully
for resource_uri in resources[keys.SUCCESS]:
result = results[resource_uri]
resource = resources[keys.SUCCESS][resource_uri]
_upgrade_one_resource(resource_uri, resource, result)
# If there were errors during fetching make sure they are reported
for resource_uri in resources[keys.ERROR]:
result = results[resource_uri]
result['upgraded'] = False
result['error'] = resources[keys.ERROR]
# Remove some internal information from the dictionary
# before returning the dictionary to the user
# (upgrade uri and cc token)
for result in results:
if 'uri' in results[result]:
del results[result]['uri']
if 'token' in results[result]:
del results[result]['token']
return RhResponse(http_req.OK, results)
def cc_log_uri_mapper(http_req):
"""
Wraps the normal proxy function to pass some additional parameter.
"""
# We call the actual mapper function with an additional parameter: A flag
# that indicates whether raw text should be returned, or a JSON encoded
# object. The HTTP-Accept header tells us what the client wants.
return cc_uri_mapper(http_req, http_req.http_accept != "application/json", "SnapLogic component container log")
|