# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008 - 2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: TypeConverter.py 10330 2009-12-24 22:13:38Z grisha $
"""
Type conversion component. The values are converted from input
fields and are assigned to the output field of the same name. It
is assumed that the input and output views have the same number of
fields. When applicable, a format string can be provided to guide
conversion of datatime and numeric values. The conversion rules are
as follows:
datetime to string
string to datetime
number to string
in accordance with format (see TypeConverterResDef.FormatString
below).
string to number - interpret string as number, if possible.
number to datetime
datetime to number
for simplicity, disallowed. Whether this should be allowed,
treating number as number of seconds since (or before, for
negative values) the epoch can be discussed for the next release.
"""
__docformat__ = "epytext en"
from datetime import datetime
import time
from decimal import Decimal
import sys
from snaplogic.common.data_types import Record
from snaplogic.common import version_info
from snaplogic.cc import component_api
from snaplogic.cc.component_api import ComponentAPI
from snaplogic.common.snap_exceptions import SnapComponentError
import snaplogic.cc.prop as prop
from snaplogic import components
from snaplogic.snapi_base import keys
# Public names
FIELD_NAME = 'field_name'
FIELD_FORMAT = 'field_format'
CONVERT_SPEC = 'conv_spec'
CONVERT_SPECS = 'conv_specs'
DefaultDateTimeFormatString = "%Y-%m-%dT%H:%M:%S"
"""ISO 8601 format."""
DefaultNumberFormatString = "%s"
"""No formatting."""
class TypeConverter(ComponentAPI):
"""
The type conversion component.
"""
api_version = '1.0'
component_version = '1.2'
capabilities = {
ComponentAPI.CAPABILITY_INPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_INPUT_VIEW_UPPER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_UPPER_LIMIT : 1,
ComponentAPI.CAPABILITY_ALLOW_PASS_THROUGH : True
}
component_description = "Converts types of fields with same names."
component_label = "Type Converter"
component_doc_uri = "https://www.snaplogic.org/trac/wiki/Documentation/%s/ComponentRef/TypeConverter" % \
version_info.doc_uri_version
def create_resource_template(self):
"""
Create TypeConverter resource definition template.
We define a 'formatString' property, which is a list of tuples,
each indicating an input field and its desired conversion.
"""
field_name = prop.SimpleProp(FIELD_NAME, "string", "Input field to convert",
{'lov': [ keys.CONSTRAINT_LOV_INPUT_FIELD] }, required=True)
field_format = prop.SimpleProp(FIELD_FORMAT, "string", "Format string")
# The default value here ('field_name') was only given because a default
# value is required in DictProp.__init__(). We could also have used field_format.
convert_spec = prop.DictProp(CONVERT_SPEC, field_name, "Description of field conversion",
min_size=2, max_size=2, fixed_keys=True, required=False)
convert_spec[FIELD_NAME] = field_name
convert_spec[FIELD_FORMAT] = field_format
convert_specs = prop.ListProp("Conversion specifications", convert_spec, required=False)
self.set_property_def(CONVERT_SPECS, convert_specs)
def _get_view_map(self, view):
"""
Create a name->type dictionary for a specified view.
"""
d = {}
for (name, type, desc) in view['fields']:
try:
name = name[name.index('.') + 1:]
except:
pass
d[name] = type
return d
def validate(self, err_obj):
"""
Validate correctness of the presented resdef for this component.
"""
# Input and output views must have the same fields
input_view_names = self.list_input_view_names()
output_view_names = self.list_output_view_names()
# Can't validate view fields if they are there yet...
if len(input_view_names) != 1 or len(output_view_names) != 1:
return
# Input and output views must have the same fields
inp_view_desc = self.get_input_view_def(input_view_names[0])
out_view_desc = self.get_output_view_def(output_view_names[0])
inp_view_field_names = set([ d[0] for d in inp_view_desc['fields'] ])
out_view_field_names = set([ d[0] for d in out_view_desc['fields'] ])
not_in_out = inp_view_field_names - out_view_field_names
not_in_in = out_view_field_names - inp_view_field_names
if not_in_out:
not_in_out = list(not_in_out)
not_in_out.sort()
not_in_out = ', '.join(not_in_out)
err_obj.get_output_view_err().set_message("Input view fields missing from output view: %s" % not_in_out)
if not_in_in:
not_in_in = list(not_in_in)
not_in_in.sort()
not_in_in = ', '.join(not_in_in)
err_obj.get_input_view_err().set_message("Output view fields missing from input view: %s" % not_in_in)
# Create a type dictionary for each view for easy lookup...
inp_types = self._get_view_map(inp_view_desc)
out_types = self._get_view_map(out_view_desc)
# ... and use it to do sanity checking on the proposed conversion
spec_err = err_obj.get_property_err(CONVERT_SPECS)
in_view_err = err_obj.get_input_view_err()[input_view_names[0]]
field_idx = 0
for field_name in inp_view_field_names:
try:
in_type = inp_types[field_name]
out_type = out_types[field_name]
if (in_type == 'datetime' and out_type == 'number') or (out_type == 'datetime' and in_type == 'number'):
in_view_err['fields'][field_idx][1].set_message("Cannot convert %s to %s." % (in_type, out_type))
except:
# We can be here even if the two views are not the same (continued
# validation even in the case of error). Thus, if we catch that
# the output view doesn't have a field of this name, we just skip
# this and continue with the next field.
continue
field_idx += 1
# Can't do anymore until we have the conv specs or they are not parameterized anymore
cs = self.get_property_value(CONVERT_SPECS)
if component_api.has_param(cs) or not cs:
return
for spec_idx in range(len(cs)):
spec = cs[spec_idx]
speced_field = spec[FIELD_NAME]
speced_format = None
speced_format = spec.get(FIELD_FORMAT, None)
in_type = inp_types[speced_field]
out_type = out_types[speced_field]
if in_type == out_type and speced_format:
spec_err[spec_idx][FIELD_FORMAT].set_message("No format string allowed for identical field types.")
elif in_type == 'string' and out_type == 'number' and speced_format:
spec_err[spec_idx][FIELD_FORMAT].set_message("No format string allowed for conversion of string to number.")
elif (in_type == 'datetime' and out_type == 'number') or (out_type == 'datetime' and in_type == 'number'):
spec_err[spec_idx][FIELD_NAME].set_message("Cannot convert %s to %s." % (in_type, out_type))
elif in_type == 'number':
# Validate the number format string by trying to format an
# arbitrary number -- if it throws an exception, we
# add it to the messages. We didn't need to validate
# the datetime format, because unsupported format
# characters do not result in error in that case.
try:
speced_format % 13.13
except:
msg = str(sys.exc_info()[1])
spec_err[spec_idx][FIELD_FORMAT].set_message("Incorrect format '%s' (%s)" % (speced_format, msg))
def _number_string_conv(self, num, fmt=DefaultNumberFormatString):
try:
return unicode(fmt % num)
except Exception, e:
raise SnapComponentError("Could not format '%s' with format '%s': %s" % (num,fmt,e))
def _string_number_conv(self, s):
try:
return Decimal(s)
except Exception, e:
raise SnapComponentError("Could not convert '%s' to number: %s" % (s, e))
def _string_datetime_conv(self, str, fmt=DefaultDateTimeFormatString):
try:
return datetime(*(time.strptime(str, fmt)[0:6]))
except Exception, e:
raise SnapComponentError("Could not convert string '%s' to datetime with format '%s': %s" % (str,fmt,e))
def _datetime_string_conv(self, dt, fmt=DefaultDateTimeFormatString):
try:
return unicode(dt.strftime(fmt))
except Exception, e:
raise SnapComponentError("Could not format date %s to with format '%s': %s" % (str(dt),fmt,e))
def execute(self, input_views, output_views):
"""
Perform the record processing.
"""
try:
output_view = output_views.values()[keys.SINGLE_VIEW]
except IndexError:
raise SnapComponentError("No output view connected.")
try:
input_view = input_views.values()[keys.SINGLE_VIEW]
except IndexError:
raise SnapComponentError("No input view connected.")
cspecs = self.get_property_value(CONVERT_SPECS)
# Convert the spec to a dictionary for fast lookup
formats = {}
if cspecs:
for spec in cspecs:
formats[spec[FIELD_NAME]] = spec[FIELD_FORMAT]
for f in formats.keys():
if f not in input_view.field_names:
formats[f] = None
converters = {
'number_string' : self._number_string_conv,
'datetime_string' : self._datetime_string_conv,
'string_datetime' : self._string_datetime_conv,
'string_number' : self._string_number_conv
}
# Initialize the converter functions.
cur_converters = dict()
for (i, field_name) in enumerate(input_view.field_names):
in_type = input_view.field_types[i]
j = 0
while output_view.field_names[j] != field_name:
j += 1
out_type = output_view.field_types[j]
if in_type == out_type:
cur_converters[field_name] = None
else:
cur_converters[field_name] = converters[in_type + "_" + out_type]
out_rec = output_view.create_record()
record = input_view.read_record()
while record:
for field in record.field_names:
if record[field] is None:
out_rec[field] = None
elif cur_converters[field] is None:
out_rec[field] = record[field]
else:
fmt = None
if formats.has_key(field):
fmt = formats[field]
if fmt:
# See also ticket 676
fmt = str(fmt)
out_rec[field] = cur_converters[field].__call__(record[field], fmt)
else:
out_rec[field] = cur_converters[field].__call__(record[field])
out_rec.transfer_pass_through_fields(record)
output_view.write_record(out_rec)
record = input_view.read_record()
output_view.completed()
def upgrade_1_0_to_1_1(self):
"""
Add source constraint to Field property
"""
# Save the property value.
# We need to recreate the property, which resets the value
property_value = self.get_property_value(CONVERT_SPECS)
field_name = prop.SimpleProp(FIELD_NAME, "string", "Input field to convert",
{'lov': [ keys.CONSTRAINT_LOV_INPUT_FIELD] }, required=True)
field_format = prop.SimpleProp(FIELD_FORMAT, "string", "Format string")
# The default value here ('field_name') was only given because a default
# value is required in DictProp.__init__(). We could also have used field_format.
convert_spec = prop.DictProp(CONVERT_SPEC, field_name, "Description of field conversion",
min_size=2, max_size=2, fixed_keys=True, required=False)
convert_spec[FIELD_NAME] = field_name
convert_spec[FIELD_FORMAT] = field_format
convert_specs = prop.ListProp("Conversion specifications", convert_spec, required=False)
self.set_property_def(CONVERT_SPECS, convert_specs)
# Restore the value
self.set_property_value(CONVERT_SPECS, property_value)
def upgrade_1_1_to_1_2(self):
"""
No-op upgrade only to change component doc URI during the upgrade
which will be by cc_info before calling this method.
"""
pass
|