# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008 - 2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: FormatFields.py 10330 2009-12-24 22:13:38Z grisha $
"""
FormatFields Module and Resource Definition.
The FormatFields component provides functions for formating field values in the outputview. The users specify
the format(s) of each of the output fields in the 'Expressions' property of the resource definition.
See FormatFieldsResDef code documentation for examples.
"""
# Imports
import re
from sets import Set
from snaplogic.common.data_types import Record
from snaplogic.common.data_types import SnapString,SnapNumber,SnapDateTime
from snaplogic.common import version_info
from snaplogic.cc.component_api import ComponentAPI
import snaplogic.cc.prop as prop
from snaplogic import components
from snaplogic.components.computils import convert_utils
from snaplogic.common.snap_exceptions import *
from snaplogic.snapi_base import keys
# Public names
__all__ = [ "FormatFields" ]
PROP_EXPRESSION = "Format expression"
PROP_OUTPUT_FIELD_NAME = "Field name"
PROP_EXPRESSION_DICT = "Expression dictionary"
PROP_EXPRESSIONS_LIST = "Output field format specifications"
PROP_FORMAT_SPECS = "Field format expressions"
class FormatFields(ComponentAPI):
"""
Class FormatFields.
"""
api_version = '1.0'
component_version = '1.3'
capabilities = {
ComponentAPI.CAPABILITY_INPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_INPUT_VIEW_UPPER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_UPPER_LIMIT : 1,
ComponentAPI.CAPABILITY_ALLOW_PASS_THROUGH : True
}
component_description = "Performs output field formatting."
component_label = "Format Fields"
component_doc_uri = "https://www.snaplogic.org/trac/wiki/Documentation/%s/ComponentRef/FormatFields" % \
version_info.doc_uri_version
# This component only outputs strings
supported_output_datatypes = [SnapString,]
# For finding input field name references in the rhs of expressions in the form ${Field001}
_subst_re = re.compile('\$[{]([\w]+)[}]')
def _define_properties(self):
""" Define the component properties. Factored out because its used in the upgrade method also."""
format_expression = prop.SimpleProp(PROP_EXPRESSION,
SnapString,
"Format expression",
None,
True)
output_field_name = prop.SimpleProp(PROP_OUTPUT_FIELD_NAME,
SnapString,
"What output field the result corresponds to",
{'lov' : [keys.CONSTRAINT_LOV_OUTPUT_FIELD] },
True)
format_spec = prop.DictProp(PROP_EXPRESSION_DICT,
None,
"Format specification dictionary",
2,
2,
True,
True)
format_spec[PROP_OUTPUT_FIELD_NAME] = output_field_name
format_spec[PROP_EXPRESSION] = format_expression
format_spec_list = prop.ListProp(PROP_EXPRESSIONS_LIST,
format_spec,
"Format specification properties",
1,
required=True)
self.set_property_def(PROP_FORMAT_SPECS, format_spec_list)
def create_resource_template(self):
"""
Create FormatFields resource definition template.
"""
self._define_properties()
def validate(self, err_obj):
"""
Validate a proposed resource definition for this component.
"""
in_views = self.list_input_view_names()
out_views = self.list_output_view_names()
out_view = self.get_output_view_def(out_views[keys.SINGLE_VIEW])
output_view_fields = [ d[keys.FIELD_NAME] for d in out_view[keys.VIEW_FIELDS] ]
output_field_types = [ d[keys.FIELD_TYPE] for d in out_view[keys.VIEW_FIELDS] ]
in_view = self.get_input_view_def(in_views[keys.SINGLE_VIEW])
input_view_fields = [ d[keys.FIELD_NAME] for d in in_view[keys.VIEW_FIELDS] ]
# Get the list of format expressions.
# There needs to be one for each output view field.
# The field names need to be valid. (the order doesn't matter though)
format_specs = self.get_property_value(PROP_FORMAT_SPECS)
format_output_field_names = []
for i, spec in enumerate(format_specs):
format_expression = spec[PROP_EXPRESSION]
format_output_field_name = spec[PROP_OUTPUT_FIELD_NAME]
# Check that the datatype of the output field is string
field_index = output_view_fields.index(format_output_field_name)
field_type = output_field_types[field_index]
if field_type not in self.supported_output_datatypes:
err_obj.get_output_view_err()[out_views[keys.SINGLE_VIEW]][keys.VIEW_FIELDS][field_index].set_message(
"Output field '%s' datatype '%s' is not supported. Must be one of: %s" %
(format_output_field_name, field_type, str(self.supported_output_datatypes)))
# 2. While we are here, make sure that the field name isn't a duplicate.
if format_output_field_name in format_output_field_names:
err_obj.get_property_err(PROP_FORMAT_SPECS)[i][PROP_OUTPUT_FIELD_NAME].set_message(
"Output field name (%s) referenced more than once." % format_output_field_name)
else:
format_output_field_names.append(format_output_field_name)
# 3. Check that any rhs input view field references match input view field names.
# Find the rhs ${fieldname} references.
for input_field in self._subst_re.findall(format_expression):
if input_field not in input_view_fields:
err_obj.get_property_err(PROP_FORMAT_SPECS)[i][PROP_EXPRESSION].set_message(
"Input field name (%s) not present in input view." % input_field)
# We can only flag one error on the expression so break as soon we set one.
break
# 4. Make sure all output view fields are covered by an expression,
# except for output fields with the same names as input fields:
# for these we transfer values from inputs to outputs unchanged.
missing_output_fields = Set(output_view_fields) - Set(format_output_field_names) - Set(input_view_fields)
if len(missing_output_fields) > 0:
err_obj.get_property_err(PROP_FORMAT_SPECS).set_message(
"Expression missing for output view fields (%s)." % ",".join(missing_output_fields))
def execute(self, input_views, output_views):
"""
Process the input records.
"""
try:
output_view = output_views.values()[keys.SINGLE_VIEW]
except IndexError:
raise SnapComponentError("No output view connected.")
try:
input_view = input_views.values()[keys.SINGLE_VIEW]
except IndexError:
raise SnapComponentError("No input view connected.")
# We need to do field name searches in this component, which is not supported by tuple.
input_view_field_names = list(input_view.field_names)
# Build an dictionary indexed by output field name of all the
# expressions. Since validate checked everything, we can trust that
# the output fields are valid.
expressions = {}
format_specs = self.get_property_value(PROP_FORMAT_SPECS)
for i, spec in enumerate(format_specs):
format_expression = spec[PROP_EXPRESSION]
format_output_field_name = spec[PROP_OUTPUT_FIELD_NAME]
expressions[format_output_field_name] = format_expression
# Make a list of common fields: fields with same names in the input and output views.
# These fields will be copied as is from input to output.
common_fields = (Set(output_view.field_names) - Set(expressions.keys())) & Set(input_view.field_names)
record = input_view.read_record()
while record:
out_rec = output_view.create_record()
# Handle pass-through fields
out_rec.transfer_pass_through_fields(record)
# Transfer fields matched by name
out_rec.transfer_matching_fields(record, common_fields)
for f in expressions:
expr = expressions[f]
# Extract all tokens wrapped inside '${token}'
flds = re.findall('\${(\w+)}', expr)
for fld in flds:
try:
idx = input_view_field_names.index(fld)
except ValueError:
raise SnapComponentError(
"No input field name '%s' found. The following expression is invalid: %s" %
(fld, expr))
# Substitute the field name with its value.
if record[fld] is None:
val = ''
else:
if input_view.field_types[idx] == SnapDateTime:
val = convert_utils.datetime_to_string(record[fld])
elif input_view.field_types[idx] == SnapNumber:
val = convert_utils.number_to_string(record[fld])
else:
val = record[fld]
expr = re.sub('\${' + fld + '}', val, expr)
out_rec[f] = expr
output_view.write_record(out_rec)
record = input_view.read_record()
output_view.completed()
def upgrade_1_0_to_1_1(self):
"""
Change 1.
The properties "expression" and "separator" we replaced by a
list of dicts comprised of a "output field name" and an "format expression".
"""
sep = self.get_property_value('separator')
exp = self.get_property_value('expression')
if sep:
expr_list = exp.split(sep)
else:
expr_list = [ exp ]
expression_list = []
for i, expr in enumerate(expr_list):
expression = {}
tokens = expr.split('=', 1)
if len(tokens) != 2:
# If the expression is malformed just shove the whole thing in the output field prop.
expression[PROP_OUTPUT_FIELD_NAME] = exp
else:
expression[PROP_OUTPUT_FIELD_NAME] = tokens[0]
expression[PROP_EXPRESSION] = tokens[1]
expression_list.append(expression)
# Create the new properties
self._define_properties()
# Set the new properties
self.set_property_value(PROP_FORMAT_SPECS, expression_list)
# Cleanout the old properties from this resdef.
self.del_property_def('separator')
self.del_property_def('expression')
def upgrade_1_1_to_1_2(self):
"""
Add LOV constraint to field property
"""
# Save property value. We need to recreate the property, which resets its value.
saved_value = self.get_property_value(PROP_FORMAT_SPECS)
# Delete the property
self.del_property_def(PROP_FORMAT_SPECS)
# Recreate the property
self._define_properties()
# Restore property value
self.set_property_value(PROP_FORMAT_SPECS, saved_value)
def upgrade_1_2_to_1_3(self):
"""
No-op upgrade only to change component doc URI during the upgrade
which will be by cc_info before calling this method.
"""
pass
|