FormatFields.py :  » Development » SnapLogic » snaplogic » components » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Development » SnapLogic 
SnapLogic » snaplogic » components » FormatFields.py
# $SnapHashLicense:
# 
# SnapLogic - Open source data services
# 
# Copyright (C) 2008 - 2009, SnapLogic, Inc.  All rights reserved.
# 
# See http://www.snaplogic.org for more information about
# the SnapLogic project. 
# 
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
# 
# "SnapLogic" is a trademark of SnapLogic, Inc.
# 
# 
# $

# $Id: FormatFields.py 10330 2009-12-24 22:13:38Z grisha $

"""
FormatFields Module and Resource Definition.

The FormatFields component provides functions for formating field values in the outputview.  The users specify
the format(s) of each of the output fields in the 'Expressions' property of the resource definition.

See FormatFieldsResDef code documentation for examples.

"""

# Imports
import re
from sets import Set

from snaplogic.common.data_types import Record
from snaplogic.common.data_types import SnapString,SnapNumber,SnapDateTime
from snaplogic.common import version_info
from snaplogic.cc.component_api import ComponentAPI
import snaplogic.cc.prop as prop
from snaplogic import components
from snaplogic.components.computils import convert_utils
from snaplogic.common.snap_exceptions import *
from snaplogic.snapi_base import keys


# Public names
__all__ = [ "FormatFields" ]

PROP_EXPRESSION        = "Format expression"
PROP_OUTPUT_FIELD_NAME = "Field name"
PROP_EXPRESSION_DICT   = "Expression dictionary"
PROP_EXPRESSIONS_LIST  = "Output field format specifications"
PROP_FORMAT_SPECS      = "Field format expressions"

class FormatFields(ComponentAPI):
    """
    Class FormatFields.
    
    """
    
    api_version = '1.0'
    component_version = '1.3'
    
    capabilities = {
        ComponentAPI.CAPABILITY_INPUT_VIEW_LOWER_LIMIT    : 1,
        ComponentAPI.CAPABILITY_INPUT_VIEW_UPPER_LIMIT    : 1,
        ComponentAPI.CAPABILITY_OUTPUT_VIEW_LOWER_LIMIT   : 1,
        ComponentAPI.CAPABILITY_OUTPUT_VIEW_UPPER_LIMIT   : 1,
        ComponentAPI.CAPABILITY_ALLOW_PASS_THROUGH        : True
    }
    
    component_description = "Performs output field formatting."
    component_label       = "Format Fields"
    component_doc_uri = "https://www.snaplogic.org/trac/wiki/Documentation/%s/ComponentRef/FormatFields" % \
                                                        version_info.doc_uri_version

    # This component only outputs strings
    supported_output_datatypes = [SnapString,]

    # For finding input field name references in the rhs of expressions in the form ${Field001}
    _subst_re = re.compile('\$[{]([\w]+)[}]')

    def _define_properties(self):
        """ Define the component properties. Factored out because its used in the upgrade method also."""
        format_expression = prop.SimpleProp(PROP_EXPRESSION, 
                                            SnapString,
                                            "Format expression", 
                                            None,
                                            True)
        output_field_name = prop.SimpleProp(PROP_OUTPUT_FIELD_NAME, 
                                            SnapString, 
                                            "What output field the result corresponds to", 
                                            {'lov' : [keys.CONSTRAINT_LOV_OUTPUT_FIELD] }, 
                                            True)
        format_spec = prop.DictProp(PROP_EXPRESSION_DICT,
                                    None, 
                                    "Format specification dictionary", 
                                    2,
                                    2,
                                    True,
                                    True)
        format_spec[PROP_OUTPUT_FIELD_NAME] = output_field_name
        format_spec[PROP_EXPRESSION] = format_expression
        format_spec_list = prop.ListProp(PROP_EXPRESSIONS_LIST, 
                                         format_spec, 
                                         "Format specification properties", 
                                         1, 
                                         required=True)
        self.set_property_def(PROP_FORMAT_SPECS, format_spec_list)

    def create_resource_template(self):
        """
        Create FormatFields resource definition template.

        """
        self._define_properties()


    def validate(self, err_obj):
        """
        Validate a proposed resource definition for this component.

        """
        in_views  = self.list_input_view_names()
        out_views = self.list_output_view_names()
        out_view = self.get_output_view_def(out_views[keys.SINGLE_VIEW])
        output_view_fields = [ d[keys.FIELD_NAME] for d in out_view[keys.VIEW_FIELDS] ]
        output_field_types = [ d[keys.FIELD_TYPE] for d in out_view[keys.VIEW_FIELDS] ]
        in_view = self.get_input_view_def(in_views[keys.SINGLE_VIEW])
        input_view_fields = [ d[keys.FIELD_NAME] for d in in_view[keys.VIEW_FIELDS] ]

        # Get the list of format expressions. 
        # There needs to be one for each output view field.
        # The field names need to be valid. (the order doesn't matter though)

        format_specs = self.get_property_value(PROP_FORMAT_SPECS)

        format_output_field_names = []
        for i, spec in enumerate(format_specs):
            format_expression = spec[PROP_EXPRESSION]
            format_output_field_name = spec[PROP_OUTPUT_FIELD_NAME]

            # Check that the datatype of the output field is string
            field_index = output_view_fields.index(format_output_field_name)
            field_type = output_field_types[field_index]  
            if field_type not in self.supported_output_datatypes:
                err_obj.get_output_view_err()[out_views[keys.SINGLE_VIEW]][keys.VIEW_FIELDS][field_index].set_message(
                        "Output field '%s' datatype '%s' is not supported.  Must be one of: %s" %
                            (format_output_field_name, field_type, str(self.supported_output_datatypes)))
                
            # 2. While we are here, make sure that the field name isn't a duplicate.
            if format_output_field_name in format_output_field_names:
                err_obj.get_property_err(PROP_FORMAT_SPECS)[i][PROP_OUTPUT_FIELD_NAME].set_message(
                    "Output field name (%s) referenced more than once." % format_output_field_name)
            else:
                format_output_field_names.append(format_output_field_name)
                
            # 3. Check that any rhs input view field references match input view field names.
            # Find the rhs ${fieldname} references.
            for input_field in self._subst_re.findall(format_expression):
                if input_field not in input_view_fields:
                    err_obj.get_property_err(PROP_FORMAT_SPECS)[i][PROP_EXPRESSION].set_message(
                        "Input field name (%s) not present in input view." % input_field)
                    # We can only flag one error on the expression so break as soon we set one.
                    break
                
        # 4. Make sure all output view fields are covered by an expression,
        # except for output fields with the same names as input fields:
        # for these we transfer values from inputs to outputs unchanged.
        missing_output_fields =  Set(output_view_fields) - Set(format_output_field_names) - Set(input_view_fields)
        if len(missing_output_fields) > 0:
            err_obj.get_property_err(PROP_FORMAT_SPECS).set_message(
                "Expression missing for output view fields (%s)." % ",".join(missing_output_fields))

    def execute(self, input_views, output_views):
        """
        Process the input records.

        """
        try:
            output_view = output_views.values()[keys.SINGLE_VIEW] 
        except IndexError:
            raise SnapComponentError("No output view connected.")
        try: 
            input_view = input_views.values()[keys.SINGLE_VIEW]
        except IndexError:
            raise SnapComponentError("No input view connected.")
        
        # We need to do field name searches in this component, which is not supported by tuple.
        input_view_field_names = list(input_view.field_names)
                                      
        # Build an dictionary indexed by output field name of all the
        # expressions. Since validate checked everything, we can trust that
        # the output fields are valid.
        expressions = {}
        format_specs = self.get_property_value(PROP_FORMAT_SPECS)
        for i, spec in enumerate(format_specs):
            format_expression = spec[PROP_EXPRESSION]
            format_output_field_name = spec[PROP_OUTPUT_FIELD_NAME]
            expressions[format_output_field_name] = format_expression

        # Make a list of common fields: fields with same names in the input and output views.
        # These fields will be copied as is from input to output.
        common_fields = (Set(output_view.field_names) - Set(expressions.keys())) & Set(input_view.field_names)

        record  = input_view.read_record()
        while record:
            out_rec = output_view.create_record()
            # Handle pass-through fields
            out_rec.transfer_pass_through_fields(record)
            
            # Transfer fields matched by name
            out_rec.transfer_matching_fields(record, common_fields)
            
            for f in expressions:
                expr = expressions[f]
                # Extract all tokens wrapped inside '${token}'
                flds = re.findall('\${(\w+)}', expr)
                for fld in flds:
                    try:
                        idx = input_view_field_names.index(fld)
                    except ValueError:
                         raise SnapComponentError(
                                    "No input field name '%s' found. The following expression is invalid: %s" %
                                    (fld, expr))
                   
                    # Substitute the field name with its value.
                    if record[fld] is None:
                        val = ''
                    else:
                        if input_view.field_types[idx] == SnapDateTime:
                            val = convert_utils.datetime_to_string(record[fld])
                        elif input_view.field_types[idx] == SnapNumber:
                            val = convert_utils.number_to_string(record[fld])
                        else:
                            val = record[fld]
                    expr = re.sub('\${' + fld + '}', val, expr)
                out_rec[f] = expr

            output_view.write_record(out_rec)

            record  = input_view.read_record()

        output_view.completed()

    def upgrade_1_0_to_1_1(self):
        """
         Change 1.
         The properties "expression" and "separator" we replaced by a 
         list of dicts comprised of a "output field name" and an "format expression".
        """

        sep = self.get_property_value('separator')
        exp = self.get_property_value('expression')

        if sep:
            expr_list = exp.split(sep)
        else:
            expr_list = [ exp ]

        expression_list = []
        for i, expr in enumerate(expr_list):
            expression = {}
            tokens = expr.split('=', 1)
            if len(tokens) != 2:
                # If the expression is malformed just shove the whole thing in the output field prop.
                expression[PROP_OUTPUT_FIELD_NAME] = exp
            else:
                expression[PROP_OUTPUT_FIELD_NAME] = tokens[0]
                expression[PROP_EXPRESSION] = tokens[1]
            expression_list.append(expression)

        # Create the new properties
        self._define_properties()

        # Set the new properties
        self.set_property_value(PROP_FORMAT_SPECS, expression_list)
                                 
        # Cleanout the old properties from this resdef.
        self.del_property_def('separator')
        self.del_property_def('expression')

    def upgrade_1_1_to_1_2(self):
        """
         Add LOV constraint to field property 
        """
        
        # Save property value.  We need to recreate the property, which resets its value.
        saved_value = self.get_property_value(PROP_FORMAT_SPECS)
        
        # Delete the property
        self.del_property_def(PROP_FORMAT_SPECS)

        # Recreate the property
        self._define_properties()
        
        # Restore property value
        self.set_property_value(PROP_FORMAT_SPECS, saved_value)
    
    def upgrade_1_2_to_1_3(self):
        """
        No-op upgrade only to change component doc URI during the upgrade
        which will be by cc_info before calling this method.
        
        """
        pass
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.