# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008-2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: HtmlFormatter.py 10330 2009-12-24 22:13:38Z grisha $
"""
HtmlFormatter Module and Resource Definition
This component provides HTML formatting for input records. Outputs are delivered
in a single-field view, so that it is suitable for easy digestion by PipeToHttp.
HTML formatting options, so far:
* Headers (arbitrary HTML to be sent before the first record)
* Footers (arbitrary HTML to be sent after the first record)
* Per-record HTML templating
Note that you probably really want this to be the last component in the pipeline,
and that it's probably only suitable for pipelines that are meant for consumption
by a web-browser. After all, the HTML for the header and footer will appear as
output records! And even those original input records will have been garbled by the
HTML templating that takes place.
Can look pretty, though...
"""
__docformat__ = "epytext en"
import sys, cgi
import re
from string import Template
from snaplogic.common.data_types import Record
from snaplogic.common import version_info
from snaplogic.cc.component_api import ComponentAPI
import snaplogic.cc.prop as prop
from snaplogic import components
from snaplogic.common.snap_exceptions import SnapComponentError
from snaplogic.snapi_base import keys
# Public names
__all__ = [ "HtmlFormatter" ]
class HtmlFormatter(ComponentAPI):
"""
A component to produce properly formatted HTML output for a browser.
The HTML formatter prepends the specified header and appends the
specified footer to the record stream. In addition, each record is
formatted via a simple HTML templating function, which in turn uses
string.Template to do its work. The template syntax for that class
looks pretty good and generic, so we just use this directly.
A template string may include arbitrary HTML and text, as well as replacement
tokens.
A replacement token is defined as ${<field_name>}. If it is encountered
in the template string then the occurrance of this token is replaced with
the value of the field that was specified via <field_name>. A replacement
token for a field may appear more than once in the template string.
So, for example, if the input view has the fields 'name' and 'address'
then you can write a template string like this:
"<big>${name} lives at <i>${address}</i>. Nothing else is known about ${name}.</big><br>"
Note that not all input view fields need to be present in the template.
A special token is defined to represent the entire line: ${}. If this token
is used, there must not be anything else in the template string. The template
string must be exactly: '${}'.
If you want to represent a '$' sign in the output then just write '$$' in the
template string. The '$$' represents an escape sequence for the dollar sign.
The template string that is going to be used for each line needs to be
presented via the 'line_template' property.
The raw HTML that are to be used for header and footer need to be presented
via the 'header' and 'footer' template respectively.
Headers and footers can be any arbitrary HTML.
"""
api_version = '1.0'
component_version = '1.1'
capabilities = {
ComponentAPI.CAPABILITY_INPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_INPUT_VIEW_UPPER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_UPPER_LIMIT : 1
}
component_description = "Outputs HTML, one HTML-table row per output record."
component_label = "HTML Formatter"
component_doc_uri = "https://www.snaplogic.org/trac/wiki/Documentation/%s/ComponentRef/HtmlFormatter" % \
version_info.doc_uri_version
# For finding input field name references in the template of the form ${Field001}
_subst_re = re.compile('\$[{]([\w]+)[}]')
def create_resource_template(self):
"""
Create HtmlFormatter resource definition template. It consists of:
header: Some HTML to be output at the start.
template: The per-line template for the output.
footer: Some HTML to be output at the end.
"""
self.set_property_def('header', prop.SimpleProp("Header", "string", "Some HTML to be output at the start, before the table."))
self.set_property_def('template', prop.SimpleProp("Template", "string", "The per-table-line template for the output.", required=True))
self.set_property_def('footer', prop.SimpleProp("Footer", "string", "Some HTML to be output at the end, after the table."))
# The header and footer have default values (empty string), the template has not.
self.set_property_value('header', "")
self.set_property_value('footer', "")
# We also set a non-modifiable default output view.
self.add_record_output_view_def("html_out", [ ("line", "string", "One line of HTML output.") ], "HTML output", False)
def validate(self, err_obj):
"""
Validate a proposed resource definition for this component.
Check that the pre-defined output view is still there and that
any input field references in the template are valid.
"""
in_views = self.list_input_view_names()
in_view = self.get_input_view_def(in_views[keys.SINGLE_VIEW])
input_view_fields = [ d[keys.FIELD_NAME] for d in in_view[keys.VIEW_FIELDS] ]
out_views = self.list_output_view_names()
if out_views[0] != "html_out":
err_obj.get_output_view_err().set_message("HTML Formatter needs the pre-defined 'html_out' view.")
# 0. The template is a required property, so no need to check for None or ""
template_prop = self.get_property_value("template")
# 1. Check that any input view field references match input view field names.
# Find the ${fieldname} references.
for input_field in self._subst_re.findall(template_prop):
if input_field not in input_view_fields:
err_obj.get_property_err("template").set_message(
"Input field name (%s) not present in input view." % input_field)
# We can only flag one error on the template so break as soon we set one.
break
def execute(self, input_views, output_views):
"""
Execute the HTML conversion of the input records.
"""
try:
input_view = input_views.values()[keys.SINGLE_VIEW]
except IndexError:
raise SnapComponentError("No input view connected.")
try:
output_view = output_views.values()[keys.SINGLE_VIEW]
except IndexError:
raise SnapComponentError("No output view connected.")
# We prepare the template that we will use for outputting the records
template_prop = self.get_property_value('template')
if template_prop == '${}' or template_prop == ' ':
all_template = True
else:
all_template = False
try:
template = Template(template_prop)
except:
raise SnapComponentError("Illegal template string...")
# We only need to pre-create the record once
out_rec = output_view.create_record()
# Writing the header
header = self.get_property_value("header")
if header:
out_rec['line'] = header
output_view.write_record(out_rec)
# Reading all the records, writing HTML as output
record = input_view.read_record()
while record is not None:
o = ""
if all_template:
# The special template token for the entire line is processed separately.
for f in input_view.field_names:
fld = record[f]
if not fld:
fld = ""
o += cgi.escape("%s " % str(fld).encode("utf-8"))
else:
# We iterate through the field names and assemble the name/value
# dictionary, which we will then use for the template replacement
d = {}
for f in record:
fld = record[f]
if fld is None:
fld = ""
d[f] = cgi.escape("%s" % str(fld).encode("utf-8"))
try:
o = template.substitute(d)
except:
raise SnapComponentError("Cannot perform template substitution.")
try:
# Convert to unicode...
u = unicode(o, 'utf-8')
except:
# ... which may fail because the string already is unicode
u = o
out_rec['line'] = u
output_view.write_record(out_rec)
record = input_view.read_record()
# Writing the footer
footer = self.get_property_value("footer")
if footer:
out_rec['line'] = footer
output_view.write_record(out_rec)
# Closing the output stream
output_view.completed()
def upgrade_1_0_to_1_1(self):
"""
No-op upgrade only to change component doc URI during the upgrade
which will be by cc_info before calling this method.
"""
pass
|