# -*- coding: utf-8 -*-
# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008-2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
#$Id: html_rp.py 7781 2009-05-29 01:29:12Z dhiraj $
"""
Simple HTML RP module.
Note that there is only a writer, not a reader. We cannot read
HTML.
"""
import urllib, cgi
from snaplogic.rp import _RPWriter
from snaplogic.common.snap_exceptions import *
from snaplogic.common import uri_prefix
from snaplogic.common.config import snap_config
from snaplogic.common.snap_crypt import output_obfuscate
CONTENT_TYPE = 'text/html'
class Writer(_RPWriter):
"""
Writes out Python objects as simply formatted HTML. All records are sent in the single HTML
document representing a table.
The Writer for the HTML RP understands some options:
{ "font" : "fixed-width" }
{ "table" : "no" }
{ "title" : <title of page> }
{ "enclosing_tags" : ( <HTML tags at the start>, <HTML tags at the end> ) }
{ "record_stream" : "no" }
{ "request_uri" : <URI of the request> }
{ "type" : "resdef" }
{ "mode_indicator" : <mode indicator string> }
If 'font' is specified as option during the Writer creation, then the elements in the table
output will be done with a fixed font.
If 'table' is specified (as 'no') then the output records will not be written into individual
table rows, but instead 'raw' into a single table cell. This is useful if huge lists are
produced, which may just as well be printed in their raw form.
If 'title' is specified, then the page title in the browser is going to be set, and a single
header line with that title is printed above the output table.
If 'enclosing_tags' is specified then the output will be preceded by the specified start
HTML code and will end with the specified end HTML code.
If 'record_stream' is specified (as 'yes') then lists objects are rendered the same way
as tuples: As table elements, all in one line. This is needed when we look at record stream
outputs (for example with pipe_to_http). Without that, record streams cannot easily be
imported into Excel, for example. But for meta data browsing it is actually nice to have
list elements rendered as bulleted items. So, we need to be able to do both, which is what
we are doing here. Note that specifying this option will force a table tag at the start
of the output so that each record itself can be rendered as just a single row, rather
than a one-line table.
The 'request_uri' options is mandatory if the human_req flag is set. Normally,
the infrastructure code (especially the HttpReq object) set this automatically.
It normally just consists of the 'http://' prefix and the host name, plus a trailing
slash. For example: "http://foo.bar.com/"
The 'type' option currently only understands 'resdef' as a value. This tells the RP that
it needs to perform special actions and needs to interpret certain keynames in a special
way.
The 'obfuscate_dict' is a dictionary of parameter names and their obfuscation constraint
value. This is only valid in the context of resdefs.
The 'mode_indicator' string is a suffix that is added to the title and is also printed
at the top of the page.
"""
def __init__(self, stream, version=None, human_req=False, options=None):
super(Writer, self).__init__(stream, human_req, None, options)
self.fixed = False
self.element_class = ""
self.make_table = True
self.title = ""
if options:
# Encode option values:
new_opts = {}
for opt_key in options.keys():
opt_val = options[opt_key]
if type(opt_val) == unicode:
opt_val = opt_val.encode('utf-8')
new_opts[opt_key] = opt_val
self.options = options = new_opts
if "type" in options and options["type"] == "resdef":
self.is_resdef = True
else:
self.is_resdef = False
self.obfuscate_dict = options.get("obfuscate_dict")
else:
self.obfuscate_dict = None
self.is_resdef = False
def initialize(self, header = ""):
"""
Prepare the underlying stream for sending the data.
@param header: An optional header for the output stream.
@type header: string
@return: String representation of what we wrote to
the output stream (could be used by the
caller for caching, for example).
@rtype: string
"""
# See if there is something for us in the options....
self.start_tags = self.end_tags = ""
self.record_stream = False
if self.options:
if self.options.has_key('font'):
if self.options['font'] == 'fixed-width':
self.fixed = True
self.element_class = " class=\"sf\""
if self.options.has_key('table'):
if self.options['table'] == 'no':
self.make_table = False
if self.options.has_key('record_stream'):
if self.options['record_stream'] == 'yes':
self.record_stream = True
# Record streams are never rendered with our special decorations.
# Just plain HTML.
self.human_readable = False
if self.options.has_key('title'):
self.title = self.options['title']
if self.options.has_key('enclosing_tags'):
(self.start_tags, self.end_tags) = self.options['enclosing_tags']
if self.options.has_key('mode_indicator'):
self.title += " (%s)" % self.options['mode_indicator']
# Some more options may be processed before we decide about
# the in-table rendering or not...
if self.make_table:
self.table_prefix = "%s" % self.start_tags
self.table_postfix = "%s" % self.end_tags
self.row_prefix = "<tr>"
self.row_postfix = "</tr>\n"
if not self.record_stream:
# If we are outputting a record stream then each record will
# be its own table row. But if not, we don't know yet how
# each element will be rendered, so we need to prepare each
# row for just being dumped (and rendered). Therefore, we are
# adding table-cell tags here.
self.row_prefix += '<td valign="top" %s>' % self.element_class
self.row_postfix = "</td>" + self.row_postfix
else:
self.table_prefix = '<tr><td valign="top" %s>%s' % (self.element_class, self.start_tags)
self.table_postfix = "%s</td></tr>" % self.end_tags
self.row_prefix = ""
if "<pre>" in self.start_tags.lower():
self.row_postfix = ""
else:
self.row_postfix = "<br>"
s = "<html><head>\n"
s += '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />\n'
if self.human_readable:
# Add parameter to explorer URI mapping back to this server using the request URI
explorer_uri = snap_config.get_instance().get_section('main')['explorer_uri']
explorer_uri += "&dataserver=" + urllib.quote(self.options['request_uri'])
s += """
<title>%s</title>
<style>
a, td {
margin: 0;
padding: 0;
font-weight: normal;
font-family: Helvetica, Arial, sans-serif;
}
th {
margin: 5;
padding-bottom: 20;
font-weight: normal;
font-family: Helvetica, Arial, sans-serif;
font-size: 1.2em;
}
td.sf {
margin: 0;
padding: 0;
font-weight: normal;
font-family: courier;
font-size: 0.8em;
}
h1 {
margin: 5;
padding-bottom: 15;
font-weight: bold;
font-style: italic;
font-family: Helvetica, Arial, sans-serif;
font-size: 1.4em;
}
h2 {
margin: 5;
padding-bottom: 15;
font-weight: bold;
font-style: italic;
font-family: Helvetica, Arial, sans-serif;
font-size: 1.3em;
}
h3 {
margin: 5;
padding-bottom: 10;
font-weight: bold;
font-style: italic;
font-family: Helvetica, Arial, sans-serif;
font-size: 1.1em;
}
h4 {
margin: 5;
padding-bottom: 10;
font-weight: bold;
font-style: italic;
font-family: Helvetica, Arial, sans-serif;
font-size: 1.0em;
}
</style></head>
<body>
<img src=\"%s/snaplogic_logo.gif\"><br> <br>
<a href=\"/\">Home</a>
<img src=\"%s/SnapLogic_gear.gif\">
<a href=\"%s\">SnapLogicExplorer</a><br><hr><br>""" % (self.title, uri_prefix.STATIC, uri_prefix.STATIC, explorer_uri)
else:
s += "</head><body>"
s += "<table>\n"
if header:
encoded_data = self._encode(header)
if encoded_data:
s += '<tr><th align="left"><b><i>%s</i></b></th></tr>\n' % encoded_data
elif self.title and self.human_readable:
encoded_data = self._encode(self.title)
if encoded_data:
s = '%s<tr><th align="left"><b><i>%s</i></b></th></tr>\n' % (s, encoded_data)
s += self.table_prefix
self.stream.write(s)
return s
def _dict_encode(self, o, first_field=None, obfuscate_list=None):
"""
Encode a dictionary into HTML.
This is recursive in that every single element in the dictionary
is being separately encoded by the top-level encode() function.
Note that we perform an ordering of the returned elements. We want
to display first:
1. name
2. label
3. description
4. overview
The 'name' field is not always called 'name'. It could be 'resource_name',
'component_name', 'input_view_name' or just 'name'. Thus, we first search
for the shortest element whose name ends with 'name' and display that first.
Then we look for something called 'description' and finally for 'overview'.
@param o: A dictionary object.
@type o: dict
@param first_field: If a dictionary is to be printed, then the
field specified here is printed first.
@type first_field: string
@param obfuscate_list: Dictionary of field names who's value needs to be obfuscated.
@type obfuscate_list: dict
@return: Encoding of the dictionary in HTML.
@rtype: string
"""
# Determine the order in which the elements are displayed.
keys = o.keys()
fields = []
# If the first_field was specified, we look for it and if present print
# it first.
if first_field and first_field in keys:
fields.append(first_field)
keys.pop(keys.index(first_field))
# Get all the '*name' entries and sort them by length (shortest first)...
names_keys = [ k for k in keys if k.endswith('name') ]
names_keys.sort(lambda x, y: cmp(len(x), len(y)))
# ... the shortest name is the winner and becomes first in our slot
# Also select all the other known fields that we want to have at
# the beginning.
if names_keys:
fields.append(names_keys[0])
if 'label' in keys:
fields.append('label')
if 'description' in keys:
try:
if o['description'].startswith("__"):
# Items that have a description string starting with "__" are
# considered 'private' and should not be displayed in this
# output at all.
return None
except:
# Catch any problems silently, since description is not always a string
pass
fields.append('description')
if 'overview' in keys:
fields.append('overview')
# Get all the remaining fields...
remaining_fields = [ k for k in keys if k not in fields ]
# ... and sort them alphabetically
remaining_fields.sort()
fields.extend(remaining_fields)
s = "<table>\n"
if not obfuscate_list:
obfuscate_list = {}
for k in fields:
if self.is_resdef and k == "prop_values":
encoded_data_2 = self._encode(o[k], obfuscate_list=self.obfuscate_dict)
else:
if k in obfuscate_list and not isinstance(o[k], list) and not isinstance(o[k], tuple) and not isinstance(o[k], dict):
# Only simple types are obfuscated
val = output_obfuscate(o[k], obfuscate_list[k])
else:
val = o[k]
encoded_data_2 = self._encode(val)
encoded_data_1 = self._encode(k)
if encoded_data_1 and encoded_data_2 is not None:
s += "<tr><td valign=\"top\"><b><i>%s:</i></b></td><td valign=\"top\">%s</td></tr>\n" % (encoded_data_1, encoded_data_2)
s += "</table>\n"
return s
def _list_encode(self, o, vertical=True):
"""
Encode a list into HTML.
@param o: An iterable object to be encoded.
@type o: list or tuple
@param vertical: Flag indicating whether this iterable item
should be encoded as a bullet list, or as
a one-line table, each in its own table cell.
Note that if self.record_stream is True then
this flag here will be ignored, and the list
will always be encoded as a one line table.
@type vertical: bool
@return: Encoding of the list in HTML.
@rtype: string
"""
if vertical and not self.record_stream:
s = "<ul>\n"
for i in o:
encoded_data = self._encode(i)
if encoded_data:
s += "<li>%s</li>\n" % encoded_data
s += "</ul>\n"
else:
s = ""
for i in o:
encoded_data = self._encode(i)
s += '<td valign="top">%s</td>' % encoded_data
if not self.record_stream:
# Records are rendered directly as rows into the outer table.
# But everything else is rendred in different ways, and thus
# needs to be enclosed in its own table tags.
s = "<table><tr>%s</tr></table>\n" % s
else:
s = "%s" % s
return s
def _encode(self, o, first_field=None, obfuscate_list=None):
"""
Encode an object into HTML.
This recursively calls specialized encoders for lists or
dictionaries, as necessary.
@param o: Any object.
@type o: object
@param first_field: If a dictionary is to be printed, then the
field specified here is printed first.
@type first_field: string
@param obfuscate_list: Dictionary of field names who's value needs to be obfuscated.
This is only passed on to dictionaries.
@type obfuscate_list: dict
@return: Encoded object in HTML.
@rtype: string
"""
if isinstance(o, list):
# A list is encoded differently...
return self._list_encode(o, True)
elif isinstance(o, tuple):
# ... than a tuple
return self._list_encode(o, False)
elif isinstance(o, dict):
return self._dict_encode(o, first_field, obfuscate_list)
else:
if o is None:
return ''
elif isinstance(o, unicode):
o = o.encode('utf-8')
elif not isinstance(o,str):
o = str(o)
s = cgi.escape(o)
# Do something special for stuff that looks like a hyperlink or path.
if s.startswith("http://") or s.startswith("https://") or s.startswith("/"):
s = '<a href="%s">%s</a>' % (s, s)
return s
def write(self, raw_record, options=None):
"""
Write an object to the underlying stream.
@param raw_record: A python object that needs to be encoded.
@type raw_record: object
@param options: A per object options dictionary. The HTML RP understands
the 'hlevel' option: A value of '1' is a large headline,
a value of '2' is medium and '3' is small.
@type options: dict
@return: String representation of what we wrote to
the output stream (could be used by the
caller for caching, for example).
@rtype: string
"""
# Check if this was defined as a headline.
first_field = None
s = ""
try:
# The 'first_field' option influences the sort order.
# If the field name that's specified here is in a
# dictionary that needs to be printed, then this field
# will be printed first.
first_field = self.options['first_field']
except:
first_field = None
if options and 'hlevel' in options:
h_start_tag = "<h%s>" % options['hlevel']
h_end_tag = "</h%s>" % options['hlevel']
if "<pre>" in self.start_tags.lower():
# Sometimes, bulk output is delivered as pre-formatted data.
# Especially log files. Therefore, when we do a headline,
# we need to get out and then back into the pre-formatted
# mode.
h_start_tag = "</pre>" + h_start_tag
h_end_tag += "\n<pre>"
# Some modules produce headlines that start with some number of '=' characters.
# Since we are using human-readable HTML formatting for headlines, we will remove
# these characters in that case.
if type(raw_record) == str:
if raw_record.startswith("="):
num = raw_record.count("=", 0, raw_record.find(" "))
# Strip it off (one more character from the end, because of the '\n' there.
raw_record = raw_record[num:-(num+1)]
encoded = self._encode(raw_record, first_field)
s = "%s%s%s%s%s" % (self.row_prefix, h_start_tag, encoded, h_end_tag, self.row_postfix)
else:
# No headline, no other options just a normal record.
encoded = self._encode(raw_record, first_field)
s = "%s%s%s" % (self.row_prefix, encoded, self.row_postfix)
self.stream.write(s)
return s
def end(self, footer=""):
"""
End the output stream.
This can also write an optional footer as a table row.
@param footer: An optional footer for the output stream.
@type footer: string
@return: String representation of what we wrote to
the output stream (could be used by the
caller for caching, for example).
@rtype: string
"""
s = self.table_postfix
if footer:
s += "<tr><td><b><i>%s</i></b></th></tr>" % self._encode(footer)
s += "</table>\n"
if self.human_readable:
s += "<br><hr><a href=\"/\">Home</a> <img src=\"%s/SnapLogic_gear.gif\">" % uri_prefix.STATIC
# Add parameter to explorer URI mapping back to this server using the request URI
explorer_uri = snap_config.get_instance().get_section('main')['explorer_uri']
explorer_uri += "&dataserver=" + urllib.quote(self.options['request_uri'])
s += "<a href=\"%s\">SnapLogicExplorer</a><br>" % (explorer_uri)
s += "</body></html>"
self.stream.write(s)
return s
|