# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2008 - 2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
#
# $
# $Id: RssWrite.py 10330 2009-12-24 22:13:38Z grisha $
"""
RssWrite Module for RSS Writer.
This module contains a RssWrite object for component operation, a RssWriteResDef object for resource definition,
and a RssWriteCapability object for capability description.
"""
import re, socket
import sys, string, time
from snaplogic.common.snap_exceptions import *
from snaplogic.common import snap_log
from snaplogic.common.data_types import SnapString,SnapDateTime
from snaplogic.common import version_info
import snaplogic.components as components
from snaplogic.cc import component_api
from snaplogic.cc.component_api import ComponentAPI
import snaplogic.cc.prop as prop
from snaplogic.common.Rss.RssItem import RssItem
from snaplogic.common.Rss.RssFeed import RssFeed
# Public names
__all__ = [ "RssWrite"]
class RssWrite(ComponentAPI):
"""
Class RssWrite.
This class provides interfaces to read information from defined input views and write a RSS document to the
output view.
The design of input views are to allow the users to provide as much/detailed information as
possible for the output RSS document. The information required 'feed' and 'item' input views
can be sufficient for a basic RSS document. However, other views are necessary for a more
complete and useful RSS document.
"""
api_version = '1.0'
component_version = '1.1'
capabilities = {
ComponentAPI.CAPABILITY_INPUT_VIEW_LOWER_LIMIT : 2,
ComponentAPI.CAPABILITY_INPUT_VIEW_UPPER_LIMIT : 9,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_LOWER_LIMIT : 1,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_UPPER_LIMIT : 2,
ComponentAPI.CAPABILITY_OUTPUT_VIEW_ALLOW_BINARY : True
}
component_description = \
"This component reads information from input views and write a RSS document to the output view"
component_label = "RSS Writer"
component_doc_uri = "https://www.snaplogic.org/trac/wiki/Documentation/%s/ComponentRef/RssWrite" % \
version_info.doc_uri_version
def create_resource_template(self):
"""
The followings are properties for this resource:
- Protocol: The protocol for the feed document to write. Valid values are 'rss20' and 'atom10'
The input and output views are defined here with pre-defined field names. The upstream components can choose to
ignore a certain views. However, the first two views, feed and item, are the core contents of the
feed.
"""
self.set_property_def("Protocol",
prop.SimpleProp("Protocol", "string", "The protocol to be used for the feed",
{ "lov" : [ 'rss20', 'atom10' ] }, required=True))
inputs = [
(
'feed',
(
( 'id', SnapString, 'The channel/feed ID.' ),
( 'link', SnapString, 'The link to this channel/feed.' ),
( 'title', SnapString, 'The channel/feed title.' ),
( 'version', SnapString, 'The protocol version this channel/feed.' ),
( 'updated', SnapDateTime, 'The last updated time of this channel/feed.' ),
( 'encoding', SnapString, 'The encoding scheme.' ),
( 'language', SnapString, 'The language of this channel/feed.' ),
( 'copyright', SnapString, 'The copyright of this channel/feed.' ),
( 'description', SnapString, 'The description of this channel/feed.' ),
),
"feed view",
),
(
'item',
(
( 'id', SnapString, 'The item/entry ID.' ),
( 'link', SnapString, 'The link to this item/entry.' ),
( 'title', SnapString, 'The item/entry title.' ),
( 'published', SnapDateTime, 'The published time of this item/entry.' ),
( 'updated', SnapDateTime, 'The last updated time of this item/entry.' ),
( 'summary', SnapString, 'The description of this item/entry.' ),
),
"item view"
),
(
'link',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'href', SnapString, 'The IRI reference of the link.' ),
( 'rel', SnapString, 'The relation type of the link.' ),
( 'type', SnapString, 'The advisory media type.' ),
( 'hreflang', SnapString, 'The language of the resource pointed to by href.' ),
( 'title', SnapString, 'The human-readable information about the link.' ),
( 'length', SnapString, 'The dvisory length of the linked content in octets.' ),
),
"link view"
),
(
'author',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'name', SnapString, 'The human-readable name of the person.' ),
( 'href', SnapString, 'The IRI associated with the person.' ),
( 'email', SnapString, 'The email address associated with the person.' ),
),
"author view"
),
(
'contributor',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'name', SnapString, 'The human-readable name of the person.' ),
( 'href', SnapString, 'The IRI associated with the person.' ),
( 'email', SnapString, 'The email address associated with the person.' ),
),
"contributor view"
),
(
'category',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'term', SnapString, 'The string identifies the category.' ),
( 'scheme', SnapString, 'The IRI that identifies a categorization.' ),
( 'label', SnapString, 'The human-readable label for display in end-user applications.' ),
),
"category view"
),
(
'content',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'type', SnapString, 'The type of the content, e.g. text, xhtml, etc.' ),
( 'value', SnapString, 'The content value string.' ),
( 'xml_base', SnapString, 'The xml:base attribute.' ),
( 'xml_lang', SnapString, 'The xml:lang attribute.' ),
),
"content view"
),
(
'namespace',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'uri', SnapString, 'The uri of the namespace.' ),
( 'prefix', SnapString, 'The prefix of the namespace.' ),
),
"namespace view"
),
(
'generator',
(
( 'id', SnapString, 'The channel/feed or item/entry ID.' ),
( 'href', SnapString, 'The uri of the generator.' ),
( 'name', SnapString, 'The generator name.' ),
( 'version', SnapString, 'The version of the generator.' ),
),
"generator view"
),
]
# Input views
for (name, fields, desc) in inputs:
self.add_record_input_view_def(name, fields, desc)
self.add_record_output_view_def('record',
(
( 'encoding', SnapString, 'The encoding of feed document.' ),
( 'document', SnapString, 'The feed document.' ),
),
"RSS feed view")
self.add_binary_output_view_def('rss', ("text/xml",), "RSS feed view")
def validate(self, err_obj):
"""Validate that Protocol has been specified."""
# All the validation we can do here (about the protocol) can be handled
# as property constraints, and thus are enforced by the CC.
pass
def execute(self, input_views, output_views):
"""
Process each record and perform corresponding operations based on which view this record is from:
- feed: Create a feed structure
- item: Create an item and append to the feed structure
- link: Add to the corresponding feed or item structure
- author: Add to the corresponding feed or item structure
- contributor: Add to the corresponding feed or item structure
- category: Add to the corresponding feed or item structure
- content: Add to the corresponding item structure
- namespace: Add to the corresponding feed structure
"""
# Check that the mandatory input views (feed, item) have been connected.
connected_input_views = input_views.keys()
if 'feed' not in connected_input_views:
raise SnapComponentError("\"feed\" input view not connected.")
if 'item' not in connected_input_views:
raise SnapComponentError("\"item\" input view not connected.")
input_handler_map = { 'feed' : self._readFeed,
'item' : self._readItem,
'link' : self._readLink,
'author' : self._readAuthor,
'contributor' : self._readContributor,
'category' : self._readCategory,
'content' : self._readContent,
'namespace' : self._readNamespace,
'generator' : self._readGenerator }
# Build the input view to handler map for our connected input views.
connected_input_handlers = {}
for view in connected_input_views:
connected_input_handlers[input_views[view]] = input_handler_map[view]
self._feed = None
self._proto = self.get_property_value('Protocol')
#SnapLog.ServerLog().debug("RssWrite: Protocol - " + self._proto)
# Structure to save received records.
# Make no assumption views are read in order of preference.
# For now, component does not have the ability to choose which view to read and switch view as wish.
self._savedRecords = dict()
for k in input_views:
self._savedRecords[k] = []
self.saveit = True
self.process_input_views(connected_input_handlers)
self.saveit = False
if not self._feed:
raise SnapComponentError('RssWrite', 'missing feed record')
# Process records if any were saved because of either feed or item record has not received yet.
for record in self._savedRecords['item']:
self._readItem(record, None, None)
for record in self._savedRecords['link']:
self._readLink(record, None, None)
for record in self._savedRecords['author']:
self._readAuthor(record, None, None)
for record in self._savedRecords['contributor']:
self._readContributor(record, None, None)
for record in self._savedRecords['category']:
self._readCategory(record, None, None)
for record in self._savedRecords['content']:
self._readContent(record, None, None)
for record in self._savedRecords['namespace']:
self._readNamespace(record, None, None)
for record in self._savedRecords['generator']:
self._readGenerator(record, None, None)
encoding = self._feed.getAttribute('encoding')
document = self._feed.serialize(protocol=self._proto)
if 'record' in output_views:
out_view = output_views['record']
r = out_view.create_record()
r['encoding'] = encoding
r['document'] = document.decode(encoding)
out_view.write_record(r)
out_view.completed()
if 'rss' in output_views:
out_view = output_views['rss']
out_view.write_binary(document, "text/xml")
out_view.completed()
def _readFeed(self, record, input_view, active_input_views):
"""
Read the 'feed' record and set up the feed structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
if self._feed:
#SnapLog.ServerLog().warning("More than one record received in 'feed' view")
return
# A new feed
self._feed = RssFeed()
self._feed.setAttributes({ 'id': record['id'] })
self._feed.setAttributes({ 'link': record['link'] })
self._feed.setAttributes({ 'title': record['title'] })
self._feed.setAttributes({ 'rights': record['copyright'] })
self._feed.setAttributes({ 'updated': record['updated'] })
self._feed.setAttributes({ 'encoding': record['encoding'] })
self._feed.setAttributes({ 'language': record['language'] })
self._feed.setAttributes({ 'subtitle': record['description'] })
#SnapLog.ServerLog().debug("RssWrite: Feed ID - " + record['id'])
#SnapLog.ServerLog().debug("RssWrite: Feed Title - " + record['title'])
def _readItem(self, record, input_view, active_input_views):
"""
Read the 'item' record and add an item to the feed structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
if not self._isFeedReady(record):
return
# A new item.
item = RssItem()
item.setAttributes({ 'id': record['id'] })
item.setAttributes({ 'link': record['link'] })
item.setAttributes({ 'title': record['title'] })
item.setAttributes({ 'updated': record['updated'] })
item.setAttributes({ 'published': record['published'] })
item.setAttributes({ 'summary': record['summary'] })
# Add to the feed
self._feed.addItem(item)
#SnapLog.ServerLog().debug("RssWrite: Item ID - " + record['id'])
def _readLink(self, record, input_view, active_input_views):
"""
Read the 'link' record and add to the feed or item structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
if not self._isFeedReady(record):
return
if self._feed.getAttribute('id') == record['id']:
item = self._feed
else:
item = self._isItemReady(record)
if not item:
return
links = {}
if record['href']:
links['href'] = record['href']
if record['rel']:
links['rel'] = record['rel']
if record['type']:
links['type'] = record['type']
if record['hreflang']:
links['hreflang'] = record['hreflang']
if record['title']:
links['title'] = record['title']
if record['length']:
links['length'] = record['length']
# Save to the item
attrs = { 'links': [ links ] }
item.setAttributes(attrs)
def _readAuthor(self, record, input_view, active_input_views):
"""
Read the 'author' record and add to the item structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
item = self._isItemReady(record)
if not item:
return
author = record['name']
if record['email']:
author += "<" + record['email'] + ">"
author_detail = { 'name': record['name'] }
if record['href']:
author_detail['href'] = record['href']
if record['email']:
author_detail['email'] = record['email']
# Save to the item
attrs = { 'author': author, 'author_detail': author_detail }
item.setAttributes(attrs)
def _readContributor(self, record, input_view, active_input_views):
"""
Read the 'contributor' record and add to the item structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
item = self._isItemReady(record)
if not item:
return
contributor = { 'name': record['name'] }
if record['href']:
contributor['href'] = record['href']
if record['email']:
contributor['email'] = record['email']
# Save to the item
attrs = { 'contributors': [ contributor ] }
item.setAttributes(attrs)
def _readCategory(self, record, input_view, active_input_views):
"""
Read the 'category' record and add to the item structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
item = self._isItemReady(record)
if not item:
return
# Need more research on this ...
# - Have not had enough sample sites for testing
category = { 'term': record['term'], 'scheme': record['scheme'], 'label': record['label'] }
# Save to the item
attrs = { 'tags': [ category ] }
item.setAttributes(attrs)
def _readContent(self, record, input_view, active_input_views):
"""
Read the 'content' record and add to the item structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
item = self._isItemReady(record)
if not item:
return
content = { 'type': record['type'], 'value': record['value'] }
if record['xml_base']:
content['base'] = record['xml_base']
if record['xml_lang']:
content['language'] = record['xml_lang']
# Save to the item
attrs = { 'content': [ content ] }
item.setAttributes(attrs)
def _readNamespace(self, record, input_view, active_input_views):
"""
Read the 'namespace' record and add to the feed structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
if not self._isFeedReady(record):
return
namespace = { record['prefix']: record['uri'] }
# Save to the feed
attrs = { 'namespaces': namespace }
self._feed.setAttributes(attrs)
def _readGenerator(self, record, input_view, active_input_views):
"""
Read the 'generator' record and add to the feed structure.
@param record: The record.
@type record: snaplogic.common.Record
"""
if record is None:
return
if not self._isFeedReady(record):
return
generator = record['name']
generator_detail = { 'name': record['name'] }
if record['href']:
generator_detail['href'] = record['href']
if record['version']:
generator_detail['version'] = record['version']
# Save to the feed
attrs = { 'generator': generator, 'generator_detail': generator_detail }
self._feed.setAttributes(attrs)
def _isFeedReady(self, record):
"""
Check if feed structure is ready, i.e. if the feed record has been received and processed.
@param record: The record.
@type record: snaplogic.common.Record
@return: The feed object, if available; NOne, otherwise.
"""
if not self._feed:
# Save the record until feed header is received.
if self.saveit:
self._savedRecords[record.view_name].append(record)
return None
return self._feed
def _isItemReady(self, record):
"""
Check if the item structure is ready, i.e. if the item record has been received and processed.
@param record: The record.
@type record: snaplogic.common.Record
@return: The item object, if available; NOne, otherwise.
"""
if not self._isFeedReady(record):
return None
item = self._feed.getItemByID(record['id'])
if not item:
# Save the record until the item is received.
if self.saveit:
self._savedRecords[record.view_name].append(record)
return None
return item
def upgrade_1_0_to_1_1(self):
"""
No-op upgrade only to change component doc URI during the upgrade
which will be by cc_info before calling this method.
"""
pass
|