RssDocument.py : » Development » SnapLogic » snaplogic » common » Rss » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Development » SnapLogic
SnapLogic » snaplogic » common » Rss » RssDocument.py
# $SnapHashLicense:
# 
# SnapLogic - Open source data services
# 
# Copyright (C) 2008, SnapLogic, Inc.  All rights reserved.
# 
# See http://www.snaplogic.org for more information about
# the SnapLogic project. 
# 
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
# 
# "SnapLogic" is a trademark of SnapLogic, Inc.
# 
# 
# $

# $Id: RssDocument.py 1438 2008-03-10 19:02:16Z dhiraj $
"""
Module for RSS Document object.

This module contains a RssDocument object for use in creating and formating XML DOM documents comforming to
rss20 and atom10 protocols for a given channel/feed or item/entry element.

"""

import xml.dom.minidom

from feedparser import FeedParserDict

import RssUtils
from snaplogic.common.snap_exceptions import *


class RssDocument:

    """
    This class provides intefaces for formating an XML DOM document confirming to rss20 or atom10 document.
    This class takes the feedparser object and creates an XML DOM based on the feedparser object values.
    The protocols supported are 'rss20' and 'atom10'.  Although the feedparser object contains mostly
    normalized object attributes, there are protocol specific attributes that are used differently when producing
    the resulting document in different protocol.
    
    """
    
    def __init__(self):
        """
        Initialization.
        
        """
        pass
    
    
    def createFeed(self, feed, proto):
        """
        Create an XML DOM document for a Feed element.
        
        @param feed: The feedparser top-level object.
        @param feed: FeedParserDict
        
        @param proto: The protocol of the feed.
        @type proto: string
        
        @return: The document (of xml.dom.minidom.Document type) of the feed.
        
        """
        if proto == 'rss20':
            return self.createRssChannel(feed)
        if proto == 'atom10':
            return self.createAtomFeed(feed)
        raise SnapValueError('RssDocument', 'createFeed', 'protocol', proto)
    
    
    def createItem(self, item, proto, feeddoc = None):
        """
        Create an XML DOM document for an Item element.
        
        @param item: The feedparser entry object.
        @param item: FeedParserDict
        
        @param proto: The protocol of the feed.
        @type proto: string
        
        @param feeddoc: The feed document that this item element is appended to.  If it is None, a new Document
            is created and appends this item to it.  When it is not None, the top level element of the feed,
            the child of the feed document for Atom10, the grandchild of the feed document for Rss20, is
            appending the new item element.
        @type feeddoc: xml.dom.minidom.Document
        
        @return: The document (of xml.dom.minidom.Document type) of the entry.
        
        """
        if proto == 'rss20':
            return self.createRssItem(item, feeddoc)
        if proto == 'atom10':
            return self.createAtomEntry(item, feeddoc)
        raise SnapValueError('RssDocument', 'createItem', 'protocol', proto)

    
    def createRssChannel(self, feed):
        """
        Create an XML DOM document for a Feed element of rss20 protocol.
        
        @param feed: The feedparser top-level object.
        @param feed: FeedParserDict
        
        @return: The document (of xml.dom.minidom.Document type) of the feed.
        
        """
        feeddoc = xml.dom.minidom.Document()

        # First level element - 'rss'
        elem_rss = feeddoc.createElement('rss')
        elem_rss.setAttribute('version', '2.0')
        feeddoc.appendChild(elem_rss)
        
        # Second level element - 'channel'
        elem_ch = feeddoc.createElement('channel')
        elem_rss.appendChild(elem_ch)

        # Child elements of 'channel'
        for k, v in feed.iteritems():
            # A few attributes are list
            if k == 'namespaces':
                for p, u in v.iteritems():
                    # Just be cautious not to include the obvious invalid namespace
                    if u == 'http://www.w3.org/2005/Atom': continue    # Document won't be displayed by browsers
                    if p: elem_rss.setAttribute('xmlns:' + p, u)
                    # else: elem_rss.setAttribute('xmlns', u)
            elif k == 'links':
                # Use it only when link attribute is not here
                if not feed.has_key('link'):
                    # The value (v) is a list, take the first one, or the one with relation of 'self'
                    m = None
                    for a in v:
                        # Find the link that is of self relation
                        if a.has_key('rel') and a['rel'] == 'self':
                            # Make sure there is href
                            if a.has_key('href') and a['href']:
                                m = a
                                break
                    if not m:
                        # Take the first link
                        for a in v:
                            # Make sure there is href
                            if a.has_key('href') and a['href']:
                                m = a
                                break
                    if m:
                        # Found a link, create the element
                        e = feeddoc.createElement('link')
                        c = feeddoc.createTextNode(m['href'])
                        e.appendChild(c)
                        elem_ch.appendChild(e)
            elif k == 'generator':
                # Use generator_detail if it exists
                if not feed.has_key('generator_detail'):
                    e = feeddoc.createElement('generator')
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_ch.appendChild(e)
            elif k == 'generator_detail':
                if v.has_key('name'):
                    e = feeddoc.createElement('generator')
                    c = feeddoc.createTextNode(v['name'])
                    if v.has_key('href'):
                        e.setAttribute('uri', v['href'])
                    if v.has_key('version'):
                        e.setAttribute('version', v['version'])
                    e.appendChild(c)
                    elem_ch.appendChild(e)
            elif k == 'subtitle':
                e = feeddoc.createElement('description')
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_ch.appendChild(e)
            elif k == 'rights':
                e = feeddoc.createElement('copyright')
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_ch.appendChild(e)
            elif k == 'published' and v:
                e = feeddoc.createElement('pubDate')
                t = RssUtils.timeDateTimeToString (v, 'rss20')
                if t: t += ' GMT'
                c = feeddoc.createTextNode(t)
                e.appendChild(c)
                elem_ch.appendChild(e)
            elif k == 'updated' and v:
                e = feeddoc.createElement('lastBuildDate')
                t = RssUtils.timeDateTimeToString (v, 'rss20')
                if t: t += ' GMT'
                c = feeddoc.createTextNode(t)
                e.appendChild(c)
                elem_ch.appendChild(e)
            # elif k == 'id':
                # Ignore this attribute
                # pass
            # TODO: filtered out rss20-undefined attributes
            elif v:
                e = feeddoc.createElement(k)
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_ch.appendChild(e)

        return feeddoc
    
    
    def createAtomFeed(self, feed):
        """
        Create an XML DOM document for a Feed element of atom10 protocol.
        
        @param feed: The feedparser top-level object.
        @param feed: FeedParserDict
        
        @return: The document (of xml.dom.minidom.Document type) of the feed.
        
        """
        feeddoc = xml.dom.minidom.Document()

        # First level element - 'feed'
        elem_feed = feeddoc.createElement('feed')
        elem_feed.setAttribute('xmlns', 'http://www.w3.org/2005/Atom')
        feeddoc.appendChild(elem_feed)

        # Child elements of 'feed'
        for k, v in feed.iteritems():
            # A few attributes are list
            if k == 'namespaces':
                for p, u in v.iteritems():
                    # Do not set xmlns attribute, previously set already
                    if p: elem_feed.setAttribute('xmlns:' + p, u)
            elif k == 'links':
                for a in v:
                    if a.has_key('href') and a['href']:
                        e = feeddoc.createElement('link')
                        for m, n in a.iteritems():
                            e.setAttribute(m, n)
                        elem_feed.appendChild(e)
            elif k == 'generator':
                if not feed.has_key('generator_detail'):
                    e = feeddoc.createElement('generator')
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_feed.appendChild(e)
            elif k == 'generator_detail':
                if v.has_key('name'):
                    e = feeddoc.createElement('generator')
                    c = feeddoc.createTextNode(v['name'])
                    if v.has_key('href'):
                        e.setAttribute('uri', v['href'])
                    if v.has_key('version'):
                        e.setAttribute('version', v['version'])
                    e.appendChild(c)
                    elem_feed.appendChild(e)
            elif k == 'published' and v:
                e = feeddoc.createElement(k)
                t = RssUtils.timeDateTimeToString (v, 'atom10')
                c = feeddoc.createTextNode(t)
                e.appendChild(c)
                elem_feed.appendChild(e)
            elif k == 'updated' and v:
                e = feeddoc.createElement('updated')
                t = RssUtils.timeDateTimeToString (v, 'atom10')
                c = feeddoc.createTextNode(t)
                e.appendChild(c)
                elem_feed.appendChild(e)
            # TODO: filtered out atom10-undefined attributes
            elif v:
                e = feeddoc.createElement(k)
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_feed.appendChild(e)

        return feeddoc

    
    def createRssItem(self, item, feeddoc = None):
        """
        Create an XML DOM document for an Item element of rss20 protocol.
        
        @param item: The feedparser entry object.
        @param item: FeedParserDict
        
        @param feeddoc: The feed document that this item element is appended to.  If it is None, a new Document
            is created and appends this item to it.  When it is not None, the top level element of the feed,
            the child of the feed document for Atom10, the grandchild of the feed document for Rss20, is
            appending the new item element.
        @type feeddoc: xml.dom.minidom.Document

        @return: The document (of xml.dom.minidom.Document type) of the entry.
        
        """
        if not feeddoc:
            feeddoc = xml.dom.minidom.Document()
            root = feeddoc
        else:
            # Get the 'channel' element, child element of the top 'rss' element
            root = feeddoc.getElementsByTagName('channel')[0]
        
        # The 'item' element.
        elem_item = feeddoc.createElement('item')
        root.appendChild(elem_item)
        
        # Child elements of 'item'
        for k, v in item.iteritems():
            if k == 'link':
                if not item.has_key('links'):
                    e = feeddoc.createElement(k)
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_item.appendChild(e)
            elif k == 'links':
                for a in v:
                    e = feeddoc.createElement('link')
                    c = feeddoc.createTextNode(a.get('href', ''))
                    e.appendChild(c)
                    elem_item.appendChild(e)
                    # Allow only one 'link' element, take the first one.
                    break
            elif k == 'author':
                if not item.has_key('author_detail'):
                    e = feeddoc.createElement('author')
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_item.appendChild(e)
            elif k == 'author_detail':
                if v.has_key('name'):
                    e = feeddoc.createElement('author')
                    c = feeddoc.createElement('name')
                    t = feeddoc.createTextNode(v['name'])
                    c.appendChild(t)
                    e.appendChild(c)
                    if v.has_key('href'):
                        c = feeddoc.createElement('uri')
                        t = feeddoc.createTextNode(v['href'])
                        c.appendChild(t)
                        e.appendChild(c)
                    if v.has_key('email'):
                        c = feeddoc.createElement('email')
                        t = feeddoc.createTextNode(v['email'])
                        c.appendChild(t)
                        e.appendChild(c)
                    e.appendChild(c)
                    elem_item.appendChild(e)
            elif k == 'contributors':
                # Ignore it.
                pass
            elif (k == 'published' or k == 'updated') and v:
                e = feeddoc.createElement('pubDate')
                t = RssUtils.timeDateTimeToString (v, 'rss20')
                if t: t += ' GMT'
                c = feeddoc.createTextNode(t)
                e.appendChild(c)
                elem_item.appendChild(e)
            elif k == 'content':
                e = feeddoc.createElement('description')
                if type(v) == list: d = v[0]
                elif type(v) == dict: d = v
                elif isinstance(v, FeedParserDict): d = v
                t = d.get('type', 'text/plain')
                if t == 'text' or t == 'text/plain':
                    c = xml.dom.minidom.CDATASection()
                    c.data = d['value']
                else:
                    c = feeddoc.createTextNode(d['value'])
                # Some browser doesn't display 'text/html'
                if t == 'text/html': t = 'html'
                e.appendChild(c)
                e.setAttribute('type', t)
                elem_item.appendChild(e)
            elif k == 'summary' and v:
                if not item.has_key('content'):
                    e = feeddoc.createElement('description')
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_item.appendChild(e)
            elif k == 'summary_detail':
                # Ignore it
                pass
            elif k == 'tags':
                for t in v:
                    e = feeddoc.createElement('category')
                    c = feeddoc.createTextNode(t.get('term', ''))
                    if t.get('scheme', ''):
                        e.setAttribute('domain', t.get('scheme', ''))
                    e.appendChild(c)
                    elem_item.appendChild(e)
            # TODO: filtered out atom10-undefined attributes
            elif v:
                e = feeddoc.createElement(k)
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_item.appendChild(e)
        
        return feeddoc

    
    def createAtomEntry(self, item, feeddoc = None):
        """
        Create an XML DOM document for an Entry element of atom10 protocol.
        
        @param item: The feedparser entry object.
        @param item: FeedParserDict
        
        @param feeddoc: The feed document that this item element is appended to.  If it is None, a new Document
            is created and appends this item to it.  When it is not None, the top level element of the feed,
            the child of the feed document for Atom10, the grandchild of the feed document for Rss20, is
            appending the new item element.
        @type feeddoc: xml.dom.minidom.Document
        
        @return: The document (of xml.dom.minidom.Document type) of the entry.
        
        """
        if not feeddoc:
            feeddoc = xml.dom.minidom.Document()
            root = feeddoc
        else:
            # Get the top 'feed' element
            root = feeddoc.documentElement
        
        # The 'entry' element.
        elem_ent = feeddoc.createElement('entry')
        root.appendChild(elem_ent)
        
        # Child elements of 'item'
        for k, v in item.iteritems():
            if k == 'link':
                e = feeddoc.createElement(k)
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_ent.appendChild(e)
            elif k == 'links':
                for a in v:
                    e = feeddoc.createElement('link')
                    for m, n in a.iteritems():
                        e.setAttribute(m, n)
                    elem_ent.appendChild(e)
            elif k == 'author':
                if not item.has_key('author_detail'):
                    e = feeddoc.createElement('author')
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_ent.appendChild(e)
            elif k == 'author_detail':
                if v.has_key('name'):
                    e = feeddoc.createElement('author')
                    c = feeddoc.createElement('name')
                    t = feeddoc.createTextNode(v['name'])
                    c.appendChild(t)
                    e.appendChild(c)
                    if v.has_key('href'):
                        c = feeddoc.createElement('uri')
                        t = feeddoc.createTextNode(v['href'])
                        c.appendChild(t)
                        e.appendChild(c)
                    if v.has_key('email'):
                        c = feeddoc.createElement('email')
                        t = feeddoc.createTextNode(v['email'])
                        c.appendChild(t)
                        e.appendChild(c)
                    e.appendChild(c)
                    elem_ent.appendChild(e)
            elif k == 'contributors':
                for a in v:
                    if a.has_key('name'):
                        e = feeddoc.createElement('contributor')
                        c = feeddoc.createElement('name')
                        t = feeddoc.createTextNode(a['name'])
                        c.appendChild(t)
                        e.appendChild(c)
                        if a.has_key('href'):
                            c = feeddoc.createElement('uri')
                            t = feeddoc.createTextNode(a['href'])
                            c.appendChild(t)
                            e.appendChild(c)
                        if a.has_key('email'):
                            c = feeddoc.createElement('email')
                            t = feeddoc.createTextNode(a['email'])
                            c.appendChild(t)
                            e.appendChild(c)
                        e.appendChild(c)
                        elem_ent.appendChild(e)
            elif (k == 'published' or k == 'updated') and v:
                e = feeddoc.createElement(k)
                t = RssUtils.timeDateTimeToString (v, 'atom10')
                c = feeddoc.createTextNode(t)
                e.appendChild(c)
                elem_ent.appendChild(e)
            elif k == 'content':
                e = feeddoc.createElement(k)
                # Support only one content field for now ...
                if type(v) == list: d = v[0]
                elif type(v) == dict: d = v
                elif isinstance(v, FeedParserDict): d = v
                t = d.get('type', 'text/plain')
                if t == 'text' or t == 'text/plain':
                    c = xml.dom.minidom.CDATASection()
                    c.data = d['value']
                else:
                    c = feeddoc.createTextNode(d['value'])
                # Some browser doesn't display 'text/html'
                if t == 'text/html': t = 'html'
                e.appendChild(c)
                e.setAttribute('type', t)
                b = d.get('base', '')
                if b: e.setAttribute('xml:base', b)
                g = d.get('language', '')
                if g: e.setAttribute('xml:lang', g)
                elem_ent.appendChild(e)
            elif k == 'summary' and v:
                if not item.has_key('summary_detail') and not item.has_key('content'):
                    e = feeddoc.createElement('content')
                    c = feeddoc.createTextNode(v)
                    e.appendChild(c)
                    elem_ent.appendChild(e)
            elif k == 'summary_detail':
                e = feeddoc.createElement('content')
                t = v.get('type', 'text/plain')
                if t == 'text' or t == 'text/plain':
                    c = xml.dom.minidom.CDATASection()
                    c.data = d['value']
                else:
                    c = feeddoc.createTextNode(d['value'])
                e.appendChild(c)
                e.setAttribute('type', t)
                b = v.get('base', '')
                if b: e.setAttribute('base', b)
                elem_ent.appendChild(e)
            elif k == 'tags':
                for t in v:
                    e = feeddoc.createElement('category')
                    e.setAttribute('term', t.get('term', ''))
                    e.setAttribute('label', t.get('label', ''))
                    e.setAttribute('scheme', t.get('scheme', ''))
                    elem_ent.appendChild(e)
            # TODO: filtered out atom10-undefined attributes
            elif v:
                e = feeddoc.createElement(k)
                c = feeddoc.createTextNode(v)
                e.appendChild(c)
                elem_ent.appendChild(e)
        
        return feeddoc
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.