itunes.py : » RSS » PenguinTV » PenguinTV-4.1.0 » penguintv » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML

Python Open Source » RSS » PenguinTV

PenguinTV » PenguinTV 4.1.0 » penguintv » itunes.py

# itunes.py
# Written by Owen Williams, (c) 2007
# see LICENSE for license information
#
# iTunes has very strange weblinks, but they are not that hard to read.
# A "viewPodcast" link returns a gzipped web page that contains a link that
# iTunes can load.  Although the protocol of this link is itms://, we can
# load it with http.  This time we get a gzipped xml file, and toward the
# bottom of the file is a simple key / value pair for episodeURL.  This
# url is what the podcast author has told itunes to use, and it'll be regular
# RSS (we hope).


import sys
import gzip
import urllib
import HTMLParser
import logging

from xml.sax import saxutils,make_parser
from xml.sax.handler import feature_namespaces

def is_itms_url(url):
  if url.lower().startswith("itms://"):
    return True

def is_itunes_url(url):
  """ Two simple checks to see if this is a valid itunes url:
    (ie, http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewPodcast?id=207870198)
      * does it contain "phobos.apple.com", and
      * does it contain "viewPodcast" 
      
      There's also another form, as in http://www.itunes.com/podcast?id=207870198"""
  
  if url.lower().startswith("itms://"):
    return True    
  if "apple.com/" in url.lower() and "viewPodcast" in url:
    return True
  if "itunes.com/podcast" in url.lower():
    return True
  return False

def get_rss_from_itunes(url):
  if not is_itunes_url(url):
    raise ItunesError, "not an itunes url"
    
  if not is_itms_url(url):
    url2 = get_itms_url(url)
    return get_podcast_url(url2)
  else:
    url2 = url.replace("itms://", "http://")
    return get_podcast_url(url2)
    
def get_itms_url(url):
  # Part 1, get the itunes "webpage" for this feed
  # we have to save the file because urlopen doesn't support seeking    
  filename, message = urllib.urlretrieve(url)
  #uncompressed = gzip.GzipFile(filename=filename, mode='r')
  uncompressed = open(filename, 'r')

  parser = viewPodcastParser()
  parser.feed(uncompressed.read())

  if parser.url is None:
    raise ItunesError, "error getting viewpodcast url from itunes"
  return parser.url

def get_podcast_url(url):
  # Part 2, find the actual rss link in the itunes "webpage"
  filename, message = urllib.urlretrieve(url)
  #uncompressed = gzip.GzipFile(filename=filename, mode='r')
  uncompressed = open(filename, 'r')

  parser = make_parser()
  parser.setFeature(feature_namespaces, 0)
  handler = itunesHandler()
  parser.setContentHandler(handler)
  parser.parse(uncompressed)

  if handler.url is None:
    raise ItunesError, "error finding podcast url"
    
  return handler.url

class viewPodcastParser(HTMLParser.HTMLParser):
  def __init__(self):
    HTMLParser.HTMLParser.__init__(self)
    self.url = None
    
  def handle_starttag(self, tag, attrs):
    new_attrs = []
    if tag.upper() == "BODY":
      for attr, val in attrs:
        if attr == "onload":
          url = val[val.find("itms://") + 4:]
          url = url[:url.find("'")]
          url = "http" + url
          self.url = url

try:
  from xml.sax.handler import ContentHandler
  def_handler = ContentHandler
except:
  try:
    from xml.sax.saxutils import DefaultHandler
    def_handler = DefaultHandler
  except Exception, e:
    logging.error("couldn't get xml parsing")
    raise e
    
class itunesHandler(def_handler):
  def __init__(self):
    self.url = ""
    self._in_key = None
    self._in_value = None
    self._last_key = None

  def startElement(self, name, attrs):
    if name == 'key':
      self._in_key = ""
    elif name == 'string':
      self._in_value = ""

  def endElement(self, name):
    if name == 'key':
      self._last_key = self._in_key
      self._in_key = None
    elif name == 'string':
      if self._last_key == 'feedURL':
        self.url = self._in_value
      self._in_value = None
        
  def characters(self, ch):
    if self._in_key is not None:
      self._in_key += ch
    elif self._in_value is not None:
      self._in_value += ch
      
class ItunesError(Exception):
  def __init__(self, m):
    self.m = m
  def __str__(self):
    return m

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.