src/gpodder/resolver.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2008 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19 #  resolver.py -- YouTube and related magic
  20 #  Justin Forest <justin.forest@gmail.com> 2008-10-13
  21 #
  22 # TODO:
  23 #
  24 #   * Channel covers.
  25 #   * Support for Vimeo, maybe blip.tv and others.
  26
  27 import re
  28 import urllib
  29 import urllib2
  30 import gtk
  31 import gobject
  32 from xml.sax import saxutils
  33 from gpodder.liblogger import log
  34 from gpodder.util import proxy_request
  35
  36 def get_real_download_url(url, proxy=None):
  37     r1 = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
  38     if r1 is not None:
  39         page = proxy_request('http://www.youtube.com/watch?v=' + r1.group(1), proxy, method='GET').read()
  40
  41         r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
  42         if r2:
  43             next = 'http://www.youtube.com/get_video?video_id=' + r1.group(1) + '&t=' + r2.group(1) + '&fmt=18'
  44             log('YouTube link resolved: %s => %s', url, next)
  45             return next
  46
  47     return url
  48
  49 def get_real_channel_url(url):
  50     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
  51     m = r.match(url)
  52
  53     if m is not None:
  54         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
  55         log('YouTube link resolved: %s => %s', url, next)
  56         return next
  57
  58     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
  59     m = r.match(url)
  60
  61     if m is not None:
  62         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
  63         log('YouTube link resolved: %s => %s', url, next)
  64         return next
  65
  66     return url
  67
  68 def get_real_cover(url):
  69     log('Cover: %s', url)
  70
  71     r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
  72     m = r.match(url)
  73
  74     if m is not None:
  75         data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
  76         data = data[data.find('id="user-profile-image"'):]
  77         data = data[data.find('src="') + 5:]
  78
  79         next = data[:data.find('"')]
  80
  81         if next.strip() == '':
  82             return None
  83
  84         log('YouTube userpic for %s is: %s', url, next)
  85         return next
  86
  87     return None
  88
  89 def get_real_episode_length(episode):
  90     url = get_real_download_url(episode.url)
  91
  92     if url != episode.url:
  93         try:
  94             info = urllib2.urlopen(url).info()
  95             if 'content-length' in info:
  96                 return info['content-length']
  97         except urllib2.HTTPError:
  98             pass
  99
 100     return 0
 101
 102 def find_youtube_channels(string):
 103     # FIXME: Make proper use of the YouTube API instead
 104     # of screen-scraping the YouTube website
 105     url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
 106
 107     r = re.compile('>\s+<')
 108     data = r.sub('><', urllib.urlopen(url).read())
 109
 110     r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
 111     m1 = r1.findall(data)
 112
 113     r2 = re.compile('\s+')
 114
 115     model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)
 116
 117     found_users = []
 118     for (name, title) in m1:
 119         if name not in found_users:
 120             found_users.append(name)
 121             link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
 122             model.append([False, name, link])
 123
 124     return model