src/gpodder/resolver.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19 #  resolver.py -- YouTube and related magic
  20 #  Justin Forest <justin.forest@gmail.com> 2008-10-13
  21 #
  22 # TODO:
  23 #
  24 #   * Channel covers.
  25 #   * Support for Vimeo, maybe blip.tv and others.
  26
  27 import re
  28 import urllib
  29 import urllib2
  30 import gtk
  31 import gobject
  32
  33 import gpodder
  34 from xml.sax import saxutils
  35 from gpodder.liblogger import log
  36 from gpodder.util import proxy_request
  37
  38 def get_real_download_url(url, proxy=None):
  39     # IDs from http://forum.videohelp.com/topic336882-1800.html#1912972
  40     if gpodder.interface == gpodder.MAEMO:
  41         # Use 3GP with AAC on Maemo
  42         fmt_id = 17
  43     else:
  44         # Use MP4 with AAC by default
  45         fmt_id = 18
  46
  47     r1 = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
  48     if r1 is not None:
  49         page = proxy_request('http://www.youtube.com/watch?v=' + r1.group(1), proxy, method='GET').read()
  50
  51         r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
  52         if r2:
  53             next = 'http://www.youtube.com/get_video?video_id=' + r1.group(1) + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id
  54             log('YouTube link resolved: %s => %s', url, next)
  55             return next
  56
  57     return url
  58
  59 def get_real_channel_url(url):
  60     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
  61     m = r.match(url)
  62
  63     if m is not None:
  64         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
  65         log('YouTube link resolved: %s => %s', url, next)
  66         return next
  67
  68     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
  69     m = r.match(url)
  70
  71     if m is not None:
  72         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
  73         log('YouTube link resolved: %s => %s', url, next)
  74         return next
  75
  76     return url
  77
  78 def get_real_cover(url):
  79     log('Cover: %s', url)
  80
  81     r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
  82     m = r.match(url)
  83
  84     if m is not None:
  85         data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
  86         data = data[data.find('id="user-profile-image"'):]
  87         data = data[data.find('src="') + 5:]
  88
  89         next = data[:data.find('"')]
  90
  91         if next.strip() == '':
  92             return None
  93
  94         log('YouTube userpic for %s is: %s', url, next)
  95         return next
  96
  97     return None
  98
  99 def get_real_episode_length(episode):
 100     url = get_real_download_url(episode.url)
 101
 102     if url != episode.url:
 103         try:
 104             info = urllib2.urlopen(url).info()
 105             if 'content-length' in info:
 106                 return info['content-length']
 107         except urllib2.HTTPError:
 108             pass
 109
 110     return 0
 111
 112 def find_youtube_channels(string):
 113     # FIXME: Make proper use of the YouTube API instead
 114     # of screen-scraping the YouTube website
 115     url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
 116
 117     r = re.compile('>\s+<')
 118     data = r.sub('><', urllib.urlopen(url).read())
 119
 120     r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
 121     m1 = r1.findall(data)
 122
 123     r2 = re.compile('\s+')
 124
 125     model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)
 126
 127     found_users = []
 128     for (name, title) in m1:
 129         if name not in found_users:
 130             found_users.append(name)
 131             link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
 132             model.append([False, name, link])
 133
 134     return model