src/gpodder/resolver.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19 #  resolver.py -- YouTube and related magic
  20 #  Justin Forest <justin.forest@gmail.com> 2008-10-13
  21 #
  22 # TODO:
  23 #
  24 #   * Channel covers.
  25 #   * Support for Vimeo, maybe blip.tv and others.
  26
  27 import re
  28 import urllib
  29 import urllib2
  30 import gtk
  31 import gobject
  32
  33 import gpodder
  34 from xml.sax import saxutils
  35 from gpodder.liblogger import log
  36 from gpodder.util import proxy_request
  37
  38 def get_real_download_url(url, proxy=None):
  39     # IDs from http://forum.videohelp.com/topic336882-1800.html#1912972
  40     if gpodder.interface == gpodder.MAEMO:
  41         # Use 3GP with AAC on Maemo
  42         fmt_id = 17
  43     else:
  44         # Use MP4 with AAC by default
  45         fmt_id = 18
  46
  47     vid = get_youtube_id(url)
  48     if vid is not None:
  49         page = None
  50         url = 'http://www.youtube.com/watch?v=' + vid
  51
  52         while page is None:
  53             req = proxy_request(url, proxy, method='GET')
  54             if 'location' in req.msg:
  55                 url = req.msg['location']
  56             else:
  57                 page = req.read()
  58
  59         r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
  60         if r2:
  61             next = 'http://www.youtube.com/get_video?video_id=' + vid + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id
  62             log('YouTube link resolved: %s => %s', url, next)
  63             return next
  64
  65     return url
  66
  67 def get_youtube_id(url):
  68     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
  69     if r is not None:
  70         return r.group(1)
  71
  72     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
  73     if r is not None:
  74         return r.group(1)
  75
  76     return None
  77
  78 def get_real_channel_url(url):
  79     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
  80     m = r.match(url)
  81
  82     if m is not None:
  83         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
  84         log('YouTube link resolved: %s => %s', url, next)
  85         return next
  86
  87     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
  88     m = r.match(url)
  89
  90     if m is not None:
  91         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
  92         log('YouTube link resolved: %s => %s', url, next)
  93         return next
  94
  95     return url
  96
  97 def get_real_cover(url):
  98     log('Cover: %s', url)
  99
 100     r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
 101     m = r.match(url)
 102
 103     if m is not None:
 104         data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
 105         data = data[data.find('id="user-profile-image"'):]
 106         data = data[data.find('src="') + 5:]
 107
 108         next = data[:data.find('"')]
 109
 110         if next.strip() == '':
 111             return None
 112
 113         log('YouTube userpic for %s is: %s', url, next)
 114         return next
 115
 116     return None
 117
 118 def get_real_episode_length(episode):
 119     url = get_real_download_url(episode.url)
 120
 121     if url != episode.url:
 122         try:
 123             info = urllib2.urlopen(url).info()
 124             if 'content-length' in info:
 125                 return info['content-length']
 126         except urllib2.HTTPError:
 127             pass
 128
 129     return 0
 130
 131 def find_youtube_channels(string):
 132     # FIXME: Make proper use of the YouTube API instead
 133     # of screen-scraping the YouTube website
 134     url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
 135
 136     r = re.compile('>\s+<')
 137     data = r.sub('><', urllib.urlopen(url).read())
 138
 139     r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
 140     m1 = r1.findall(data)
 141
 142     r2 = re.compile('\s+')
 143
 144     model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)
 145
 146     found_users = []
 147     for (name, title) in m1:
 148         if name not in found_users:
 149             found_users.append(name)
 150             link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
 151             model.append([False, name, link])
 152
 153     return model