src/gpodder/youtube.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19 #  gpodder.youtube - YouTube and related magic
  20 #  Justin Forest <justin.forest@gmail.com> 2008-10-13
  21 #
  22
  23
  24 import gpodder
  25
  26 from gpodder import util
  27 from gpodder.liblogger import log
  28
  29 import re
  30 import urllib
  31
  32 try:
  33     # Python >= 2.6
  34     from urlparse import parse_qs
  35 except ImportError:
  36     # Python < 2.6
  37     from cgi import parse_qs
  38
  39 # See http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
  40 # Currently missing: the WebM 480p and 720 formats; 3GP profile
  41 supported_formats = [
  42     (37, '37/1920x1080/9/0/115', '1920x1080 (HD)'),
  43     (22, '22/1280x720/9/0/115', '1280x720 (HD)'),
  44     (35, '35/854x480/9/0/115', '854x480'),
  45     (34, '34/640x360/9/0/115', '640x360'),
  46     (18, '18/640x360/9/0/115', '640x360 (iPod)'),
  47     (18, '18/480x360/9/0/115', '480x360 (iPod)'),
  48     (5, '5/320x240/7/0/0', '320x240 (FLV)'),
  49 ]
  50
  51 class YouTubeError(Exception): pass
  52
  53 def get_real_download_url(url, preferred_fmt_id=18):
  54     vid = get_youtube_id(url)
  55     if vid is not None:
  56         page = None
  57         url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
  58
  59         while page is None:
  60             req = util.http_request(url, method='GET')
  61             if 'location' in req.msg:
  62                 url = req.msg['location']
  63             else:
  64                 page = req.read()
  65
  66         # Try to find the best video format available for this video
  67         # (http://forum.videohelp.com/topic336882-1800.html#1912972)
  68         def find_urls(page):
  69             r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page)
  70             if r4 is not None:
  71                 fmt_url_map = urllib.unquote(r4.group(1))
  72                 for fmt_url_encoded in fmt_url_map.split(','):
  73                     video_info = parse_qs(fmt_url_encoded)
  74                     yield int(video_info['itag'][0]), video_info['url'][0]
  75             else:
  76                 error_info = parse_qs(page)
  77                 error_message = util.remove_html_tags(error_info['reason'][0])
  78                 raise YouTubeError('Cannot download video: %s' % error_message)
  79
  80         fmt_id_url_map = sorted(find_urls(page), reverse=True)
  81         # Default to the highest fmt_id if we don't find a match below
  82         if fmt_id_url_map:
  83             default_fmt_id, default_url = fmt_id_url_map[0]
  84         else:
  85             raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
  86
  87         formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
  88         fmt_id_url_map = dict(fmt_id_url_map)
  89
  90         if gpodder.ui.diablo:
  91             # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could
  92             # also use 13 and 17 here, but the quality is very low then. There
  93             # seems to also be a 6, but I could not find a video with that yet.
  94             fmt_id = 5
  95         elif gpodder.ui.fremantle:
  96             # This provides good quality video, seems to be always available
  97             # and is playable fluently in Media Player
  98             if preferred_fmt_id == 5:
  99                 fmt_id = 5
 100             else:
 101                 fmt_id = 18
 102         else:
 103             # As a fallback, use fmt_id 18 (seems to be always available)
 104             fmt_id = 18
 105
 106             # This will be set to True if the search below has already "seen"
 107             # our preferred format, but has not yet found a suitable available
 108             # format for the given video.
 109             seen_preferred = False
 110
 111             for id, wanted, description in supported_formats:
 112                 # If we see our preferred format, accept formats below
 113                 if id == preferred_fmt_id:
 114                     seen_preferred = True
 115
 116                 # If the format is available and preferred (or lower),
 117                 # use the given format for our fmt_id
 118                 if id in formats_available and seen_preferred:
 119                     log('Found available YouTube format: %s (fmt_id=%d)', \
 120                             description, id)
 121                     fmt_id = id
 122                     break
 123
 124         url = fmt_id_url_map.get(fmt_id, None)
 125         if url is None:
 126             url = default_url
 127
 128     return url
 129
 130 def get_youtube_id(url):
 131     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
 132     if r is not None:
 133         return r.group(1)
 134
 135     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
 136     if r is not None:
 137         return r.group(1)
 138
 139     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
 140     if r is not None:
 141         return r.group(1)
 142
 143     return None
 144
 145 def is_video_link(url):
 146     return (get_youtube_id(url) is not None)
 147
 148 def get_real_channel_url(url):
 149     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
 150     m = r.match(url)
 151
 152     if m is not None:
 153         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
 154         log('YouTube link resolved: %s => %s', url, next)
 155         return next
 156
 157     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
 158     m = r.match(url)
 159
 160     if m is not None:
 161         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
 162         log('YouTube link resolved: %s => %s', url, next)
 163         return next
 164
 165     return url
 166
 167 def get_real_cover(url):
 168     r = re.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
 169             re.IGNORECASE)
 170     m = r.match(url)
 171
 172     if m is not None:
 173         username = m.group(1)
 174         api_url = 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
 175         data = util.urlopen(api_url).read()
 176         match = re.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data)
 177         if match is not None:
 178             log('YouTube userpic for %s is: %s', url, match.group(1))
 179             return match.group(1)
 180
 181     return None
 182
 183 def find_youtube_channels(string):
 184     # FIXME: Make proper use of the YouTube API instead
 185     # of screen-scraping the YouTube website
 186     url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
 187
 188     r = re.compile('>\s+<')
 189     data = r.sub('><', util.urlopen(url).read())
 190
 191     r1 = re.compile('<a href="/user/([^"?]+)[^"]+"[^>]*>([^<]+)</a>')
 192     m1 = r1.findall(data)
 193
 194     r2 = re.compile('\s+')
 195
 196     class FakeImporter(object):
 197         def __init__(self):
 198             self.items = []
 199
 200     result = FakeImporter()
 201     found_users = []
 202     for name, title in m1:
 203         if name not in found_users:
 204             found_users.append(name)
 205             link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
 206             result.items.append({'title': name, 'url': link, 'description': title})
 207
 208     return result
 209