src/gpodder/youtube.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19 #  gpodder.youtube - YouTube and related magic
  20 #  Justin Forest <justin.forest@gmail.com> 2008-10-13
  21 #
  22
  23
  24 import gpodder
  25
  26 from gpodder import util
  27
  28 import os.path
  29
  30 import logging
  31 logger = logging.getLogger(__name__)
  32
  33 try:
  34     import simplejson as json
  35 except ImportError:
  36     import json
  37
  38 import re
  39 import urllib
  40
  41 try:
  42     # Python >= 2.6
  43     from urlparse import parse_qs
  44 except ImportError:
  45     # Python < 2.6
  46     from cgi import parse_qs
  47
  48 # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
  49 # format id, (preferred ids, path(?), description) # video bitrate, audio bitrate
  50 formats = [
  51     # WebM VP8 video, Vorbis audio
  52     # Fallback to an MP4 version of same quality.
  53     # Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
  54     # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
  55     (46, ([46, 37, 45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 1080p (1920x1080)')), # N/A,      192 kbps
  56     (45, ([45, 22, 44, 35, 43, 18, 6, 34, 5],         '45/1280x720/99/0/0', 'WebM 720p (1280x720)')),   # 2.0 Mbps, 192 kbps
  57     (44, ([44, 35, 43, 18, 6, 34, 5],                 '44/854x480/99/0/0',  'WebM 480p (854x480)')),    # 1.0 Mbps, 128 kbps
  58     (43, ([43, 18, 6, 34, 5],                         '43/640x360/99/0/0',  'WebM 360p (640x360)')),    # 0.5 Mbps, 128 kbps
  59
  60     # MP4 H.264 video, AAC audio
  61     # Try 35 (FLV 480p H.264 AAC) between 720p and 360p because there's no MP4 480p.
  62     # Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
  63     # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
  64     (38, ([38, 37, 22, 35, 18, 34, 6, 5], '38/1920x1080/9/0/115', 'MP4 4K 3072p (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
  65     (37, ([37, 22, 35, 18, 34, 6, 5],     '37/1920x1080/9/0/115', 'MP4 HD 1080p (1920x1080)')), # 4.3 - 3.0 Mbps, 192 kbps
  66     (22, ([22, 35, 18, 34, 6, 5],         '22/1280x720/9/0/115',  'MP4 HD 720p (1280x720)')),   # 2.9 - 2.0 Mbps, 192 kbps
  67     (18, ([18, 34, 6, 5],                 '18/640x360/9/0/115',   'MP4 360p (640x360)')),       #       0.5 Mbps,  96 kbps
  68
  69     # FLV H.264 video, AAC audio
  70     # Does not check for 360p MP4.
  71     # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
  72     (35, ([35, 34, 6, 5], '35/854x480/9/0/115',   'FLV 480p (854x480)')), # 1 - 0.80 Mbps, 128 kbps
  73     (34, ([34, 6, 5],     '34/640x360/9/0/115',   'FLV 360p (640x360)')), #     0.50 Mbps, 128 kbps
  74
  75     # FLV Sorenson H.263 video, MP3 audio
  76     (6, ([6, 5],         '5/480x270/7/0/0',      'FLV 270p (480x270)')), #     0.80 Mbps,  64 kbps
  77     (5, ([5],            '5/320x240/7/0/0',      'FLV 240p (320x240)')), #     0.25 Mbps,  64 kbps
  78 ]
  79 formats_dict = dict(formats)
  80
  81 class YouTubeError(Exception): pass
  82
  83
  84 def get_fmt_ids(youtube_config):
  85     fmt_ids = youtube_config.preferred_fmt_ids
  86     if not fmt_ids:
  87         format = formats_dict.get(youtube_config.preferred_fmt_id)
  88         if format is None:
  89             fmt_ids = []
  90         else:
  91             fmt_ids, path, description = format
  92
  93     return fmt_ids
  94
  95 def get_real_download_url(url, preferred_fmt_ids=None):
  96     if not preferred_fmt_ids:
  97         preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p
  98
  99     vid = get_youtube_id(url)
 100     if vid is not None:
 101         page = None
 102         url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
 103
 104         while page is None:
 105             req = util.http_request(url, method='GET')
 106             if 'location' in req.msg:
 107                 url = req.msg['location']
 108             else:
 109                 page = req.read()
 110
 111         # Try to find the best video format available for this video
 112         # (http://forum.videohelp.com/topic336882-1800.html#1912972)
 113         def find_urls(page):
 114             r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page)
 115             if r4 is not None:
 116                 fmt_url_map = urllib.unquote(r4.group(1))
 117                 for fmt_url_encoded in fmt_url_map.split(','):
 118                     video_info = parse_qs(fmt_url_encoded)
 119                     yield int(video_info['itag'][0]), video_info['url'][0] + "&signature=" + video_info['sig'][0]
 120             else:
 121                 error_info = parse_qs(page)
 122                 error_message = util.remove_html_tags(error_info['reason'][0])
 123                 raise YouTubeError('Cannot download video: %s' % error_message)
 124
 125         fmt_id_url_map = sorted(find_urls(page), reverse=True)
 126
 127         if not fmt_id_url_map:
 128             raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
 129
 130         # Default to the highest fmt_id if we don't find a match below
 131         _, url  = fmt_id_url_map[0]
 132
 133         formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
 134         fmt_id_url_map = dict(fmt_id_url_map)
 135
 136         # This provides good quality video, seems to be always available
 137         # and is playable fluently in Media Player
 138         if gpodder.ui.harmattan:
 139             preferred_fmt_ids = [18]
 140
 141         for id in preferred_fmt_ids:
 142             id = int(id)
 143             if id in formats_available:
 144                 format = formats_dict.get(id)
 145                 if format is not None:
 146                     _, _, description = format
 147                 else:
 148                     description = 'Unknown'
 149
 150                 logger.info('Found YouTube format: %s (fmt_id=%d)',
 151                         description, id)
 152                 url = fmt_id_url_map[id]
 153                 break
 154
 155     return url
 156
 157 def get_youtube_id(url):
 158     r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
 159     if r is not None:
 160         return r.group(1)
 161
 162     r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
 163     if r is not None:
 164         return r.group(1)
 165
 166     r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
 167     if r is not None:
 168         return r.group(1)
 169
 170     return None
 171
 172 def is_video_link(url):
 173     return (get_youtube_id(url) is not None)
 174
 175 def is_youtube_guid(guid):
 176     return guid.startswith('tag:youtube.com,2008:video:')
 177
 178 def get_real_channel_url(url):
 179     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
 180     m = r.match(url)
 181
 182     if m is not None:
 183         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
 184         logger.debug('YouTube link resolved: %s => %s', url, next)
 185         return next
 186
 187     r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
 188     m = r.match(url)
 189
 190     if m is not None:
 191         next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
 192         logger.debug('YouTube link resolved: %s => %s', url, next)
 193         return next
 194
 195     return url
 196
 197 def get_real_cover(url):
 198     r = re.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
 199             re.IGNORECASE)
 200     m = r.match(url)
 201
 202     if m is not None:
 203         username = m.group(1)
 204         api_url = 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
 205         data = util.urlopen(api_url).read()
 206         match = re.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data)
 207         if match is not None:
 208             logger.debug('YouTube userpic for %s is: %s', url, match.group(1))
 209             return match.group(1)
 210
 211     return None
 212
 213 def find_youtube_channels(string):
 214     url = 'http://gdata.youtube.com/feeds/api/videos?alt=json&q=%s' % urllib.quote(string, '')
 215     data = json.load(util.urlopen(url))
 216
 217     class FakeImporter(object):
 218         def __init__(self):
 219             self.items = []
 220
 221     result = FakeImporter()
 222
 223     seen_users = set()
 224     for entry in data['feed']['entry']:
 225         user = os.path.basename(entry['author'][0]['uri']['$t'])
 226         title = entry['title']['$t']
 227         url = 'http://www.youtube.com/rss/user/%s/videos.rss' % user
 228         if user not in seen_users:
 229             result.items.append({
 230                 'title': user,
 231                 'url': url,
 232                 'description': title
 233             })
 234             seen_users.add(user)
 235
 236     return result
 237