YouTube: Parse error messages, improve downloading
[gpodder.git] / src / gpodder / youtube.py
blob01113c4be389e81afe44459e187bd99e3168aed1
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # gpodder.youtube - YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
24 import gpodder
26 from gpodder import util
27 from gpodder.liblogger import log
29 import re
30 import urllib
32 try:
33 # Python >= 2.6
34 from urlparse import parse_qs
35 except ImportError:
36 # Python < 2.6
37 from cgi import parse_qs
39 # See http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
40 # Currently missing: the WebM 480p and 720 formats; 3GP profile
41 supported_formats = [
42 (37, '37/1920x1080/9/0/115', '1920x1080 (HD)'),
43 (22, '22/1280x720/9/0/115', '1280x720 (HD)'),
44 (35, '35/854x480/9/0/115', '854x480'),
45 (34, '34/640x360/9/0/115', '640x360'),
46 (18, '18/640x360/9/0/115', '640x360 (iPod)'),
47 (18, '18/480x360/9/0/115', '480x360 (iPod)'),
48 (5, '5/320x240/7/0/0', '320x240 (FLV)'),
51 class YouTubeError(Exception): pass
53 def get_real_download_url(url, preferred_fmt_id=18):
54 vid = get_youtube_id(url)
55 if vid is not None:
56 page = None
57 url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
59 while page is None:
60 req = util.http_request(url, method='GET')
61 if 'location' in req.msg:
62 url = req.msg['location']
63 else:
64 page = req.read()
66 # Try to find the best video format available for this video
67 # (http://forum.videohelp.com/topic336882-1800.html#1912972)
68 def find_urls(page):
69 r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page)
70 if r4 is not None:
71 fmt_url_map = urllib.unquote(r4.group(1))
72 for fmt_url_encoded in fmt_url_map.split(','):
73 video_info = parse_qs(fmt_url_encoded)
74 yield int(video_info['itag'][0]), video_info['url'][0]
75 else:
76 error_info = parse_qs(page)
77 error_message = util.remove_html_tags(error_info['reason'][0])
78 raise YouTubeError('Cannot download video: %s' % error_message)
80 fmt_id_url_map = sorted(find_urls(page), reverse=True)
81 # Default to the highest fmt_id if we don't find a match below
82 if fmt_id_url_map:
83 default_fmt_id, default_url = fmt_id_url_map[0]
84 else:
85 raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
87 formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
88 fmt_id_url_map = dict(fmt_id_url_map)
90 if gpodder.ui.diablo:
91 # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could
92 # also use 13 and 17 here, but the quality is very low then. There
93 # seems to also be a 6, but I could not find a video with that yet.
94 fmt_id = 5
95 elif gpodder.ui.fremantle:
96 # This provides good quality video, seems to be always available
97 # and is playable fluently in Media Player
98 if preferred_fmt_id == 5:
99 fmt_id = 5
100 else:
101 fmt_id = 18
102 else:
103 # As a fallback, use fmt_id 18 (seems to be always available)
104 fmt_id = 18
106 # This will be set to True if the search below has already "seen"
107 # our preferred format, but has not yet found a suitable available
108 # format for the given video.
109 seen_preferred = False
111 for id, wanted, description in supported_formats:
112 # If we see our preferred format, accept formats below
113 if id == preferred_fmt_id:
114 seen_preferred = True
116 # If the format is available and preferred (or lower),
117 # use the given format for our fmt_id
118 if id in formats_available and seen_preferred:
119 log('Found available YouTube format: %s (fmt_id=%d)', \
120 description, id)
121 fmt_id = id
122 break
124 url = fmt_id_url_map.get(fmt_id, None)
125 if url is None:
126 url = default_url
128 return url
130 def get_youtube_id(url):
131 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
132 if r is not None:
133 return r.group(1)
135 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
136 if r is not None:
137 return r.group(1)
139 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
140 if r is not None:
141 return r.group(1)
143 return None
145 def is_video_link(url):
146 return (get_youtube_id(url) is not None)
148 def get_real_channel_url(url):
149 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
150 m = r.match(url)
152 if m is not None:
153 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
154 log('YouTube link resolved: %s => %s', url, next)
155 return next
157 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
158 m = r.match(url)
160 if m is not None:
161 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
162 log('YouTube link resolved: %s => %s', url, next)
163 return next
165 return url
167 def get_real_cover(url):
168 r = re.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
169 re.IGNORECASE)
170 m = r.match(url)
172 if m is not None:
173 username = m.group(1)
174 api_url = 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
175 data = util.urlopen(api_url).read()
176 match = re.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data)
177 if match is not None:
178 log('YouTube userpic for %s is: %s', url, match.group(1))
179 return match.group(1)
181 return None
183 def find_youtube_channels(string):
184 # FIXME: Make proper use of the YouTube API instead
185 # of screen-scraping the YouTube website
186 url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
188 r = re.compile('>\s+<')
189 data = r.sub('><', util.urlopen(url).read())
191 r1 = re.compile('<a href="/user/([^"?]+)[^"]+"[^>]*>([^<]+)</a>')
192 m1 = r1.findall(data)
194 r2 = re.compile('\s+')
196 class FakeImporter(object):
197 def __init__(self):
198 self.items = []
200 result = FakeImporter()
201 found_users = []
202 for name, title in m1:
203 if name not in found_users:
204 found_users.append(name)
205 link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
206 result.items.append({'title': name, 'url': link, 'description': title})
208 return result