YouTube: Fix directory search (bug 1760)
[gpodder.git] / src / gpodder / youtube.py
blob4a6fa044da1f9031f41ac08591480e2d840e417c
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # gpodder.youtube - YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
24 import gpodder
26 from gpodder import util
28 import os.path
30 import logging
31 logger = logging.getLogger(__name__)
33 try:
34 import simplejson as json
35 except ImportError:
36 import json
38 import re
39 import urllib
41 try:
42 # Python >= 2.6
43 from urlparse import parse_qs
44 except ImportError:
45 # Python < 2.6
46 from cgi import parse_qs
48 # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
49 # format id, (preferred ids, path(?), description) # video bitrate, audio bitrate
50 formats = [
51 # WebM VP8 video, Vorbis audio
52 # Fallback to an MP4 version of same quality.
53 # Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
54 # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
55 (46, ([46, 37, 45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 1080p (1920x1080)')), # N/A, 192 kbps
56 (45, ([45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 720p (1280x720)')), # 2.0 Mbps, 192 kbps
57 (44, ([44, 35, 43, 18, 6, 34, 5], '44/854x480/99/0/0', 'WebM 480p (854x480)')), # 1.0 Mbps, 128 kbps
58 (43, ([43, 18, 6, 34, 5], '43/640x360/99/0/0', 'WebM 360p (640x360)')), # 0.5 Mbps, 128 kbps
60 # MP4 H.264 video, AAC audio
61 # Try 35 (FLV 480p H.264 AAC) between 720p and 360p because there's no MP4 480p.
62 # Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
63 # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
64 (38, ([38, 37, 22, 35, 18, 34, 6, 5], '38/1920x1080/9/0/115', 'MP4 4K 3072p (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
65 (37, ([37, 22, 35, 18, 34, 6, 5], '37/1920x1080/9/0/115', 'MP4 HD 1080p (1920x1080)')), # 4.3 - 3.0 Mbps, 192 kbps
66 (22, ([22, 35, 18, 34, 6, 5], '22/1280x720/9/0/115', 'MP4 HD 720p (1280x720)')), # 2.9 - 2.0 Mbps, 192 kbps
67 (18, ([18, 34, 6, 5], '18/640x360/9/0/115', 'MP4 360p (640x360)')), # 0.5 Mbps, 96 kbps
69 # FLV H.264 video, AAC audio
70 # Does not check for 360p MP4.
71 # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
72 (35, ([35, 34, 6, 5], '35/854x480/9/0/115', 'FLV 480p (854x480)')), # 1 - 0.80 Mbps, 128 kbps
73 (34, ([34, 6, 5], '34/640x360/9/0/115', 'FLV 360p (640x360)')), # 0.50 Mbps, 128 kbps
75 # FLV Sorenson H.263 video, MP3 audio
76 (6, ([6, 5], '5/480x270/7/0/0', 'FLV 270p (480x270)')), # 0.80 Mbps, 64 kbps
77 (5, ([5], '5/320x240/7/0/0', 'FLV 240p (320x240)')), # 0.25 Mbps, 64 kbps
79 formats_dict = dict(formats)
81 class YouTubeError(Exception): pass
84 def get_fmt_ids(youtube_config):
85 fmt_ids = youtube_config.preferred_fmt_ids
86 if not fmt_ids:
87 format = formats_dict.get(youtube_config.preferred_fmt_id)
88 if format is None:
89 fmt_ids = []
90 else:
91 fmt_ids, path, description = format
93 return fmt_ids
95 def get_real_download_url(url, preferred_fmt_ids=None):
96 if not preferred_fmt_ids:
97 preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p
99 vid = get_youtube_id(url)
100 if vid is not None:
101 page = None
102 url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
104 while page is None:
105 req = util.http_request(url, method='GET')
106 if 'location' in req.msg:
107 url = req.msg['location']
108 else:
109 page = req.read()
111 # Try to find the best video format available for this video
112 # (http://forum.videohelp.com/topic336882-1800.html#1912972)
113 def find_urls(page):
114 r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page)
115 if r4 is not None:
116 fmt_url_map = urllib.unquote(r4.group(1))
117 for fmt_url_encoded in fmt_url_map.split(','):
118 video_info = parse_qs(fmt_url_encoded)
119 yield int(video_info['itag'][0]), video_info['url'][0] + "&signature=" + video_info['sig'][0]
120 else:
121 error_info = parse_qs(page)
122 error_message = util.remove_html_tags(error_info['reason'][0])
123 raise YouTubeError('Cannot download video: %s' % error_message)
125 fmt_id_url_map = sorted(find_urls(page), reverse=True)
127 if not fmt_id_url_map:
128 raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
130 # Default to the highest fmt_id if we don't find a match below
131 _, url = fmt_id_url_map[0]
133 formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
134 fmt_id_url_map = dict(fmt_id_url_map)
136 # This provides good quality video, seems to be always available
137 # and is playable fluently in Media Player
138 if gpodder.ui.harmattan:
139 preferred_fmt_ids = [18]
141 for id in preferred_fmt_ids:
142 id = int(id)
143 if id in formats_available:
144 format = formats_dict.get(id)
145 if format is not None:
146 _, _, description = format
147 else:
148 description = 'Unknown'
150 logger.info('Found YouTube format: %s (fmt_id=%d)',
151 description, id)
152 url = fmt_id_url_map[id]
153 break
155 return url
157 def get_youtube_id(url):
158 r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
159 if r is not None:
160 return r.group(1)
162 r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
163 if r is not None:
164 return r.group(1)
166 r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
167 if r is not None:
168 return r.group(1)
170 return None
172 def is_video_link(url):
173 return (get_youtube_id(url) is not None)
175 def is_youtube_guid(guid):
176 return guid.startswith('tag:youtube.com,2008:video:')
178 def get_real_channel_url(url):
179 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
180 m = r.match(url)
182 if m is not None:
183 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
184 logger.debug('YouTube link resolved: %s => %s', url, next)
185 return next
187 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
188 m = r.match(url)
190 if m is not None:
191 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
192 logger.debug('YouTube link resolved: %s => %s', url, next)
193 return next
195 return url
197 def get_real_cover(url):
198 r = re.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
199 re.IGNORECASE)
200 m = r.match(url)
202 if m is not None:
203 username = m.group(1)
204 api_url = 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
205 data = util.urlopen(api_url).read()
206 match = re.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data)
207 if match is not None:
208 logger.debug('YouTube userpic for %s is: %s', url, match.group(1))
209 return match.group(1)
211 return None
213 def find_youtube_channels(string):
214 url = 'http://gdata.youtube.com/feeds/api/videos?alt=json&q=%s' % urllib.quote(string, '')
215 data = json.load(util.urlopen(url))
217 class FakeImporter(object):
218 def __init__(self):
219 self.items = []
221 result = FakeImporter()
223 seen_users = set()
224 for entry in data['feed']['entry']:
225 user = os.path.basename(entry['author'][0]['uri']['$t'])
226 title = entry['title']['$t']
227 url = 'http://www.youtube.com/rss/user/%s/videos.rss' % user
228 if user not in seen_users:
229 result.items.append({
230 'title': user,
231 'url': url,
232 'description': title
234 seen_users.add(user)
236 return result