1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # gpodder.youtube - YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
26 from gpodder
import util
27 from gpodder
.liblogger
import log
34 from urlparse
import parse_qs
37 from cgi
import parse_qs
39 # See http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
40 # Currently missing: the WebM 480p and 720 formats; 3GP profile
42 (37, '37/1920x1080/9/0/115', '1920x1080 (HD)'),
43 (22, '22/1280x720/9/0/115', '1280x720 (HD)'),
44 (35, '35/854x480/9/0/115', '854x480'),
45 (34, '34/640x360/9/0/115', '640x360'),
46 (18, '18/640x360/9/0/115', '640x360 (iPod)'),
47 (18, '18/480x360/9/0/115', '480x360 (iPod)'),
48 (5, '5/320x240/7/0/0', '320x240 (FLV)'),
51 class YouTubeError(Exception): pass
53 def get_real_download_url(url
, preferred_fmt_id
=18):
54 vid
= get_youtube_id(url
)
57 url
= 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
60 req
= util
.http_request(url
, method
='GET')
61 if 'location' in req
.msg
:
62 url
= req
.msg
['location']
66 # Try to find the best video format available for this video
67 # (http://forum.videohelp.com/topic336882-1800.html#1912972)
69 r4
= re
.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page
)
71 fmt_url_map
= urllib
.unquote(r4
.group(1))
72 for fmt_url_encoded
in fmt_url_map
.split(','):
73 video_info
= parse_qs(fmt_url_encoded
)
74 yield int(video_info
['itag'][0]), video_info
['url'][0]
76 error_info
= parse_qs(page
)
77 error_message
= util
.remove_html_tags(error_info
['reason'][0])
78 raise YouTubeError('Cannot download video: %s' % error_message
)
80 fmt_id_url_map
= sorted(find_urls(page
), reverse
=True)
81 # Default to the highest fmt_id if we don't find a match below
83 default_fmt_id
, default_url
= fmt_id_url_map
[0]
85 raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid
)
87 formats_available
= set(fmt_id
for fmt_id
, url
in fmt_id_url_map
)
88 fmt_id_url_map
= dict(fmt_id_url_map
)
91 # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could
92 # also use 13 and 17 here, but the quality is very low then. There
93 # seems to also be a 6, but I could not find a video with that yet.
95 elif gpodder
.ui
.fremantle
:
96 # This provides good quality video, seems to be always available
97 # and is playable fluently in Media Player
98 if preferred_fmt_id
== 5:
103 # As a fallback, use fmt_id 18 (seems to be always available)
106 # This will be set to True if the search below has already "seen"
107 # our preferred format, but has not yet found a suitable available
108 # format for the given video.
109 seen_preferred
= False
111 for id, wanted
, description
in supported_formats
:
112 # If we see our preferred format, accept formats below
113 if id == preferred_fmt_id
:
114 seen_preferred
= True
116 # If the format is available and preferred (or lower),
117 # use the given format for our fmt_id
118 if id in formats_available
and seen_preferred
:
119 log('Found available YouTube format: %s (fmt_id=%d)', \
124 url
= fmt_id_url_map
.get(fmt_id
, None)
130 def get_youtube_id(url
):
131 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re
.IGNORECASE
).match(url
)
135 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re
.IGNORECASE
).match(url
)
139 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re
.IGNORECASE
).match(url
)
145 def is_video_link(url
):
146 return (get_youtube_id(url
) is not None)
148 def get_real_channel_url(url
):
149 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re
.IGNORECASE
)
153 next
= 'http://www.youtube.com/rss/user/'+ m
.group(1) +'/videos.rss'
154 log('YouTube link resolved: %s => %s', url
, next
)
157 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re
.IGNORECASE
)
161 next
= 'http://www.youtube.com/rss/user/'+ m
.group(1) +'/videos.rss'
162 log('YouTube link resolved: %s => %s', url
, next
)
167 def get_real_cover(url
):
168 r
= re
.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
173 username
= m
.group(1)
174 api_url
= 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
175 data
= util
.urlopen(api_url
).read()
176 match
= re
.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data
)
177 if match
is not None:
178 log('YouTube userpic for %s is: %s', url
, match
.group(1))
179 return match
.group(1)
183 def find_youtube_channels(string
):
184 # FIXME: Make proper use of the YouTube API instead
185 # of screen-scraping the YouTube website
186 url
= 'http://www.youtube.com/results?search_query='+ urllib
.quote(string
, '') +'&search_type=search_users&aq=f'
188 r
= re
.compile('>\s+<')
189 data
= r
.sub('><', util
.urlopen(url
).read())
191 r1
= re
.compile('<a href="/user/([^"?]+)[^"]+"[^>]*>([^<]+)</a>')
192 m1
= r1
.findall(data
)
194 r2
= re
.compile('\s+')
196 class FakeImporter(object):
200 result
= FakeImporter()
202 for name
, title
in m1
:
203 if name
not in found_users
:
204 found_users
.append(name
)
205 link
= 'http://www.youtube.com/rss/user/'+ name
+'/videos.rss'
206 result
.items
.append({'title': name
, 'url': link
, 'description': title
})