1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # gpodder.youtube - YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
26 from gpodder
import util
31 logger
= logging
.getLogger(__name__
)
34 import simplejson
as json
43 from urlparse
import parse_qs
46 from cgi
import parse_qs
48 # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
49 # format id, (preferred ids, path(?), description) # video bitrate, audio bitrate
51 # WebM VP8 video, Vorbis audio
52 # Fallback to an MP4 version of same quality.
53 # Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
54 # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
55 (46, ([46, 37, 45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 1080p (1920x1080)')), # N/A, 192 kbps
56 (45, ([45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 720p (1280x720)')), # 2.0 Mbps, 192 kbps
57 (44, ([44, 35, 43, 18, 6, 34, 5], '44/854x480/99/0/0', 'WebM 480p (854x480)')), # 1.0 Mbps, 128 kbps
58 (43, ([43, 18, 6, 34, 5], '43/640x360/99/0/0', 'WebM 360p (640x360)')), # 0.5 Mbps, 128 kbps
60 # MP4 H.264 video, AAC audio
61 # Try 35 (FLV 480p H.264 AAC) between 720p and 360p because there's no MP4 480p.
62 # Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
63 # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
64 (38, ([38, 37, 22, 35, 18, 34, 6, 5], '38/1920x1080/9/0/115', 'MP4 4K 3072p (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
65 (37, ([37, 22, 35, 18, 34, 6, 5], '37/1920x1080/9/0/115', 'MP4 HD 1080p (1920x1080)')), # 4.3 - 3.0 Mbps, 192 kbps
66 (22, ([22, 35, 18, 34, 6, 5], '22/1280x720/9/0/115', 'MP4 HD 720p (1280x720)')), # 2.9 - 2.0 Mbps, 192 kbps
67 (18, ([18, 34, 6, 5], '18/640x360/9/0/115', 'MP4 360p (640x360)')), # 0.5 Mbps, 96 kbps
69 # FLV H.264 video, AAC audio
70 # Does not check for 360p MP4.
71 # Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
72 (35, ([35, 34, 6, 5], '35/854x480/9/0/115', 'FLV 480p (854x480)')), # 1 - 0.80 Mbps, 128 kbps
73 (34, ([34, 6, 5], '34/640x360/9/0/115', 'FLV 360p (640x360)')), # 0.50 Mbps, 128 kbps
75 # FLV Sorenson H.263 video, MP3 audio
76 (6, ([6, 5], '5/480x270/7/0/0', 'FLV 270p (480x270)')), # 0.80 Mbps, 64 kbps
77 (5, ([5], '5/320x240/7/0/0', 'FLV 240p (320x240)')), # 0.25 Mbps, 64 kbps
79 formats_dict
= dict(formats
)
81 class YouTubeError(Exception): pass
84 def get_fmt_ids(youtube_config
):
85 fmt_ids
= youtube_config
.preferred_fmt_ids
87 format
= formats_dict
.get(youtube_config
.preferred_fmt_id
)
91 fmt_ids
, path
, description
= format
95 def get_real_download_url(url
, preferred_fmt_ids
=None):
96 if not preferred_fmt_ids
:
97 preferred_fmt_ids
, _
, _
= formats_dict
[22] # MP4 720p
99 vid
= get_youtube_id(url
)
102 url
= 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
105 req
= util
.http_request(url
, method
='GET')
106 if 'location' in req
.msg
:
107 url
= req
.msg
['location']
111 # Try to find the best video format available for this video
112 # (http://forum.videohelp.com/topic336882-1800.html#1912972)
114 r4
= re
.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page
)
116 fmt_url_map
= urllib
.unquote(r4
.group(1))
117 for fmt_url_encoded
in fmt_url_map
.split(','):
118 video_info
= parse_qs(fmt_url_encoded
)
119 yield int(video_info
['itag'][0]), video_info
['url'][0] + "&signature=" + video_info
['sig'][0]
121 error_info
= parse_qs(page
)
122 error_message
= util
.remove_html_tags(error_info
['reason'][0])
123 raise YouTubeError('Cannot download video: %s' % error_message
)
125 fmt_id_url_map
= sorted(find_urls(page
), reverse
=True)
127 if not fmt_id_url_map
:
128 raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid
)
130 # Default to the highest fmt_id if we don't find a match below
131 _
, url
= fmt_id_url_map
[0]
133 formats_available
= set(fmt_id
for fmt_id
, url
in fmt_id_url_map
)
134 fmt_id_url_map
= dict(fmt_id_url_map
)
136 # This provides good quality video, seems to be always available
137 # and is playable fluently in Media Player
138 if gpodder
.ui
.harmattan
:
139 preferred_fmt_ids
= [18]
141 for id in preferred_fmt_ids
:
143 if id in formats_available
:
144 format
= formats_dict
.get(id)
145 if format
is not None:
146 _
, _
, description
= format
148 description
= 'Unknown'
150 logger
.info('Found YouTube format: %s (fmt_id=%d)',
152 url
= fmt_id_url_map
[id]
157 def get_youtube_id(url
):
158 r
= re
.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re
.IGNORECASE
).match(url
)
162 r
= re
.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re
.IGNORECASE
).match(url
)
166 r
= re
.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re
.IGNORECASE
).match(url
)
172 def is_video_link(url
):
173 return (get_youtube_id(url
) is not None)
175 def is_youtube_guid(guid
):
176 return guid
.startswith('tag:youtube.com,2008:video:')
178 def get_real_channel_url(url
):
179 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re
.IGNORECASE
)
183 next
= 'http://www.youtube.com/rss/user/'+ m
.group(1) +'/videos.rss'
184 logger
.debug('YouTube link resolved: %s => %s', url
, next
)
187 r
= re
.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re
.IGNORECASE
)
191 next
= 'http://www.youtube.com/rss/user/'+ m
.group(1) +'/videos.rss'
192 logger
.debug('YouTube link resolved: %s => %s', url
, next
)
197 def get_real_cover(url
):
198 r
= re
.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
203 username
= m
.group(1)
204 api_url
= 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
205 data
= util
.urlopen(api_url
).read()
206 match
= re
.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data
)
207 if match
is not None:
208 logger
.debug('YouTube userpic for %s is: %s', url
, match
.group(1))
209 return match
.group(1)
213 def find_youtube_channels(string
):
214 url
= 'http://gdata.youtube.com/feeds/api/videos?alt=json&q=%s' % urllib
.quote(string
, '')
215 data
= json
.load(util
.urlopen(url
))
217 class FakeImporter(object):
221 result
= FakeImporter()
224 for entry
in data
['feed']['entry']:
225 user
= os
.path
.basename(entry
['author'][0]['uri']['$t'])
226 title
= entry
['title']['$t']
227 url
= 'http://www.youtube.com/rss/user/%s/videos.rss' % user
228 if user
not in seen_users
:
229 result
.items
.append({