1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2018 The gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # gpodder.youtube - YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
28 import xml
.etree
.ElementTree
29 from functools
import lru_cache
30 from html
.parser
import HTMLParser
31 from urllib
.parse
import parse_qs
34 from gpodder
import registry
, util
36 logger
= logging
.getLogger(__name__
)
42 # http://en.wikipedia.org/wiki/YouTube#Quality_and_formats
43 # https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L447
45 # adaptive audio formats
51 # formats and fallbacks of same quality: WebM -> MP4 -> FLV
57 mp4_240
= ['133+140'] + flv_240
58 mp4_360
= [18, '134+140'] + flv_360
59 mp4_480
= ['135+140'] + flv_480
60 mp4_720
= [22, '136+140']
61 mp4_1080
= [37, '137+140']
62 mp4_1440
= ['264+140']
63 mp4_2160
= ['266+140']
65 mp4_4320
= ['138+140']
66 webm_144
= ['278+250'] + mp4_144
67 webm_240
= ['242+250'] + mp4_240
68 webm_360
= [43, '243+251'] + mp4_360
69 webm_480
= [44, '244+251'] + mp4_480
70 webm_720
= [45, '247+251'] + mp4_720
71 webm_1080
= [46, '248+251'] + mp4_1080
72 webm_1440
= ['271+251'] + mp4_1440
73 webm_2160
= ['313+251'] + mp4_2160
74 webm_4320
= ['272+251'] + mp4_4320
75 # fallbacks to lower quality
77 webm_360
+= flv_270
+ webm_240
81 webm_1440
+= webm_1080
82 webm_2160
+= webm_1440
83 webm_4320
+= mp4_3072
+ webm_2160
85 mp4_360
+= flv_270
+ mp4_240
96 # format id, (preferred ids, path(?), description) # video bitrate, audio bitrate
98 # WebM VP8, VP9 or VP9 HFR video, Vorbis or Opus audio
99 # Fallback to MP4 or FLV
100 (272, (webm_4320
, '272/7680x4320/99/0/0', 'WebM 4320p 8K (7680x4320) youtube-dl')), # N/A, 160 kbps
101 (313, (webm_2160
, '313/3840x2160/99/0/0', 'WebM 2160p 4K (3840x2160) youtube-dl')), # N/A, 160 kbps
102 (271, (webm_1440
, '271/2560x1440/99/0/0', 'WebM 1440p (2560x1440) youtube-dl')), # N/A, 160 kbps
103 (46, (webm_1080
, '46/1920x1080/99/0/0', 'WebM 1080p (1920x1080) youtube-dl')), # N/A, 192 kbps
104 (45, (webm_720
, '45/1280x720/99/0/0', 'WebM 720p (1280x720) youtube-dl')), # 2.0 Mbps, 192 kbps
105 (44, (webm_480
, '44/854x480/99/0/0', 'WebM 480p (854x480) youtube-dl')), # 1.0 Mbps, 128 kbps
106 (43, (webm_360
, '43/640x360/99/0/0', 'WebM 360p (640x360)')), # 0.5 Mbps, 128 kbps
107 (242, (webm_240
, '242/426x240/99/0/0', 'WebM 240p (426x240) youtube-dl')), # N/A, 70 kbps
108 (278, (webm_144
, '278/256x144/99/0/0', 'WebM 144p (256x144) youtube-dl')), # N/A, 70 kbps
110 # MP4 H.264 video, AAC audio
112 (138, (mp4_4320
, '138/7680x4320/9/0/115', 'MP4 4320p 8K (7680x4320) youtube-dl')), # N/A, 128 kbps
113 (38, (mp4_3072
, '38/4096x3072/9/0/115', 'MP4 3072p 4K (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
114 (266, (mp4_2160
, '266/3840x2160/9/0/115', 'MP4 2160p 4K (3840x2160) youtube-dl')), # N/A, 128 kbps
115 (264, (mp4_1440
, '264/2560x1440/9/0/115', 'MP4 1440p (2560x1440) youtube-dl')), # N/A, 128 kbps
116 (37, (mp4_1080
, '37/1920x1080/9/0/115', 'MP4 1080p (1920x1080) youtube-dl')), # 4.3 - 3.0 Mbps, 192 kbps
117 (22, (mp4_720
, '22/1280x720/9/0/115', 'MP4 720p (1280x720)')), # 2.9 - 2.0 Mbps, 192 kbps
118 (135, (mp4_480
, '135/854x480/9/0/115', 'MP4 480p (854x480) youtube-dl')), # N/A, 128 kbps
119 (18, (mp4_360
, '18/640x360/9/0/115', 'MP4 360p (640x360)')), # 0.5 Mbps, 96 kbps
120 (133, (mp4_240
, '133/426x240/9/0/115', 'MP4 240p (426x240) youtube-dl')), # N/A, 128 kbps
121 (160, (mp4_144
, '160/256x144/9/0/115', 'MP4 144p (256x144) youtube-dl')), # N/A, 128 kbps
123 # FLV H.264 video, AAC audio
124 # Fallback to FLV 6 or 5
125 (35, (flv_480
, '35/854x480/9/0/115', 'FLV 480p (854x480)')), # 1 - 0.80 Mbps, 128 kbps
126 (34, (flv_360
, '34/640x360/9/0/115', 'FLV 360p (640x360)')), # 0.50 Mbps, 128 kbps
128 # FLV Sorenson H.263 video, MP3 audio
129 (6, (flv_270
, '6/480x270/7/0/0', 'FLV 270p (480x270)')), # 0.80 Mbps, 64 kbps
130 (5, (flv_240
, '5/320x240/7/0/0', 'FLV 240p (320x240)')), # 0.25 Mbps, 64 kbps
132 formats_dict
= dict(formats
)
134 # streaming formats and fallbacks to lower quality
136 hls_240
= [92] + hls_144
137 hls_360
= [93] + hls_240
138 hls_480
= [94] + hls_360
139 hls_720
= [95] + hls_480
140 hls_1080
= [96] + hls_720
142 (96, (hls_1080
, '9/1920x1080/9/0/115', 'MP4 1080p (1920x1080)')), # N/A, 256 kbps
143 (95, (hls_720
, '9/1280x720/9/0/115', 'MP4 720p (1280x720)')), # N/A, 256 kbps
144 (94, (hls_480
, '9/854x480/9/0/115', 'MP4 480p (854x480)')), # N/A, 128 kbps
145 (93, (hls_360
, '9/640x360/9/0/115', 'MP4 360p (640x360)')), # N/A, 128 kbps
146 (92, (hls_240
, '9/426x240/9/0/115', 'MP4 240p (426x240)')), # N/A, 48 kbps
147 (91, (hls_144
, '9/256x144/9/0/115', 'MP4 144p (256x144)')), # N/A, 48 kbps
149 hls_formats_dict
= dict(hls_formats
)
151 CHANNEL_VIDEOS_XML
= 'https://www.youtube.com/feeds/videos.xml'
152 WATCH_ENDPOINT
= 'https://www.youtube.com/watch?bpctr=9999999999&has_verified=1&v='
154 # The page may contain "};" sequences inside the initial player response.
155 # Use a greedy match with script end tag, and fallback to a non-greedy match without.
156 INITIAL_PLAYER_RESPONSE_RE1
= r
'ytInitialPlayerResponse\s*=\s*({.+})\s*;\s*</script'
157 INITIAL_PLAYER_RESPONSE_RE2
= r
'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
161 for regex
in (INITIAL_PLAYER_RESPONSE_RE1
, INITIAL_PLAYER_RESPONSE_RE2
):
162 ipr
= re
.search(regex
, page
)
168 class YouTubeError(Exception):
172 def get_fmt_ids(youtube_config
, allow_partial
):
174 if youtube_config
.preferred_hls_fmt_id
== 0:
175 hls_fmt_ids
= (youtube_config
.preferred_hls_fmt_ids
if youtube_config
.preferred_hls_fmt_ids
else [])
177 fmt
= hls_formats_dict
.get(youtube_config
.preferred_hls_fmt_id
)
181 hls_fmt_ids
, path
, description
= fmt
185 if youtube_config
.preferred_fmt_id
== 0:
186 return (youtube_config
.preferred_fmt_ids
+ hls_fmt_ids
if youtube_config
.preferred_fmt_ids
else hls_fmt_ids
)
188 fmt
= formats_dict
.get(youtube_config
.preferred_fmt_id
)
191 fmt_ids
, path
, description
= fmt
192 return fmt_ids
+ hls_fmt_ids
195 @registry.download_url
.register
196 def youtube_real_download_url(config
, episode
, allow_partial
):
197 fmt_ids
= get_fmt_ids(config
.youtube
, allow_partial
) if config
else None
198 res
, duration
= get_real_download_url(episode
.url
, allow_partial
, fmt_ids
)
199 if duration
is not None:
200 episode
.total_time
= int(int(duration
) / 1000)
201 return None if res
== episode
.url
else res
204 def youtube_get_old_endpoint(vid
):
205 # TODO: changing 'detailpage' to 'embedded' allows age-restricted content
206 url
= 'https://www.youtube.com/get_video_info?html5=1&c=TVHTML5&cver=6.20180913&el=detailpage&video_id=' + vid
207 r
= util
.urlopen(url
)
209 raise YouTubeError('Youtube "%s": %d %s' % (url
, r
.status_code
, r
.reason
))
214 def youtube_get_new_endpoint(vid
):
215 url
= WATCH_ENDPOINT
+ vid
216 r
= util
.urlopen(url
)
218 raise YouTubeError('Youtube "%s": %d %s' % (url
, r
.status_code
, r
.reason
))
220 ipr
= get_ipr(r
.text
)
223 url
= get_gdpr_consent_url(r
.text
)
224 except YouTubeError
as e
:
225 raise YouTubeError('Youtube "%s": No ytInitialPlayerResponse found and %s' % (url
, str(e
)))
226 r
= util
.urlopen(url
)
228 raise YouTubeError('Youtube "%s": %d %s' % (url
, r
.status_code
, r
.reason
))
230 ipr
= get_ipr(r
.text
)
232 raise YouTubeError('Youtube "%s": No ytInitialPlayerResponse found' % url
)
234 return None, ipr
.group(1)
237 def get_total_time(episode
):
239 vid
= get_youtube_id(episode
.url
)
243 url
= WATCH_ENDPOINT
+ vid
244 r
= util
.urlopen(url
)
248 ipr
= get_ipr(r
.text
)
250 url
= get_gdpr_consent_url(r
.text
)
251 r
= util
.urlopen(url
)
255 ipr
= get_ipr(r
.text
)
259 player_response
= json
.loads(ipr
.group(1))
260 return int(player_response
['videoDetails']['lengthSeconds']) # 0 if live
265 def get_real_download_url(url
, allow_partial
, preferred_fmt_ids
=None):
266 if not preferred_fmt_ids
:
267 preferred_fmt_ids
, _
, _
= formats_dict
[22] # MP4 720p
271 vid
= get_youtube_id(url
)
274 old_page
, new_page
= youtube_get_new_endpoint(vid
)
275 except YouTubeError
as e
:
277 old_page
, new_page
= youtube_get_old_endpoint(vid
)
279 def find_urls(old_page
, new_page
):
280 # streamingData is preferable to url_encoded_fmt_stream_map
281 # streamingData.formats are the same as url_encoded_fmt_stream_map
282 # streamingData.adaptiveFormats are audio-only and video-only formats
284 x
= parse_qs(old_page
) if old_page
else json
.loads(new_page
)
285 player_response
= json
.loads(x
['player_response'][0]) if old_page
and 'player_response' in x
else x
289 # TODO: unknown if this is valid for new_page
290 error_message
= util
.remove_html_tags(x
['reason'][0])
291 elif 'playabilityStatus' in player_response
:
292 playabilityStatus
= player_response
['playabilityStatus']
294 if 'reason' in playabilityStatus
:
295 error_message
= util
.remove_html_tags(playabilityStatus
['reason'])
296 elif 'liveStreamability' in playabilityStatus \
297 and not playabilityStatus
['liveStreamability'].get('liveStreamabilityRenderer', {}).get('displayEndscreen', False):
298 # playabilityStatus.liveStreamability -- video is or was a live stream
299 # playabilityStatus.liveStreamability.liveStreamabilityRenderer.displayEndscreen -- video has ended if present
301 if allow_partial
and 'streamingData' in player_response
and 'hlsManifestUrl' in player_response
['streamingData']:
302 r
= util
.urlopen(player_response
['streamingData']['hlsManifestUrl'])
304 raise YouTubeError('HLS Manifest: %d %s' % (r
.status_code
, r
.reason
))
305 manifest
= r
.text
.splitlines()
307 urls
= [line
for line
in manifest
if line
[0] != '#']
308 itag_re
= re
.compile(r
'/itag/([0-9]+)/')
310 itag
= itag_re
.search(url
).group(1)
311 yield int(itag
), [url
, None]
314 error_message
= 'live stream'
315 elif 'streamingData' in player_response
:
316 if 'formats' in player_response
['streamingData']:
317 for f
in player_response
['streamingData']['formats']:
318 if 'url' in f
: # DRM videos store url inside a signatureCipher key
319 yield int(f
['itag']), [f
['url'], f
.get('approxDurationMs')]
320 if 'adaptiveFormats' in player_response
['streamingData']:
321 for f
in player_response
['streamingData']['adaptiveFormats']:
322 if 'url' in f
: # DRM videos store url inside a signatureCipher key
323 yield int(f
['itag']), [f
['url'], f
.get('approxDurationMs')]
326 if error_message
is not None:
327 raise YouTubeError(('Cannot stream video: %s' if allow_partial
else 'Cannot download video: %s') % error_message
)
330 r4
= re
.search(r
'url_encoded_fmt_stream_map=([^&]+)', old_page
)
332 fmt_url_map
= urllib
.parse
.unquote(r4
.group(1))
333 for fmt_url_encoded
in fmt_url_map
.split(','):
334 video_info
= parse_qs(fmt_url_encoded
)
335 yield int(video_info
['itag'][0]), [video_info
['url'][0], None]
337 fmt_id_url_map
= sorted(find_urls(old_page
, new_page
), reverse
=True)
339 if not fmt_id_url_map
:
340 drm
= re
.search(r
'(%22(cipher|signatureCipher)%22%3A|"signatureCipher":)', old_page
or new_page
)
342 raise YouTubeError('Unsupported DRM content')
343 raise YouTubeError('No formats found')
345 formats_available
= {fmt_id
for fmt_id
, url
in fmt_id_url_map
}
346 fmt_id_url_map
= dict(fmt_id_url_map
)
348 for fmt_id
in preferred_fmt_ids
:
349 if not re
.search(r
'^[0-9]+$', str(fmt_id
)):
350 # skip non-integer formats 'best', '136+140' or twitch '720p'
353 if fmt_id
in formats_available
:
354 fmt
= formats_dict
.get(fmt_id
) or hls_formats_dict
.get(fmt_id
)
356 _
, _
, description
= fmt
358 description
= 'Unknown'
360 logger
.info('Found YouTube format: %s (fmt_id=%d)',
362 url
, duration
= fmt_id_url_map
[fmt_id
]
365 raise YouTubeError('No preferred formats found')
371 def get_youtube_id(url
):
372 r
= re
.compile(r
'http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re
.IGNORECASE
).match(url
)
376 r
= re
.compile(r
'http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re
.IGNORECASE
).match(url
)
380 r
= re
.compile(r
'http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re
.IGNORECASE
).match(url
)
384 return for_each_feed_pattern(lambda url
, channel
: channel
, url
, None)
387 def is_video_link(url
):
388 return (get_youtube_id(url
) is not None)
391 def is_youtube_guid(guid
):
392 return guid
.startswith('tag:youtube.com,2008:video:')
395 def for_each_feed_pattern(func
, url
, fallback_result
):
397 Try to find the username for all possible YouTube feed/webpage URLs
398 Will call func(url, channel) for each match, and if func() returns
399 a result other than None, returns this. If no match is found or
400 func() returns None, return fallback_result.
402 CHANNEL_MATCH_PATTERNS
= [
403 r
'http[s]?://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)',
404 r
'http[s]?://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)',
405 r
'http[s]?://(?:[a-z]+\.)?youtube\.com/rss/user/([a-z0-9]+)/videos\.rss',
406 r
'http[s]?://(?:[a-z]+\.)?youtube\.com/channel/([-_a-z0-9]+)',
407 r
'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?user=([a-z0-9]+)',
408 r
'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?channel_id=([-_a-z0-9]+)',
409 r
'http[s]?://gdata.youtube.com/feeds/users/([^/]+)/uploads',
410 r
'http[s]?://gdata.youtube.com/feeds/base/users/([^/]+)/uploads',
413 for pattern
in CHANNEL_MATCH_PATTERNS
:
414 m
= re
.match(pattern
, url
, re
.IGNORECASE
)
416 result
= func(url
, m
.group(1))
417 if result
is not None:
420 return fallback_result
423 def get_real_channel_url(url
):
424 def return_user_feed(url
, channel
):
425 result
= 'https://gdata.youtube.com/feeds/users/{0}/uploads'.format(channel
)
426 logger
.debug('YouTube link resolved: %s => %s', url
, result
)
429 return for_each_feed_pattern(return_user_feed
, url
, url
)
433 def get_channel_id_url(url
, feed_data
=None):
434 if 'youtube.com' in url
:
435 # URL may contain channel ID, avoid a network request
436 m
= re
.search(r
'channel_id=([^"]+)', url
)
438 # old versions of gpodder allowed newlines and whitespace in feed URLs, strip here to avoid a 404
439 channel_id
= m
.group(1).strip()
440 channel_url
= 'https://www.youtube.com/channel/{}'.format(channel_id
)
444 if feed_data
is None:
445 r
= util
.urlopen(url
, cookies
={'SOCS': 'CAI'})
447 raise YouTubeError('Youtube "%s": %d %s' % (url
, r
.status_code
, r
.reason
))
450 # video page may contain corrupt HTML/XML, search for tag to avoid exception
451 m
= re
.search(r
'(channel_id=([^"]+)">|"channelId":"([^"]+)")', r
.text
)
453 channel_id
= m
.group(2) or m
.group(3)
455 raw_xml_data
= io
.BytesIO(r
.content
)
456 xml_data
= xml
.etree
.ElementTree
.parse(raw_xml_data
)
457 channel_id
= xml_data
.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
458 if channel_id
is None:
459 # check entries if feed has an empty channelId
460 m
= re
.search(r
'<yt:channelId>([^<]+)</yt:channelId>', r
.text
)
462 channel_id
= m
.group(1)
463 if channel_id
is None:
464 raise Exception('Could not retrieve YouTube channel ID for URL %s.' % url
)
466 # feeds no longer contain the required "UC" prefix on channel ID
467 if len(channel_id
) == 22:
468 channel_id
= "UC" + channel_id
469 channel_url
= 'https://www.youtube.com/channel/{}'.format(channel_id
)
473 logger
.warning('Could not retrieve YouTube channel ID for URL %s.' % url
, exc_info
=True)
475 raise Exception('Could not retrieve YouTube channel ID for URL %s.' % url
)
478 def get_cover(url
, feed_data
=None):
479 if 'youtube.com' in url
:
481 class YouTubeHTMLCoverParser(HTMLParser
):
482 """This custom html parser searches for the youtube channel thumbnail/avatar"""
487 def handle_starttag(self
, tag
, attributes
):
488 attribute_dict
= {attribute
[0]: attribute
[1] for attribute
in attributes
}
490 # Look for 900x900px image first.
492 and 'rel' in attribute_dict \
493 and attribute_dict
['rel'] == 'image_src':
494 self
.url
.append(attribute_dict
['href'])
496 # Fallback to image that may only be 100x100px.
498 and 'class' in attribute_dict \
499 and attribute_dict
['class'] == "channel-header-profile-image":
500 self
.url
.append(attribute_dict
['src'])
503 channel_url
= get_channel_id_url(url
, feed_data
)
504 r
= util
.urlopen(channel_url
)
506 raise YouTubeError('Youtube "%s": %d %s' % (url
, r
.status_code
, r
.reason
))
507 html_data
= util
.response_text(r
)
508 parser
= YouTubeHTMLCoverParser()
509 parser
.feed(html_data
)
511 logger
.debug('Youtube cover art for {} is: {}'.format(url
, parser
.url
))
515 logger
.warning('Could not retrieve cover art', exc_info
=True)
518 def get_gdpr_consent_url(html_data
):
520 Creates the URL for automatically accepting GDPR consents
521 EU GDPR redirects to a form that needs to be posted to be redirected to a get request
522 with the form data as input to the youtube video URL. This extracts that form data from
523 the GDPR form and builds up the URL the posted form results.
525 class ConsentHTML(HTMLParser
):
529 self
.consentForm
= False
531 def handle_starttag(self
, tag
, attributes
):
532 attribute_dict
= {attribute
[0]: attribute
[1] for attribute
in attributes
}
533 if tag
== 'form' and attribute_dict
['action'] == 'https://consent.youtube.com/s':
534 self
.consentForm
= True
535 self
.url
= 'https://consent.google.com/s?'
536 # Get GDPR form elements
537 if self
.consentForm
and tag
== 'input' and attribute_dict
['type'] == 'hidden':
538 self
.url
+= '&' + attribute_dict
['name'] + '=' + urllib
.parse
.quote_plus(attribute_dict
['value'])
540 def handle_endtag(self
, tag
):
542 self
.consentForm
= False
545 parser
= ConsentHTML()
546 parser
.feed(html_data
)
548 raise YouTubeError('Could not retrieve GDPR accepted consent URL')
551 logger
.debug('YouTube GDPR accept consent URL is: %s', parser
.url
)
554 logger
.debug('YouTube GDPR accepted consent URL could not be resolved.')
555 raise YouTubeError('No acceptable GDPR consent URL')
558 def get_channel_desc(url
, feed_data
=None):
559 if 'youtube.com' in url
:
561 class YouTubeHTMLDesc(HTMLParser
):
562 """This custom html parser searches for the YouTube channel description."""
565 self
.description
= ''
567 def handle_starttag(self
, tag
, attributes
):
568 attribute_dict
= {attribute
[0]: attribute
[1] for attribute
in attributes
}
570 # Get YouTube channel description.
572 and 'name' in attribute_dict \
573 and attribute_dict
['name'] == "description":
574 self
.description
= attribute_dict
['content']
577 channel_url
= get_channel_id_url(url
, feed_data
)
578 r
= util
.urlopen(channel_url
)
580 raise YouTubeError('Youtube "%s": %d %s' % (url
, r
.status_code
, r
.reason
))
581 html_data
= util
.response_text(r
)
582 parser
= YouTubeHTMLDesc()
583 parser
.feed(html_data
)
584 if parser
.description
:
585 logger
.debug('YouTube description for %s is: %s', url
, parser
.description
)
586 return parser
.description
588 logger
.debug('YouTube description for %s is not provided.', url
)
589 return _('No description available')
592 logger
.warning('Could not retrieve YouTube channel description for %s.' % url
, exc_info
=True)
595 def parse_youtube_url(url
):
597 Youtube Channel Links are parsed into youtube feed links
598 >>> parse_youtube_url("https://www.youtube.com/channel/CHANNEL_ID")
599 'https://www.youtube.com/feeds/videos.xml?channel_id=CHANNEL_ID'
601 Youtube User Links are parsed into youtube feed links
602 >>> parse_youtube_url("https://www.youtube.com/user/USERNAME")
603 'https://www.youtube.com/feeds/videos.xml?user=USERNAME'
605 Youtube Playlist Links are parsed into youtube feed links
606 >>> parse_youtube_url("https://www.youtube.com/playlist?list=PLAYLIST_ID")
607 'https://www.youtube.com/feeds/videos.xml?playlist_id=PLAYLIST_ID'
609 >>> parse_youtube_url(None)
612 @param url: the path to the channel, user or playlist
613 @return: the feed url if successful or the given url if not
617 scheme
, netloc
, path
, query
, fragment
= urllib
.parse
.urlsplit(url
)
618 logger
.debug("Analyzing URL: {}".format(" ".join([scheme
, netloc
, path
, query
, fragment
])))
620 if 'youtube.com' in netloc
:
621 if path
== '/feeds/videos.xml' and re
.search(r
'^(user|channel|playlist)_id=.*', query
):
624 if '/user/' in path
or '/channel/' in path
or 'list=' in query
:
625 logger
.debug("Valid Youtube URL detected. Parsing...")
627 if path
.startswith('/user/'):
628 user_id
= path
.split('/')[2]
629 query
= 'user={user_id}'.format(user_id
=user_id
)
631 if path
.startswith('/channel/'):
632 channel_id
= path
.split('/')[2]
633 query
= 'channel_id={channel_id}'.format(channel_id
=channel_id
)
636 playlist_query
= [query_value
for query_value
in query
.split("&") if 'list=' in query_value
][0]
637 playlist_id
= playlist_query
[5:]
638 query
= 'playlist_id={playlist_id}'.format(playlist_id
=playlist_id
)
640 path
= '/feeds/videos.xml'
642 new_url
= urllib
.parse
.urlunsplit((scheme
, netloc
, path
, query
, fragment
))
643 logger
.debug("New Youtube URL: {}".format(new_url
))
646 # look for channel URL in page
647 logger
.debug("Unknown Youtube URL, trying to extract channel ID...")
648 new_url
= get_channel_id_url(url
)
650 logger
.debug("New Youtube URL: {}".format(new_url
))
651 return parse_youtube_url(new_url
)
653 logger
.debug("Not a valid Youtube URL: {}".format(url
))