1 # -*- coding: utf-8 -*-
2 # Manage YouTube subscriptions using youtube-dl (https://github.com/ytdl-org/youtube-dl)
3 # Requirements: youtube-dl module (pip install youtube_dl)
4 # (c) 2019-08-17 Eric Le Lay <elelay.fr:contact>
5 # Released under the same license terms as gPodder itself.
14 import yt_dlp
as youtube_dl
15 program_name
= 'yt-dlp'
16 want_ytdl_version
= '2023.02.17'
19 program_name
= 'youtube-dl'
20 want_ytdl_version
= '2023.02.17' # youtube-dl has been patched, but not yet released
23 from gpodder
import download
, feedcore
, model
, registry
, util
, youtube
25 import gi
# isort:skip
26 gi
.require_version('Gtk', '3.0') # isort:skip
27 from gi
.repository
import Gtk
# isort:skip
32 logger
= logging
.getLogger(__name__
)
35 __title__
= 'youtube-dl'
36 __description__
= _('Manage YouTube subscriptions using youtube-dl (pip install youtube_dl) or yt-dlp (pip install yt-dlp)')
37 __only_for__
= 'gtk, cli'
38 __authors__
= 'Eric Le Lay <elelay.fr:contact>'
39 __doc__
= 'https://gpodder.github.io/docs/extensions/youtubedl.html'
41 want_ytdl_version_msg
= _('Your version of youtube-dl/yt-dlp %(have_version)s has known issues, please upgrade to %(want_version)s or newer.')
44 # youtube-dl downloads and parses each video page to get informations about it, which is very slow.
45 # Set to False to fall back to the fast but limited (only 15 episodes) gpodder code
46 'manage_channel': True,
47 # If for some reason youtube-dl download doesn't work for you, you can fallback to gpodder code.
48 # Set to False to fall back to default gpodder code (less available formats).
49 'manage_downloads': True,
50 # Embed all available subtitles to downloaded videos. Needs ffmpeg.
51 'embed_subtitles': False,
55 # youtube feed still preprocessed by youtube.py (compat)
56 CHANNEL_RE
= re
.compile(r
'''https://www.youtube.com/feeds/videos.xml\?channel_id=(.+)''')
57 USER_RE
= re
.compile(r
'''https://www.youtube.com/feeds/videos.xml\?user=(.+)''')
58 PLAYLIST_RE
= re
.compile(r
'''https://www.youtube.com/feeds/videos.xml\?playlist_id=(.+)''')
61 def youtube_parsedate(s
):
62 """Parse a string into a unix timestamp
64 Only strings provided by youtube-dl API are
65 parsed with this function (20170920).
68 return time
.mktime(time
.strptime(s
, "%Y%m%d"))
72 def video_guid(video_id
):
74 generate same guid as youtube
76 return 'yt:video:{}'.format(video_id
)
79 class YoutubeCustomDownload(download
.CustomDownload
):
81 Represents the download of a single episode using youtube-dl.
83 Actual youtube-dl interaction via gPodderYoutubeDL.
85 def __init__(self
, ytdl
, url
, episode
):
88 self
._reporthook
= None
89 self
._prev
_dl
_bytes
= 0
90 self
._episode
= episode
91 self
._partial
_filename
= None
94 def partial_filename(self
):
95 return self
._partial
_filename
97 @partial_filename.setter
98 def partial_filename(self
, val
):
99 self
._partial
_filename
= val
101 def retrieve_resume(self
, tempname
, reporthook
=None):
103 called by download.DownloadTask to perform the download.
105 self
._reporthook
= reporthook
106 # outtmpl: use given tempname by DownloadTask
107 # (escape % because outtmpl used as a string template by youtube-dl)
108 outtmpl
= tempname
.replace('%', '%%')
109 info
, opts
= self
._ytdl
.fetch_info(self
._url
, outtmpl
, self
._my
_hook
)
110 if program_name
== 'yt-dlp':
111 default
= opts
['outtmpl']['default'] if type(opts
['outtmpl']) == dict else opts
['outtmpl']
112 self
.partial_filename
= os
.path
.join(opts
['paths']['home'], default
) % info
113 elif program_name
== 'youtube-dl':
114 self
.partial_filename
= opts
['outtmpl'] % info
116 res
= self
._ytdl
.fetch_video(info
, opts
)
117 if program_name
== 'yt-dlp':
118 # yt-dlp downloads to whatever file name it wants, so rename
119 filepath
= res
.get('requested_downloads', [{}])[0].get('filepath')
121 raise Exception("Could not determine youtube-dl output file")
122 if filepath
!= tempname
:
123 logger
.debug('yt-dlp downloaded to "%s" instead of "%s", moving',
124 os
.path
.basename(filepath
),
125 os
.path
.basename(tempname
))
127 os
.rename(filepath
, tempname
)
129 if 'duration' in res
and res
['duration']:
130 self
._episode
.total_time
= res
['duration']
132 # youtube-dl doesn't return a content-type but an extension
134 dot_ext
= '.{}'.format(res
['ext'])
135 if program_name
== 'youtube-dl':
136 # See #673 when merging multiple formats, the extension is appended to the tempname
137 # by youtube-dl resulting in empty .partial file + .partial.mp4 exists
138 # and #796 .mkv is chosen by ytdl sometimes
139 for try_ext
in (dot_ext
, ".mp4", ".m4a", ".webm", ".mkv"):
140 tempname_with_ext
= tempname
+ try_ext
141 if os
.path
.isfile(tempname_with_ext
):
142 logger
.debug('youtube-dl downloaded to "%s" instead of "%s", moving',
143 os
.path
.basename(tempname_with_ext
),
144 os
.path
.basename(tempname
))
146 os
.rename(tempname_with_ext
, tempname
)
150 ext_filetype
= util
.mimetype_from_extension(dot_ext
)
152 # YouTube weba formats have a webm extension and get a video/webm mime-type
153 # but audio content has no width or height, so change it to audio/webm for correct icon and player
154 if ext_filetype
.startswith('video/') and ('height' not in res
or res
['height'] is None):
155 ext_filetype
= ext_filetype
.replace('video/', 'audio/')
156 headers
['content-type'] = ext_filetype
157 return headers
, res
.get('url', self
._url
)
159 def _my_hook(self
, d
):
160 if d
['status'] == 'downloading':
162 dl_bytes
= d
['downloaded_bytes']
163 total_bytes
= d
.get('total_bytes') or d
.get('total_bytes_estimate') or 0
164 self
._reporthook
(self
._prev
_dl
_bytes
+ dl_bytes
,
166 self
._prev
_dl
_bytes
+ total_bytes
)
167 elif d
['status'] == 'finished':
168 dl_bytes
= d
['downloaded_bytes']
169 self
._prev
_dl
_bytes
+= dl_bytes
171 self
._reporthook
(self
._prev
_dl
_bytes
, 1, self
._prev
_dl
_bytes
)
172 elif d
['status'] == 'error':
173 logger
.error('download hook error: %r', d
)
175 logger
.debug('unknown download hook status: %r', d
)
178 class YoutubeFeed(model
.Feed
):
180 Represents the youtube feed for model.PodcastChannel
182 def __init__(self
, url
, cover_url
, description
, max_episodes
, ie_result
, downloader
):
184 self
._cover
_url
= cover_url
185 self
._description
= description
186 self
._max
_episodes
= max_episodes
187 ie_result
['entries'] = self
._process
_entries
(ie_result
.get('entries', []))
188 self
._ie
_result
= ie_result
189 self
._downloader
= downloader
191 def _process_entries(self
, entries
):
192 filtered_entries
= []
194 for i
, e
in enumerate(entries
): # consumes the generator!
195 if e
.get('_type', 'video') in ('url', 'url_transparent') and e
.get('ie_key') == 'Youtube':
196 guid
= video_guid(e
['id'])
198 if guid
in seen_guids
:
199 logger
.debug('dropping already seen entry %s title="%s"', guid
, e
.get('title'))
201 filtered_entries
.append(e
)
204 logger
.debug('dropping entry not youtube video %r', e
)
205 if len(filtered_entries
) == self
._max
_episodes
:
206 # entries is a generator: stopping now prevents it to download more pages
207 logger
.debug('stopping entry enumeration')
209 return filtered_entries
212 return '{} (YouTube)'.format(self
._ie
_result
.get('title') or self
._ie
_result
.get('id') or self
._url
)
215 return self
._ie
_result
.get('webpage_url')
217 def get_description(self
):
218 return self
._description
220 def get_cover_url(self
):
221 return self
._cover
_url
223 def get_http_etag(self
):
224 """ :return str: optional -- last HTTP etag header, for conditional request next time """
225 # youtube-dl doesn't provide it!
228 def get_http_last_modified(self
):
229 """ :return str: optional -- last HTTP Last-Modified header, for conditional request next time """
230 # youtube-dl doesn't provide it!
233 def get_new_episodes(self
, channel
, existing_guids
):
234 # entries are already sorted by decreasing date
235 # trim guids to max episodes
236 entries
= [e
for i
, e
in enumerate(self
._ie
_result
['entries'])
237 if not self
._max
_episodes
or i
< self
._max
_episodes
]
238 all_seen_guids
= set(e
['guid'] for e
in entries
)
239 # only fetch new ones from youtube since they are so slow to get
240 new_entries
= [e
for e
in entries
if e
['guid'] not in existing_guids
]
241 logger
.debug('%i/%i new entries', len(new_entries
), len(all_seen_guids
))
242 self
._ie
_result
['entries'] = new_entries
243 self
._downloader
.refresh_entries(self
._ie
_result
)
244 # episodes from entries
246 for en
in self
._ie
_result
['entries']:
247 guid
= video_guid(en
['id'])
249 mime_type
= util
.mimetype_from_extension('.{}'.format(en
['ext']))
251 mime_type
= 'application/octet-stream'
252 if en
.get('filesize'):
253 filesize
= int(en
['filesize'] or 0)
255 filesize
= sum(int(f
.get('filesize') or 0)
256 for f
in en
.get('requested_formats', []))
258 'title': en
.get('title', guid
),
259 'link': en
.get('webpage_url'),
260 'episode_art_url': en
.get('thumbnail'),
261 'description': util
.remove_html_tags(en
.get('description') or ''),
262 'description_html': '',
263 'url': en
.get('webpage_url'),
264 'file_size': filesize
,
265 'mime_type': mime_type
,
267 'published': youtube_parsedate(en
.get('upload_date', None)),
268 'total_time': int(en
.get('duration') or 0),
270 episode
= channel
.episode_factory(ep
)
272 episodes
.append(episode
)
273 return episodes
, all_seen_guids
275 def get_next_page(self
, channel
, max_episodes
):
277 Paginated feed support (RFC 5005).
278 If the feed is paged, return the next feed page.
279 Returned page will in turn be asked for the next page, until None is returned.
280 :return feedcore.Result: the next feed's page,
281 as a fully parsed Feed or None
286 class gPodderYoutubeDL(download
.CustomDownloader
):
287 def __init__(self
, gpodder_config
, my_config
, force
=False):
289 :param force: force using this downloader even if config says don't manage downloads
291 self
.gpodder_config
= gpodder_config
292 self
.my_config
= my_config
294 # cachedir is not much used in youtube-dl, but set it anyway
295 cachedir
= os
.path
.join(gpodder
.home
, 'youtube-dl')
296 os
.makedirs(cachedir
, exist_ok
=True)
298 'cachedir': cachedir
,
299 'no_color': True, # prevent escape codes in desktop notifications on errors
300 'noprogress': True, # prevent progress bar from appearing in console
303 self
._ydl
_opts
['verbose'] = True
305 self
._ydl
_opts
['quiet'] = True
306 # Don't create downloaders for URLs supported by these youtube-dl extractors
307 self
.ie_blacklist
= ["Generic"]
308 # Cache URL regexes from youtube-dl matches here, seed with youtube regex
309 self
.regex_cache
= [re
.compile(r
'https://www.youtube.com/watch\?v=.+')]
310 # #686 on windows without a console, sys.stdout is None, causing exceptions
311 # when adding podcasts.
312 # See https://docs.python.org/3/library/sys.html#sys.__stderr__ Note
314 logger
.debug('no stdout, setting youtube-dl logger')
315 self
._ydl
_opts
['logger'] = logger
317 def add_format(self
, gpodder_config
, opts
, fallback
=None):
318 """ construct youtube-dl -f argument from configured format. """
319 # You can set a custom format or custom formats by editing the config for key
320 # `youtube.preferred_fmt_ids`
322 # It takes a list of format strings separated by comma: bestaudio, 18
323 # they are translated to youtube dl format bestaudio/18, meaning preferably
324 # the best audio quality (audio-only) and MP4 360p if it's not available.
326 # See https://github.com/ytdl-org/youtube-dl#format-selection for details
327 # about youtube-dl format specification.
328 fmt_ids
= youtube
.get_fmt_ids(gpodder_config
.youtube
, False)
329 opts
['format'] = '/'.join(str(fmt
) for fmt
in fmt_ids
)
331 opts
['format'] += '/' + fallback
332 logger
.debug('format=%s', opts
['format'])
334 def fetch_info(self
, url
, tempname
, reporthook
):
335 subs
= self
.my_config
.embed_subtitles
337 'paths': {'home': os
.path
.dirname(tempname
)},
338 # Postprocessing in yt-dlp breaks without ext
339 'outtmpl': (os
.path
.basename(tempname
) if program_name
== 'yt-dlp'
340 else tempname
) + '.%(ext)s',
341 'nopart': True, # don't append .part (already .partial)
342 'retries': 3, # retry a few times
343 'progress_hooks': [reporthook
], # to notify UI
344 'writesubtitles': subs
,
345 'subtitleslangs': ['all'] if subs
else [],
346 'postprocessors': [{'key': 'FFmpegEmbedSubtitle'}] if subs
else [],
348 opts
.update(self
._ydl
_opts
)
349 self
.add_format(self
.gpodder_config
, opts
)
350 with youtube_dl
.YoutubeDL(opts
) as ydl
:
351 info
= ydl
.extract_info(url
, download
=False)
354 def fetch_video(self
, info
, opts
):
355 with youtube_dl
.YoutubeDL(opts
) as ydl
:
356 return ydl
.process_video_result(info
, download
=True)
358 def refresh_entries(self
, ie_result
):
359 # only interested in video metadata
361 'skip_download': True, # don't download the video
362 'youtube_include_dash_manifest': False, # don't download the DASH manifest
364 self
.add_format(self
.gpodder_config
, opts
, fallback
='18')
365 opts
.update(self
._ydl
_opts
)
367 # refresh videos one by one to catch single videos blocked by youtube
368 for e
in ie_result
.get('entries', []):
369 tmp
= {k
: v
for k
, v
in ie_result
.items() if k
!= 'entries'}
372 with youtube_dl
.YoutubeDL(opts
) as ydl
:
373 ydl
.process_ie_result(tmp
, download
=False)
374 new_entries
.extend(tmp
.get('entries'))
375 except youtube_dl
.utils
.DownloadError
as ex
:
376 if ex
.exc_info
[0] == youtube_dl
.utils
.ExtractorError
:
377 # for instance "This video contains content from xyz, who has blocked it on copyright grounds"
378 logger
.warning('Skipping %s: %s', e
.get('title', ''), ex
.exc_info
[1])
380 logger
.exception('Skipping %r: %s', tmp
, ex
.exc_info
)
381 ie_result
['entries'] = new_entries
383 def refresh(self
, url
, channel_url
, max_episodes
):
385 Fetch a channel or playlist contents.
387 Doesn't yet fetch video entry informations, so we only get the video id and title.
389 # Duplicate a bit of the YoutubeDL machinery here because we only
390 # want to parse the channel/playlist first, not to fetch video entries.
391 # We call YoutubeDL.extract_info(process=False), so we
392 # have to call extract_info again ourselves when we get a result of type 'url'.
393 def extract_type(ie_result
):
394 result_type
= ie_result
.get('_type', 'video')
395 if result_type
not in ('url', 'playlist', 'multi_video'):
396 raise Exception('Unsuported result_type: {}'.format(result_type
))
397 has_playlist
= result_type
in ('playlist', 'multi_video')
398 return result_type
, has_playlist
401 'youtube_include_dash_manifest': False, # only interested in video title and id
403 opts
.update(self
._ydl
_opts
)
404 with youtube_dl
.YoutubeDL(opts
) as ydl
:
405 ie_result
= ydl
.extract_info(url
, download
=False, process
=False)
406 result_type
, has_playlist
= extract_type(ie_result
)
407 while not has_playlist
:
408 if result_type
in ('url', 'url_transparent'):
409 ie_result
['url'] = youtube_dl
.utils
.sanitize_url(ie_result
['url'])
410 if result_type
== 'url':
411 logger
.debug("extract_info(%s) to get the video list", ie_result
['url'])
412 # We have to add extra_info to the results because it may be
413 # contained in a playlist
414 ie_result
= ydl
.extract_info(ie_result
['url'],
417 ie_key
=ie_result
.get('ie_key'))
418 result_type
, has_playlist
= extract_type(ie_result
)
419 cover_url
= youtube
.get_cover(channel_url
) # youtube-dl doesn't provide the cover url!
420 description
= youtube
.get_channel_desc(channel_url
) # youtube-dl doesn't provide the description!
421 return feedcore
.Result(feedcore
.UPDATED_FEED
,
422 YoutubeFeed(url
, cover_url
, description
, max_episodes
, ie_result
, self
))
424 def fetch_channel(self
, channel
, max_episodes
=0):
426 called by model.gPodderFetcher to get a custom feed.
427 :returns feedcore.Result: a YoutubeFeed or None if channel is not a youtube channel or playlist
429 if not self
.my_config
.manage_channel
:
432 m
= CHANNEL_RE
.match(channel
.url
)
434 url
= 'https://www.youtube.com/channel/{}/videos'.format(m
.group(1))
436 m
= USER_RE
.match(channel
.url
)
438 url
= 'https://www.youtube.com/user/{}/videos'.format(m
.group(1))
440 m
= PLAYLIST_RE
.match(channel
.url
)
442 url
= 'https://www.youtube.com/playlist?list={}'.format(m
.group(1))
444 logger
.info('youtube-dl handling %s => %s', channel
.url
, url
)
445 return self
.refresh(url
, channel
.url
, max_episodes
)
448 def is_supported_url(self
, url
):
451 if self
.regex_cache
[0].match(url
) is not None:
453 for r
in self
.regex_cache
[1:]:
454 if r
.match(url
) is not None:
455 self
.regex_cache
.remove(r
)
456 self
.regex_cache
.insert(0, r
)
458 with youtube_dl
.YoutubeDL(self
._ydl
_opts
) as ydl
:
459 # youtube-dl returns a list, yt-dlp returns a dict
461 if type(ydl
._ies
) == dict:
462 ies
= ydl
._ies
.values()
464 if ie
.suitable(url
) and ie
.ie_key() not in self
.ie_blacklist
:
465 self
.regex_cache
.insert(0, ie
._VALID
_URL
_RE
)
469 def custom_downloader(self
, unused_config
, episode
):
471 called from registry.custom_downloader.resolve
473 if not self
.force
and not self
.my_config
.manage_downloads
:
476 try: # Reject URLs linking to known media files
477 (_
, ext
) = util
.filename_from_url(episode
.url
)
478 if util
.file_type_by_extension(ext
) is not None:
483 if self
.is_supported_url(episode
.url
):
484 return YoutubeCustomDownload(self
, episode
.url
, episode
)
489 class gPodderExtension
:
490 def __init__(self
, container
):
491 self
.container
= container
496 self
.ytdl
= gPodderYoutubeDL(self
.container
.manager
.core
.config
, self
.container
.config
)
497 logger
.info('Registering youtube-dl. (using %s %s)' % (program_name
, youtube_dl
.version
.__version
__))
498 registry
.feed_handler
.register(self
.ytdl
.fetch_channel
)
499 registry
.custom_downloader
.register(self
.ytdl
.custom_downloader
)
501 if youtube_dl
.utils
.version_tuple(youtube_dl
.version
.__version
__) < youtube_dl
.utils
.version_tuple(want_ytdl_version
):
502 logger
.error(want_ytdl_version_msg
503 % {'have_version': youtube_dl
.version
.__version
__, 'want_version': want_ytdl_version
})
506 logger
.info('Unregistering youtube-dl.')
508 registry
.feed_handler
.unregister(self
.ytdl
.fetch_channel
)
512 registry
.custom_downloader
.unregister(self
.ytdl
.custom_downloader
)
517 def on_ui_object_available(self
, name
, ui_object
):
518 if name
== 'gpodder-gtk':
519 self
.gpodder
= ui_object
521 if youtube_dl
.utils
.version_tuple(youtube_dl
.version
.__version
__) < youtube_dl
.utils
.version_tuple(want_ytdl_version
):
522 ui_object
.notification(want_ytdl_version_msg
%
523 {'have_version': youtube_dl
.version
.__version
__, 'want_version': want_ytdl_version
},
524 _('Old youtube-dl'), important
=True, widget
=ui_object
.main_window
)
526 def on_episodes_context_menu(self
, episodes
):
527 if not self
.container
.config
.manage_downloads
and any(e
.can_download() for e
in episodes
):
528 return [(_("Download with youtube-dl"), self
.download_episodes
)]
530 def download_episodes(self
, episodes
):
531 episodes
= [e
for e
in episodes
if e
.can_download()]
533 # create a new gPodderYoutubeDL to force using it even if manage_downloads is False
534 downloader
= gPodderYoutubeDL(self
.container
.manager
.core
.config
, self
.container
.config
, force
=True)
535 self
.gpodder
.download_episode_list(episodes
, downloader
=downloader
)
537 def toggle_manage_channel(self
, widget
):
538 self
.container
.config
.manage_channel
= widget
.get_active()
540 def toggle_manage_downloads(self
, widget
):
541 self
.container
.config
.manage_downloads
= widget
.get_active()
543 def toggle_embed_subtitles(self
, widget
):
544 if widget
.get_active():
545 if not util
.find_command('ffmpeg'):
547 widget
.set_active(False)
548 self
.container
.config
.embed_subtitles
= False
550 self
.container
.config
.embed_subtitles
= True
552 self
.container
.config
.embed_subtitles
= False
554 def show_preferences(self
):
555 box
= Gtk
.Box(orientation
=Gtk
.Orientation
.VERTICAL
, spacing
=10)
556 box
.set_border_width(10)
558 label
= Gtk
.Label('%s %s' % (program_name
, youtube_dl
.version
.__version
__))
559 box
.pack_start(label
, False, False, 0)
561 box
.pack_start(Gtk
.HSeparator(), False, False, 0)
563 checkbox
= Gtk
.CheckButton(_('Parse YouTube channel feeds with youtube-dl to access more than 15 episodes'))
564 checkbox
.set_active(self
.container
.config
.manage_channel
)
565 checkbox
.connect('toggled', self
.toggle_manage_channel
)
566 box
.pack_start(checkbox
, False, False, 0)
568 box
.pack_start(Gtk
.HSeparator(), False, False, 0)
570 checkbox
= Gtk
.CheckButton(_('Download all supported episodes with youtube-dl'))
571 checkbox
.set_active(self
.container
.config
.manage_downloads
)
572 checkbox
.connect('toggled', self
.toggle_manage_downloads
)
573 box
.pack_start(checkbox
, False, False, 0)
574 note
= Gtk
.Label(use_markup
=True, wrap
=True, label
=_(
575 'youtube-dl provides access to additional YouTube formats and DRM content.'
576 ' Episodes from non-YouTube channels, that have youtube-dl support, will <b>fail</b> to download unless you manually'
577 ' <a href="https://gpodder.github.io/docs/youtube.html#formats">add custom formats</a> for each site.'
578 ' <b>Download with youtube-dl</b> appears in the episode menu when this option is disabled,'
579 ' and can be used to manually download from supported sites.'))
580 note
.connect('activate-link', lambda label
, url
: util
.open_website(url
))
581 note
.set_property('xalign', 0.0)
584 box
.pack_start(Gtk
.HSeparator(), False, False, 0)
586 checkbox
= Gtk
.CheckButton(_('Embed all available subtitles in downloaded video'))
587 checkbox
.set_active(self
.container
.config
.embed_subtitles
)
588 checkbox
.connect('toggled', self
.toggle_embed_subtitles
)
589 box
.pack_start(checkbox
, False, False, 0)
591 infobar
= Gtk
.InfoBar()
592 infobar
.get_content_area().add(Gtk
.Label(wrap
=True, label
=_(
593 'The "ffmpeg" command was not found. FFmpeg is required for embedding subtitles.')))
594 self
.infobar
= infobar
595 box
.pack_end(infobar
, False, False, 0)
601 def on_preferences(self
):
602 return [(_('youtube-dl'), self
.show_preferences
)]