Fix typo in label.
[gpodder.git] / share / gpodder / extensions / youtube-dl.py
blob610d4ab1a751d399daf203096101dff8e0366ffc
1 # -*- coding: utf-8 -*-
2 # Manage YouTube subscriptions using youtube-dl (https://github.com/ytdl-org/youtube-dl)
3 # Requirements: youtube-dl module (pip install youtube_dl)
4 # (c) 2019-08-17 Eric Le Lay <elelay.fr:contact>
5 # Released under the same license terms as gPodder itself.
7 import logging
8 import os
9 import re
10 import sys
11 import time
13 try:
14 import yt_dlp as youtube_dl
15 program_name = 'yt-dlp'
16 want_ytdl_version = '2023.02.17'
17 except:
18 import youtube_dl
19 program_name = 'youtube-dl'
20 want_ytdl_version = '2023.02.17' # youtube-dl has been patched, but not yet released
22 import gpodder
23 from gpodder import download, feedcore, model, registry, util, youtube
25 import gi # isort:skip
26 gi.require_version('Gtk', '3.0') # isort:skip
27 from gi.repository import Gtk # isort:skip
29 _ = gpodder.gettext
32 logger = logging.getLogger(__name__)
35 __title__ = 'youtube-dl'
36 __description__ = _('Manage YouTube subscriptions using youtube-dl (pip install youtube_dl) or yt-dlp (pip install yt-dlp)')
37 __only_for__ = 'gtk, cli'
38 __authors__ = 'Eric Le Lay <elelay.fr:contact>'
39 __doc__ = 'https://gpodder.github.io/docs/extensions/youtubedl.html'
41 want_ytdl_version_msg = _('Your version of youtube-dl/yt-dlp %(have_version)s has known issues, please upgrade to %(want_version)s or newer.')
43 DefaultConfig = {
44 # youtube-dl downloads and parses each video page to get informations about it, which is very slow.
45 # Set to False to fall back to the fast but limited (only 15 episodes) gpodder code
46 'manage_channel': True,
47 # If for some reason youtube-dl download doesn't work for you, you can fallback to gpodder code.
48 # Set to False to fall back to default gpodder code (less available formats).
49 'manage_downloads': True,
50 # Embed all available subtitles to downloaded videos. Needs ffmpeg.
51 'embed_subtitles': False,
55 # youtube feed still preprocessed by youtube.py (compat)
56 CHANNEL_RE = re.compile(r'''https://www.youtube.com/feeds/videos.xml\?channel_id=(.+)''')
57 USER_RE = re.compile(r'''https://www.youtube.com/feeds/videos.xml\?user=(.+)''')
58 PLAYLIST_RE = re.compile(r'''https://www.youtube.com/feeds/videos.xml\?playlist_id=(.+)''')
61 def youtube_parsedate(s):
62 """Parse a string into a unix timestamp
64 Only strings provided by youtube-dl API are
65 parsed with this function (20170920).
66 """
67 if s:
68 return time.mktime(time.strptime(s, "%Y%m%d"))
69 return 0
72 def video_guid(video_id):
73 """
74 generate same guid as youtube
75 """
76 return 'yt:video:{}'.format(video_id)
79 class YoutubeCustomDownload(download.CustomDownload):
80 """
81 Represents the download of a single episode using youtube-dl.
83 Actual youtube-dl interaction via gPodderYoutubeDL.
84 """
85 def __init__(self, ytdl, url, episode):
86 self._ytdl = ytdl
87 self._url = url
88 self._reporthook = None
89 self._prev_dl_bytes = 0
90 self._episode = episode
91 self._partial_filename = None
93 @property
94 def partial_filename(self):
95 return self._partial_filename
97 @partial_filename.setter
98 def partial_filename(self, val):
99 self._partial_filename = val
101 def retrieve_resume(self, tempname, reporthook=None):
103 called by download.DownloadTask to perform the download.
105 self._reporthook = reporthook
106 # outtmpl: use given tempname by DownloadTask
107 # (escape % because outtmpl used as a string template by youtube-dl)
108 outtmpl = tempname.replace('%', '%%')
109 info, opts = self._ytdl.fetch_info(self._url, outtmpl, self._my_hook)
110 if program_name == 'yt-dlp':
111 default = opts['outtmpl']['default'] if type(opts['outtmpl']) == dict else opts['outtmpl']
112 self.partial_filename = os.path.join(opts['paths']['home'], default) % info
113 elif program_name == 'youtube-dl':
114 self.partial_filename = opts['outtmpl'] % info
116 res = self._ytdl.fetch_video(info, opts)
117 if program_name == 'yt-dlp':
118 # yt-dlp downloads to whatever file name it wants, so rename
119 filepath = res.get('requested_downloads', [{}])[0].get('filepath')
120 if filepath is None:
121 raise Exception("Could not determine youtube-dl output file")
122 if filepath != tempname:
123 logger.debug('yt-dlp downloaded to "%s" instead of "%s", moving',
124 os.path.basename(filepath),
125 os.path.basename(tempname))
126 os.remove(tempname)
127 os.rename(filepath, tempname)
129 if 'duration' in res and res['duration']:
130 self._episode.total_time = res['duration']
131 headers = {}
132 # youtube-dl doesn't return a content-type but an extension
133 if 'ext' in res:
134 dot_ext = '.{}'.format(res['ext'])
135 if program_name == 'youtube-dl':
136 # See #673 when merging multiple formats, the extension is appended to the tempname
137 # by youtube-dl resulting in empty .partial file + .partial.mp4 exists
138 # and #796 .mkv is chosen by ytdl sometimes
139 for try_ext in (dot_ext, ".mp4", ".m4a", ".webm", ".mkv"):
140 tempname_with_ext = tempname + try_ext
141 if os.path.isfile(tempname_with_ext):
142 logger.debug('youtube-dl downloaded to "%s" instead of "%s", moving',
143 os.path.basename(tempname_with_ext),
144 os.path.basename(tempname))
145 os.remove(tempname)
146 os.rename(tempname_with_ext, tempname)
147 dot_ext = try_ext
148 break
150 ext_filetype = util.mimetype_from_extension(dot_ext)
151 if ext_filetype:
152 # YouTube weba formats have a webm extension and get a video/webm mime-type
153 # but audio content has no width or height, so change it to audio/webm for correct icon and player
154 if ext_filetype.startswith('video/') and ('height' not in res or res['height'] is None):
155 ext_filetype = ext_filetype.replace('video/', 'audio/')
156 headers['content-type'] = ext_filetype
157 return headers, res.get('url', self._url)
159 def _my_hook(self, d):
160 if d['status'] == 'downloading':
161 if self._reporthook:
162 dl_bytes = d['downloaded_bytes']
163 total_bytes = d.get('total_bytes') or d.get('total_bytes_estimate') or 0
164 self._reporthook(self._prev_dl_bytes + dl_bytes,
166 self._prev_dl_bytes + total_bytes)
167 elif d['status'] == 'finished':
168 dl_bytes = d['downloaded_bytes']
169 self._prev_dl_bytes += dl_bytes
170 if self._reporthook:
171 self._reporthook(self._prev_dl_bytes, 1, self._prev_dl_bytes)
172 elif d['status'] == 'error':
173 logger.error('download hook error: %r', d)
174 else:
175 logger.debug('unknown download hook status: %r', d)
178 class YoutubeFeed(model.Feed):
180 Represents the youtube feed for model.PodcastChannel
182 def __init__(self, url, cover_url, description, max_episodes, ie_result, downloader):
183 self._url = url
184 self._cover_url = cover_url
185 self._description = description
186 self._max_episodes = max_episodes
187 ie_result['entries'] = self._process_entries(ie_result.get('entries', []))
188 self._ie_result = ie_result
189 self._downloader = downloader
191 def _process_entries(self, entries):
192 filtered_entries = []
193 seen_guids = set()
194 for i, e in enumerate(entries): # consumes the generator!
195 if e.get('_type', 'video') in ('url', 'url_transparent') and e.get('ie_key') == 'Youtube':
196 guid = video_guid(e['id'])
197 e['guid'] = guid
198 if guid in seen_guids:
199 logger.debug('dropping already seen entry %s title="%s"', guid, e.get('title'))
200 else:
201 filtered_entries.append(e)
202 seen_guids.add(guid)
203 else:
204 logger.debug('dropping entry not youtube video %r', e)
205 if len(filtered_entries) == self._max_episodes:
206 # entries is a generator: stopping now prevents it to download more pages
207 logger.debug('stopping entry enumeration')
208 break
209 return filtered_entries
211 def get_title(self):
212 return '{} (YouTube)'.format(self._ie_result.get('title') or self._ie_result.get('id') or self._url)
214 def get_link(self):
215 return self._ie_result.get('webpage_url')
217 def get_description(self):
218 return self._description
220 def get_cover_url(self):
221 return self._cover_url
223 def get_http_etag(self):
224 """ :return str: optional -- last HTTP etag header, for conditional request next time """
225 # youtube-dl doesn't provide it!
226 return None
228 def get_http_last_modified(self):
229 """ :return str: optional -- last HTTP Last-Modified header, for conditional request next time """
230 # youtube-dl doesn't provide it!
231 return None
233 def get_new_episodes(self, channel, existing_guids):
234 # entries are already sorted by decreasing date
235 # trim guids to max episodes
236 entries = [e for i, e in enumerate(self._ie_result['entries'])
237 if not self._max_episodes or i < self._max_episodes]
238 all_seen_guids = set(e['guid'] for e in entries)
239 # only fetch new ones from youtube since they are so slow to get
240 new_entries = [e for e in entries if e['guid'] not in existing_guids]
241 logger.debug('%i/%i new entries', len(new_entries), len(all_seen_guids))
242 self._ie_result['entries'] = new_entries
243 self._downloader.refresh_entries(self._ie_result)
244 # episodes from entries
245 episodes = []
246 for en in self._ie_result['entries']:
247 guid = video_guid(en['id'])
248 if en.get('ext'):
249 mime_type = util.mimetype_from_extension('.{}'.format(en['ext']))
250 else:
251 mime_type = 'application/octet-stream'
252 if en.get('filesize'):
253 filesize = int(en['filesize'] or 0)
254 else:
255 filesize = sum(int(f.get('filesize') or 0)
256 for f in en.get('requested_formats', []))
257 ep = {
258 'title': en.get('title', guid),
259 'link': en.get('webpage_url'),
260 'episode_art_url': en.get('thumbnail'),
261 'description': util.remove_html_tags(en.get('description') or ''),
262 'description_html': '',
263 'url': en.get('webpage_url'),
264 'file_size': filesize,
265 'mime_type': mime_type,
266 'guid': guid,
267 'published': youtube_parsedate(en.get('upload_date', None)),
268 'total_time': int(en.get('duration') or 0),
270 episode = channel.episode_factory(ep)
271 episode.save()
272 episodes.append(episode)
273 return episodes, all_seen_guids
275 def get_next_page(self, channel, max_episodes):
277 Paginated feed support (RFC 5005).
278 If the feed is paged, return the next feed page.
279 Returned page will in turn be asked for the next page, until None is returned.
280 :return feedcore.Result: the next feed's page,
281 as a fully parsed Feed or None
283 return None
286 class gPodderYoutubeDL(download.CustomDownloader):
287 def __init__(self, gpodder_config, my_config, force=False):
289 :param force: force using this downloader even if config says don't manage downloads
291 self.gpodder_config = gpodder_config
292 self.my_config = my_config
293 self.force = force
294 # cachedir is not much used in youtube-dl, but set it anyway
295 cachedir = os.path.join(gpodder.home, 'youtube-dl')
296 os.makedirs(cachedir, exist_ok=True)
297 self._ydl_opts = {
298 'cachedir': cachedir,
299 'no_color': True, # prevent escape codes in desktop notifications on errors
300 'noprogress': True, # prevent progress bar from appearing in console
302 if gpodder.verbose:
303 self._ydl_opts['verbose'] = True
304 else:
305 self._ydl_opts['quiet'] = True
306 # Don't create downloaders for URLs supported by these youtube-dl extractors
307 self.ie_blacklist = ["Generic"]
308 # Cache URL regexes from youtube-dl matches here, seed with youtube regex
309 self.regex_cache = [re.compile(r'https://www.youtube.com/watch\?v=.+')]
310 # #686 on windows without a console, sys.stdout is None, causing exceptions
311 # when adding podcasts.
312 # See https://docs.python.org/3/library/sys.html#sys.__stderr__ Note
313 if not sys.stdout:
314 logger.debug('no stdout, setting youtube-dl logger')
315 self._ydl_opts['logger'] = logger
317 def add_format(self, gpodder_config, opts, fallback=None):
318 """ construct youtube-dl -f argument from configured format. """
319 # You can set a custom format or custom formats by editing the config for key
320 # `youtube.preferred_fmt_ids`
322 # It takes a list of format strings separated by comma: bestaudio, 18
323 # they are translated to youtube dl format bestaudio/18, meaning preferably
324 # the best audio quality (audio-only) and MP4 360p if it's not available.
326 # See https://github.com/ytdl-org/youtube-dl#format-selection for details
327 # about youtube-dl format specification.
328 fmt_ids = youtube.get_fmt_ids(gpodder_config.youtube, False)
329 opts['format'] = '/'.join(str(fmt) for fmt in fmt_ids)
330 if fallback:
331 opts['format'] += '/' + fallback
332 logger.debug('format=%s', opts['format'])
334 def fetch_info(self, url, tempname, reporthook):
335 subs = self.my_config.embed_subtitles
336 opts = {
337 'paths': {'home': os.path.dirname(tempname)},
338 # Postprocessing in yt-dlp breaks without ext
339 'outtmpl': (os.path.basename(tempname) if program_name == 'yt-dlp'
340 else tempname) + '.%(ext)s',
341 'nopart': True, # don't append .part (already .partial)
342 'retries': 3, # retry a few times
343 'progress_hooks': [reporthook], # to notify UI
344 'writesubtitles': subs,
345 'subtitleslangs': ['all'] if subs else [],
346 'postprocessors': [{'key': 'FFmpegEmbedSubtitle'}] if subs else [],
348 opts.update(self._ydl_opts)
349 self.add_format(self.gpodder_config, opts)
350 with youtube_dl.YoutubeDL(opts) as ydl:
351 info = ydl.extract_info(url, download=False)
352 return info, opts
354 def fetch_video(self, info, opts):
355 with youtube_dl.YoutubeDL(opts) as ydl:
356 return ydl.process_video_result(info, download=True)
358 def refresh_entries(self, ie_result):
359 # only interested in video metadata
360 opts = {
361 'skip_download': True, # don't download the video
362 'youtube_include_dash_manifest': False, # don't download the DASH manifest
364 self.add_format(self.gpodder_config, opts, fallback='18')
365 opts.update(self._ydl_opts)
366 new_entries = []
367 # refresh videos one by one to catch single videos blocked by youtube
368 for e in ie_result.get('entries', []):
369 tmp = {k: v for k, v in ie_result.items() if k != 'entries'}
370 tmp['entries'] = [e]
371 try:
372 with youtube_dl.YoutubeDL(opts) as ydl:
373 ydl.process_ie_result(tmp, download=False)
374 new_entries.extend(tmp.get('entries'))
375 except youtube_dl.utils.DownloadError as ex:
376 if ex.exc_info[0] == youtube_dl.utils.ExtractorError:
377 # for instance "This video contains content from xyz, who has blocked it on copyright grounds"
378 logger.warning('Skipping %s: %s', e.get('title', ''), ex.exc_info[1])
379 continue
380 logger.exception('Skipping %r: %s', tmp, ex.exc_info)
381 ie_result['entries'] = new_entries
383 def refresh(self, url, channel_url, max_episodes):
385 Fetch a channel or playlist contents.
387 Doesn't yet fetch video entry informations, so we only get the video id and title.
389 # Duplicate a bit of the YoutubeDL machinery here because we only
390 # want to parse the channel/playlist first, not to fetch video entries.
391 # We call YoutubeDL.extract_info(process=False), so we
392 # have to call extract_info again ourselves when we get a result of type 'url'.
393 def extract_type(ie_result):
394 result_type = ie_result.get('_type', 'video')
395 if result_type not in ('url', 'playlist', 'multi_video'):
396 raise Exception('Unsuported result_type: {}'.format(result_type))
397 has_playlist = result_type in ('playlist', 'multi_video')
398 return result_type, has_playlist
400 opts = {
401 'youtube_include_dash_manifest': False, # only interested in video title and id
403 opts.update(self._ydl_opts)
404 with youtube_dl.YoutubeDL(opts) as ydl:
405 ie_result = ydl.extract_info(url, download=False, process=False)
406 result_type, has_playlist = extract_type(ie_result)
407 while not has_playlist:
408 if result_type in ('url', 'url_transparent'):
409 ie_result['url'] = youtube_dl.utils.sanitize_url(ie_result['url'])
410 if result_type == 'url':
411 logger.debug("extract_info(%s) to get the video list", ie_result['url'])
412 # We have to add extra_info to the results because it may be
413 # contained in a playlist
414 ie_result = ydl.extract_info(ie_result['url'],
415 download=False,
416 process=False,
417 ie_key=ie_result.get('ie_key'))
418 result_type, has_playlist = extract_type(ie_result)
419 cover_url = youtube.get_cover(channel_url) # youtube-dl doesn't provide the cover url!
420 description = youtube.get_channel_desc(channel_url) # youtube-dl doesn't provide the description!
421 return feedcore.Result(feedcore.UPDATED_FEED,
422 YoutubeFeed(url, cover_url, description, max_episodes, ie_result, self))
424 def fetch_channel(self, channel, max_episodes=0):
426 called by model.gPodderFetcher to get a custom feed.
427 :returns feedcore.Result: a YoutubeFeed or None if channel is not a youtube channel or playlist
429 if not self.my_config.manage_channel:
430 return None
431 url = None
432 m = CHANNEL_RE.match(channel.url)
433 if m:
434 url = 'https://www.youtube.com/channel/{}/videos'.format(m.group(1))
435 else:
436 m = USER_RE.match(channel.url)
437 if m:
438 url = 'https://www.youtube.com/user/{}/videos'.format(m.group(1))
439 else:
440 m = PLAYLIST_RE.match(channel.url)
441 if m:
442 url = 'https://www.youtube.com/playlist?list={}'.format(m.group(1))
443 if url:
444 logger.info('youtube-dl handling %s => %s', channel.url, url)
445 return self.refresh(url, channel.url, max_episodes)
446 return None
448 def is_supported_url(self, url):
449 if url is None:
450 return False
451 if self.regex_cache[0].match(url) is not None:
452 return True
453 for r in self.regex_cache[1:]:
454 if r.match(url) is not None:
455 self.regex_cache.remove(r)
456 self.regex_cache.insert(0, r)
457 return True
458 with youtube_dl.YoutubeDL(self._ydl_opts) as ydl:
459 # youtube-dl returns a list, yt-dlp returns a dict
460 ies = ydl._ies
461 if type(ydl._ies) == dict:
462 ies = ydl._ies.values()
463 for ie in ies:
464 if ie.suitable(url) and ie.ie_key() not in self.ie_blacklist:
465 self.regex_cache.insert(0, ie._VALID_URL_RE)
466 return True
467 return False
469 def custom_downloader(self, unused_config, episode):
471 called from registry.custom_downloader.resolve
473 if not self.force and not self.my_config.manage_downloads:
474 return None
476 try: # Reject URLs linking to known media files
477 (_, ext) = util.filename_from_url(episode.url)
478 if util.file_type_by_extension(ext) is not None:
479 return None
480 except Exception:
481 pass
483 if self.is_supported_url(episode.url):
484 return YoutubeCustomDownload(self, episode.url, episode)
486 return None
489 class gPodderExtension:
490 def __init__(self, container):
491 self.container = container
492 self.ytdl = None
493 self.infobar = None
495 def on_load(self):
496 self.ytdl = gPodderYoutubeDL(self.container.manager.core.config, self.container.config)
497 logger.info('Registering youtube-dl. (using %s %s)' % (program_name, youtube_dl.version.__version__))
498 registry.feed_handler.register(self.ytdl.fetch_channel)
499 registry.custom_downloader.register(self.ytdl.custom_downloader)
501 if youtube_dl.utils.version_tuple(youtube_dl.version.__version__) < youtube_dl.utils.version_tuple(want_ytdl_version):
502 logger.error(want_ytdl_version_msg
503 % {'have_version': youtube_dl.version.__version__, 'want_version': want_ytdl_version})
505 def on_unload(self):
506 logger.info('Unregistering youtube-dl.')
507 try:
508 registry.feed_handler.unregister(self.ytdl.fetch_channel)
509 except ValueError:
510 pass
511 try:
512 registry.custom_downloader.unregister(self.ytdl.custom_downloader)
513 except ValueError:
514 pass
515 self.ytdl = None
517 def on_ui_object_available(self, name, ui_object):
518 if name == 'gpodder-gtk':
519 self.gpodder = ui_object
521 if youtube_dl.utils.version_tuple(youtube_dl.version.__version__) < youtube_dl.utils.version_tuple(want_ytdl_version):
522 ui_object.notification(want_ytdl_version_msg %
523 {'have_version': youtube_dl.version.__version__, 'want_version': want_ytdl_version},
524 _('Old youtube-dl'), important=True, widget=ui_object.main_window)
526 def on_episodes_context_menu(self, episodes):
527 if not self.container.config.manage_downloads and any(e.can_download() for e in episodes):
528 return [(_("Download with youtube-dl"), self.download_episodes)]
530 def download_episodes(self, episodes):
531 episodes = [e for e in episodes if e.can_download()]
533 # create a new gPodderYoutubeDL to force using it even if manage_downloads is False
534 downloader = gPodderYoutubeDL(self.container.manager.core.config, self.container.config, force=True)
535 self.gpodder.download_episode_list(episodes, downloader=downloader)
537 def toggle_manage_channel(self, widget):
538 self.container.config.manage_channel = widget.get_active()
540 def toggle_manage_downloads(self, widget):
541 self.container.config.manage_downloads = widget.get_active()
543 def toggle_embed_subtitles(self, widget):
544 if widget.get_active():
545 if not util.find_command('ffmpeg'):
546 self.infobar.show()
547 widget.set_active(False)
548 self.container.config.embed_subtitles = False
549 else:
550 self.container.config.embed_subtitles = True
551 else:
552 self.container.config.embed_subtitles = False
554 def show_preferences(self):
555 box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
556 box.set_border_width(10)
558 label = Gtk.Label('%s %s' % (program_name, youtube_dl.version.__version__))
559 box.pack_start(label, False, False, 0)
561 box.pack_start(Gtk.HSeparator(), False, False, 0)
563 checkbox = Gtk.CheckButton(_('Parse YouTube channel feeds with youtube-dl to access more than 15 episodes'))
564 checkbox.set_active(self.container.config.manage_channel)
565 checkbox.connect('toggled', self.toggle_manage_channel)
566 box.pack_start(checkbox, False, False, 0)
568 box.pack_start(Gtk.HSeparator(), False, False, 0)
570 checkbox = Gtk.CheckButton(_('Download all supported episodes with youtube-dl'))
571 checkbox.set_active(self.container.config.manage_downloads)
572 checkbox.connect('toggled', self.toggle_manage_downloads)
573 box.pack_start(checkbox, False, False, 0)
574 note = Gtk.Label(use_markup=True, wrap=True, label=_(
575 'youtube-dl provides access to additional YouTube formats and DRM content.'
576 ' Episodes from non-YouTube channels, that have youtube-dl support, will <b>fail</b> to download unless you manually'
577 ' <a href="https://gpodder.github.io/docs/youtube.html#formats">add custom formats</a> for each site.'
578 ' <b>Download with youtube-dl</b> appears in the episode menu when this option is disabled,'
579 ' and can be used to manually download from supported sites.'))
580 note.connect('activate-link', lambda label, url: util.open_website(url))
581 note.set_property('xalign', 0.0)
582 box.add(note)
584 box.pack_start(Gtk.HSeparator(), False, False, 0)
586 checkbox = Gtk.CheckButton(_('Embed all available subtitles in downloaded video'))
587 checkbox.set_active(self.container.config.embed_subtitles)
588 checkbox.connect('toggled', self.toggle_embed_subtitles)
589 box.pack_start(checkbox, False, False, 0)
591 infobar = Gtk.InfoBar()
592 infobar.get_content_area().add(Gtk.Label(wrap=True, label=_(
593 'The "ffmpeg" command was not found. FFmpeg is required for embedding subtitles.')))
594 self.infobar = infobar
595 box.pack_end(infobar, False, False, 0)
597 box.show_all()
598 infobar.hide()
599 return box
601 def on_preferences(self):
602 return [(_('youtube-dl'), self.show_preferences)]