yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime as dt
   5 import errno
   6 import fileinput
   7 import http.cookiejar
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import random
  15 import re
  16 import shutil
  17 import string
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25
  26 from .cache import Cache
  27 from .compat import functools, urllib  # isort: split
  28 from .compat import compat_os_name, urllib_req_to_req
  29 from .cookies import LenientSimpleCookie, load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.common import UnsupportedURLIE
  34 from .extractor.openload import PhantomJSwrapper
  35 from .minicurses import format_text
  36 from .networking import HEADRequest, Request, RequestDirector
  37 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
  38 from .networking.exceptions import (
  39     HTTPError,
  40     NoSupportingHandlers,
  41     RequestError,
  42     SSLError,
  43     network_exceptions,
  44 )
  45 from .networking.impersonate import ImpersonateRequestHandler
  46 from .plugins import directories as plugin_directories
  47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  48 from .postprocessor import (
  49     EmbedThumbnailPP,
  50     FFmpegFixupDuplicateMoovPP,
  51     FFmpegFixupDurationPP,
  52     FFmpegFixupM3u8PP,
  53     FFmpegFixupM4aPP,
  54     FFmpegFixupStretchedPP,
  55     FFmpegFixupTimestampPP,
  56     FFmpegMergerPP,
  57     FFmpegPostProcessor,
  58     FFmpegVideoConvertorPP,
  59     MoveFilesAfterDownloadPP,
  60     get_postprocessor,
  61 )
  62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  63 from .update import (
  64     REPOSITORY,
  65     _get_system_deprecation,
  66     _make_label,
  67     current_git_head,
  68     detect_variant,
  69 )
  70 from .utils import (
  71     DEFAULT_OUTTMPL,
  72     IDENTITY,
  73     LINK_TEMPLATES,
  74     MEDIA_EXTENSIONS,
  75     NO_DEFAULT,
  76     NUMBER_RE,
  77     OUTTMPL_TYPES,
  78     POSTPROCESS_WHEN,
  79     STR_FORMAT_RE_TMPL,
  80     STR_FORMAT_TYPES,
  81     ContentTooShortError,
  82     DateRange,
  83     DownloadCancelled,
  84     DownloadError,
  85     EntryNotInPlaylist,
  86     ExistingVideoReached,
  87     ExtractorError,
  88     FormatSorter,
  89     GeoRestrictedError,
  90     ISO3166Utils,
  91     LazyList,
  92     MaxDownloadsReached,
  93     Namespace,
  94     PagedList,
  95     PlaylistEntries,
  96     Popen,
  97     PostProcessingError,
  98     ReExtractInfo,
  99     RejectedVideoReached,
 100     SameFileError,
 101     UnavailableVideoError,
 102     UserNotLive,
 103     YoutubeDLError,
 104     age_restricted,
 105     bug_reports_message,
 106     date_from_str,
 107     deprecation_warning,
 108     determine_ext,
 109     determine_protocol,
 110     encode_compat_str,
 111     encodeFilename,
 112     error_to_compat_str,
 113     escapeHTML,
 114     expand_path,
 115     extract_basic_auth,
 116     filter_dict,
 117     float_or_none,
 118     format_bytes,
 119     format_decimal_suffix,
 120     format_field,
 121     formatSeconds,
 122     get_compatible_ext,
 123     get_domain,
 124     int_or_none,
 125     iri_to_uri,
 126     is_path_like,
 127     join_nonempty,
 128     locked_file,
 129     make_archive_id,
 130     make_dir,
 131     number_of_digits,
 132     orderedSet,
 133     orderedSet_from_options,
 134     parse_filesize,
 135     preferredencoding,
 136     prepend_extension,
 137     remove_terminal_sequences,
 138     render_table,
 139     replace_extension,
 140     sanitize_filename,
 141     sanitize_path,
 142     sanitize_url,
 143     shell_quote,
 144     str_or_none,
 145     strftime_or_none,
 146     subtitles_filename,
 147     supports_terminal_sequences,
 148     system_identifier,
 149     filesize_from_tbr,
 150     timetuple_from_msec,
 151     to_high_limit_path,
 152     traverse_obj,
 153     try_call,
 154     try_get,
 155     url_basename,
 156     variadic,
 157     version_tuple,
 158     windows_enable_vt_mode,
 159     write_json_file,
 160     write_string,
 161 )
 162 from .utils._utils import _YDLLogger
 163 from .utils.networking import (
 164     HTTPHeaderDict,
 165     clean_headers,
 166     clean_proxies,
 167     std_headers,
 168 )
 169 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
 170
 171 if compat_os_name == 'nt':
 172     import ctypes
 173
 174
 175 class YoutubeDL:
 176     """YoutubeDL class.
 177
 178     YoutubeDL objects are the ones responsible of downloading the
 179     actual video file and writing it to disk if the user has requested
 180     it, among some other tasks. In most cases there should be one per
 181     program. As, given a video URL, the downloader doesn't know how to
 182     extract all the needed information, task that InfoExtractors do, it
 183     has to pass the URL to one of them.
 184
 185     For this, YoutubeDL objects have a method that allows
 186     InfoExtractors to be registered in a given order. When it is passed
 187     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 188     finds that reports being able to handle it. The InfoExtractor extracts
 189     all the information about the video or videos the URL refers to, and
 190     YoutubeDL process the extracted information, possibly using a File
 191     Downloader to download the video.
 192
 193     YoutubeDL objects accept a lot of parameters. In order not to saturate
 194     the object constructor with arguments, it receives a dictionary of
 195     options instead. These options are available through the params
 196     attribute for the InfoExtractors to use. The YoutubeDL also
 197     registers itself as the downloader in charge for the InfoExtractors
 198     that are added to it, so this is a "mutual registration".
 199
 200     Available options:
 201
 202     username:          Username for authentication purposes.
 203     password:          Password for authentication purposes.
 204     videopassword:     Password for accessing a video.
 205     ap_mso:            Adobe Pass multiple-system operator identifier.
 206     ap_username:       Multiple-system operator account username.
 207     ap_password:       Multiple-system operator account password.
 208     usenetrc:          Use netrc for authentication instead.
 209     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 210     netrc_cmd:         Use a shell command to get credentials
 211     verbose:           Print additional info to stdout.
 212     quiet:             Do not print messages to stdout.
 213     no_warnings:       Do not print out anything for warnings.
 214     forceprint:        A dict with keys WHEN mapped to a list of templates to
 215                        print to stdout. The allowed keys are video or any of the
 216                        items in utils.POSTPROCESS_WHEN.
 217                        For compatibility, a single list is also accepted
 218     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 219                        a list of tuples with (template, filename)
 220     forcejson:         Force printing info_dict as JSON.
 221     dump_single_json:  Force printing the info_dict of the whole playlist
 222                        (or video) as a single JSON line.
 223     force_write_download_archive: Force writing download archive regardless
 224                        of 'skip_download' or 'simulate'.
 225     simulate:          Do not download the video files. If unset (or None),
 226                        simulate only if listsubtitles, listformats or list_thumbnails is used
 227     format:            Video format code. see "FORMAT SELECTION" for more details.
 228                        You can also pass a function. The function takes 'ctx' as
 229                        argument and returns the formats to download.
 230                        See "build_format_selector" for an implementation
 231     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 232     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 233                        extracting metadata even if the video is not actually
 234                        available for download (experimental)
 235     format_sort:       A list of fields by which to sort the video formats.
 236                        See "Sorting Formats" for more details.
 237     format_sort_force: Force the given format_sort. see "Sorting Formats"
 238                        for more details.
 239     prefer_free_formats: Whether to prefer video formats with free containers
 240                        over non-free ones of same quality.
 241     allow_multiple_video_streams:   Allow multiple video streams to be merged
 242                        into a single file
 243     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 244                        into a single file
 245     check_formats      Whether to test if the formats are downloadable.
 246                        Can be True (check all), False (check none),
 247                        'selected' (check selected formats),
 248                        or None (check only if requested by extractor)
 249     paths:             Dictionary of output paths. The allowed keys are 'home'
 250                        'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
 251     outtmpl:           Dictionary of templates for output names. Allowed keys
 252                        are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
 253                        For compatibility with youtube-dl, a single string can also be used
 254     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 255     restrictfilenames: Do not allow "&" and spaces in file names
 256     trim_file_name:    Limit length of filename (extension excluded)
 257     windowsfilenames:  Force the filenames to be windows compatible
 258     ignoreerrors:      Do not stop on download/postprocessing errors.
 259                        Can be 'only_download' to ignore only download errors.
 260                        Default is 'only_download' for CLI, but False for API
 261     skip_playlist_after_errors: Number of allowed failures until the rest of
 262                        the playlist is skipped
 263     allowed_extractors:  List of regexes to match against extractor names that are allowed
 264     overwrites:        Overwrite all video and metadata files if True,
 265                        overwrite only non-video files if None
 266                        and don't overwrite any file if False
 267     playlist_items:    Specific indices of playlist to download.
 268     playlistrandom:    Download playlist items in random order.
 269     lazy_playlist:     Process playlist entries as they are received.
 270     matchtitle:        Download only matching titles.
 271     rejecttitle:       Reject downloads for matching titles.
 272     logger:            Log messages to a logging.Logger instance.
 273     logtostderr:       Print everything to stderr instead of stdout.
 274     consoletitle:      Display progress in console window's titlebar.
 275     writedescription:  Write the video description to a .description file
 276     writeinfojson:     Write the video description to a .info.json file
 277     clean_infojson:    Remove internal metadata from the infojson
 278     getcomments:       Extract video comments. This will not be written to disk
 279                        unless writeinfojson is also given
 280     writeannotations:  Write the video annotations to a .annotations.xml file
 281     writethumbnail:    Write the thumbnail image to a file
 282     allow_playlist_files: Whether to write playlists' description, infojson etc
 283                        also to disk when using the 'write*' options
 284     write_all_thumbnails:  Write all thumbnail formats to files
 285     writelink:         Write an internet shortcut file, depending on the
 286                        current platform (.url/.webloc/.desktop)
 287     writeurllink:      Write a Windows internet shortcut file (.url)
 288     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 289     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 290     writesubtitles:    Write the video subtitles to a file
 291     writeautomaticsub: Write the automatically generated subtitles to a file
 292     listsubtitles:     Lists all available subtitles for the video
 293     subtitlesformat:   The format code for subtitles
 294     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 295                        The list may contain "all" to refer to all the available
 296                        subtitles. The language can be prefixed with a "-" to
 297                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 298     keepvideo:         Keep the video file after post-processing
 299     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 300     skip_download:     Skip the actual download of the video file
 301     cachedir:          Location of the cache files in the filesystem.
 302                        False to disable filesystem cache.
 303     noplaylist:        Download single video instead of a playlist if in doubt.
 304     age_limit:         An integer representing the user's age in years.
 305                        Unsuitable videos for the given age are skipped.
 306     min_views:         An integer representing the minimum view count the video
 307                        must have in order to not be skipped.
 308                        Videos without view count information are always
 309                        downloaded. None for no limit.
 310     max_views:         An integer representing the maximum view count.
 311                        Videos that are more popular than that are not
 312                        downloaded.
 313                        Videos without view count information are always
 314                        downloaded. None for no limit.
 315     download_archive:  A set, or the name of a file where all downloads are recorded.
 316                        Videos already present in the file are not downloaded again.
 317     break_on_existing: Stop the download process after attempting to download a
 318                        file that is in the archive.
 319     break_per_url:     Whether break_on_reject and break_on_existing
 320                        should act on each input URL as opposed to for the entire queue
 321     cookiefile:        File name or text stream from where cookies should be read and dumped to
 322     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 323                        name/path from where cookies are loaded, the name of the keyring,
 324                        and the container name, e.g. ('chrome', ) or
 325                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 326     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 327                        support RFC 5746 secure renegotiation
 328     nocheckcertificate:  Do not verify SSL certificates
 329     client_certificate:  Path to client certificate file in PEM format. May include the private key
 330     client_certificate_key:  Path to private key file for client certificate
 331     client_certificate_password:  Password for client certificate private key, if encrypted.
 332                         If not provided and the key is encrypted, yt-dlp will ask interactively
 333     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 334                        (Only supported by some extractors)
 335     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 336     http_headers:      A dictionary of custom headers to be used for all requests
 337     proxy:             URL of the proxy server to use
 338     geo_verification_proxy:  URL of the proxy to use for IP address verification
 339                        on geo-restricted sites.
 340     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 341     bidi_workaround:   Work around buggy terminals without bidirectional text
 342                        support, using fridibi
 343     debug_printtraffic:Print out sent and received HTTP traffic
 344     default_search:    Prepend this string if an input url is not valid.
 345                        'auto' for elaborate guessing
 346     encoding:          Use this encoding instead of the system-specified.
 347     extract_flat:      Whether to resolve and process url_results further
 348                        * False:     Always process. Default for API
 349                        * True:      Never process
 350                        * 'in_playlist': Do not process inside playlist/multi_video
 351                        * 'discard': Always process, but don't return the result
 352                                     from inside playlist/multi_video
 353                        * 'discard_in_playlist': Same as "discard", but only for
 354                                     playlists (not multi_video). Default for CLI
 355     wait_for_video:    If given, wait for scheduled streams to become available.
 356                        The value should be a tuple containing the range
 357                        (min_secs, max_secs) to wait between retries
 358     postprocessors:    A list of dictionaries, each with an entry
 359                        * key:  The name of the postprocessor. See
 360                                yt_dlp/postprocessor/__init__.py for a list.
 361                        * when: When to run the postprocessor. Allowed values are
 362                                the entries of utils.POSTPROCESS_WHEN
 363                                Assumed to be 'post_process' if not given
 364     progress_hooks:    A list of functions that get called on download
 365                        progress, with a dictionary with the entries
 366                        * status: One of "downloading", "error", or "finished".
 367                                  Check this first and ignore unknown values.
 368                        * info_dict: The extracted info_dict
 369
 370                        If status is one of "downloading", or "finished", the
 371                        following properties may also be present:
 372                        * filename: The final filename (always present)
 373                        * tmpfilename: The filename we're currently writing to
 374                        * downloaded_bytes: Bytes on disk
 375                        * total_bytes: Size of the whole file, None if unknown
 376                        * total_bytes_estimate: Guess of the eventual file size,
 377                                                None if unavailable.
 378                        * elapsed: The number of seconds since download started.
 379                        * eta: The estimated time in seconds, None if unknown
 380                        * speed: The download speed in bytes/second, None if
 381                                 unknown
 382                        * fragment_index: The counter of the currently
 383                                          downloaded video fragment.
 384                        * fragment_count: The number of fragments (= individual
 385                                          files that will be merged)
 386
 387                        Progress hooks are guaranteed to be called at least once
 388                        (with status "finished") if the download is successful.
 389     postprocessor_hooks:  A list of functions that get called on postprocessing
 390                        progress, with a dictionary with the entries
 391                        * status: One of "started", "processing", or "finished".
 392                                  Check this first and ignore unknown values.
 393                        * postprocessor: Name of the postprocessor
 394                        * info_dict: The extracted info_dict
 395
 396                        Progress hooks are guaranteed to be called at least twice
 397                        (with status "started" and "finished") if the processing is successful.
 398     merge_output_format: "/" separated list of extensions to use when merging formats.
 399     final_ext:         Expected final extension; used to detect when the file was
 400                        already downloaded and converted
 401     fixup:             Automatically correct known faults of the file.
 402                        One of:
 403                        - "never": do nothing
 404                        - "warn": only emit a warning
 405                        - "detect_or_warn": check whether we can do anything
 406                                            about it, warn otherwise (default)
 407     source_address:    Client-side IP address to bind to.
 408     impersonate:       Client to impersonate for requests.
 409                        An ImpersonateTarget (from yt_dlp.networking.impersonate)
 410     sleep_interval_requests: Number of seconds to sleep between requests
 411                        during extraction
 412     sleep_interval:    Number of seconds to sleep before each download when
 413                        used alone or a lower bound of a range for randomized
 414                        sleep before each download (minimum possible number
 415                        of seconds to sleep) when used along with
 416                        max_sleep_interval.
 417     max_sleep_interval:Upper bound of a range for randomized sleep before each
 418                        download (maximum possible number of seconds to sleep).
 419                        Must only be used along with sleep_interval.
 420                        Actual sleep time will be a random float from range
 421                        [sleep_interval; max_sleep_interval].
 422     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 423     listformats:       Print an overview of available video formats and exit.
 424     list_thumbnails:   Print a table of all thumbnails and exit.
 425     match_filter:      A function that gets called for every video with the signature
 426                        (info_dict, *, incomplete: bool) -> Optional[str]
 427                        For backward compatibility with youtube-dl, the signature
 428                        (info_dict) -> Optional[str] is also allowed.
 429                        - If it returns a message, the video is ignored.
 430                        - If it returns None, the video is downloaded.
 431                        - If it returns utils.NO_DEFAULT, the user is interactively
 432                          asked whether to download the video.
 433                        - Raise utils.DownloadCancelled(msg) to abort remaining
 434                          downloads when a video is rejected.
 435                        match_filter_func in utils/_utils.py is one example for this.
 436     color:             A Dictionary with output stream names as keys
 437                        and their respective color policy as values.
 438                        Can also just be a single color policy,
 439                        in which case it applies to all outputs.
 440                        Valid stream names are 'stdout' and 'stderr'.
 441                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 442     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 443                        HTTP header
 444     geo_bypass_country:
 445                        Two-letter ISO 3166-2 country code that will be used for
 446                        explicit geographic restriction bypassing via faking
 447                        X-Forwarded-For HTTP header
 448     geo_bypass_ip_block:
 449                        IP range in CIDR notation that will be used similarly to
 450                        geo_bypass_country
 451     external_downloader: A dictionary of protocol keys and the executable of the
 452                        external downloader to use for it. The allowed protocols
 453                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 454                        Set the value to 'native' to use the native downloader
 455     compat_opts:       Compatibility options. See "Differences in default behavior".
 456                        The following options do not work when used through the API:
 457                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 458                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 459                        Refer __init__.py for their implementation
 460     progress_template: Dictionary of templates for progress outputs.
 461                        Allowed keys are 'download', 'postprocess',
 462                        'download-title' (console title) and 'postprocess-title'.
 463                        The template is mapped on a dictionary with keys 'progress' and 'info'
 464     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 465                        as argument and returns the time to sleep in seconds.
 466                        Allowed keys are 'http', 'fragment', 'file_access'
 467     download_ranges:   A callback function that gets called for every video with
 468                        the signature (info_dict, ydl) -> Iterable[Section].
 469                        Only the returned sections will be downloaded.
 470                        Each Section is a dict with the following keys:
 471                        * start_time: Start time of the section in seconds
 472                        * end_time: End time of the section in seconds
 473                        * title: Section title (Optional)
 474                        * index: Section number (Optional)
 475     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 476     noprogress:        Do not print the progress bar
 477     live_from_start:   Whether to download livestreams videos from the start
 478
 479     The following parameters are not used by YoutubeDL itself, they are used by
 480     the downloader (see yt_dlp/downloader/common.py):
 481     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 482     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 483     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 484     external_downloader_args, concurrent_fragment_downloads, progress_delta.
 485
 486     The following options are used by the post processors:
 487     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 488                        to the binary or its containing directory.
 489     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 490                        and a list of additional command-line arguments for the
 491                        postprocessor/executable. The dict can also have "PP+EXE" keys
 492                        which are used when the given exe is used by the given PP.
 493                        Use 'default' as the name for arguments to passed to all PP
 494                        For compatibility with youtube-dl, a single list of args
 495                        can also be used
 496
 497     The following options are used by the extractors:
 498     extractor_retries: Number of times to retry for known errors (default: 3)
 499     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 500     hls_split_discontinuity: Split HLS playlists to different formats at
 501                        discontinuities such as ad breaks (default: False)
 502     extractor_args:    A dictionary of arguments to be passed to the extractors.
 503                        See "EXTRACTOR ARGUMENTS" for details.
 504                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 505     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 506
 507     The following options are deprecated and may be removed in the future:
 508
 509     break_on_reject:   Stop the download process when encountering a video that
 510                        has been filtered out.
 511                        - `raise DownloadCancelled(msg)` in match_filter instead
 512     force_generic_extractor: Force downloader to use the generic extractor
 513                        - Use allowed_extractors = ['generic', 'default']
 514     playliststart:     - Use playlist_items
 515                        Playlist item to start at.
 516     playlistend:       - Use playlist_items
 517                        Playlist item to end at.
 518     playlistreverse:   - Use playlist_items
 519                        Download playlist items in reverse order.
 520     forceurl:          - Use forceprint
 521                        Force printing final URL.
 522     forcetitle:        - Use forceprint
 523                        Force printing title.
 524     forceid:           - Use forceprint
 525                        Force printing ID.
 526     forcethumbnail:    - Use forceprint
 527                        Force printing thumbnail URL.
 528     forcedescription:  - Use forceprint
 529                        Force printing description.
 530     forcefilename:     - Use forceprint
 531                        Force printing final filename.
 532     forceduration:     - Use forceprint
 533                        Force printing duration.
 534     allsubtitles:      - Use subtitleslangs = ['all']
 535                        Downloads all the subtitles of the video
 536                        (requires writesubtitles or writeautomaticsub)
 537     include_ads:       - Doesn't work
 538                        Download ads as well
 539     call_home:         - Not implemented
 540                        Boolean, true iff we are allowed to contact the
 541                        yt-dlp servers for debugging.
 542     post_hooks:        - Register a custom postprocessor
 543                        A list of functions that get called as the final step
 544                        for each video file, after all postprocessors have been
 545                        called. The filename will be passed as the only argument.
 546     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 547                        Use the native HLS downloader instead of ffmpeg/avconv
 548                        if True, otherwise use ffmpeg/avconv if False, otherwise
 549                        use downloader suggested by extractor if None.
 550     prefer_ffmpeg:     - avconv support is deprecated
 551                        If False, use avconv instead of ffmpeg if both are available,
 552                        otherwise prefer ffmpeg.
 553     youtube_include_dash_manifest: - Use extractor_args
 554                        If True (default), DASH manifests and related
 555                        data will be downloaded and processed by extractor.
 556                        You can reduce network I/O by disabling it if you don't
 557                        care about DASH. (only for youtube)
 558     youtube_include_hls_manifest: - Use extractor_args
 559                        If True (default), HLS manifests and related
 560                        data will be downloaded and processed by extractor.
 561                        You can reduce network I/O by disabling it if you don't
 562                        care about HLS. (only for youtube)
 563     no_color:          Same as `color='no_color'`
 564     no_overwrites:     Same as `overwrites=False`
 565     """
 566
 567     _NUMERIC_FIELDS = {
 568         'width', 'height', 'asr', 'audio_channels', 'fps',
 569         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 570         'timestamp', 'release_timestamp',
 571         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 572         'average_rating', 'comment_count', 'age_limit',
 573         'start_time', 'end_time',
 574         'chapter_number', 'season_number', 'episode_number',
 575         'track_number', 'disc_number', 'release_year',
 576     }
 577
 578     _format_fields = {
 579         # NB: Keep in sync with the docstring of extractor/common.py
 580         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 581         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 582         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 583         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
 584         'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
 585         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 586         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 587     }
 588     _deprecated_multivalue_fields = {
 589         'album_artist': 'album_artists',
 590         'artist': 'artists',
 591         'composer': 'composers',
 592         'creator': 'creators',
 593         'genre': 'genres',
 594     }
 595     _format_selection_exts = {
 596         'audio': set(MEDIA_EXTENSIONS.common_audio),
 597         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 598         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 599     }
 600
 601     def __init__(self, params=None, auto_init=True):
 602         """Create a FileDownloader object with the given options.
 603         @param auto_init    Whether to load the default extractors and print header (if verbose).
 604                             Set to 'no_verbose_header' to not print the header
 605         """
 606         if params is None:
 607             params = {}
 608         self.params = params
 609         self._ies = {}
 610         self._ies_instances = {}
 611         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 612         self._printed_messages = set()
 613         self._first_webpage_request = True
 614         self._post_hooks = []
 615         self._progress_hooks = []
 616         self._postprocessor_hooks = []
 617         self._download_retcode = 0
 618         self._num_downloads = 0
 619         self._num_videos = 0
 620         self._playlist_level = 0
 621         self._playlist_urls = set()
 622         self.cache = Cache(self)
 623         self.__header_cookies = []
 624
 625         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 626         self._out_files = Namespace(
 627             out=stdout,
 628             error=sys.stderr,
 629             screen=sys.stderr if self.params.get('quiet') else stdout,
 630             console=None if compat_os_name == 'nt' else next(
 631                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 632         )
 633
 634         try:
 635             windows_enable_vt_mode()
 636         except Exception as e:
 637             self.write_debug(f'Failed to enable VT mode: {e}')
 638
 639         if self.params.get('no_color'):
 640             if self.params.get('color') is not None:
 641                 self.params.setdefault('_warnings', []).append(
 642                     'Overwriting params from "color" with "no_color"')
 643             self.params['color'] = 'no_color'
 644
 645         term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
 646         no_color = bool(os.getenv('NO_COLOR'))
 647
 648         def process_color_policy(stream):
 649             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 650             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 651             if policy in ('auto', None):
 652                 if term_allow_color and supports_terminal_sequences(stream):
 653                     return 'no_color' if no_color else True
 654                 return False
 655             assert policy in ('always', 'never', 'no_color'), policy
 656             return {'always': True, 'never': False}.get(policy, policy)
 657
 658         self._allow_colors = Namespace(**{
 659             name: process_color_policy(stream)
 660             for name, stream in self._out_files.items_ if name != 'console'
 661         })
 662
 663         system_deprecation = _get_system_deprecation()
 664         if system_deprecation:
 665             self.deprecated_feature(system_deprecation.replace('\n', '\n                    '))
 666
 667         if self.params.get('allow_unplayable_formats'):
 668             self.report_warning(
 669                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 670                 'This is a developer option intended for debugging. \n'
 671                 '         If you experience any issues while using this option, '
 672                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 673
 674         if self.params.get('bidi_workaround', False):
 675             try:
 676                 import pty
 677                 master, slave = pty.openpty()
 678                 width = shutil.get_terminal_size().columns
 679                 width_args = [] if width is None else ['-w', str(width)]
 680                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 681                 try:
 682                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 683                 except OSError:
 684                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 685                 self._output_channel = os.fdopen(master, 'rb')
 686             except OSError as ose:
 687                 if ose.errno == errno.ENOENT:
 688                     self.report_warning(
 689                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 690                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 691                 else:
 692                     raise
 693
 694         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 695         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
 696         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
 697         self.params['http_headers'].pop('Cookie', None)
 698
 699         if auto_init and auto_init != 'no_verbose_header':
 700             self.print_debug_header()
 701
 702         def check_deprecated(param, option, suggestion):
 703             if self.params.get(param) is not None:
 704                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 705                 return True
 706             return False
 707
 708         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 709             if self.params.get('geo_verification_proxy') is None:
 710                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 711
 712         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 713         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 714         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 715
 716         for msg in self.params.get('_warnings', []):
 717             self.report_warning(msg)
 718         for msg in self.params.get('_deprecation_warnings', []):
 719             self.deprecated_feature(msg)
 720
 721         if impersonate_target := self.params.get('impersonate'):
 722             if not self._impersonate_target_available(impersonate_target):
 723                 raise YoutubeDLError(
 724                     f'Impersonate target "{impersonate_target}" is not available. '
 725                     f'Use --list-impersonate-targets to see available targets. '
 726                     f'You may be missing dependencies required to support this target.')
 727
 728         if 'list-formats' in self.params['compat_opts']:
 729             self.params['listformats_table'] = False
 730
 731         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 732             # nooverwrites was unnecessarily changed to overwrites
 733             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 734             # This ensures compatibility with both keys
 735             self.params['overwrites'] = not self.params['nooverwrites']
 736         elif self.params.get('overwrites') is None:
 737             self.params.pop('overwrites', None)
 738         else:
 739             self.params['nooverwrites'] = not self.params['overwrites']
 740
 741         if self.params.get('simulate') is None and any((
 742             self.params.get('list_thumbnails'),
 743             self.params.get('listformats'),
 744             self.params.get('listsubtitles'),
 745         )):
 746             self.params['simulate'] = 'list_only'
 747
 748         self.params.setdefault('forceprint', {})
 749         self.params.setdefault('print_to_file', {})
 750
 751         # Compatibility with older syntax
 752         if not isinstance(params['forceprint'], dict):
 753             self.params['forceprint'] = {'video': params['forceprint']}
 754
 755         if auto_init:
 756             self.add_default_info_extractors()
 757
 758         if (sys.platform != 'win32'
 759                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 760                 and not self.params.get('restrictfilenames', False)):
 761             # Unicode filesystem API will throw errors (#1474, #13027)
 762             self.report_warning(
 763                 'Assuming --restrict-filenames since file system encoding '
 764                 'cannot encode all characters. '
 765                 'Set the LC_ALL environment variable to fix this.')
 766             self.params['restrictfilenames'] = True
 767
 768         self._parse_outtmpl()
 769
 770         # Creating format selector here allows us to catch syntax errors before the extraction
 771         self.format_selector = (
 772             self.params.get('format') if self.params.get('format') in (None, '-')
 773             else self.params['format'] if callable(self.params['format'])
 774             else self.build_format_selector(self.params['format']))
 775
 776         hooks = {
 777             'post_hooks': self.add_post_hook,
 778             'progress_hooks': self.add_progress_hook,
 779             'postprocessor_hooks': self.add_postprocessor_hook,
 780         }
 781         for opt, fn in hooks.items():
 782             for ph in self.params.get(opt, []):
 783                 fn(ph)
 784
 785         for pp_def_raw in self.params.get('postprocessors', []):
 786             pp_def = dict(pp_def_raw)
 787             when = pp_def.pop('when', 'post_process')
 788             self.add_post_processor(
 789                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 790                 when=when)
 791
 792         def preload_download_archive(fn):
 793             """Preload the archive, if any is specified"""
 794             archive = set()
 795             if fn is None:
 796                 return archive
 797             elif not is_path_like(fn):
 798                 return fn
 799
 800             self.write_debug(f'Loading archive file {fn!r}')
 801             try:
 802                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 803                     for line in archive_file:
 804                         archive.add(line.strip())
 805             except OSError as ioe:
 806                 if ioe.errno != errno.ENOENT:
 807                     raise
 808             return archive
 809
 810         self.archive = preload_download_archive(self.params.get('download_archive'))
 811
 812     def warn_if_short_id(self, argv):
 813         # short YouTube ID starting with dash?
 814         idxs = [
 815             i for i, a in enumerate(argv)
 816             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 817         if idxs:
 818             correct_argv = (
 819                 ['yt-dlp']
 820                 + [a for i, a in enumerate(argv) if i not in idxs]
 821                 + ['--'] + [argv[i] for i in idxs]
 822             )
 823             self.report_warning(
 824                 'Long argument string detected. '
 825                 'Use -- to separate parameters and URLs, like this:\n%s' %
 826                 shell_quote(correct_argv))
 827
 828     def add_info_extractor(self, ie):
 829         """Add an InfoExtractor object to the end of the list."""
 830         ie_key = ie.ie_key()
 831         self._ies[ie_key] = ie
 832         if not isinstance(ie, type):
 833             self._ies_instances[ie_key] = ie
 834             ie.set_downloader(self)
 835
 836     def get_info_extractor(self, ie_key):
 837         """
 838         Get an instance of an IE with name ie_key, it will try to get one from
 839         the _ies list, if there's no instance it will create a new one and add
 840         it to the extractor list.
 841         """
 842         ie = self._ies_instances.get(ie_key)
 843         if ie is None:
 844             ie = get_info_extractor(ie_key)()
 845             self.add_info_extractor(ie)
 846         return ie
 847
 848     def add_default_info_extractors(self):
 849         """
 850         Add the InfoExtractors returned by gen_extractors to the end of the list
 851         """
 852         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 853         all_ies['end'] = UnsupportedURLIE()
 854         try:
 855             ie_names = orderedSet_from_options(
 856                 self.params.get('allowed_extractors', ['default']), {
 857                     'all': list(all_ies),
 858                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 859                 }, use_regex=True)
 860         except re.error as e:
 861             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 862         for name in ie_names:
 863             self.add_info_extractor(all_ies[name])
 864         self.write_debug(f'Loaded {len(ie_names)} extractors')
 865
 866     def add_post_processor(self, pp, when='post_process'):
 867         """Add a PostProcessor object to the end of the chain."""
 868         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 869         self._pps[when].append(pp)
 870         pp.set_downloader(self)
 871
 872     def add_post_hook(self, ph):
 873         """Add the post hook"""
 874         self._post_hooks.append(ph)
 875
 876     def add_progress_hook(self, ph):
 877         """Add the download progress hook"""
 878         self._progress_hooks.append(ph)
 879
 880     def add_postprocessor_hook(self, ph):
 881         """Add the postprocessing progress hook"""
 882         self._postprocessor_hooks.append(ph)
 883         for pps in self._pps.values():
 884             for pp in pps:
 885                 pp.add_progress_hook(ph)
 886
 887     def _bidi_workaround(self, message):
 888         if not hasattr(self, '_output_channel'):
 889             return message
 890
 891         assert hasattr(self, '_output_process')
 892         assert isinstance(message, str)
 893         line_count = message.count('\n') + 1
 894         self._output_process.stdin.write((message + '\n').encode())
 895         self._output_process.stdin.flush()
 896         res = ''.join(self._output_channel.readline().decode()
 897                       for _ in range(line_count))
 898         return res[:-len('\n')]
 899
 900     def _write_string(self, message, out=None, only_once=False):
 901         if only_once:
 902             if message in self._printed_messages:
 903                 return
 904             self._printed_messages.add(message)
 905         write_string(message, out=out, encoding=self.params.get('encoding'))
 906
 907     def to_stdout(self, message, skip_eol=False, quiet=None):
 908         """Print message to stdout"""
 909         if quiet is not None:
 910             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 911                                      'Use "YoutubeDL.to_screen" instead')
 912         if skip_eol is not False:
 913             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 914                                      'Use "YoutubeDL.to_screen" instead')
 915         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 916
 917     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 918         """Print message to screen if not in quiet mode"""
 919         if self.params.get('logger'):
 920             self.params['logger'].debug(message)
 921             return
 922         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 923             return
 924         self._write_string(
 925             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 926             self._out_files.screen, only_once=only_once)
 927
 928     def to_stderr(self, message, only_once=False):
 929         """Print message to stderr"""
 930         assert isinstance(message, str)
 931         if self.params.get('logger'):
 932             self.params['logger'].error(message)
 933         else:
 934             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 935
 936     def _send_console_code(self, code):
 937         if compat_os_name == 'nt' or not self._out_files.console:
 938             return
 939         self._write_string(code, self._out_files.console)
 940
 941     def to_console_title(self, message):
 942         if not self.params.get('consoletitle', False):
 943             return
 944         message = remove_terminal_sequences(message)
 945         if compat_os_name == 'nt':
 946             if ctypes.windll.kernel32.GetConsoleWindow():
 947                 # c_wchar_p() might not be necessary if `message` is
 948                 # already of type unicode()
 949                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 950         else:
 951             self._send_console_code(f'\033]0;{message}\007')
 952
 953     def save_console_title(self):
 954         if not self.params.get('consoletitle') or self.params.get('simulate'):
 955             return
 956         self._send_console_code('\033[22;0t')  # Save the title on stack
 957
 958     def restore_console_title(self):
 959         if not self.params.get('consoletitle') or self.params.get('simulate'):
 960             return
 961         self._send_console_code('\033[23;0t')  # Restore the title from stack
 962
 963     def __enter__(self):
 964         self.save_console_title()
 965         return self
 966
 967     def save_cookies(self):
 968         if self.params.get('cookiefile') is not None:
 969             self.cookiejar.save()
 970
 971     def __exit__(self, *args):
 972         self.restore_console_title()
 973         self.close()
 974
 975     def close(self):
 976         self.save_cookies()
 977         if '_request_director' in self.__dict__:
 978             self._request_director.close()
 979             del self._request_director
 980
 981     def trouble(self, message=None, tb=None, is_error=True):
 982         """Determine action to take when a download problem appears.
 983
 984         Depending on if the downloader has been configured to ignore
 985         download errors or not, this method may throw an exception or
 986         not when errors are found, after printing the message.
 987
 988         @param tb          If given, is additional traceback information
 989         @param is_error    Whether to raise error according to ignorerrors
 990         """
 991         if message is not None:
 992             self.to_stderr(message)
 993         if self.params.get('verbose'):
 994             if tb is None:
 995                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 996                     tb = ''
 997                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 998                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 999                     tb += encode_compat_str(traceback.format_exc())
1000                 else:
1001                     tb_data = traceback.format_list(traceback.extract_stack())
1002                     tb = ''.join(tb_data)
1003             if tb:
1004                 self.to_stderr(tb)
1005         if not is_error:
1006             return
1007         if not self.params.get('ignoreerrors'):
1008             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1009                 exc_info = sys.exc_info()[1].exc_info
1010             else:
1011                 exc_info = sys.exc_info()
1012             raise DownloadError(message, exc_info)
1013         self._download_retcode = 1
1014
1015     Styles = Namespace(
1016         HEADERS='yellow',
1017         EMPHASIS='light blue',
1018         FILENAME='green',
1019         ID='green',
1020         DELIM='blue',
1021         ERROR='red',
1022         BAD_FORMAT='light red',
1023         WARNING='yellow',
1024         SUPPRESS='light black',
1025     )
1026
1027     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1028         text = str(text)
1029         if test_encoding:
1030             original_text = text
1031             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1032             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1033             text = text.encode(encoding, 'ignore').decode(encoding)
1034             if fallback is not None and text != original_text:
1035                 text = fallback
1036         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1037
1038     def _format_out(self, *args, **kwargs):
1039         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1040
1041     def _format_screen(self, *args, **kwargs):
1042         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1043
1044     def _format_err(self, *args, **kwargs):
1045         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1046
1047     def report_warning(self, message, only_once=False):
1048         '''
1049         Print the message to stderr, it will be prefixed with 'WARNING:'
1050         If stderr is a tty file the 'WARNING:' will be colored
1051         '''
1052         if self.params.get('logger') is not None:
1053             self.params['logger'].warning(message)
1054         else:
1055             if self.params.get('no_warnings'):
1056                 return
1057             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1058
1059     def deprecation_warning(self, message, *, stacklevel=0):
1060         deprecation_warning(
1061             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1062
1063     def deprecated_feature(self, message):
1064         if self.params.get('logger') is not None:
1065             self.params['logger'].warning(f'Deprecated Feature: {message}')
1066         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1067
1068     def report_error(self, message, *args, **kwargs):
1069         '''
1070         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1071         in red if stderr is a tty file.
1072         '''
1073         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1074
1075     def write_debug(self, message, only_once=False):
1076         '''Log debug message or Print message to stderr'''
1077         if not self.params.get('verbose', False):
1078             return
1079         message = f'[debug] {message}'
1080         if self.params.get('logger'):
1081             self.params['logger'].debug(message)
1082         else:
1083             self.to_stderr(message, only_once)
1084
1085     def report_file_already_downloaded(self, file_name):
1086         """Report file has already been fully downloaded."""
1087         try:
1088             self.to_screen('[download] %s has already been downloaded' % file_name)
1089         except UnicodeEncodeError:
1090             self.to_screen('[download] The file has already been downloaded')
1091
1092     def report_file_delete(self, file_name):
1093         """Report that existing file will be deleted."""
1094         try:
1095             self.to_screen('Deleting existing file %s' % file_name)
1096         except UnicodeEncodeError:
1097             self.to_screen('Deleting existing file')
1098
1099     def raise_no_formats(self, info, forced=False, *, msg=None):
1100         has_drm = info.get('_has_drm')
1101         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1102         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1103         if forced or not ignored:
1104             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1105                                  expected=has_drm or ignored or expected)
1106         else:
1107             self.report_warning(msg)
1108
1109     def parse_outtmpl(self):
1110         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1111         self._parse_outtmpl()
1112         return self.params['outtmpl']
1113
1114     def _parse_outtmpl(self):
1115         sanitize = IDENTITY
1116         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1117             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1118
1119         outtmpl = self.params.setdefault('outtmpl', {})
1120         if not isinstance(outtmpl, dict):
1121             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1122         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1123
1124     def get_output_path(self, dir_type='', filename=None):
1125         paths = self.params.get('paths', {})
1126         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1127         path = os.path.join(
1128             expand_path(paths.get('home', '').strip()),
1129             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1130             filename or '')
1131         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1132
1133     @staticmethod
1134     def _outtmpl_expandpath(outtmpl):
1135         # expand_path translates '%%' into '%' and '$$' into '$'
1136         # correspondingly that is not what we want since we need to keep
1137         # '%%' intact for template dict substitution step. Working around
1138         # with boundary-alike separator hack.
1139         sep = ''.join(random.choices(string.ascii_letters, k=32))
1140         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1141
1142         # outtmpl should be expand_path'ed before template dict substitution
1143         # because meta fields may contain env variables we don't want to
1144         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1145         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1146         return expand_path(outtmpl).replace(sep, '')
1147
1148     @staticmethod
1149     def escape_outtmpl(outtmpl):
1150         ''' Escape any remaining strings like %s, %abc% etc. '''
1151         return re.sub(
1152             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1153             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1154             outtmpl)
1155
1156     @classmethod
1157     def validate_outtmpl(cls, outtmpl):
1158         ''' @return None or Exception object '''
1159         outtmpl = re.sub(
1160             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1161             lambda mobj: f'{mobj.group(0)[:-1]}s',
1162             cls._outtmpl_expandpath(outtmpl))
1163         try:
1164             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1165             return None
1166         except ValueError as err:
1167             return err
1168
1169     @staticmethod
1170     def _copy_infodict(info_dict):
1171         info_dict = dict(info_dict)
1172         info_dict.pop('__postprocessors', None)
1173         info_dict.pop('__pending_error', None)
1174         return info_dict
1175
1176     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1177         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1178         @param sanitize    Whether to sanitize the output as a filename.
1179                            For backward compatibility, a function can also be passed
1180         """
1181
1182         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1183
1184         info_dict = self._copy_infodict(info_dict)
1185         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1186             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1187             if info_dict.get('duration', None) is not None
1188             else None)
1189         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1190         info_dict['video_autonumber'] = self._num_videos
1191         if info_dict.get('resolution') is None:
1192             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1193
1194         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1195         # of %(field)s to %(field)0Nd for backward compatibility
1196         field_size_compat_map = {
1197             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1198             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1199             'autonumber': self.params.get('autonumber_size') or 5,
1200         }
1201
1202         TMPL_DICT = {}
1203         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1204         MATH_FUNCTIONS = {
1205             '+': float.__add__,
1206             '-': float.__sub__,
1207             '*': float.__mul__,
1208         }
1209         # Field is of the form key1.key2...
1210         # where keys (except first) can be string, int, slice or "{field, ...}"
1211         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1212         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1213             'inner': FIELD_INNER_RE,
1214             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1215         }
1216         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1217         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1218         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1219             (?P<negate>-)?
1220             (?P<fields>{FIELD_RE})
1221             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1222             (?:>(?P<strf_format>.+?))?
1223             (?P<remaining>
1224                 (?P<alternate>(?<!\\),[^|&)]+)?
1225                 (?:&(?P<replacement>.*?))?
1226                 (?:\|(?P<default>.*?))?
1227             )$''')
1228
1229         def _from_user_input(field):
1230             if field == ':':
1231                 return ...
1232             elif ':' in field:
1233                 return slice(*map(int_or_none, field.split(':')))
1234             elif int_or_none(field) is not None:
1235                 return int(field)
1236             return field
1237
1238         def _traverse_infodict(fields):
1239             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1240                       for f in ([x] if x.startswith('{') else x.split('.'))]
1241             for i in (0, -1):
1242                 if fields and not fields[i]:
1243                     fields.pop(i)
1244
1245             for i, f in enumerate(fields):
1246                 if not f.startswith('{'):
1247                     fields[i] = _from_user_input(f)
1248                     continue
1249                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1250                 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
1251
1252             return traverse_obj(info_dict, fields, traverse_string=True)
1253
1254         def get_value(mdict):
1255             # Object traversal
1256             value = _traverse_infodict(mdict['fields'])
1257             # Negative
1258             if mdict['negate']:
1259                 value = float_or_none(value)
1260                 if value is not None:
1261                     value *= -1
1262             # Do maths
1263             offset_key = mdict['maths']
1264             if offset_key:
1265                 value = float_or_none(value)
1266                 operator = None
1267                 while offset_key:
1268                     item = re.match(
1269                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1270                         offset_key).group(0)
1271                     offset_key = offset_key[len(item):]
1272                     if operator is None:
1273                         operator = MATH_FUNCTIONS[item]
1274                         continue
1275                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1276                     offset = float_or_none(item)
1277                     if offset is None:
1278                         offset = float_or_none(_traverse_infodict(item))
1279                     try:
1280                         value = operator(value, multiplier * offset)
1281                     except (TypeError, ZeroDivisionError):
1282                         return None
1283                     operator = None
1284             # Datetime formatting
1285             if mdict['strf_format']:
1286                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1287
1288             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1289             if sanitize and value == '':
1290                 value = None
1291             return value
1292
1293         na = self.params.get('outtmpl_na_placeholder', 'NA')
1294
1295         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1296             return sanitize_filename(str(value), restricted=restricted, is_id=(
1297                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1298                 if 'filename-sanitization' in self.params['compat_opts']
1299                 else NO_DEFAULT))
1300
1301         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1302         sanitize = bool(sanitize)
1303
1304         def _dumpjson_default(obj):
1305             if isinstance(obj, (set, LazyList)):
1306                 return list(obj)
1307             return repr(obj)
1308
1309         class _ReplacementFormatter(string.Formatter):
1310             def get_field(self, field_name, args, kwargs):
1311                 if field_name.isdigit():
1312                     return args[0], -1
1313                 raise ValueError('Unsupported field')
1314
1315         replacement_formatter = _ReplacementFormatter()
1316
1317         def create_key(outer_mobj):
1318             if not outer_mobj.group('has_key'):
1319                 return outer_mobj.group(0)
1320             key = outer_mobj.group('key')
1321             mobj = re.match(INTERNAL_FORMAT_RE, key)
1322             value, replacement, default, last_field = None, None, na, ''
1323             while mobj:
1324                 mobj = mobj.groupdict()
1325                 default = mobj['default'] if mobj['default'] is not None else default
1326                 value = get_value(mobj)
1327                 last_field, replacement = mobj['fields'], mobj['replacement']
1328                 if value is None and mobj['alternate']:
1329                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1330                 else:
1331                     break
1332
1333             if None not in (value, replacement):
1334                 try:
1335                     value = replacement_formatter.format(replacement, value)
1336                 except ValueError:
1337                     value, default = None, na
1338
1339             fmt = outer_mobj.group('format')
1340             if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1341                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1342
1343             flags = outer_mobj.group('conversion') or ''
1344             str_fmt = f'{fmt[:-1]}s'
1345             if value is None:
1346                 value, fmt = default, 's'
1347             elif fmt[-1] == 'l':  # list
1348                 delim = '\n' if '#' in flags else ', '
1349                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1350             elif fmt[-1] == 'j':  # json
1351                 value, fmt = json.dumps(
1352                     value, default=_dumpjson_default,
1353                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1354             elif fmt[-1] == 'h':  # html
1355                 value, fmt = escapeHTML(str(value)), str_fmt
1356             elif fmt[-1] == 'q':  # quoted
1357                 value = map(str, variadic(value) if '#' in flags else [value])
1358                 value, fmt = shell_quote(value, shell=True), str_fmt
1359             elif fmt[-1] == 'B':  # bytes
1360                 value = f'%{str_fmt}'.encode() % str(value).encode()
1361                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1362             elif fmt[-1] == 'U':  # unicode normalized
1363                 value, fmt = unicodedata.normalize(
1364                     # "+" = compatibility equivalence, "#" = NFD
1365                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1366                     value), str_fmt
1367             elif fmt[-1] == 'D':  # decimal suffix
1368                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1369                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1370                                               factor=1024 if '#' in flags else 1000)
1371             elif fmt[-1] == 'S':  # filename sanitization
1372                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1373             elif fmt[-1] == 'c':
1374                 if value:
1375                     value = str(value)[0]
1376                 else:
1377                     fmt = str_fmt
1378             elif fmt[-1] not in 'rsa':  # numeric
1379                 value = float_or_none(value)
1380                 if value is None:
1381                     value, fmt = default, 's'
1382
1383             if sanitize:
1384                 # If value is an object, sanitize might convert it to a string
1385                 # So we convert it to repr first
1386                 if fmt[-1] == 'r':
1387                     value, fmt = repr(value), str_fmt
1388                 elif fmt[-1] == 'a':
1389                     value, fmt = ascii(value), str_fmt
1390                 if fmt[-1] in 'csra':
1391                     value = sanitizer(last_field, value)
1392
1393             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1394             TMPL_DICT[key] = value
1395             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1396
1397         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1398
1399     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1400         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1401         return self.escape_outtmpl(outtmpl) % info_dict
1402
1403     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1404         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1405         if outtmpl is None:
1406             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1407         try:
1408             outtmpl = self._outtmpl_expandpath(outtmpl)
1409             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1410             if not filename:
1411                 return None
1412
1413             if tmpl_type in ('', 'temp'):
1414                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1415                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1416                     filename = replace_extension(filename, ext, final_ext)
1417             elif tmpl_type:
1418                 force_ext = OUTTMPL_TYPES[tmpl_type]
1419                 if force_ext:
1420                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1421
1422             # https://github.com/blackjack4494/youtube-dlc/issues/85
1423             trim_file_name = self.params.get('trim_file_name', False)
1424             if trim_file_name:
1425                 no_ext, *ext = filename.rsplit('.', 2)
1426                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1427
1428             return filename
1429         except ValueError as err:
1430             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1431             return None
1432
1433     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1434         """Generate the output filename"""
1435         if outtmpl:
1436             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1437             dir_type = None
1438         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1439         if not filename and dir_type not in ('', 'temp'):
1440             return ''
1441
1442         if warn:
1443             if not self.params.get('paths'):
1444                 pass
1445             elif filename == '-':
1446                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1447             elif os.path.isabs(filename):
1448                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1449         if filename == '-' or not filename:
1450             return filename
1451
1452         return self.get_output_path(dir_type, filename)
1453
1454     def _match_entry(self, info_dict, incomplete=False, silent=False):
1455         """Returns None if the file should be downloaded"""
1456         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1457         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1458
1459         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1460
1461         def check_filter():
1462             if _type in ('playlist', 'multi_video'):
1463                 return
1464             elif _type in ('url', 'url_transparent') and not try_call(
1465                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1466                 return
1467
1468             if 'title' in info_dict:
1469                 # This can happen when we're just evaluating the playlist
1470                 title = info_dict['title']
1471                 matchtitle = self.params.get('matchtitle', False)
1472                 if matchtitle:
1473                     if not re.search(matchtitle, title, re.IGNORECASE):
1474                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1475                 rejecttitle = self.params.get('rejecttitle', False)
1476                 if rejecttitle:
1477                     if re.search(rejecttitle, title, re.IGNORECASE):
1478                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1479
1480             date = info_dict.get('upload_date')
1481             if date is not None:
1482                 dateRange = self.params.get('daterange', DateRange())
1483                 if date not in dateRange:
1484                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1485             view_count = info_dict.get('view_count')
1486             if view_count is not None:
1487                 min_views = self.params.get('min_views')
1488                 if min_views is not None and view_count < min_views:
1489                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1490                 max_views = self.params.get('max_views')
1491                 if max_views is not None and view_count > max_views:
1492                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1493             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1494                 return 'Skipping "%s" because it is age restricted' % video_title
1495
1496             match_filter = self.params.get('match_filter')
1497             if match_filter is None:
1498                 return None
1499
1500             cancelled = None
1501             try:
1502                 try:
1503                     ret = match_filter(info_dict, incomplete=incomplete)
1504                 except TypeError:
1505                     # For backward compatibility
1506                     ret = None if incomplete else match_filter(info_dict)
1507             except DownloadCancelled as err:
1508                 if err.msg is not NO_DEFAULT:
1509                     raise
1510                 ret, cancelled = err.msg, err
1511
1512             if ret is NO_DEFAULT:
1513                 while True:
1514                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1515                     reply = input(self._format_screen(
1516                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1517                     if reply in {'y', ''}:
1518                         return None
1519                     elif reply == 'n':
1520                         if cancelled:
1521                             raise type(cancelled)(f'Skipping {video_title}')
1522                         return f'Skipping {video_title}'
1523             return ret
1524
1525         if self.in_download_archive(info_dict):
1526             reason = ''.join((
1527                 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1528                 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1529                 'has already been recorded in the archive'))
1530             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1531         else:
1532             try:
1533                 reason = check_filter()
1534             except DownloadCancelled as e:
1535                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1536             else:
1537                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1538         if reason is not None:
1539             if not silent:
1540                 self.to_screen('[download] ' + reason)
1541             if self.params.get(break_opt, False):
1542                 raise break_err()
1543         return reason
1544
1545     @staticmethod
1546     def add_extra_info(info_dict, extra_info):
1547         '''Set the keys from extra_info in info dict if they are missing'''
1548         for key, value in extra_info.items():
1549             info_dict.setdefault(key, value)
1550
1551     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1552                      process=True, force_generic_extractor=False):
1553         """
1554         Extract and return the information dictionary of the URL
1555
1556         Arguments:
1557         @param url          URL to extract
1558
1559         Keyword arguments:
1560         @param download     Whether to download videos
1561         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1562                             Must be True for download to work
1563         @param ie_key       Use only the extractor with this key
1564
1565         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1566         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1567         """
1568
1569         if extra_info is None:
1570             extra_info = {}
1571
1572         if not ie_key and force_generic_extractor:
1573             ie_key = 'Generic'
1574
1575         if ie_key:
1576             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1577         else:
1578             ies = self._ies
1579
1580         for key, ie in ies.items():
1581             if not ie.suitable(url):
1582                 continue
1583
1584             if not ie.working():
1585                 self.report_warning('The program functionality for this site has been marked as broken, '
1586                                     'and will probably not work.')
1587
1588             temp_id = ie.get_temp_id(url)
1589             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1590                 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1591                                'has already been recorded in the archive')
1592                 if self.params.get('break_on_existing', False):
1593                     raise ExistingVideoReached()
1594                 break
1595             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1596         else:
1597             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1598             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1599                               tb=False if extractors_restricted else None)
1600
1601     def _handle_extraction_exceptions(func):
1602         @functools.wraps(func)
1603         def wrapper(self, *args, **kwargs):
1604             while True:
1605                 try:
1606                     return func(self, *args, **kwargs)
1607                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1608                     raise
1609                 except ReExtractInfo as e:
1610                     if e.expected:
1611                         self.to_screen(f'{e}; Re-extracting data')
1612                     else:
1613                         self.to_stderr('\r')
1614                         self.report_warning(f'{e}; Re-extracting data')
1615                     continue
1616                 except GeoRestrictedError as e:
1617                     msg = e.msg
1618                     if e.countries:
1619                         msg += '\nThis video is available in %s.' % ', '.join(
1620                             map(ISO3166Utils.short2full, e.countries))
1621                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1622                     self.report_error(msg)
1623                 except ExtractorError as e:  # An error we somewhat expected
1624                     self.report_error(str(e), e.format_traceback())
1625                 except Exception as e:
1626                     if self.params.get('ignoreerrors'):
1627                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1628                     else:
1629                         raise
1630                 break
1631         return wrapper
1632
1633     def _wait_for_video(self, ie_result={}):
1634         if (not self.params.get('wait_for_video')
1635                 or ie_result.get('_type', 'video') != 'video'
1636                 or ie_result.get('formats') or ie_result.get('url')):
1637             return
1638
1639         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1640         last_msg = ''
1641
1642         def progress(msg):
1643             nonlocal last_msg
1644             full_msg = f'{msg}\n'
1645             if not self.params.get('noprogress'):
1646                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1647             elif last_msg:
1648                 return
1649             self.to_screen(full_msg, skip_eol=True)
1650             last_msg = msg
1651
1652         min_wait, max_wait = self.params.get('wait_for_video')
1653         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1654         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1655             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1656             self.report_warning('Release time of video is not known')
1657         elif ie_result and (diff or 0) <= 0:
1658             self.report_warning('Video should already be available according to extracted info')
1659         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1660         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1661
1662         wait_till = time.time() + diff
1663         try:
1664             while True:
1665                 diff = wait_till - time.time()
1666                 if diff <= 0:
1667                     progress('')
1668                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1669                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1670                 time.sleep(1)
1671         except KeyboardInterrupt:
1672             progress('')
1673             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1674         except BaseException as e:
1675             if not isinstance(e, ReExtractInfo):
1676                 self.to_screen('')
1677             raise
1678
1679     def _load_cookies(self, data, *, autoscope=True):
1680         """Loads cookies from a `Cookie` header
1681
1682         This tries to work around the security vulnerability of passing cookies to every domain.
1683         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1684
1685         @param data         The Cookie header as string to load the cookies from
1686         @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1687                             If `True`, save cookies for later to be stored in the jar with a limited scope
1688                             If a URL, save cookies in the jar with the domain of the URL
1689         """
1690         for cookie in LenientSimpleCookie(data).values():
1691             if autoscope and any(cookie.values()):
1692                 raise ValueError('Invalid syntax in Cookie Header')
1693
1694             domain = cookie.get('domain') or ''
1695             expiry = cookie.get('expires')
1696             if expiry == '':  # 0 is valid
1697                 expiry = None
1698             prepared_cookie = http.cookiejar.Cookie(
1699                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1700                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1701                 cookie.get('secure') or False, expiry, False, None, None, {})
1702
1703             if domain:
1704                 self.cookiejar.set_cookie(prepared_cookie)
1705             elif autoscope is True:
1706                 self.deprecated_feature(
1707                     'Passing cookies as a header is a potential security risk; '
1708                     'they will be scoped to the domain of the downloaded urls. '
1709                     'Please consider loading cookies from a file or browser instead.')
1710                 self.__header_cookies.append(prepared_cookie)
1711             elif autoscope:
1712                 self.report_warning(
1713                     'The extractor result contains an unscoped cookie as an HTTP header. '
1714                     f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1715                     only_once=True)
1716                 self._apply_header_cookies(autoscope, [prepared_cookie])
1717             else:
1718                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1719                                   tb=False, is_error=False)
1720
1721     def _apply_header_cookies(self, url, cookies=None):
1722         """Applies stray header cookies to the provided url
1723
1724         This loads header cookies and scopes them to the domain provided in `url`.
1725         While this is not ideal, it helps reduce the risk of them being sent
1726         to an unintended destination while mostly maintaining compatibility.
1727         """
1728         parsed = urllib.parse.urlparse(url)
1729         if not parsed.hostname:
1730             return
1731
1732         for cookie in map(copy.copy, cookies or self.__header_cookies):
1733             cookie.domain = f'.{parsed.hostname}'
1734             self.cookiejar.set_cookie(cookie)
1735
1736     @_handle_extraction_exceptions
1737     def __extract_info(self, url, ie, download, extra_info, process):
1738         self._apply_header_cookies(url)
1739
1740         try:
1741             ie_result = ie.extract(url)
1742         except UserNotLive as e:
1743             if process:
1744                 if self.params.get('wait_for_video'):
1745                     self.report_warning(e)
1746                 self._wait_for_video()
1747             raise
1748         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1749             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1750             return
1751         if isinstance(ie_result, list):
1752             # Backwards compatibility: old IE result format
1753             ie_result = {
1754                 '_type': 'compat_list',
1755                 'entries': ie_result,
1756             }
1757         if extra_info.get('original_url'):
1758             ie_result.setdefault('original_url', extra_info['original_url'])
1759         self.add_default_extra_info(ie_result, ie, url)
1760         if process:
1761             self._wait_for_video(ie_result)
1762             return self.process_ie_result(ie_result, download, extra_info)
1763         else:
1764             return ie_result
1765
1766     def add_default_extra_info(self, ie_result, ie, url):
1767         if url is not None:
1768             self.add_extra_info(ie_result, {
1769                 'webpage_url': url,
1770                 'original_url': url,
1771             })
1772         webpage_url = ie_result.get('webpage_url')
1773         if webpage_url:
1774             self.add_extra_info(ie_result, {
1775                 'webpage_url_basename': url_basename(webpage_url),
1776                 'webpage_url_domain': get_domain(webpage_url),
1777             })
1778         if ie is not None:
1779             self.add_extra_info(ie_result, {
1780                 'extractor': ie.IE_NAME,
1781                 'extractor_key': ie.ie_key(),
1782             })
1783
1784     def process_ie_result(self, ie_result, download=True, extra_info=None):
1785         """
1786         Take the result of the ie(may be modified) and resolve all unresolved
1787         references (URLs, playlist items).
1788
1789         It will also download the videos if 'download'.
1790         Returns the resolved ie_result.
1791         """
1792         if extra_info is None:
1793             extra_info = {}
1794         result_type = ie_result.get('_type', 'video')
1795
1796         if result_type in ('url', 'url_transparent'):
1797             ie_result['url'] = sanitize_url(
1798                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1799             if ie_result.get('original_url') and not extra_info.get('original_url'):
1800                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1801
1802             extract_flat = self.params.get('extract_flat', False)
1803             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1804                     or extract_flat is True):
1805                 info_copy = ie_result.copy()
1806                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1807                 if ie and not ie_result.get('id'):
1808                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1809                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1810                 self.add_extra_info(info_copy, extra_info)
1811                 info_copy, _ = self.pre_process(info_copy)
1812                 self._fill_common_fields(info_copy, False)
1813                 self.__forced_printings(info_copy)
1814                 self._raise_pending_errors(info_copy)
1815                 if self.params.get('force_write_download_archive', False):
1816                     self.record_download_archive(info_copy)
1817                 return ie_result
1818
1819         if result_type == 'video':
1820             self.add_extra_info(ie_result, extra_info)
1821             ie_result = self.process_video_result(ie_result, download=download)
1822             self._raise_pending_errors(ie_result)
1823             additional_urls = (ie_result or {}).get('additional_urls')
1824             if additional_urls:
1825                 # TODO: Improve MetadataParserPP to allow setting a list
1826                 if isinstance(additional_urls, str):
1827                     additional_urls = [additional_urls]
1828                 self.to_screen(
1829                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1830                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1831                 ie_result['additional_entries'] = [
1832                     self.extract_info(
1833                         url, download, extra_info=extra_info,
1834                         force_generic_extractor=self.params.get('force_generic_extractor'))
1835                     for url in additional_urls
1836                 ]
1837             return ie_result
1838         elif result_type == 'url':
1839             # We have to add extra_info to the results because it may be
1840             # contained in a playlist
1841             return self.extract_info(
1842                 ie_result['url'], download,
1843                 ie_key=ie_result.get('ie_key'),
1844                 extra_info=extra_info)
1845         elif result_type == 'url_transparent':
1846             # Use the information from the embedding page
1847             info = self.extract_info(
1848                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1849                 extra_info=extra_info, download=False, process=False)
1850
1851             # extract_info may return None when ignoreerrors is enabled and
1852             # extraction failed with an error, don't crash and return early
1853             # in this case
1854             if not info:
1855                 return info
1856
1857             exempted_fields = {'_type', 'url', 'ie_key'}
1858             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1859                 # For video clips, the id etc of the clip extractor should be used
1860                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1861
1862             new_result = info.copy()
1863             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1864
1865             # Extracted info may not be a video result (i.e.
1866             # info.get('_type', 'video') != video) but rather an url or
1867             # url_transparent. In such cases outer metadata (from ie_result)
1868             # should be propagated to inner one (info). For this to happen
1869             # _type of info should be overridden with url_transparent. This
1870             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1871             if new_result.get('_type') == 'url':
1872                 new_result['_type'] = 'url_transparent'
1873
1874             return self.process_ie_result(
1875                 new_result, download=download, extra_info=extra_info)
1876         elif result_type in ('playlist', 'multi_video'):
1877             # Protect from infinite recursion due to recursively nested playlists
1878             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1879             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1880             if webpage_url and webpage_url in self._playlist_urls:
1881                 self.to_screen(
1882                     '[download] Skipping already downloaded playlist: %s'
1883                     % ie_result.get('title') or ie_result.get('id'))
1884                 return
1885
1886             self._playlist_level += 1
1887             self._playlist_urls.add(webpage_url)
1888             self._fill_common_fields(ie_result, False)
1889             self._sanitize_thumbnails(ie_result)
1890             try:
1891                 return self.__process_playlist(ie_result, download)
1892             finally:
1893                 self._playlist_level -= 1
1894                 if not self._playlist_level:
1895                     self._playlist_urls.clear()
1896         elif result_type == 'compat_list':
1897             self.report_warning(
1898                 'Extractor %s returned a compat_list result. '
1899                 'It needs to be updated.' % ie_result.get('extractor'))
1900
1901             def _fixup(r):
1902                 self.add_extra_info(r, {
1903                     'extractor': ie_result['extractor'],
1904                     'webpage_url': ie_result['webpage_url'],
1905                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1906                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1907                     'extractor_key': ie_result['extractor_key'],
1908                 })
1909                 return r
1910             ie_result['entries'] = [
1911                 self.process_ie_result(_fixup(r), download, extra_info)
1912                 for r in ie_result['entries']
1913             ]
1914             return ie_result
1915         else:
1916             raise Exception('Invalid result type: %s' % result_type)
1917
1918     def _ensure_dir_exists(self, path):
1919         return make_dir(path, self.report_error)
1920
1921     @staticmethod
1922     def _playlist_infodict(ie_result, strict=False, **kwargs):
1923         info = {
1924             'playlist_count': ie_result.get('playlist_count'),
1925             'playlist': ie_result.get('title') or ie_result.get('id'),
1926             'playlist_id': ie_result.get('id'),
1927             'playlist_title': ie_result.get('title'),
1928             'playlist_uploader': ie_result.get('uploader'),
1929             'playlist_uploader_id': ie_result.get('uploader_id'),
1930             **kwargs,
1931         }
1932         if strict:
1933             return info
1934         if ie_result.get('webpage_url'):
1935             info.update({
1936                 'webpage_url': ie_result['webpage_url'],
1937                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1938                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1939             })
1940         return {
1941             **info,
1942             'playlist_index': 0,
1943             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1944             'extractor': ie_result['extractor'],
1945             'extractor_key': ie_result['extractor_key'],
1946         }
1947
1948     def __process_playlist(self, ie_result, download):
1949         """Process each entry in the playlist"""
1950         assert ie_result['_type'] in ('playlist', 'multi_video')
1951
1952         common_info = self._playlist_infodict(ie_result, strict=True)
1953         title = common_info.get('playlist') or '<Untitled>'
1954         if self._match_entry(common_info, incomplete=True) is not None:
1955             return
1956         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1957
1958         all_entries = PlaylistEntries(self, ie_result)
1959         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1960
1961         lazy = self.params.get('lazy_playlist')
1962         if lazy:
1963             resolved_entries, n_entries = [], 'N/A'
1964             ie_result['requested_entries'], ie_result['entries'] = None, None
1965         else:
1966             entries = resolved_entries = list(entries)
1967             n_entries = len(resolved_entries)
1968             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1969         if not ie_result.get('playlist_count'):
1970             # Better to do this after potentially exhausting entries
1971             ie_result['playlist_count'] = all_entries.get_full_count()
1972
1973         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1974         ie_copy = collections.ChainMap(ie_result, extra)
1975
1976         _infojson_written = False
1977         write_playlist_files = self.params.get('allow_playlist_files', True)
1978         if write_playlist_files and self.params.get('list_thumbnails'):
1979             self.list_thumbnails(ie_result)
1980         if write_playlist_files and not self.params.get('simulate'):
1981             _infojson_written = self._write_info_json(
1982                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1983             if _infojson_written is None:
1984                 return
1985             if self._write_description('playlist', ie_result,
1986                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1987                 return
1988             # TODO: This should be passed to ThumbnailsConvertor if necessary
1989             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1990
1991         if lazy:
1992             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1993                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1994         elif self.params.get('playlistreverse'):
1995             entries.reverse()
1996         elif self.params.get('playlistrandom'):
1997             random.shuffle(entries)
1998
1999         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
2000                        f'{format_field(ie_result, "playlist_count", " of %s")}')
2001
2002         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
2003         if self.params.get('extract_flat') == 'discard_in_playlist':
2004             keep_resolved_entries = ie_result['_type'] != 'playlist'
2005         if keep_resolved_entries:
2006             self.write_debug('The information of all playlist entries will be held in memory')
2007
2008         failures = 0
2009         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
2010         for i, (playlist_index, entry) in enumerate(entries):
2011             if lazy:
2012                 resolved_entries.append((playlist_index, entry))
2013             if not entry:
2014                 continue
2015
2016             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
2017             if not lazy and 'playlist-index' in self.params['compat_opts']:
2018                 playlist_index = ie_result['requested_entries'][i]
2019
2020             entry_copy = collections.ChainMap(entry, {
2021                 **common_info,
2022                 'n_entries': int_or_none(n_entries),
2023                 'playlist_index': playlist_index,
2024                 'playlist_autonumber': i + 1,
2025             })
2026
2027             if self._match_entry(entry_copy, incomplete=True) is not None:
2028                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2029                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2030                 continue
2031
2032             self.to_screen('[download] Downloading item %s of %s' % (
2033                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
2034
2035             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2036                 'playlist_index': playlist_index,
2037                 'playlist_autonumber': i + 1,
2038             }, extra))
2039             if not entry_result:
2040                 failures += 1
2041             if failures >= max_failures:
2042                 self.report_error(
2043                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2044                 break
2045             if keep_resolved_entries:
2046                 resolved_entries[i] = (playlist_index, entry_result)
2047
2048         # Update with processed data
2049         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2050         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2051         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2052             # Do not set for full playlist
2053             ie_result.pop('requested_entries')
2054
2055         # Write the updated info to json
2056         if _infojson_written is True and self._write_info_json(
2057                 'updated playlist', ie_result,
2058                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2059             return
2060
2061         ie_result = self.run_all_pps('playlist', ie_result)
2062         self.to_screen(f'[download] Finished downloading playlist: {title}')
2063         return ie_result
2064
2065     @_handle_extraction_exceptions
2066     def __process_iterable_entry(self, entry, download, extra_info):
2067         return self.process_ie_result(
2068             entry, download=download, extra_info=extra_info)
2069
2070     def _build_format_filter(self, filter_spec):
2071         " Returns a function to filter the formats according to the filter_spec "
2072
2073         OPERATORS = {
2074             '<': operator.lt,
2075             '<=': operator.le,
2076             '>': operator.gt,
2077             '>=': operator.ge,
2078             '=': operator.eq,
2079             '!=': operator.ne,
2080         }
2081         operator_rex = re.compile(r'''(?x)\s*
2082             (?P<key>[\w.-]+)\s*
2083             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2084             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2085             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2086         m = operator_rex.fullmatch(filter_spec)
2087         if m:
2088             try:
2089                 comparison_value = int(m.group('value'))
2090             except ValueError:
2091                 comparison_value = parse_filesize(m.group('value'))
2092                 if comparison_value is None:
2093                     comparison_value = parse_filesize(m.group('value') + 'B')
2094                 if comparison_value is None:
2095                     raise ValueError(
2096                         'Invalid value %r in format specification %r' % (
2097                             m.group('value'), filter_spec))
2098             op = OPERATORS[m.group('op')]
2099
2100         if not m:
2101             STR_OPERATORS = {
2102                 '=': operator.eq,
2103                 '^=': lambda attr, value: attr.startswith(value),
2104                 '$=': lambda attr, value: attr.endswith(value),
2105                 '*=': lambda attr, value: value in attr,
2106                 '~=': lambda attr, value: value.search(attr) is not None
2107             }
2108             str_operator_rex = re.compile(r'''(?x)\s*
2109                 (?P<key>[a-zA-Z0-9._-]+)\s*
2110                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2111                 (?P<quote>["'])?
2112                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2113                 (?(quote)(?P=quote))\s*
2114                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2115             m = str_operator_rex.fullmatch(filter_spec)
2116             if m:
2117                 if m.group('op') == '~=':
2118                     comparison_value = re.compile(m.group('value'))
2119                 else:
2120                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2121                 str_op = STR_OPERATORS[m.group('op')]
2122                 if m.group('negation'):
2123                     op = lambda attr, value: not str_op(attr, value)
2124                 else:
2125                     op = str_op
2126
2127         if not m:
2128             raise SyntaxError('Invalid filter specification %r' % filter_spec)
2129
2130         def _filter(f):
2131             actual_value = f.get(m.group('key'))
2132             if actual_value is None:
2133                 return m.group('none_inclusive')
2134             return op(actual_value, comparison_value)
2135         return _filter
2136
2137     def _check_formats(self, formats):
2138         for f in formats:
2139             working = f.get('__working')
2140             if working is not None:
2141                 if working:
2142                     yield f
2143                 continue
2144             self.to_screen('[info] Testing format %s' % f['format_id'])
2145             path = self.get_output_path('temp')
2146             if not self._ensure_dir_exists(f'{path}/'):
2147                 continue
2148             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2149             temp_file.close()
2150             try:
2151                 success, _ = self.dl(temp_file.name, f, test=True)
2152             except (DownloadError, OSError, ValueError) + network_exceptions:
2153                 success = False
2154             finally:
2155                 if os.path.exists(temp_file.name):
2156                     try:
2157                         os.remove(temp_file.name)
2158                     except OSError:
2159                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2160             f['__working'] = success
2161             if success:
2162                 yield f
2163             else:
2164                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2165
2166     def _select_formats(self, formats, selector):
2167         return list(selector({
2168             'formats': formats,
2169             'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2170             'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2171                                    or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2172         }))
2173
2174     def _default_format_spec(self, info_dict, download=True):
2175         download = download and not self.params.get('simulate')
2176         prefer_best = download and (
2177             self.params['outtmpl']['default'] == '-'
2178             or info_dict.get('is_live') and not self.params.get('live_from_start'))
2179
2180         def can_merge():
2181             merger = FFmpegMergerPP(self)
2182             return merger.available and merger.can_merge()
2183
2184         if not prefer_best and download and not can_merge():
2185             prefer_best = True
2186             formats = self._get_formats(info_dict)
2187             evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
2188             if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2189                 self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2190                                     'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2191
2192         compat = (self.params.get('allow_multiple_audio_streams')
2193                   or 'format-spec' in self.params['compat_opts'])
2194
2195         return ('best/bestvideo+bestaudio' if prefer_best
2196                 else 'bestvideo+bestaudio/best' if compat
2197                 else 'bestvideo*+bestaudio/best')
2198
2199     def build_format_selector(self, format_spec):
2200         def syntax_error(note, start):
2201             message = (
2202                 'Invalid format specification: '
2203                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2204             return SyntaxError(message)
2205
2206         PICKFIRST = 'PICKFIRST'
2207         MERGE = 'MERGE'
2208         SINGLE = 'SINGLE'
2209         GROUP = 'GROUP'
2210         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2211
2212         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2213                                   'video': self.params.get('allow_multiple_video_streams', False)}
2214
2215         def _parse_filter(tokens):
2216             filter_parts = []
2217             for type, string_, start, _, _ in tokens:
2218                 if type == tokenize.OP and string_ == ']':
2219                     return ''.join(filter_parts)
2220                 else:
2221                     filter_parts.append(string_)
2222
2223         def _remove_unused_ops(tokens):
2224             # Remove operators that we don't use and join them with the surrounding strings.
2225             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2226             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2227             last_string, last_start, last_end, last_line = None, None, None, None
2228             for type, string_, start, end, line in tokens:
2229                 if type == tokenize.OP and string_ == '[':
2230                     if last_string:
2231                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2232                         last_string = None
2233                     yield type, string_, start, end, line
2234                     # everything inside brackets will be handled by _parse_filter
2235                     for type, string_, start, end, line in tokens:
2236                         yield type, string_, start, end, line
2237                         if type == tokenize.OP and string_ == ']':
2238                             break
2239                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2240                     if last_string:
2241                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2242                         last_string = None
2243                     yield type, string_, start, end, line
2244                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2245                     if not last_string:
2246                         last_string = string_
2247                         last_start = start
2248                         last_end = end
2249                     else:
2250                         last_string += string_
2251             if last_string:
2252                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2253
2254         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2255             selectors = []
2256             current_selector = None
2257             for type, string_, start, _, _ in tokens:
2258                 # ENCODING is only defined in Python 3.x
2259                 if type == getattr(tokenize, 'ENCODING', None):
2260                     continue
2261                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2262                     current_selector = FormatSelector(SINGLE, string_, [])
2263                 elif type == tokenize.OP:
2264                     if string_ == ')':
2265                         if not inside_group:
2266                             # ')' will be handled by the parentheses group
2267                             tokens.restore_last_token()
2268                         break
2269                     elif inside_merge and string_ in ['/', ',']:
2270                         tokens.restore_last_token()
2271                         break
2272                     elif inside_choice and string_ == ',':
2273                         tokens.restore_last_token()
2274                         break
2275                     elif string_ == ',':
2276                         if not current_selector:
2277                             raise syntax_error('"," must follow a format selector', start)
2278                         selectors.append(current_selector)
2279                         current_selector = None
2280                     elif string_ == '/':
2281                         if not current_selector:
2282                             raise syntax_error('"/" must follow a format selector', start)
2283                         first_choice = current_selector
2284                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2285                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2286                     elif string_ == '[':
2287                         if not current_selector:
2288                             current_selector = FormatSelector(SINGLE, 'best', [])
2289                         format_filter = _parse_filter(tokens)
2290                         current_selector.filters.append(format_filter)
2291                     elif string_ == '(':
2292                         if current_selector:
2293                             raise syntax_error('Unexpected "("', start)
2294                         group = _parse_format_selection(tokens, inside_group=True)
2295                         current_selector = FormatSelector(GROUP, group, [])
2296                     elif string_ == '+':
2297                         if not current_selector:
2298                             raise syntax_error('Unexpected "+"', start)
2299                         selector_1 = current_selector
2300                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2301                         if not selector_2:
2302                             raise syntax_error('Expected a selector', start)
2303                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2304                     else:
2305                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2306                 elif type == tokenize.ENDMARKER:
2307                     break
2308             if current_selector:
2309                 selectors.append(current_selector)
2310             return selectors
2311
2312         def _merge(formats_pair):
2313             format_1, format_2 = formats_pair
2314
2315             formats_info = []
2316             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2317             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2318
2319             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2320                 get_no_more = {'video': False, 'audio': False}
2321                 for (i, fmt_info) in enumerate(formats_info):
2322                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2323                         formats_info.pop(i)
2324                         continue
2325                     for aud_vid in ['audio', 'video']:
2326                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2327                             if get_no_more[aud_vid]:
2328                                 formats_info.pop(i)
2329                                 break
2330                             get_no_more[aud_vid] = True
2331
2332             if len(formats_info) == 1:
2333                 return formats_info[0]
2334
2335             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2336             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2337
2338             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2339             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2340
2341             output_ext = get_compatible_ext(
2342                 vcodecs=[f.get('vcodec') for f in video_fmts],
2343                 acodecs=[f.get('acodec') for f in audio_fmts],
2344                 vexts=[f['ext'] for f in video_fmts],
2345                 aexts=[f['ext'] for f in audio_fmts],
2346                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2347                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2348
2349             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2350
2351             new_dict = {
2352                 'requested_formats': formats_info,
2353                 'format': '+'.join(filtered('format')),
2354                 'format_id': '+'.join(filtered('format_id')),
2355                 'ext': output_ext,
2356                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2357                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2358                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2359                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2360                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2361             }
2362
2363             if the_only_video:
2364                 new_dict.update({
2365                     'width': the_only_video.get('width'),
2366                     'height': the_only_video.get('height'),
2367                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2368                     'fps': the_only_video.get('fps'),
2369                     'dynamic_range': the_only_video.get('dynamic_range'),
2370                     'vcodec': the_only_video.get('vcodec'),
2371                     'vbr': the_only_video.get('vbr'),
2372                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2373                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2374                 })
2375
2376             if the_only_audio:
2377                 new_dict.update({
2378                     'acodec': the_only_audio.get('acodec'),
2379                     'abr': the_only_audio.get('abr'),
2380                     'asr': the_only_audio.get('asr'),
2381                     'audio_channels': the_only_audio.get('audio_channels')
2382                 })
2383
2384             return new_dict
2385
2386         def _check_formats(formats):
2387             if self.params.get('check_formats') == 'selected':
2388                 yield from self._check_formats(formats)
2389                 return
2390             elif (self.params.get('check_formats') is not None
2391                     or self.params.get('allow_unplayable_formats')):
2392                 yield from formats
2393                 return
2394
2395             for f in formats:
2396                 if f.get('has_drm') or f.get('__needs_testing'):
2397                     yield from self._check_formats([f])
2398                 else:
2399                     yield f
2400
2401         def _build_selector_function(selector):
2402             if isinstance(selector, list):  # ,
2403                 fs = [_build_selector_function(s) for s in selector]
2404
2405                 def selector_function(ctx):
2406                     for f in fs:
2407                         yield from f(ctx)
2408                 return selector_function
2409
2410             elif selector.type == GROUP:  # ()
2411                 selector_function = _build_selector_function(selector.selector)
2412
2413             elif selector.type == PICKFIRST:  # /
2414                 fs = [_build_selector_function(s) for s in selector.selector]
2415
2416                 def selector_function(ctx):
2417                     for f in fs:
2418                         picked_formats = list(f(ctx))
2419                         if picked_formats:
2420                             return picked_formats
2421                     return []
2422
2423             elif selector.type == MERGE:  # +
2424                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2425
2426                 def selector_function(ctx):
2427                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2428                         yield _merge(pair)
2429
2430             elif selector.type == SINGLE:  # atom
2431                 format_spec = selector.selector or 'best'
2432
2433                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2434                 if format_spec == 'all':
2435                     def selector_function(ctx):
2436                         yield from _check_formats(ctx['formats'][::-1])
2437                 elif format_spec == 'mergeall':
2438                     def selector_function(ctx):
2439                         formats = list(_check_formats(
2440                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2441                         if not formats:
2442                             return
2443                         merged_format = formats[-1]
2444                         for f in formats[-2::-1]:
2445                             merged_format = _merge((merged_format, f))
2446                         yield merged_format
2447
2448                 else:
2449                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2450                     mobj = re.match(
2451                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2452                         format_spec)
2453                     if mobj is not None:
2454                         format_idx = int_or_none(mobj.group('n'), default=1)
2455                         format_reverse = mobj.group('bw')[0] == 'b'
2456                         format_type = (mobj.group('type') or [None])[0]
2457                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2458                         format_modified = mobj.group('mod') is not None
2459
2460                         format_fallback = not format_type and not format_modified  # for b, w
2461                         _filter_f = (
2462                             (lambda f: f.get('%scodec' % format_type) != 'none')
2463                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2464                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2465                             if format_type  # bv, ba, wv, wa
2466                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2467                             if not format_modified  # b, w
2468                             else lambda f: True)  # b*, w*
2469                         filter_f = lambda f: _filter_f(f) and (
2470                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2471                     else:
2472                         if format_spec in self._format_selection_exts['audio']:
2473                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2474                         elif format_spec in self._format_selection_exts['video']:
2475                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2476                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2477                         elif format_spec in self._format_selection_exts['storyboards']:
2478                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2479                         else:
2480                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2481
2482                     def selector_function(ctx):
2483                         formats = list(ctx['formats'])
2484                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2485                         if not matches:
2486                             if format_fallback and ctx['incomplete_formats']:
2487                                 # for extractors with incomplete formats (audio only (soundcloud)
2488                                 # or video only (imgur)) best/worst will fallback to
2489                                 # best/worst {video,audio}-only format
2490                                 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
2491                             elif seperate_fallback and not ctx['has_merged_format']:
2492                                 # for compatibility with youtube-dl when there is no pre-merged format
2493                                 matches = list(filter(seperate_fallback, formats))
2494                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2495                         try:
2496                             yield matches[format_idx - 1]
2497                         except LazyList.IndexError:
2498                             return
2499
2500             filters = [self._build_format_filter(f) for f in selector.filters]
2501
2502             def final_selector(ctx):
2503                 ctx_copy = dict(ctx)
2504                 for _filter in filters:
2505                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2506                 return selector_function(ctx_copy)
2507             return final_selector
2508
2509         # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2510         #       Prefix numbers with random letters to avoid it being classified as a number
2511         #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2512         # TODO: Implement parser not reliant on tokenize.tokenize
2513         prefix = ''.join(random.choices(string.ascii_letters, k=32))
2514         stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
2515         try:
2516             tokens = list(_remove_unused_ops(
2517                 token._replace(string=token.string.replace(prefix, ''))
2518                 for token in tokenize.tokenize(stream.readline)))
2519         except tokenize.TokenError:
2520             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2521
2522         class TokenIterator:
2523             def __init__(self, tokens):
2524                 self.tokens = tokens
2525                 self.counter = 0
2526
2527             def __iter__(self):
2528                 return self
2529
2530             def __next__(self):
2531                 if self.counter >= len(self.tokens):
2532                     raise StopIteration()
2533                 value = self.tokens[self.counter]
2534                 self.counter += 1
2535                 return value
2536
2537             next = __next__
2538
2539             def restore_last_token(self):
2540                 self.counter -= 1
2541
2542         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2543         return _build_selector_function(parsed_selector)
2544
2545     def _calc_headers(self, info_dict, load_cookies=False):
2546         res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2547         clean_headers(res)
2548
2549         if load_cookies:  # For --load-info-json
2550             self._load_cookies(res.get('Cookie'), autoscope=info_dict['url'])  # compat
2551             self._load_cookies(info_dict.get('cookies'), autoscope=False)
2552         # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2553         # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2554         res.pop('Cookie', None)
2555         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2556         if cookies:
2557             encoder = LenientSimpleCookie()
2558             values = []
2559             for cookie in cookies:
2560                 _, value = encoder.value_encode(cookie.value)
2561                 values.append(f'{cookie.name}={value}')
2562                 if cookie.domain:
2563                     values.append(f'Domain={cookie.domain}')
2564                 if cookie.path:
2565                     values.append(f'Path={cookie.path}')
2566                 if cookie.secure:
2567                     values.append('Secure')
2568                 if cookie.expires:
2569                     values.append(f'Expires={cookie.expires}')
2570                 if cookie.version:
2571                     values.append(f'Version={cookie.version}')
2572             info_dict['cookies'] = '; '.join(values)
2573
2574         if 'X-Forwarded-For' not in res:
2575             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2576             if x_forwarded_for_ip:
2577                 res['X-Forwarded-For'] = x_forwarded_for_ip
2578
2579         return res
2580
2581     def _calc_cookies(self, url):
2582         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2583         return self.cookiejar.get_cookie_header(url)
2584
2585     def _sort_thumbnails(self, thumbnails):
2586         thumbnails.sort(key=lambda t: (
2587             t.get('preference') if t.get('preference') is not None else -1,
2588             t.get('width') if t.get('width') is not None else -1,
2589             t.get('height') if t.get('height') is not None else -1,
2590             t.get('id') if t.get('id') is not None else '',
2591             t.get('url')))
2592
2593     def _sanitize_thumbnails(self, info_dict):
2594         thumbnails = info_dict.get('thumbnails')
2595         if thumbnails is None:
2596             thumbnail = info_dict.get('thumbnail')
2597             if thumbnail:
2598                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2599         if not thumbnails:
2600             return
2601
2602         def check_thumbnails(thumbnails):
2603             for t in thumbnails:
2604                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2605                 try:
2606                     self.urlopen(HEADRequest(t['url']))
2607                 except network_exceptions as err:
2608                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2609                     continue
2610                 yield t
2611
2612         self._sort_thumbnails(thumbnails)
2613         for i, t in enumerate(thumbnails):
2614             if t.get('id') is None:
2615                 t['id'] = '%d' % i
2616             if t.get('width') and t.get('height'):
2617                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2618             t['url'] = sanitize_url(t['url'])
2619
2620         if self.params.get('check_formats') is True:
2621             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2622         else:
2623             info_dict['thumbnails'] = thumbnails
2624
2625     def _fill_common_fields(self, info_dict, final=True):
2626         # TODO: move sanitization here
2627         if final:
2628             title = info_dict['fulltitle'] = info_dict.get('title')
2629             if not title:
2630                 if title == '':
2631                     self.write_debug('Extractor gave empty title. Creating a generic title')
2632                 else:
2633                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2634                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2635
2636         if info_dict.get('duration') is not None:
2637             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2638
2639         for ts_key, date_key in (
2640                 ('timestamp', 'upload_date'),
2641                 ('release_timestamp', 'release_date'),
2642                 ('modified_timestamp', 'modified_date'),
2643         ):
2644             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2645                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2646                 # see http://bugs.python.org/issue1646728)
2647                 with contextlib.suppress(ValueError, OverflowError, OSError):
2648                     upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
2649                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2650
2651         if not info_dict.get('release_year'):
2652             info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2653
2654         live_keys = ('is_live', 'was_live')
2655         live_status = info_dict.get('live_status')
2656         if live_status is None:
2657             for key in live_keys:
2658                 if info_dict.get(key) is False:
2659                     continue
2660                 if info_dict.get(key):
2661                     live_status = key
2662                 break
2663             if all(info_dict.get(key) is False for key in live_keys):
2664                 live_status = 'not_live'
2665         if live_status:
2666             info_dict['live_status'] = live_status
2667             for key in live_keys:
2668                 if info_dict.get(key) is None:
2669                     info_dict[key] = (live_status == key)
2670         if live_status == 'post_live':
2671             info_dict['was_live'] = True
2672
2673         # Auto generate title fields corresponding to the *_number fields when missing
2674         # in order to always have clean titles. This is very common for TV series.
2675         for field in ('chapter', 'season', 'episode'):
2676             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2677                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2678
2679         for old_key, new_key in self._deprecated_multivalue_fields.items():
2680             if new_key in info_dict and old_key in info_dict:
2681                 if '_version' not in info_dict:  # HACK: Do not warn when using --load-info-json
2682                     self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
2683             elif old_value := info_dict.get(old_key):
2684                 info_dict[new_key] = old_value.split(', ')
2685             elif new_value := info_dict.get(new_key):
2686                 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2687
2688     def _raise_pending_errors(self, info):
2689         err = info.pop('__pending_error', None)
2690         if err:
2691             self.report_error(err, tb=False)
2692
2693     def sort_formats(self, info_dict):
2694         formats = self._get_formats(info_dict)
2695         formats.sort(key=FormatSorter(
2696             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2697
2698     def process_video_result(self, info_dict, download=True):
2699         assert info_dict.get('_type', 'video') == 'video'
2700         self._num_videos += 1
2701
2702         if 'id' not in info_dict:
2703             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2704         elif not info_dict.get('id'):
2705             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2706
2707         def report_force_conversion(field, field_not, conversion):
2708             self.report_warning(
2709                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2710                 % (field, field_not, conversion))
2711
2712         def sanitize_string_field(info, string_field):
2713             field = info.get(string_field)
2714             if field is None or isinstance(field, str):
2715                 return
2716             report_force_conversion(string_field, 'a string', 'string')
2717             info[string_field] = str(field)
2718
2719         def sanitize_numeric_fields(info):
2720             for numeric_field in self._NUMERIC_FIELDS:
2721                 field = info.get(numeric_field)
2722                 if field is None or isinstance(field, (int, float)):
2723                     continue
2724                 report_force_conversion(numeric_field, 'numeric', 'int')
2725                 info[numeric_field] = int_or_none(field)
2726
2727         sanitize_string_field(info_dict, 'id')
2728         sanitize_numeric_fields(info_dict)
2729         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2730             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2731         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2732             self.report_warning('"duration" field is negative, there is an error in extractor')
2733
2734         chapters = info_dict.get('chapters') or []
2735         if chapters and chapters[0].get('start_time'):
2736             chapters.insert(0, {'start_time': 0})
2737
2738         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2739         for idx, (prev, current, next_) in enumerate(zip(
2740                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2741             if current.get('start_time') is None:
2742                 current['start_time'] = prev.get('end_time')
2743             if not current.get('end_time'):
2744                 current['end_time'] = next_.get('start_time')
2745             if not current.get('title'):
2746                 current['title'] = f'<Untitled Chapter {idx}>'
2747
2748         if 'playlist' not in info_dict:
2749             # It isn't part of a playlist
2750             info_dict['playlist'] = None
2751             info_dict['playlist_index'] = None
2752
2753         self._sanitize_thumbnails(info_dict)
2754
2755         thumbnail = info_dict.get('thumbnail')
2756         thumbnails = info_dict.get('thumbnails')
2757         if thumbnail:
2758             info_dict['thumbnail'] = sanitize_url(thumbnail)
2759         elif thumbnails:
2760             info_dict['thumbnail'] = thumbnails[-1]['url']
2761
2762         if info_dict.get('display_id') is None and 'id' in info_dict:
2763             info_dict['display_id'] = info_dict['id']
2764
2765         self._fill_common_fields(info_dict)
2766
2767         for cc_kind in ('subtitles', 'automatic_captions'):
2768             cc = info_dict.get(cc_kind)
2769             if cc:
2770                 for _, subtitle in cc.items():
2771                     for subtitle_format in subtitle:
2772                         if subtitle_format.get('url'):
2773                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2774                         if subtitle_format.get('ext') is None:
2775                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2776
2777         automatic_captions = info_dict.get('automatic_captions')
2778         subtitles = info_dict.get('subtitles')
2779
2780         info_dict['requested_subtitles'] = self.process_subtitles(
2781             info_dict['id'], subtitles, automatic_captions)
2782
2783         formats = self._get_formats(info_dict)
2784
2785         # Backward compatibility with InfoExtractor._sort_formats
2786         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2787         if field_preference:
2788             info_dict['_format_sort_fields'] = field_preference
2789
2790         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2791             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2792         if not self.params.get('allow_unplayable_formats'):
2793             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2794
2795         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2796             self.report_warning(
2797                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2798                 'only images are available for download. Use --list-formats to see them'.capitalize())
2799
2800         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2801         if not get_from_start:
2802             info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
2803         if info_dict.get('is_live') and formats:
2804             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2805             if get_from_start and not formats:
2806                 self.raise_no_formats(info_dict, msg=(
2807                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2808                     'If you want to download from the current time, use --no-live-from-start'))
2809
2810         def is_wellformed(f):
2811             url = f.get('url')
2812             if not url:
2813                 self.report_warning(
2814                     '"url" field is missing or empty - skipping format, '
2815                     'there is an error in extractor')
2816                 return False
2817             if isinstance(url, bytes):
2818                 sanitize_string_field(f, 'url')
2819             return True
2820
2821         # Filter out malformed formats for better extraction robustness
2822         formats = list(filter(is_wellformed, formats or []))
2823
2824         if not formats:
2825             self.raise_no_formats(info_dict)
2826
2827         for format in formats:
2828             sanitize_string_field(format, 'format_id')
2829             sanitize_numeric_fields(format)
2830             format['url'] = sanitize_url(format['url'])
2831             if format.get('ext') is None:
2832                 format['ext'] = determine_ext(format['url']).lower()
2833             if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2834                 if format.get('acodec') is None:
2835                     format['acodec'] = format['ext']
2836             if format.get('protocol') is None:
2837                 format['protocol'] = determine_protocol(format)
2838             if format.get('resolution') is None:
2839                 format['resolution'] = self.format_resolution(format, default=None)
2840             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2841                 format['dynamic_range'] = 'SDR'
2842             if format.get('aspect_ratio') is None:
2843                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2844             # For fragmented formats, "tbr" is often max bitrate and not average
2845             if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
2846                     and not format.get('filesize') and not format.get('filesize_approx')):
2847                 format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
2848             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2849
2850         # Safeguard against old/insecure infojson when using --load-info-json
2851         if info_dict.get('http_headers'):
2852             info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2853             info_dict['http_headers'].pop('Cookie', None)
2854
2855         # This is copied to http_headers by the above _calc_headers and can now be removed
2856         if '__x_forwarded_for_ip' in info_dict:
2857             del info_dict['__x_forwarded_for_ip']
2858
2859         self.sort_formats({
2860             'formats': formats,
2861             '_format_sort_fields': info_dict.get('_format_sort_fields')
2862         })
2863
2864         # Sanitize and group by format_id
2865         formats_dict = {}
2866         for i, format in enumerate(formats):
2867             if not format.get('format_id'):
2868                 format['format_id'] = str(i)
2869             else:
2870                 # Sanitize format_id from characters used in format selector expression
2871                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2872             formats_dict.setdefault(format['format_id'], []).append(format)
2873
2874         # Make sure all formats have unique format_id
2875         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2876         for format_id, ambiguous_formats in formats_dict.items():
2877             ambigious_id = len(ambiguous_formats) > 1
2878             for i, format in enumerate(ambiguous_formats):
2879                 if ambigious_id:
2880                     format['format_id'] = '%s-%d' % (format_id, i)
2881                 # Ensure there is no conflict between id and ext in format selection
2882                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2883                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2884                     format['format_id'] = 'f%s' % format['format_id']
2885
2886                 if format.get('format') is None:
2887                     format['format'] = '{id} - {res}{note}'.format(
2888                         id=format['format_id'],
2889                         res=self.format_resolution(format),
2890                         note=format_field(format, 'format_note', ' (%s)'),
2891                     )
2892
2893         if self.params.get('check_formats') is True:
2894             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2895
2896         if not formats or formats[0] is not info_dict:
2897             # only set the 'formats' fields if the original info_dict list them
2898             # otherwise we end up with a circular reference, the first (and unique)
2899             # element in the 'formats' field in info_dict is info_dict itself,
2900             # which can't be exported to json
2901             info_dict['formats'] = formats
2902
2903         info_dict, _ = self.pre_process(info_dict)
2904
2905         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2906             return info_dict
2907
2908         self.post_extract(info_dict)
2909         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2910
2911         # The pre-processors may have modified the formats
2912         formats = self._get_formats(info_dict)
2913
2914         list_only = self.params.get('simulate') == 'list_only'
2915         interactive_format_selection = not list_only and self.format_selector == '-'
2916         if self.params.get('list_thumbnails'):
2917             self.list_thumbnails(info_dict)
2918         if self.params.get('listsubtitles'):
2919             if 'automatic_captions' in info_dict:
2920                 self.list_subtitles(
2921                     info_dict['id'], automatic_captions, 'automatic captions')
2922             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2923         if self.params.get('listformats') or interactive_format_selection:
2924             self.list_formats(info_dict)
2925         if list_only:
2926             # Without this printing, -F --print-json will not work
2927             self.__forced_printings(info_dict)
2928             return info_dict
2929
2930         format_selector = self.format_selector
2931         while True:
2932             if interactive_format_selection:
2933                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2934                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2935                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2936                 try:
2937                     format_selector = self.build_format_selector(req_format) if req_format else None
2938                 except SyntaxError as err:
2939                     self.report_error(err, tb=False, is_error=False)
2940                     continue
2941
2942             if format_selector is None:
2943                 req_format = self._default_format_spec(info_dict, download=download)
2944                 self.write_debug(f'Default format spec: {req_format}')
2945                 format_selector = self.build_format_selector(req_format)
2946
2947             formats_to_download = self._select_formats(formats, format_selector)
2948             if interactive_format_selection and not formats_to_download:
2949                 self.report_error('Requested format is not available', tb=False, is_error=False)
2950                 continue
2951             break
2952
2953         if not formats_to_download:
2954             if not self.params.get('ignore_no_formats_error'):
2955                 raise ExtractorError(
2956                     'Requested format is not available. Use --list-formats for a list of available formats',
2957                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2958             self.report_warning('Requested format is not available')
2959             # Process what we can, even without any available formats.
2960             formats_to_download = [{}]
2961
2962         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2963         best_format, downloaded_formats = formats_to_download[-1], []
2964         if download:
2965             if best_format and requested_ranges:
2966                 def to_screen(*msg):
2967                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2968
2969                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2970                           (f['format_id'] for f in formats_to_download))
2971                 if requested_ranges != ({}, ):
2972                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2973                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2974             max_downloads_reached = False
2975
2976             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2977                 new_info = self._copy_infodict(info_dict)
2978                 new_info.update(fmt)
2979                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2980                 end_time = offset + min(chapter.get('end_time', duration), duration)
2981                 # duration may not be accurate. So allow deviations <1sec
2982                 if end_time == float('inf') or end_time > offset + duration + 1:
2983                     end_time = None
2984                 if chapter or offset:
2985                     new_info.update({
2986                         'section_start': offset + chapter.get('start_time', 0),
2987                         'section_end': end_time,
2988                         'section_title': chapter.get('title'),
2989                         'section_number': chapter.get('index'),
2990                     })
2991                 downloaded_formats.append(new_info)
2992                 try:
2993                     self.process_info(new_info)
2994                 except MaxDownloadsReached:
2995                     max_downloads_reached = True
2996                 self._raise_pending_errors(new_info)
2997                 # Remove copied info
2998                 for key, val in tuple(new_info.items()):
2999                     if info_dict.get(key) == val:
3000                         new_info.pop(key)
3001                 if max_downloads_reached:
3002                     break
3003
3004             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
3005             assert write_archive.issubset({True, False, 'ignore'})
3006             if True in write_archive and False not in write_archive:
3007                 self.record_download_archive(info_dict)
3008
3009             info_dict['requested_downloads'] = downloaded_formats
3010             info_dict = self.run_all_pps('after_video', info_dict)
3011             if max_downloads_reached:
3012                 raise MaxDownloadsReached()
3013
3014         # We update the info dict with the selected best quality format (backwards compatibility)
3015         info_dict.update(best_format)
3016         return info_dict
3017
3018     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
3019         """Select the requested subtitles and their format"""
3020         available_subs, normal_sub_langs = {}, []
3021         if normal_subtitles and self.params.get('writesubtitles'):
3022             available_subs.update(normal_subtitles)
3023             normal_sub_langs = tuple(normal_subtitles.keys())
3024         if automatic_captions and self.params.get('writeautomaticsub'):
3025             for lang, cap_info in automatic_captions.items():
3026                 if lang not in available_subs:
3027                     available_subs[lang] = cap_info
3028
3029         if not available_subs or (
3030                 not self.params.get('writesubtitles')
3031                 and not self.params.get('writeautomaticsub')):
3032             return None
3033
3034         all_sub_langs = tuple(available_subs.keys())
3035         if self.params.get('allsubtitles', False):
3036             requested_langs = all_sub_langs
3037         elif self.params.get('subtitleslangs', False):
3038             try:
3039                 requested_langs = orderedSet_from_options(
3040                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3041             except re.error as e:
3042                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
3043         else:
3044             requested_langs = LazyList(itertools.chain(
3045                 ['en'] if 'en' in normal_sub_langs else [],
3046                 filter(lambda f: f.startswith('en'), normal_sub_langs),
3047                 ['en'] if 'en' in all_sub_langs else [],
3048                 filter(lambda f: f.startswith('en'), all_sub_langs),
3049                 normal_sub_langs, all_sub_langs,
3050             ))[:1]
3051         if requested_langs:
3052             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3053
3054         formats_query = self.params.get('subtitlesformat', 'best')
3055         formats_preference = formats_query.split('/') if formats_query else []
3056         subs = {}
3057         for lang in requested_langs:
3058             formats = available_subs.get(lang)
3059             if formats is None:
3060                 self.report_warning(f'{lang} subtitles not available for {video_id}')
3061                 continue
3062             for ext in formats_preference:
3063                 if ext == 'best':
3064                     f = formats[-1]
3065                     break
3066                 matches = list(filter(lambda f: f['ext'] == ext, formats))
3067                 if matches:
3068                     f = matches[-1]
3069                     break
3070             else:
3071                 f = formats[-1]
3072                 self.report_warning(
3073                     'No subtitle format found matching "%s" for language %s, '
3074                     'using %s' % (formats_query, lang, f['ext']))
3075             subs[lang] = f
3076         return subs
3077
3078     def _forceprint(self, key, info_dict):
3079         if info_dict is None:
3080             return
3081         info_copy = info_dict.copy()
3082         info_copy.setdefault('filename', self.prepare_filename(info_dict))
3083         if info_dict.get('requested_formats') is not None:
3084             # For RTMP URLs, also include the playpath
3085             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3086         elif info_dict.get('url'):
3087             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3088         info_copy['formats_table'] = self.render_formats_table(info_dict)
3089         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3090         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3091         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3092
3093         def format_tmpl(tmpl):
3094             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3095             if not mobj:
3096                 return tmpl
3097
3098             fmt = '%({})s'
3099             if tmpl.startswith('{'):
3100                 tmpl, fmt = f'.{tmpl}', '%({})j'
3101             if tmpl.endswith('='):
3102                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3103             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3104
3105         for tmpl in self.params['forceprint'].get(key, []):
3106             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3107
3108         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3109             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3110             tmpl = format_tmpl(tmpl)
3111             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3112             if self._ensure_dir_exists(filename):
3113                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3114                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3115
3116         return info_copy
3117
3118     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3119         if (self.params.get('forcejson')
3120                 or self.params['forceprint'].get('video')
3121                 or self.params['print_to_file'].get('video')):
3122             self.post_extract(info_dict)
3123         if filename:
3124             info_dict['filename'] = filename
3125         info_copy = self._forceprint('video', info_dict)
3126
3127         def print_field(field, actual_field=None, optional=False):
3128             if actual_field is None:
3129                 actual_field = field
3130             if self.params.get(f'force{field}') and (
3131                     info_copy.get(field) is not None or (not optional and not incomplete)):
3132                 self.to_stdout(info_copy[actual_field])
3133
3134         print_field('title')
3135         print_field('id')
3136         print_field('url', 'urls')
3137         print_field('thumbnail', optional=True)
3138         print_field('description', optional=True)
3139         print_field('filename')
3140         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3141             self.to_stdout(formatSeconds(info_copy['duration']))
3142         print_field('format')
3143
3144         if self.params.get('forcejson'):
3145             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3146
3147     def dl(self, name, info, subtitle=False, test=False):
3148         if not info.get('url'):
3149             self.raise_no_formats(info, True)
3150
3151         if test:
3152             verbose = self.params.get('verbose')
3153             params = {
3154                 'test': True,
3155                 'quiet': self.params.get('quiet') or not verbose,
3156                 'verbose': verbose,
3157                 'noprogress': not verbose,
3158                 'nopart': True,
3159                 'skip_unavailable_fragments': False,
3160                 'keep_fragments': False,
3161                 'overwrites': True,
3162                 '_no_ytdl_file': True,
3163             }
3164         else:
3165             params = self.params
3166         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3167         if not test:
3168             for ph in self._progress_hooks:
3169                 fd.add_progress_hook(ph)
3170             urls = '", "'.join(
3171                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3172                 for f in info.get('requested_formats', []) or [info])
3173             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3174
3175         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3176         # But it may contain objects that are not deep-copyable
3177         new_info = self._copy_infodict(info)
3178         if new_info.get('http_headers') is None:
3179             new_info['http_headers'] = self._calc_headers(new_info)
3180         return fd.download(name, new_info, subtitle)
3181
3182     def existing_file(self, filepaths, *, default_overwrite=True):
3183         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3184         if existing_files and not self.params.get('overwrites', default_overwrite):
3185             return existing_files[0]
3186
3187         for file in existing_files:
3188             self.report_file_delete(file)
3189             os.remove(file)
3190         return None
3191
3192     def process_info(self, info_dict):
3193         """Process a single resolved IE result. (Modifies it in-place)"""
3194
3195         assert info_dict.get('_type', 'video') == 'video'
3196         original_infodict = info_dict
3197
3198         if 'format' not in info_dict and 'ext' in info_dict:
3199             info_dict['format'] = info_dict['ext']
3200
3201         if self._match_entry(info_dict) is not None:
3202             info_dict['__write_download_archive'] = 'ignore'
3203             return
3204
3205         # Does nothing under normal operation - for backward compatibility of process_info
3206         self.post_extract(info_dict)
3207
3208         def replace_info_dict(new_info):
3209             nonlocal info_dict
3210             if new_info == info_dict:
3211                 return
3212             info_dict.clear()
3213             info_dict.update(new_info)
3214
3215         new_info, _ = self.pre_process(info_dict, 'video')
3216         replace_info_dict(new_info)
3217         self._num_downloads += 1
3218
3219         # info_dict['_filename'] needs to be set for backward compatibility
3220         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3221         temp_filename = self.prepare_filename(info_dict, 'temp')
3222         files_to_move = {}
3223
3224         # Forced printings
3225         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3226
3227         def check_max_downloads():
3228             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3229                 raise MaxDownloadsReached()
3230
3231         if self.params.get('simulate'):
3232             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3233             check_max_downloads()
3234             return
3235
3236         if full_filename is None:
3237             return
3238         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3239             return
3240         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3241             return
3242
3243         if self._write_description('video', info_dict,
3244                                    self.prepare_filename(info_dict, 'description')) is None:
3245             return
3246
3247         sub_files = self._write_subtitles(info_dict, temp_filename)
3248         if sub_files is None:
3249             return
3250         files_to_move.update(dict(sub_files))
3251
3252         thumb_files = self._write_thumbnails(
3253             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3254         if thumb_files is None:
3255             return
3256         files_to_move.update(dict(thumb_files))
3257
3258         infofn = self.prepare_filename(info_dict, 'infojson')
3259         _infojson_written = self._write_info_json('video', info_dict, infofn)
3260         if _infojson_written:
3261             info_dict['infojson_filename'] = infofn
3262             # For backward compatibility, even though it was a private field
3263             info_dict['__infojson_filename'] = infofn
3264         elif _infojson_written is None:
3265             return
3266
3267         # Note: Annotations are deprecated
3268         annofn = None
3269         if self.params.get('writeannotations', False):
3270             annofn = self.prepare_filename(info_dict, 'annotation')
3271         if annofn:
3272             if not self._ensure_dir_exists(encodeFilename(annofn)):
3273                 return
3274             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3275                 self.to_screen('[info] Video annotations are already present')
3276             elif not info_dict.get('annotations'):
3277                 self.report_warning('There are no annotations to write.')
3278             else:
3279                 try:
3280                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3281                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3282                         annofile.write(info_dict['annotations'])
3283                 except (KeyError, TypeError):
3284                     self.report_warning('There are no annotations to write.')
3285                 except OSError:
3286                     self.report_error('Cannot write annotations file: ' + annofn)
3287                     return
3288
3289         # Write internet shortcut files
3290         def _write_link_file(link_type):
3291             url = try_get(info_dict['webpage_url'], iri_to_uri)
3292             if not url:
3293                 self.report_warning(
3294                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3295                 return True
3296             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3297             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3298                 return False
3299             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3300                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3301                 return True
3302             try:
3303                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3304                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3305                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3306                     template_vars = {'url': url}
3307                     if link_type == 'desktop':
3308                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3309                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3310             except OSError:
3311                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3312                 return False
3313             return True
3314
3315         write_links = {
3316             'url': self.params.get('writeurllink'),
3317             'webloc': self.params.get('writewebloclink'),
3318             'desktop': self.params.get('writedesktoplink'),
3319         }
3320         if self.params.get('writelink'):
3321             link_type = ('webloc' if sys.platform == 'darwin'
3322                          else 'desktop' if sys.platform.startswith('linux')
3323                          else 'url')
3324             write_links[link_type] = True
3325
3326         if any(should_write and not _write_link_file(link_type)
3327                for link_type, should_write in write_links.items()):
3328             return
3329
3330         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3331         replace_info_dict(new_info)
3332
3333         if self.params.get('skip_download'):
3334             info_dict['filepath'] = temp_filename
3335             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3336             info_dict['__files_to_move'] = files_to_move
3337             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3338             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3339         else:
3340             # Download
3341             info_dict.setdefault('__postprocessors', [])
3342             try:
3343
3344                 def existing_video_file(*filepaths):
3345                     ext = info_dict.get('ext')
3346                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3347                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3348                                               default_overwrite=False)
3349                     if file:
3350                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3351                     return file
3352
3353                 fd, success = None, True
3354                 if info_dict.get('protocol') or info_dict.get('url'):
3355                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3356                     if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3357                             info_dict.get('section_start') or info_dict.get('section_end')):
3358                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3359                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3360                         self.report_error(f'{msg}. Aborting')
3361                         return
3362
3363                 if info_dict.get('requested_formats') is not None:
3364                     old_ext = info_dict['ext']
3365                     if self.params.get('merge_output_format') is None:
3366                         if (info_dict['ext'] == 'webm'
3367                                 and info_dict.get('thumbnails')
3368                                 # check with type instead of pp_key, __name__, or isinstance
3369                                 # since we dont want any custom PPs to trigger this
3370                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3371                             info_dict['ext'] = 'mkv'
3372                             self.report_warning(
3373                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3374                     new_ext = info_dict['ext']
3375
3376                     def correct_ext(filename, ext=new_ext):
3377                         if filename == '-':
3378                             return filename
3379                         filename_real_ext = os.path.splitext(filename)[1][1:]
3380                         filename_wo_ext = (
3381                             os.path.splitext(filename)[0]
3382                             if filename_real_ext in (old_ext, new_ext)
3383                             else filename)
3384                         return f'{filename_wo_ext}.{ext}'
3385
3386                     # Ensure filename always has a correct extension for successful merge
3387                     full_filename = correct_ext(full_filename)
3388                     temp_filename = correct_ext(temp_filename)
3389                     dl_filename = existing_video_file(full_filename, temp_filename)
3390
3391                     info_dict['__real_download'] = False
3392                     # NOTE: Copy so that original format dicts are not modified
3393                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3394
3395                     merger = FFmpegMergerPP(self)
3396                     downloaded = []
3397                     if dl_filename is not None:
3398                         self.report_file_already_downloaded(dl_filename)
3399                     elif fd:
3400                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3401                             f['filepath'] = fname = prepend_extension(
3402                                 correct_ext(temp_filename, info_dict['ext']),
3403                                 'f%s' % f['format_id'], info_dict['ext'])
3404                             downloaded.append(fname)
3405                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3406                         success, real_download = self.dl(temp_filename, info_dict)
3407                         info_dict['__real_download'] = real_download
3408                     else:
3409                         if self.params.get('allow_unplayable_formats'):
3410                             self.report_warning(
3411                                 'You have requested merging of multiple formats '
3412                                 'while also allowing unplayable formats to be downloaded. '
3413                                 'The formats won\'t be merged to prevent data corruption.')
3414                         elif not merger.available:
3415                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3416                             if not self.params.get('ignoreerrors'):
3417                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3418                                 return
3419                             self.report_warning(f'{msg}. The formats won\'t be merged')
3420
3421                         if temp_filename == '-':
3422                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3423                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3424                                       else 'but ffmpeg is not installed')
3425                             self.report_warning(
3426                                 f'You have requested downloading multiple formats to stdout {reason}. '
3427                                 'The formats will be streamed one after the other')
3428                             fname = temp_filename
3429                         for f in info_dict['requested_formats']:
3430                             new_info = dict(info_dict)
3431                             del new_info['requested_formats']
3432                             new_info.update(f)
3433                             if temp_filename != '-':
3434                                 fname = prepend_extension(
3435                                     correct_ext(temp_filename, new_info['ext']),
3436                                     'f%s' % f['format_id'], new_info['ext'])
3437                                 if not self._ensure_dir_exists(fname):
3438                                     return
3439                                 f['filepath'] = fname
3440                                 downloaded.append(fname)
3441                             partial_success, real_download = self.dl(fname, new_info)
3442                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3443                             success = success and partial_success
3444
3445                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3446                         info_dict['__postprocessors'].append(merger)
3447                         info_dict['__files_to_merge'] = downloaded
3448                         # Even if there were no downloads, it is being merged only now
3449                         info_dict['__real_download'] = True
3450                     else:
3451                         for file in downloaded:
3452                             files_to_move[file] = None
3453                 else:
3454                     # Just a single file
3455                     dl_filename = existing_video_file(full_filename, temp_filename)
3456                     if dl_filename is None or dl_filename == temp_filename:
3457                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3458                         # So we should try to resume the download
3459                         success, real_download = self.dl(temp_filename, info_dict)
3460                         info_dict['__real_download'] = real_download
3461                     else:
3462                         self.report_file_already_downloaded(dl_filename)
3463
3464                 dl_filename = dl_filename or temp_filename
3465                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3466
3467             except network_exceptions as err:
3468                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3469                 return
3470             except OSError as err:
3471                 raise UnavailableVideoError(err)
3472             except (ContentTooShortError, ) as err:
3473                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3474                 return
3475
3476             self._raise_pending_errors(info_dict)
3477             if success and full_filename != '-':
3478
3479                 def fixup():
3480                     do_fixup = True
3481                     fixup_policy = self.params.get('fixup')
3482                     vid = info_dict['id']
3483
3484                     if fixup_policy in ('ignore', 'never'):
3485                         return
3486                     elif fixup_policy == 'warn':
3487                         do_fixup = 'warn'
3488                     elif fixup_policy != 'force':
3489                         assert fixup_policy in ('detect_or_warn', None)
3490                         if not info_dict.get('__real_download'):
3491                             do_fixup = False
3492
3493                     def ffmpeg_fixup(cndn, msg, cls):
3494                         if not (do_fixup and cndn):
3495                             return
3496                         elif do_fixup == 'warn':
3497                             self.report_warning(f'{vid}: {msg}')
3498                             return
3499                         pp = cls(self)
3500                         if pp.available:
3501                             info_dict['__postprocessors'].append(pp)
3502                         else:
3503                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3504
3505                     stretched_ratio = info_dict.get('stretched_ratio')
3506                     ffmpeg_fixup(stretched_ratio not in (1, None),
3507                                  f'Non-uniform pixel ratio {stretched_ratio}',
3508                                  FFmpegFixupStretchedPP)
3509
3510                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3511                     downloader = downloader.FD_NAME if downloader else None
3512
3513                     ext = info_dict.get('ext')
3514                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3515                         isinstance(pp, FFmpegVideoConvertorPP)
3516                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3517                     ) for pp in self._pps['post_process'])
3518
3519                     if not postprocessed_by_ffmpeg:
3520                         ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3521                                      and info_dict.get('container') == 'm4a_dash',
3522                                      'writing DASH m4a. Only some players support this container',
3523                                      FFmpegFixupM4aPP)
3524                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3525                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3526                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3527                                      FFmpegFixupM3u8PP)
3528                         ffmpeg_fixup(downloader == 'dashsegments'
3529                                      and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
3530                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3531
3532                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3533                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3534
3535                 fixup()
3536                 try:
3537                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3538                 except PostProcessingError as err:
3539                     self.report_error('Postprocessing: %s' % str(err))
3540                     return
3541                 try:
3542                     for ph in self._post_hooks:
3543                         ph(info_dict['filepath'])
3544                 except Exception as err:
3545                     self.report_error('post hooks: %s' % str(err))
3546                     return
3547                 info_dict['__write_download_archive'] = True
3548
3549         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3550         if self.params.get('force_write_download_archive'):
3551             info_dict['__write_download_archive'] = True
3552         check_max_downloads()
3553
3554     def __download_wrapper(self, func):
3555         @functools.wraps(func)
3556         def wrapper(*args, **kwargs):
3557             try:
3558                 res = func(*args, **kwargs)
3559             except UnavailableVideoError as e:
3560                 self.report_error(e)
3561             except DownloadCancelled as e:
3562                 self.to_screen(f'[info] {e}')
3563                 if not self.params.get('break_per_url'):
3564                     raise
3565                 self._num_downloads = 0
3566             else:
3567                 if self.params.get('dump_single_json', False):
3568                     self.post_extract(res)
3569                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3570         return wrapper
3571
3572     def download(self, url_list):
3573         """Download a given list of URLs."""
3574         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3575         outtmpl = self.params['outtmpl']['default']
3576         if (len(url_list) > 1
3577                 and outtmpl != '-'
3578                 and '%' not in outtmpl
3579                 and self.params.get('max_downloads') != 1):
3580             raise SameFileError(outtmpl)
3581
3582         for url in url_list:
3583             self.__download_wrapper(self.extract_info)(
3584                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3585
3586         return self._download_retcode
3587
3588     def download_with_info_file(self, info_filename):
3589         with contextlib.closing(fileinput.FileInput(
3590                 [info_filename], mode='r',
3591                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3592             # FileInput doesn't have a read method, we can't call json.load
3593             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3594                      for info in variadic(json.loads('\n'.join(f)))]
3595         for info in infos:
3596             try:
3597                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3598             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3599                 if not isinstance(e, EntryNotInPlaylist):
3600                     self.to_stderr('\r')
3601                 webpage_url = info.get('webpage_url')
3602                 if webpage_url is None:
3603                     raise
3604                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3605                 self.download([webpage_url])
3606             except ExtractorError as e:
3607                 self.report_error(e)
3608         return self._download_retcode
3609
3610     @staticmethod
3611     def sanitize_info(info_dict, remove_private_keys=False):
3612         ''' Sanitize the infodict for converting to json '''
3613         if info_dict is None:
3614             return info_dict
3615         info_dict.setdefault('epoch', int(time.time()))
3616         info_dict.setdefault('_type', 'video')
3617         info_dict.setdefault('_version', {
3618             'version': __version__,
3619             'current_git_head': current_git_head(),
3620             'release_git_head': RELEASE_GIT_HEAD,
3621             'repository': ORIGIN,
3622         })
3623
3624         if remove_private_keys:
3625             reject = lambda k, v: v is None or k.startswith('__') or k in {
3626                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3627                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3628                 'playlist_autonumber',
3629             }
3630         else:
3631             reject = lambda k, v: False
3632
3633         def filter_fn(obj):
3634             if isinstance(obj, dict):
3635                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3636             elif isinstance(obj, (list, tuple, set, LazyList)):
3637                 return list(map(filter_fn, obj))
3638             elif obj is None or isinstance(obj, (str, int, float, bool)):
3639                 return obj
3640             else:
3641                 return repr(obj)
3642
3643         return filter_fn(info_dict)
3644
3645     @staticmethod
3646     def filter_requested_info(info_dict, actually_filter=True):
3647         ''' Alias of sanitize_info for backward compatibility '''
3648         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3649
3650     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3651         for filename in set(filter(None, files_to_delete)):
3652             if msg:
3653                 self.to_screen(msg % filename)
3654             try:
3655                 os.remove(filename)
3656             except OSError:
3657                 self.report_warning(f'Unable to delete file {filename}')
3658             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3659                 del info['__files_to_move'][filename]
3660
3661     @staticmethod
3662     def post_extract(info_dict):
3663         def actual_post_extract(info_dict):
3664             if info_dict.get('_type') in ('playlist', 'multi_video'):
3665                 for video_dict in info_dict.get('entries', {}):
3666                     actual_post_extract(video_dict or {})
3667                 return
3668
3669             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3670             info_dict.update(post_extractor())
3671
3672         actual_post_extract(info_dict or {})
3673
3674     def run_pp(self, pp, infodict):
3675         files_to_delete = []
3676         if '__files_to_move' not in infodict:
3677             infodict['__files_to_move'] = {}
3678         try:
3679             files_to_delete, infodict = pp.run(infodict)
3680         except PostProcessingError as e:
3681             # Must be True and not 'only_download'
3682             if self.params.get('ignoreerrors') is True:
3683                 self.report_error(e)
3684                 return infodict
3685             raise
3686
3687         if not files_to_delete:
3688             return infodict
3689         if self.params.get('keepvideo', False):
3690             for f in files_to_delete:
3691                 infodict['__files_to_move'].setdefault(f, '')
3692         else:
3693             self._delete_downloaded_files(
3694                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3695         return infodict
3696
3697     def run_all_pps(self, key, info, *, additional_pps=None):
3698         if key != 'video':
3699             self._forceprint(key, info)
3700         for pp in (additional_pps or []) + self._pps[key]:
3701             info = self.run_pp(pp, info)
3702         return info
3703
3704     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3705         info = dict(ie_info)
3706         info['__files_to_move'] = files_to_move or {}
3707         try:
3708             info = self.run_all_pps(key, info)
3709         except PostProcessingError as err:
3710             msg = f'Preprocessing: {err}'
3711             info.setdefault('__pending_error', msg)
3712             self.report_error(msg, is_error=False)
3713         return info, info.pop('__files_to_move', None)
3714
3715     def post_process(self, filename, info, files_to_move=None):
3716         """Run all the postprocessors on the given file."""
3717         info['filepath'] = filename
3718         info['__files_to_move'] = files_to_move or {}
3719         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3720         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3721         del info['__files_to_move']
3722         return self.run_all_pps('after_move', info)
3723
3724     def _make_archive_id(self, info_dict):
3725         video_id = info_dict.get('id')
3726         if not video_id:
3727             return
3728         # Future-proof against any change in case
3729         # and backwards compatibility with prior versions
3730         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3731         if extractor is None:
3732             url = str_or_none(info_dict.get('url'))
3733             if not url:
3734                 return
3735             # Try to find matching extractor for the URL and take its ie_key
3736             for ie_key, ie in self._ies.items():
3737                 if ie.suitable(url):
3738                     extractor = ie_key
3739                     break
3740             else:
3741                 return
3742         return make_archive_id(extractor, video_id)
3743
3744     def in_download_archive(self, info_dict):
3745         if not self.archive:
3746             return False
3747
3748         vid_ids = [self._make_archive_id(info_dict)]
3749         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3750         return any(id_ in self.archive for id_ in vid_ids)
3751
3752     def record_download_archive(self, info_dict):
3753         fn = self.params.get('download_archive')
3754         if fn is None:
3755             return
3756         vid_id = self._make_archive_id(info_dict)
3757         assert vid_id
3758
3759         self.write_debug(f'Adding to archive: {vid_id}')
3760         if is_path_like(fn):
3761             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3762                 archive_file.write(vid_id + '\n')
3763         self.archive.add(vid_id)
3764
3765     @staticmethod
3766     def format_resolution(format, default='unknown'):
3767         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3768             return 'audio only'
3769         if format.get('resolution') is not None:
3770             return format['resolution']
3771         if format.get('width') and format.get('height'):
3772             return '%dx%d' % (format['width'], format['height'])
3773         elif format.get('height'):
3774             return '%sp' % format['height']
3775         elif format.get('width'):
3776             return '%dx?' % format['width']
3777         return default
3778
3779     def _list_format_headers(self, *headers):
3780         if self.params.get('listformats_table', True) is not False:
3781             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3782         return headers
3783
3784     def _format_note(self, fdict):
3785         res = ''
3786         if fdict.get('ext') in ['f4f', 'f4m']:
3787             res += '(unsupported)'
3788         if fdict.get('language'):
3789             if res:
3790                 res += ' '
3791             res += '[%s]' % fdict['language']
3792         if fdict.get('format_note') is not None:
3793             if res:
3794                 res += ' '
3795             res += fdict['format_note']
3796         if fdict.get('tbr') is not None:
3797             if res:
3798                 res += ', '
3799             res += '%4dk' % fdict['tbr']
3800         if fdict.get('container') is not None:
3801             if res:
3802                 res += ', '
3803             res += '%s container' % fdict['container']
3804         if (fdict.get('vcodec') is not None
3805                 and fdict.get('vcodec') != 'none'):
3806             if res:
3807                 res += ', '
3808             res += fdict['vcodec']
3809             if fdict.get('vbr') is not None:
3810                 res += '@'
3811         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3812             res += 'video@'
3813         if fdict.get('vbr') is not None:
3814             res += '%4dk' % fdict['vbr']
3815         if fdict.get('fps') is not None:
3816             if res:
3817                 res += ', '
3818             res += '%sfps' % fdict['fps']
3819         if fdict.get('acodec') is not None:
3820             if res:
3821                 res += ', '
3822             if fdict['acodec'] == 'none':
3823                 res += 'video only'
3824             else:
3825                 res += '%-5s' % fdict['acodec']
3826         elif fdict.get('abr') is not None:
3827             if res:
3828                 res += ', '
3829             res += 'audio'
3830         if fdict.get('abr') is not None:
3831             res += '@%3dk' % fdict['abr']
3832         if fdict.get('asr') is not None:
3833             res += ' (%5dHz)' % fdict['asr']
3834         if fdict.get('filesize') is not None:
3835             if res:
3836                 res += ', '
3837             res += format_bytes(fdict['filesize'])
3838         elif fdict.get('filesize_approx') is not None:
3839             if res:
3840                 res += ', '
3841             res += '~' + format_bytes(fdict['filesize_approx'])
3842         return res
3843
3844     def _get_formats(self, info_dict):
3845         if info_dict.get('formats') is None:
3846             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3847                 return [info_dict]
3848             return []
3849         return info_dict['formats']
3850
3851     def render_formats_table(self, info_dict):
3852         formats = self._get_formats(info_dict)
3853         if not formats:
3854             return
3855         if not self.params.get('listformats_table', True) is not False:
3856             table = [
3857                 [
3858                     format_field(f, 'format_id'),
3859                     format_field(f, 'ext'),
3860                     self.format_resolution(f),
3861                     self._format_note(f)
3862                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3863             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3864
3865         def simplified_codec(f, field):
3866             assert field in ('acodec', 'vcodec')
3867             codec = f.get(field)
3868             if not codec:
3869                 return 'unknown'
3870             elif codec != 'none':
3871                 return '.'.join(codec.split('.')[:4])
3872
3873             if field == 'vcodec' and f.get('acodec') == 'none':
3874                 return 'images'
3875             elif field == 'acodec' and f.get('vcodec') == 'none':
3876                 return ''
3877             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3878                                     self.Styles.SUPPRESS)
3879
3880         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3881         table = [
3882             [
3883                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3884                 format_field(f, 'ext'),
3885                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3886                 format_field(f, 'fps', '\t%d', func=round),
3887                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3888                 format_field(f, 'audio_channels', '\t%s'),
3889                 delim, (
3890                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3891                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3892                     or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
3893                                     self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
3894                 format_field(f, 'tbr', '\t%dk', func=round),
3895                 shorten_protocol_name(f.get('protocol', '')),
3896                 delim,
3897                 simplified_codec(f, 'vcodec'),
3898                 format_field(f, 'vbr', '\t%dk', func=round),
3899                 simplified_codec(f, 'acodec'),
3900                 format_field(f, 'abr', '\t%dk', func=round),
3901                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3902                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3903                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3904                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3905                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3906                     format_field(f, 'format_note'),
3907                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3908                     delim=', '), delim=' '),
3909             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3910         header_line = self._list_format_headers(
3911             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3912             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3913
3914         return render_table(
3915             header_line, table, hide_empty=True,
3916             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3917
3918     def render_thumbnails_table(self, info_dict):
3919         thumbnails = list(info_dict.get('thumbnails') or [])
3920         if not thumbnails:
3921             return None
3922         return render_table(
3923             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3924             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3925
3926     def render_subtitles_table(self, video_id, subtitles):
3927         def _row(lang, formats):
3928             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3929             if len(set(names)) == 1:
3930                 names = [] if names[0] == 'unknown' else names[:1]
3931             return [lang, ', '.join(names), ', '.join(exts)]
3932
3933         if not subtitles:
3934             return None
3935         return render_table(
3936             self._list_format_headers('Language', 'Name', 'Formats'),
3937             [_row(lang, formats) for lang, formats in subtitles.items()],
3938             hide_empty=True)
3939
3940     def __list_table(self, video_id, name, func, *args):
3941         table = func(*args)
3942         if not table:
3943             self.to_screen(f'{video_id} has no {name}')
3944             return
3945         self.to_screen(f'[info] Available {name} for {video_id}:')
3946         self.to_stdout(table)
3947
3948     def list_formats(self, info_dict):
3949         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3950
3951     def list_thumbnails(self, info_dict):
3952         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3953
3954     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3955         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3956
3957     def print_debug_header(self):
3958         if not self.params.get('verbose'):
3959             return
3960
3961         from . import _IN_CLI  # Must be delayed import
3962
3963         # These imports can be slow. So import them only as needed
3964         from .extractor.extractors import _LAZY_LOADER
3965         from .extractor.extractors import (
3966             _PLUGIN_CLASSES as plugin_ies,
3967             _PLUGIN_OVERRIDES as plugin_ie_overrides
3968         )
3969
3970         def get_encoding(stream):
3971             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3972             additional_info = []
3973             if os.environ.get('TERM', '').lower() == 'dumb':
3974                 additional_info.append('dumb')
3975             if not supports_terminal_sequences(stream):
3976                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3977                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3978             if additional_info:
3979                 ret = f'{ret} ({",".join(additional_info)})'
3980             return ret
3981
3982         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3983             locale.getpreferredencoding(),
3984             sys.getfilesystemencoding(),
3985             self.get_encoding(),
3986             ', '.join(
3987                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3988                 if stream is not None and key != 'console')
3989         )
3990
3991         logger = self.params.get('logger')
3992         if logger:
3993             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3994             write_debug(encoding_str)
3995         else:
3996             write_string(f'[debug] {encoding_str}\n', encoding=None)
3997             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3998
3999         source = detect_variant()
4000         if VARIANT not in (None, 'pip'):
4001             source += '*'
4002         klass = type(self)
4003         write_debug(join_nonempty(
4004             f'{REPOSITORY.rpartition("/")[2]} version',
4005             _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
4006             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
4007             '' if source == 'unknown' else f'({source})',
4008             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
4009             delim=' '))
4010
4011         if not _IN_CLI:
4012             write_debug(f'params: {self.params}')
4013
4014         if not _LAZY_LOADER:
4015             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
4016                 write_debug('Lazy loading extractors is forcibly disabled')
4017             else:
4018                 write_debug('Lazy loading extractors is disabled')
4019         if self.params['compat_opts']:
4020             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
4021
4022         if current_git_head():
4023             write_debug(f'Git HEAD: {current_git_head()}')
4024         write_debug(system_identifier())
4025
4026         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4027         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4028         if ffmpeg_features:
4029             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
4030
4031         exe_versions['rtmpdump'] = rtmpdump_version()
4032         exe_versions['phantomjs'] = PhantomJSwrapper._version()
4033         exe_str = ', '.join(
4034             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4035         ) or 'none'
4036         write_debug('exe versions: %s' % exe_str)
4037
4038         from .compat.compat_utils import get_package_info
4039         from .dependencies import available_dependencies
4040
4041         write_debug('Optional libraries: %s' % (', '.join(sorted({
4042             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
4043         })) or 'none'))
4044
4045         write_debug(f'Proxy map: {self.proxies}')
4046         write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4047         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4048             display_list = ['%s%s' % (
4049                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
4050                 for name, klass in plugins.items()]
4051             if plugin_type == 'Extractor':
4052                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4053                                     for parent, plugins in plugin_ie_overrides.items())
4054             if not display_list:
4055                 continue
4056             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4057
4058         plugin_dirs = plugin_directories()
4059         if plugin_dirs:
4060             write_debug(f'Plugin directories: {plugin_dirs}')
4061
4062         # Not implemented
4063         if False and self.params.get('call_home'):
4064             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
4065             write_debug('Public IP address: %s' % ipaddr)
4066             latest_version = self.urlopen(
4067                 'https://yt-dl.org/latest/version').read().decode()
4068             if version_tuple(latest_version) > version_tuple(__version__):
4069                 self.report_warning(
4070                     'You are using an outdated version (newest version: %s)! '
4071                     'See https://yt-dl.org/update if you need help updating.' %
4072                     latest_version)
4073
4074     @functools.cached_property
4075     def proxies(self):
4076         """Global proxy configuration"""
4077         opts_proxy = self.params.get('proxy')
4078         if opts_proxy is not None:
4079             if opts_proxy == '':
4080                 opts_proxy = '__noproxy__'
4081             proxies = {'all': opts_proxy}
4082         else:
4083             proxies = urllib.request.getproxies()
4084             # compat. Set HTTPS_PROXY to __noproxy__ to revert
4085             if 'http' in proxies and 'https' not in proxies:
4086                 proxies['https'] = proxies['http']
4087
4088         return proxies
4089
4090     @functools.cached_property
4091     def cookiejar(self):
4092         """Global cookiejar instance"""
4093         return load_cookies(
4094             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4095
4096     @property
4097     def _opener(self):
4098         """
4099         Get a urllib OpenerDirector from the Urllib handler (deprecated).
4100         """
4101         self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4102         handler = self._request_director.handlers['Urllib']
4103         return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4104
4105     def _get_available_impersonate_targets(self):
4106         # todo(future): make available as public API
4107         return [
4108             (target, rh.RH_NAME)
4109             for rh in self._request_director.handlers.values()
4110             if isinstance(rh, ImpersonateRequestHandler)
4111             for target in rh.supported_targets
4112         ]
4113
4114     def _impersonate_target_available(self, target):
4115         # todo(future): make available as public API
4116         return any(
4117             rh.is_supported_target(target)
4118             for rh in self._request_director.handlers.values()
4119             if isinstance(rh, ImpersonateRequestHandler))
4120
4121     def urlopen(self, req):
4122         """ Start an HTTP download """
4123         if isinstance(req, str):
4124             req = Request(req)
4125         elif isinstance(req, urllib.request.Request):
4126             self.deprecation_warning(
4127                 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4128                 'Use yt_dlp.networking.common.Request instead.')
4129             req = urllib_req_to_req(req)
4130         assert isinstance(req, Request)
4131
4132         # compat: Assume user:pass url params are basic auth
4133         url, basic_auth_header = extract_basic_auth(req.url)
4134         if basic_auth_header:
4135             req.headers['Authorization'] = basic_auth_header
4136         req.url = sanitize_url(url)
4137
4138         clean_proxies(proxies=req.proxies, headers=req.headers)
4139         clean_headers(req.headers)
4140
4141         try:
4142             return self._request_director.send(req)
4143         except NoSupportingHandlers as e:
4144             for ue in e.unsupported_errors:
4145                 # FIXME: This depends on the order of errors.
4146                 if not (ue.handler and ue.msg):
4147                     continue
4148                 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4149                     raise RequestError(
4150                         'file:// URLs are disabled by default in yt-dlp for security reasons. '
4151                         'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4152                 if (
4153                     'unsupported proxy type: "https"' in ue.msg.lower()
4154                     and 'requests' not in self._request_director.handlers
4155                     and 'curl_cffi' not in self._request_director.handlers
4156                 ):
4157                     raise RequestError(
4158                         'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
4159
4160                 elif (
4161                     re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4162                     and 'websockets' not in self._request_director.handlers
4163                 ):
4164                     raise RequestError(
4165                         'This request requires WebSocket support. '
4166                         'Ensure one of the following dependencies are installed: websockets',
4167                         cause=ue) from ue
4168
4169                 elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
4170                     raise RequestError(
4171                         f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4172                         f' See --list-impersonate-targets for available targets.'
4173                         f' This request requires browser impersonation, however you may be missing dependencies'
4174                         f' required to support this target.')
4175             raise
4176         except SSLError as e:
4177             if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4178                 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4179             elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4180                 raise RequestError(
4181                     'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4182                     'Try using --legacy-server-connect', cause=e) from e
4183             raise
4184
4185     def build_request_director(self, handlers, preferences=None):
4186         logger = _YDLLogger(self)
4187         headers = self.params['http_headers'].copy()
4188         proxies = self.proxies.copy()
4189         clean_headers(headers)
4190         clean_proxies(proxies, headers)
4191
4192         director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4193         for handler in handlers:
4194             director.add_handler(handler(
4195                 logger=logger,
4196                 headers=headers,
4197                 cookiejar=self.cookiejar,
4198                 proxies=proxies,
4199                 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4200                 verify=not self.params.get('nocheckcertificate'),
4201                 **traverse_obj(self.params, {
4202                     'verbose': 'debug_printtraffic',
4203                     'source_address': 'source_address',
4204                     'timeout': 'socket_timeout',
4205                     'legacy_ssl_support': 'legacyserverconnect',
4206                     'enable_file_urls': 'enable_file_urls',
4207                     'impersonate': 'impersonate',
4208                     'client_cert': {
4209                         'client_certificate': 'client_certificate',
4210                         'client_certificate_key': 'client_certificate_key',
4211                         'client_certificate_password': 'client_certificate_password',
4212                     },
4213                 }),
4214             ))
4215         director.preferences.update(preferences or [])
4216         if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4217             director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4218         return director
4219
4220     @functools.cached_property
4221     def _request_director(self):
4222         return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4223
4224     def encode(self, s):
4225         if isinstance(s, bytes):
4226             return s  # Already encoded
4227
4228         try:
4229             return s.encode(self.get_encoding())
4230         except UnicodeEncodeError as err:
4231             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4232             raise
4233
4234     def get_encoding(self):
4235         encoding = self.params.get('encoding')
4236         if encoding is None:
4237             encoding = preferredencoding()
4238         return encoding
4239
4240     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4241         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4242         if overwrite is None:
4243             overwrite = self.params.get('overwrites', True)
4244         if not self.params.get('writeinfojson'):
4245             return False
4246         elif not infofn:
4247             self.write_debug(f'Skipping writing {label} infojson')
4248             return False
4249         elif not self._ensure_dir_exists(infofn):
4250             return None
4251         elif not overwrite and os.path.exists(infofn):
4252             self.to_screen(f'[info] {label.title()} metadata is already present')
4253             return 'exists'
4254
4255         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4256         try:
4257             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4258             return True
4259         except OSError:
4260             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4261             return None
4262
4263     def _write_description(self, label, ie_result, descfn):
4264         ''' Write description and returns True = written, False = skip, None = error '''
4265         if not self.params.get('writedescription'):
4266             return False
4267         elif not descfn:
4268             self.write_debug(f'Skipping writing {label} description')
4269             return False
4270         elif not self._ensure_dir_exists(descfn):
4271             return None
4272         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4273             self.to_screen(f'[info] {label.title()} description is already present')
4274         elif ie_result.get('description') is None:
4275             self.to_screen(f'[info] There\'s no {label} description to write')
4276             return False
4277         else:
4278             try:
4279                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4280                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4281                     descfile.write(ie_result['description'])
4282             except OSError:
4283                 self.report_error(f'Cannot write {label} description file {descfn}')
4284                 return None
4285         return True
4286
4287     def _write_subtitles(self, info_dict, filename):
4288         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4289         ret = []
4290         subtitles = info_dict.get('requested_subtitles')
4291         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4292             # subtitles download errors are already managed as troubles in relevant IE
4293             # that way it will silently go on when used with unsupporting IE
4294             return ret
4295         elif not subtitles:
4296             self.to_screen('[info] There are no subtitles for the requested languages')
4297             return ret
4298         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4299         if not sub_filename_base:
4300             self.to_screen('[info] Skipping writing video subtitles')
4301             return ret
4302
4303         for sub_lang, sub_info in subtitles.items():
4304             sub_format = sub_info['ext']
4305             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4306             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4307             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4308             if existing_sub:
4309                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4310                 sub_info['filepath'] = existing_sub
4311                 ret.append((existing_sub, sub_filename_final))
4312                 continue
4313
4314             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4315             if sub_info.get('data') is not None:
4316                 try:
4317                     # Use newline='' to prevent conversion of newline characters
4318                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4319                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4320                         subfile.write(sub_info['data'])
4321                     sub_info['filepath'] = sub_filename
4322                     ret.append((sub_filename, sub_filename_final))
4323                     continue
4324                 except OSError:
4325                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4326                     return None
4327
4328             try:
4329                 sub_copy = sub_info.copy()
4330                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4331                 self.dl(sub_filename, sub_copy, subtitle=True)
4332                 sub_info['filepath'] = sub_filename
4333                 ret.append((sub_filename, sub_filename_final))
4334             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4335                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4336                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4337                     if not self.params.get('ignoreerrors'):
4338                         self.report_error(msg)
4339                     raise DownloadError(msg)
4340                 self.report_warning(msg)
4341         return ret
4342
4343     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4344         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
4345         write_all = self.params.get('write_all_thumbnails', False)
4346         thumbnails, ret = [], []
4347         if write_all or self.params.get('writethumbnail', False):
4348             thumbnails = info_dict.get('thumbnails') or []
4349             if not thumbnails:
4350                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4351                 return ret
4352         multiple = write_all and len(thumbnails) > 1
4353
4354         if thumb_filename_base is None:
4355             thumb_filename_base = filename
4356         if thumbnails and not thumb_filename_base:
4357             self.write_debug(f'Skipping writing {label} thumbnail')
4358             return ret
4359
4360         if thumbnails and not self._ensure_dir_exists(filename):
4361             return None
4362
4363         for idx, t in list(enumerate(thumbnails))[::-1]:
4364             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4365             thumb_display_id = f'{label} thumbnail {t["id"]}'
4366             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4367             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4368
4369             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4370             if existing_thumb:
4371                 self.to_screen('[info] %s is already present' % (
4372                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4373                 t['filepath'] = existing_thumb
4374                 ret.append((existing_thumb, thumb_filename_final))
4375             else:
4376                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4377                 try:
4378                     uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4379                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4380                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4381                         shutil.copyfileobj(uf, thumbf)
4382                     ret.append((thumb_filename, thumb_filename_final))
4383                     t['filepath'] = thumb_filename
4384                 except network_exceptions as err:
4385                     if isinstance(err, HTTPError) and err.status == 404:
4386                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4387                     else:
4388                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4389                     thumbnails.pop(idx)
4390             if ret and not write_all:
4391                 break
4392         return ret