src/gpodder/download.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2018 The gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  download.py -- Download queue management
  23 #  Thomas Perl <thp@perli.net>   2007-09-15
  24 #
  25 #  Based on libwget.py (2005-10-29)
  26 #
  27
  28 import glob
  29 import logging
  30 import mimetypes
  31 import os
  32 import os.path
  33 import shutil
  34 import threading
  35 import time
  36 import urllib.error
  37 from abc import ABC, abstractmethod
  38
  39 import requests
  40 from requests.adapters import HTTPAdapter
  41 from requests.exceptions import ConnectionError, HTTPError, RequestException
  42 from requests.packages.urllib3.exceptions import MaxRetryError
  43 from requests.packages.urllib3.util.retry import Retry
  44
  45 import gpodder
  46 from gpodder import registry, util
  47
  48 logger = logging.getLogger(__name__)
  49
  50 _ = gpodder.gettext
  51
  52 REDIRECT_RETRIES = 3
  53
  54
  55 class CustomDownload(ABC):
  56     """ abstract class for custom downloads. DownloadTask call retrieve_resume() on it """
  57
  58     @property
  59     @abstractmethod
  60     def partial_filename(self):
  61         """
  62         Full path to the temporary file actually being downloaded (downloaders
  63         may not support setting a tempname).
  64         """
  65         ...
  66
  67     @partial_filename.setter
  68     @abstractmethod
  69     def partial_filename(self, val):
  70         ...
  71
  72     @abstractmethod
  73     def retrieve_resume(self, tempname, reporthook):
  74         """
  75         :param str tempname: temporary filename for the download
  76         :param func(number, number, number) reporthook: callback for download progress (count, blockSize, totalSize)
  77         :return dict(str, str), str: (headers, real_url)
  78         """
  79         return {}, None
  80
  81
  82 class CustomDownloader(ABC):
  83     """
  84     abstract class for custom downloaders.
  85
  86     DownloadTask calls custom_downloader to get a CustomDownload
  87     """
  88
  89     @abstractmethod
  90     def custom_downloader(self, config, episode):
  91         """
  92         if this custom downloader has a custom download method (e.g. youtube-dl),
  93         return a CustomDownload. Else return None
  94         :param config: gpodder config (e.g. to get preferred video format)
  95         :param model.PodcastEpisode episode: episode to download
  96         :return CustomDownload: object used to download the episode
  97         """
  98         return None
  99
 100
 101 class ContentRange(object):
 102     # Based on:
 103     # http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
 104     #
 105     # Copyright (c) 2007 Ian Bicking and Contributors
 106     #
 107     # Permission is hereby granted, free of charge, to any person obtaining
 108     # a copy of this software and associated documentation files (the
 109     # "Software"), to deal in the Software without restriction, including
 110     # without limitation the rights to use, copy, modify, merge, publish,
 111     # distribute, sublicense, and/or sell copies of the Software, and to
 112     # permit persons to whom the Software is furnished to do so, subject to
 113     # the following conditions:
 114     #
 115     # The above copyright notice and this permission notice shall be
 116     # included in all copies or substantial portions of the Software.
 117     #
 118     # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 119     # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 120     # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 121     # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 122     # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 123     # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 124     # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 125     """
 126     Represents the Content-Range header
 127
 128     This header is ``start-stop/length``, where stop and length can be
 129     ``*`` (represented as None in the attributes).
 130     """
 131
 132     def __init__(self, start, stop, length):
 133         assert start >= 0, "Bad start: %r" % start
 134         assert stop is None or (stop >= 0 and stop >= start), (
 135             "Bad stop: %r" % stop)
 136         self.start = start
 137         self.stop = stop
 138         self.length = length
 139
 140     def __repr__(self):
 141         return '<%s %s>' % (
 142             self.__class__.__name__,
 143             self)
 144
 145     def __str__(self):
 146         if self.stop is None:
 147             stop = '*'
 148         else:
 149             stop = self.stop + 1
 150         if self.length is None:
 151             length = '*'
 152         else:
 153             length = self.length
 154         return 'bytes %s-%s/%s' % (self.start, stop, length)
 155
 156     def __iter__(self):
 157         """
 158         Mostly so you can unpack this, like:
 159
 160             start, stop, length = res.content_range
 161         """
 162         return iter([self.start, self.stop, self.length])
 163
 164     @classmethod
 165     def parse(cls, value):
 166         """
 167         Parse the header.  May return None if it cannot parse.
 168         """
 169         if value is None:
 170             return None
 171         value = value.strip()
 172         if not value.startswith('bytes '):
 173             # Unparseable
 174             return None
 175         value = value[len('bytes '):].strip()
 176         if '/' not in value:
 177             # Invalid, no length given
 178             return None
 179         range, length = value.split('/', 1)
 180         if '-' not in range:
 181             # Invalid, no range
 182             return None
 183         start, end = range.split('-', 1)
 184         try:
 185             start = int(start)
 186             if end == '*':
 187                 end = None
 188             else:
 189                 end = int(end)
 190             if length == '*':
 191                 length = None
 192             else:
 193                 length = int(length)
 194         except ValueError:
 195             # Parse problem
 196             return None
 197         if end is None:
 198             return cls(start, None, length)
 199         else:
 200             return cls(start, end - 1, length)
 201
 202
 203 class DownloadCancelledException(Exception): pass
 204
 205
 206 class DownloadNoURLException(Exception): pass
 207
 208
 209 class gPodderDownloadHTTPError(Exception):
 210     def __init__(self, url, error_code, error_message):
 211         self.url = url
 212         self.error_code = error_code
 213         self.error_message = error_message
 214
 215
 216 class DownloadURLOpener:
 217
 218     # Sometimes URLs are not escaped correctly - try to fix them
 219     # (see RFC2396; Section 2.4.3. Excluded US-ASCII Characters)
 220     # FYI: The omission of "%" in the list is to avoid double escaping!
 221     ESCAPE_CHARS = dict((ord(c), '%%%x' % ord(c)) for c in ' <>#"{}|\\^[]`')
 222
 223     def __init__(self, channel, max_retries=3):
 224         super().__init__()
 225         self.channel = channel
 226         self.max_retries = max_retries
 227
 228     def init_session(self):
 229         """ init a session with our own retry codes + retry count """
 230         # I add a few retries for redirects but it means that I will allow max_retries + REDIRECT_RETRIES
 231         # if encountering max_retries connect and REDIRECT_RETRIES read for instance
 232         retry_strategy = Retry(
 233             total=self.max_retries + REDIRECT_RETRIES,
 234             connect=self.max_retries,
 235             read=self.max_retries,
 236             redirect=max(REDIRECT_RETRIES, self.max_retries),
 237             status=self.max_retries,
 238             status_forcelist=Retry.RETRY_AFTER_STATUS_CODES.union((408, 418, 504, 598, 599,)))
 239         adapter = HTTPAdapter(max_retries=retry_strategy)
 240         http = requests.Session()
 241         http.mount("https://", adapter)
 242         http.mount("http://", adapter)
 243         return http
 244
 245 # The following is based on Python's urllib.py "URLopener.retrieve"
 246 # Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
 247
 248     def retrieve_resume(self, url, filename, reporthook=None, data=None, disable_auth=False):
 249         """Download files from an URL; return (headers, real_url)
 250
 251         Resumes a download if the local filename exists and
 252         the server supports download resuming.
 253         """
 254
 255         current_size = 0
 256         tfp = None
 257         headers = {
 258             'User-agent': gpodder.user_agent
 259         }
 260
 261         if (self.channel.auth_username or self.channel.auth_password) and not disable_auth:
 262             logger.debug('Authenticating as "%s"', self.channel.auth_username)
 263             auth = (self.channel.auth_username, self.channel.auth_password)
 264         else:
 265             auth = None
 266
 267         if os.path.exists(filename):
 268             try:
 269                 current_size = os.path.getsize(filename)
 270                 tfp = open(filename, 'ab')
 271                 # If the file exists, then only download the remainder
 272                 if current_size > 0:
 273                     headers['Range'] = 'bytes=%s-' % (current_size)
 274             except:
 275                 logger.warning('Cannot resume download: %s', filename, exc_info=True)
 276                 tfp = None
 277                 current_size = 0
 278
 279         if tfp is None:
 280             tfp = open(filename, 'wb')
 281
 282         # Fix a problem with bad URLs that are not encoded correctly (bug 549)
 283         url = url.translate(self.ESCAPE_CHARS)
 284
 285         session = self.init_session()
 286         with session.get(url,
 287                          headers=headers,
 288                          stream=True,
 289                          auth=auth,
 290                          timeout=gpodder.SOCKET_TIMEOUT) as resp:
 291             try:
 292                 resp.raise_for_status()
 293             except HTTPError as e:
 294                 if auth is not None:
 295                     # Try again without authentication (bug 1296)
 296                     return self.retrieve_resume(url, filename, reporthook, data, True)
 297                 else:
 298                     raise gPodderDownloadHTTPError(url, resp.status_code, str(e))
 299
 300             headers = resp.headers
 301
 302             if current_size > 0:
 303                 # We told the server to resume - see if she agrees
 304                 # See RFC2616 (206 Partial Content + Section 14.16)
 305                 # XXX check status code here, too...
 306                 range = ContentRange.parse(headers.get('content-range', ''))
 307                 if range is None or range.start != current_size:
 308                     # Ok, that did not work. Reset the download
 309                     # TODO: seek and truncate if content-range differs from request
 310                     tfp.close()
 311                     tfp = open(filename, 'wb')
 312                     current_size = 0
 313                     logger.warning('Cannot resume: Invalid Content-Range (RFC2616).')
 314
 315             result = headers, resp.url
 316             bs = 1024 * 8
 317             size = -1
 318             read = current_size
 319             blocknum = current_size // bs
 320             if reporthook:
 321                 if "content-length" in headers:
 322                     size = int(headers['content-length']) + current_size
 323                 reporthook(blocknum, bs, size)
 324             for block in resp.iter_content(bs):
 325                 read += len(block)
 326                 tfp.write(block)
 327                 blocknum += 1
 328                 if reporthook:
 329                     reporthook(blocknum, bs, size)
 330             tfp.close()
 331             del tfp
 332
 333         # raise exception if actual size does not match content-length header
 334         if size >= 0 and read < size:
 335             raise urllib.error.ContentTooShortError("retrieval incomplete: got only %i out "
 336                                        "of %i bytes" % (read, size), result)
 337
 338         return result
 339
 340 # end code based on urllib.py
 341
 342
 343 class DefaultDownload(CustomDownload):
 344     def __init__(self, config, episode, url):
 345         self._config = config
 346         self.__episode = episode
 347         self._url = url
 348         self.__partial_filename = None
 349
 350     @property
 351     def partial_filename(self):
 352         return self.__partial_filename
 353
 354     @partial_filename.setter
 355     def partial_filename(self, val):
 356         self.__partial_filename = val
 357
 358     def retrieve_resume(self, tempname, reporthook):
 359         url = self._url
 360         logger.info("Downloading %s", url)
 361         max_retries = max(0, self._config.auto.retries)
 362         downloader = DownloadURLOpener(self.__episode.channel, max_retries=max_retries)
 363         self.partial_filename = tempname
 364
 365         # Retry the download on incomplete download (other retries are done by the Retry strategy)
 366         for retry in range(max_retries + 1):
 367             if retry > 0:
 368                 logger.info('Retrying download of %s (%d)', url, retry)
 369                 time.sleep(1)
 370
 371             try:
 372                 headers, real_url = downloader.retrieve_resume(url,
 373                     tempname, reporthook=reporthook)
 374                 # If we arrive here, the download was successful
 375                 break
 376             except urllib.error.ContentTooShortError as ctse:
 377                 if retry < max_retries:
 378                     logger.info('Content too short: %s - will retry.',
 379                             url)
 380                     continue
 381                 raise
 382         return (headers, real_url)
 383
 384
 385 class DefaultDownloader(CustomDownloader):
 386     @staticmethod
 387     def custom_downloader(config, episode):
 388         url = episode.url
 389         # Resolve URL and start downloading the episode
 390         res = registry.download_url.resolve(config, None, episode, False)
 391         if res:
 392             url = res
 393         if url == episode.url:
 394             # don't modify custom urls (#635 - vimeo breaks if * is unescaped)
 395             url = url.strip()
 396             url = util.iri_to_url(url)
 397         return DefaultDownload(config, episode, url)
 398
 399
 400 class DownloadQueueWorker(object):
 401     def __init__(self, queue, exit_callback, continue_check_callback):
 402         self.queue = queue
 403         self.exit_callback = exit_callback
 404         self.continue_check_callback = continue_check_callback
 405
 406     def __repr__(self):
 407         return threading.current_thread().getName()
 408
 409     def run(self):
 410         logger.info('Starting new thread: %s', self)
 411         while True:
 412             if not self.continue_check_callback(self):
 413                 return
 414
 415             task = self.queue.get_next()
 416             if not task:
 417                 logger.info('No more tasks for %s to carry out.', self)
 418                 break
 419             logger.info('%s is processing: %s', self, task)
 420             task.run()
 421             task.recycle()
 422
 423         self.exit_callback(self)
 424
 425
 426 class ForceDownloadWorker(object):
 427     def __init__(self, task):
 428         self.task = task
 429
 430     def __repr__(self):
 431         return threading.current_thread().getName()
 432
 433     def run(self):
 434         logger.info('Starting new thread: %s', self)
 435         logger.info('%s is processing: %s', self, self.task)
 436         self.task.run()
 437         self.task.recycle()
 438
 439
 440 class DownloadQueueManager(object):
 441     def __init__(self, config, queue):
 442         self._config = config
 443         self.tasks = queue
 444
 445         self.worker_threads_access = threading.RLock()
 446         self.worker_threads = []
 447
 448     def __exit_callback(self, worker_thread):
 449         with self.worker_threads_access:
 450             self.worker_threads.remove(worker_thread)
 451
 452     def __continue_check_callback(self, worker_thread):
 453         with self.worker_threads_access:
 454             if len(self.worker_threads) > self._config.max_downloads and \
 455                     self._config.max_downloads_enabled:
 456                 self.worker_threads.remove(worker_thread)
 457                 return False
 458             else:
 459                 return True
 460
 461     def __spawn_threads(self):
 462         """Spawn new worker threads if necessary
 463         """
 464         with self.worker_threads_access:
 465             work_count = self.tasks.available_work_count()
 466             if self._config.max_downloads_enabled:
 467                 # always allow at least 1 download
 468                 spawn_limit = max(int(self._config.max_downloads), 1)
 469             else:
 470                 spawn_limit = self._config.limit.downloads.concurrent_max
 471             running = len(self.worker_threads)
 472             logger.info('%r tasks to do, can start at most %r threads, %r threads currently running', work_count, spawn_limit, running)
 473             for i in range(0, min(work_count, spawn_limit - running)):
 474                 # We have to create a new thread here, there's work to do
 475                 logger.info('Starting new worker thread.')
 476
 477                 worker = DownloadQueueWorker(self.tasks, self.__exit_callback,
 478                         self.__continue_check_callback)
 479                 self.worker_threads.append(worker)
 480                 util.run_in_background(worker.run)
 481
 482     def update_max_downloads(self):
 483         self.__spawn_threads()
 484
 485     def force_start_task(self, task):
 486         with task:
 487             if task.status in (task.QUEUED, task.PAUSED, task.CANCELLED, task.FAILED):
 488                 task.status = task.DOWNLOADING
 489                 worker = ForceDownloadWorker(task)
 490                 util.run_in_background(worker.run)
 491
 492     def queue_task(self, task):
 493         """Marks a task as queued
 494         """
 495         self.tasks.queue_task(task)
 496         self.__spawn_threads()
 497
 498     def has_workers(self):
 499         return len(self.worker_threads) > 0
 500
 501
 502 class DownloadTask(object):
 503     """An object representing the download task of an episode
 504
 505     You can create a new download task like this:
 506
 507         task = DownloadTask(episode, gpodder.config.Config(CONFIGFILE))
 508         task.status = DownloadTask.QUEUED
 509         task.run()
 510
 511     While the download is in progress, you can access its properties:
 512
 513         task.total_size       # in bytes
 514         task.progress         # from 0.0 to 1.0
 515         task.speed            # in bytes per second
 516         str(task)             # name of the episode
 517         task.status           # current status
 518         task.status_changed   # True if the status has been changed (see below)
 519         task.url              # URL of the episode being downloaded
 520         task.podcast_url      # URL of the podcast this download belongs to
 521         task.episode          # Episode object of this task
 522
 523     You can cancel a running download task by setting its status:
 524
 525         with task:
 526             task.status = DownloadTask.CANCELLING
 527
 528     The task will then abort as soon as possible (due to the nature
 529     of downloading data, this can take a while when the Internet is
 530     busy).
 531
 532     The "status_changed" attribute gets set to True everytime the
 533     "status" attribute changes its value. After you get the value of
 534     the "status_changed" attribute, it is always reset to False:
 535
 536         if task.status_changed:
 537             new_status = task.status
 538             # .. update the UI accordingly ..
 539
 540     Obviously, this also means that you must have at most *one*
 541     place in your UI code where you check for status changes and
 542     broadcast the status updates from there.
 543
 544     While the download is taking place and after the .run() method
 545     has finished, you can get the final status to check if the download
 546     was successful:
 547
 548         if task.status == DownloadTask.DONE:
 549             # .. everything ok ..
 550         elif task.status == DownloadTask.FAILED:
 551             # .. an error happened, and the
 552             #    error_message attribute is set ..
 553             print task.error_message
 554         elif task.status == DownloadTask.PAUSED:
 555             # .. user paused the download ..
 556         elif task.status == DownloadTask.CANCELLED:
 557             # .. user cancelled the download ..
 558
 559     The difference between cancelling and pausing a DownloadTask is
 560     that the temporary file gets deleted when cancelling, but does
 561     not get deleted when pausing.
 562
 563     Be sure to call .removed_from_list() on this task when removing
 564     it from the UI, so that it can carry out any pending clean-up
 565     actions (e.g. removing the temporary file when the task has not
 566     finished successfully; i.e. task.status != DownloadTask.DONE).
 567
 568     The UI can call the method "notify_as_finished()" to determine if
 569     this episode still has still to be shown as "finished" download
 570     in a notification window. This will return True only the first time
 571     it is called when the status is DONE. After returning True once,
 572     it will always return False afterwards.
 573
 574     The same thing works for failed downloads ("notify_as_failed()").
 575     """
 576     # Possible states this download task can be in
 577     STATUS_MESSAGE = (_('Queued'), _('Queued'), _('Downloading'),
 578             _('Finished'), _('Failed'), _('Cancelling'), _('Cancelled'), _('Pausing'), _('Paused'))
 579     (NEW, QUEUED, DOWNLOADING, DONE, FAILED, CANCELLING, CANCELLED, PAUSING, PAUSED) = list(range(9))
 580
 581     # Wheter this task represents a file download or a device sync operation
 582     ACTIVITY_DOWNLOAD, ACTIVITY_SYNCHRONIZE = list(range(2))
 583
 584     # Minimum time between progress updates (in seconds)
 585     MIN_TIME_BETWEEN_UPDATES = 1.
 586
 587     def __str__(self):
 588         return self.__episode.title
 589
 590     def __enter__(self):
 591         return self.__lock.acquire()
 592
 593     def __exit__(self, type, value, traceback):
 594         self.__lock.release()
 595
 596     def __get_status(self):
 597         return self.__status
 598
 599     def __set_status(self, status):
 600         if status != self.__status:
 601             self.__status_changed = True
 602             self.__status = status
 603
 604     status = property(fget=__get_status, fset=__set_status)
 605
 606     def __get_status_changed(self):
 607         if self.__status_changed:
 608             self.__status_changed = False
 609             return True
 610         else:
 611             return False
 612
 613     status_changed = property(fget=__get_status_changed)
 614
 615     def __get_activity(self):
 616         return self.__activity
 617
 618     def __set_activity(self, activity):
 619         self.__activity = activity
 620
 621     activity = property(fget=__get_activity, fset=__set_activity)
 622
 623     def __get_url(self):
 624         return self.__episode.url
 625
 626     url = property(fget=__get_url)
 627
 628     def __get_podcast_url(self):
 629         return self.__episode.channel.url
 630
 631     podcast_url = property(fget=__get_podcast_url)
 632
 633     def __get_episode(self):
 634         return self.__episode
 635
 636     episode = property(fget=__get_episode)
 637
 638     def __get_downloader(self):
 639         return self.__downloader
 640
 641     def __set_downloader(self, downloader):
 642         # modifying the downloader will only have effect before the download is started
 643         self.__downloader = downloader
 644
 645     downloader = property(fget=__get_downloader, fset=__set_downloader)
 646
 647     def can_queue(self):
 648         return self.status in (self.CANCELLED, self.PAUSED, self.FAILED)
 649
 650     def unpause(self):
 651         with self:
 652             # Resume a downloading task that was transitioning to paused
 653             if self.status == self.PAUSING:
 654                 self.status = self.DOWNLOADING
 655
 656     def can_pause(self):
 657         return self.status in (self.DOWNLOADING, self.QUEUED)
 658
 659     def pause(self):
 660         with self:
 661             # Pause a queued download
 662             if self.status == self.QUEUED:
 663                 self.status = self.PAUSED
 664             # Request pause of a running download
 665             elif self.status == self.DOWNLOADING:
 666                 self.status = self.PAUSING
 667                 # download rate limited tasks sleep and take longer to transition from the PAUSING state to the PAUSED state
 668
 669     def can_cancel(self):
 670         return self.status in (self.DOWNLOADING, self.QUEUED, self.PAUSED, self.FAILED)
 671
 672     def cancel(self):
 673         with self:
 674             # Cancelling directly is allowed if the task isn't currently downloading
 675             if self.status in (self.QUEUED, self.PAUSED, self.FAILED):
 676                 self.status = self.CANCELLING
 677                 # Call run, so the partial file gets deleted, and task recycled
 678                 self.run()
 679             # Otherwise request cancellation
 680             elif self.status == self.DOWNLOADING:
 681                 self.status = self.CANCELLING
 682
 683     def can_remove(self):
 684         return self.status in (self.CANCELLED, self.FAILED, self.DONE)
 685
 686     def delete_partial_files(self):
 687         temporary_files = [self.tempname]
 688         # youtube-dl creates .partial.* files for adaptive formats
 689         temporary_files += glob.glob('%s.*' % self.tempname)
 690
 691         for tempfile in temporary_files:
 692             util.delete_file(tempfile)
 693
 694     def removed_from_list(self):
 695         if self.status != self.DONE:
 696             self.delete_partial_files()
 697
 698     def __init__(self, episode, config, downloader=None):
 699         assert episode.download_task is None
 700         self.__lock = threading.RLock()
 701         self.__status = DownloadTask.NEW
 702         self.__activity = DownloadTask.ACTIVITY_DOWNLOAD
 703         self.__status_changed = True
 704         self.__episode = episode
 705         self._config = config
 706         # specify a custom downloader to be used for this download
 707         self.__downloader = downloader
 708
 709         # Create the target filename and save it in the database
 710         self.filename = self.__episode.local_filename(create=True)
 711         self.tempname = self.filename + '.partial'
 712
 713         self.total_size = self.__episode.file_size
 714         self.speed = 0.0
 715         self.progress = 0.0
 716         self.error_message = None
 717         self.custom_downloader = None
 718
 719         # Have we already shown this task in a notification?
 720         self._notification_shown = False
 721
 722         # Variables for speed limit and speed calculation
 723         self.__start_time = 0
 724         self.__start_blocks = 0
 725         self.__limit_rate_value = self._config.limit.bandwidth.kbps
 726         self.__limit_rate = self._config.limit.bandwidth.enabled
 727
 728         # Progress update functions
 729         self._progress_updated = None
 730         self._last_progress_updated = 0.
 731
 732         # If the tempname already exists, set progress accordingly
 733         if os.path.exists(self.tempname):
 734             try:
 735                 already_downloaded = os.path.getsize(self.tempname)
 736                 if self.total_size > 0:
 737                     self.progress = max(0.0, min(1.0, already_downloaded / self.total_size))
 738             except OSError as os_error:
 739                 logger.error('Cannot get size for %s', os_error)
 740         else:
 741             # "touch self.tempname", so we also get partial
 742             # files for resuming when the file is queued
 743             open(self.tempname, 'w').close()
 744
 745         # Store a reference to this task in the episode
 746         episode.download_task = self
 747
 748     def reuse(self):
 749         if not os.path.exists(self.tempname):
 750             # partial file was deleted when cancelled, recreate it
 751             open(self.tempname, 'w').close()
 752
 753     def notify_as_finished(self):
 754         if self.status == DownloadTask.DONE:
 755             if self._notification_shown:
 756                 return False
 757             else:
 758                 self._notification_shown = True
 759                 return True
 760
 761         return False
 762
 763     def notify_as_failed(self):
 764         if self.status == DownloadTask.FAILED:
 765             if self._notification_shown:
 766                 return False
 767             else:
 768                 self._notification_shown = True
 769                 return True
 770
 771         return False
 772
 773     def add_progress_callback(self, callback):
 774         self._progress_updated = callback
 775
 776     def status_updated(self, count, blockSize, totalSize):
 777         # We see a different "total size" while downloading,
 778         # so correct the total size variable in the thread
 779         if totalSize != self.total_size and totalSize > 0:
 780             self.total_size = float(totalSize)
 781             if self.__episode.file_size != self.total_size:
 782                 logger.debug('Updating file size of %s to %s',
 783                         self.filename, self.total_size)
 784                 self.__episode.file_size = self.total_size
 785                 self.__episode.save()
 786
 787         if self.total_size > 0:
 788             self.progress = max(0.0, min(1.0, count * blockSize / self.total_size))
 789             if self._progress_updated is not None:
 790                 diff = time.time() - self._last_progress_updated
 791                 if diff > self.MIN_TIME_BETWEEN_UPDATES or self.progress == 1.:
 792                     self._progress_updated(self.progress)
 793                     self._last_progress_updated = time.time()
 794
 795         self.calculate_speed(count, blockSize)
 796
 797         if self.status == DownloadTask.CANCELLING:
 798             raise DownloadCancelledException()
 799
 800         if self.status == DownloadTask.PAUSING:
 801             raise DownloadCancelledException()
 802
 803     def calculate_speed(self, count, blockSize):
 804         if count % 5 == 0:
 805             now = time.time()
 806             if self.__start_time > 0:
 807                 # Has rate limiting been enabled or disabled?
 808                 if self.__limit_rate != self._config.limit.bandwidth.enabled:
 809                     # If it has been enabled then reset base time and block count
 810                     if self._config.limit.bandwidth.enabled:
 811                         self.__start_time = now
 812                         self.__start_blocks = count
 813                     self.__limit_rate = self._config.limit.bandwidth.enabled
 814
 815                 # Has the rate been changed and are we currently limiting?
 816                 if self.__limit_rate_value != self._config.limit.bandwidth.kbps and self.__limit_rate:
 817                     self.__start_time = now
 818                     self.__start_blocks = count
 819                     self.__limit_rate_value = self._config.limit.bandwidth.kbps
 820
 821                 passed = now - self.__start_time
 822                 if passed > 0:
 823                     speed = ((count - self.__start_blocks) * blockSize) / passed
 824                 else:
 825                     speed = 0
 826             else:
 827                 self.__start_time = now
 828                 self.__start_blocks = count
 829                 passed = now - self.__start_time
 830                 speed = count * blockSize
 831
 832             self.speed = float(speed)
 833
 834             if self._config.limit.bandwidth.enabled and speed > self._config.limit.bandwidth.kbps:
 835                 # calculate the time that should have passed to reach
 836                 # the desired download rate and wait if necessary
 837                 should_have_passed = (count - self.__start_blocks) * blockSize / (self._config.limit.bandwidth.kbps * 1024.0)
 838                 if should_have_passed > passed:
 839                     # sleep a maximum of 10 seconds to not cause time-outs
 840                     delay = min(10.0, float(should_have_passed - passed))
 841                     time.sleep(delay)
 842
 843     def recycle(self):
 844         if self.status not in (self.FAILED, self.PAUSED):
 845             self.episode.download_task = None
 846
 847     def set_episode_download_task(self):
 848         if not self.episode.download_task:
 849             self.episode.download_task = self
 850
 851     def run(self):
 852         # Speed calculation (re-)starts here
 853         self.__start_time = 0
 854         self.__start_blocks = 0
 855
 856         # If the download has already been cancelled/paused, skip it
 857         with self:
 858             if self.status == DownloadTask.CANCELLING:
 859                 self.status = DownloadTask.CANCELLED
 860                 self.__episode._download_error = None
 861                 self.delete_partial_files()
 862                 self.progress = 0.0
 863                 self.speed = 0.0
 864                 self.recycle()
 865                 return False
 866
 867             if self.status == DownloadTask.PAUSING:
 868                 self.status = DownloadTask.PAUSED
 869                 return False
 870
 871             # We only start this download if its status is downloading
 872             if self.status != DownloadTask.DOWNLOADING:
 873                 return False
 874
 875             # We are downloading this file right now
 876             self._notification_shown = False
 877
 878             # Restore a reference to this task in the episode
 879             # when running a recycled task following a pause or failed
 880             # see #649
 881             self.set_episode_download_task()
 882
 883         url = self.__episode.url
 884         result = DownloadTask.DOWNLOADING
 885         try:
 886             if url == '':
 887                 raise DownloadNoURLException()
 888
 889             if self.downloader:
 890                 downloader = self.downloader.custom_downloader(self._config, self.episode)
 891             else:
 892                 downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
 893
 894             if downloader:
 895                 logger.info('Downloading %s with %s', url, downloader)
 896             else:
 897                 downloader = DefaultDownloader.custom_downloader(self._config, self.episode)
 898
 899             self.custom_downloader = downloader
 900             headers, real_url = downloader.retrieve_resume(self.tempname, self.status_updated)
 901
 902             new_mimetype = headers.get('content-type', self.__episode.mime_type)
 903             old_mimetype = self.__episode.mime_type
 904             _basename, ext = os.path.splitext(self.filename)
 905             if new_mimetype != old_mimetype or util.wrong_extension(ext):
 906                 logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
 907                 old_extension = self.__episode.extension()
 908                 self.__episode.mime_type = new_mimetype
 909                 # don't call local_filename because we'll get the old download name
 910                 new_extension = self.__episode.extension(may_call_local_filename=False)
 911
 912                 # If the desired filename extension changed due to the new
 913                 # mimetype, we force an update of the local filename to fix the
 914                 # extension.
 915                 if old_extension != new_extension or util.wrong_extension(ext):
 916                     self.filename = self.__episode.local_filename(create=True, force_update=True)
 917
 918             # In some cases, the redirect of a URL causes the real filename to
 919             # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
 920             if real_url != url and not util.is_known_redirecter(real_url):
 921                 realname, realext = util.filename_from_url(real_url)
 922
 923                 # Only update from redirect if the redirected-to filename has
 924                 # a proper extension (this is needed for e.g. YouTube)
 925                 if not util.wrong_extension(realext):
 926                     real_filename = ''.join((realname, realext))
 927                     self.filename = self.__episode.local_filename(create=True,
 928                             force_update=True, template=real_filename)
 929                     logger.info('Download was redirected (%s). New filename: %s',
 930                             real_url, os.path.basename(self.filename))
 931
 932             # Look at the Content-disposition header; use if if available
 933             disposition_filename = util.get_header_param(headers, 'filename', 'content-disposition')
 934
 935             # Some servers do send the content-disposition header, but provide
 936             # an empty filename, resulting in an empty string here (bug 1440)
 937             if disposition_filename is not None and disposition_filename != '':
 938                 # The server specifies a download filename - try to use it
 939                 # filename_from_url to remove query string; see #591
 940                 fn, ext = util.filename_from_url(disposition_filename)
 941                 logger.debug("converting disposition filename '%s' to local filename '%s%s'", disposition_filename, fn, ext)
 942                 disposition_filename = fn + ext
 943                 self.filename = self.__episode.local_filename(create=True,
 944                         force_update=True, template=disposition_filename)
 945                 new_mimetype, encoding = mimetypes.guess_type(self.filename)
 946                 if new_mimetype is not None:
 947                     logger.info('Using content-disposition mimetype: %s',
 948                             new_mimetype)
 949                     self.__episode.mime_type = new_mimetype
 950
 951             # Re-evaluate filename and tempname to take care of podcast renames
 952             # while downloads are running (which will change both file names)
 953             self.filename = self.__episode.local_filename(create=False)
 954             self.tempname = os.path.join(os.path.dirname(self.filename),
 955                     os.path.basename(self.tempname))
 956             shutil.move(self.tempname, self.filename)
 957
 958             # Model- and database-related updates after a download has finished
 959             self.__episode.on_downloaded(self.filename)
 960         except DownloadCancelledException:
 961             logger.info('Download has been cancelled/paused: %s', self)
 962             if self.status == DownloadTask.CANCELLING:
 963                 self.__episode._download_error = None
 964                 self.delete_partial_files()
 965                 self.progress = 0.0
 966                 self.speed = 0.0
 967             result = DownloadTask.CANCELLED
 968         except DownloadNoURLException:
 969             result = DownloadTask.FAILED
 970             self.error_message = _('Episode has no URL to download')
 971         except urllib.error.ContentTooShortError as ctse:
 972             result = DownloadTask.FAILED
 973             self.error_message = _('Missing content from server')
 974         except ConnectionError as ce:
 975             # special case request exception
 976             result = DownloadTask.FAILED
 977             logger.error('Download failed: %s', str(ce), exc_info=True)
 978             d = {'host': ce.args[0].pool.host, 'port': ce.args[0].pool.port}
 979             self.error_message = _("Couldn't connect to server %(host)s:%(port)s" % d)
 980         except RequestException as re:
 981             # extract MaxRetryError to shorten the exception message
 982             if isinstance(re.args[0], MaxRetryError):
 983                 re = re.args[0]
 984             logger.error('%s while downloading "%s"', str(re),
 985                     self.__episode.title, exc_info=True)
 986             result = DownloadTask.FAILED
 987             d = {'error': str(re)}
 988             self.error_message = _('Request Error: %(error)s') % d
 989         except IOError as ioe:
 990             logger.error('%s while downloading "%s": %s', ioe.strerror,
 991                     self.__episode.title, ioe.filename, exc_info=True)
 992             result = DownloadTask.FAILED
 993             d = {'error': ioe.strerror, 'filename': ioe.filename}
 994             self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
 995         except gPodderDownloadHTTPError as gdhe:
 996             logger.error('HTTP %s while downloading "%s": %s',
 997                     gdhe.error_code, self.__episode.title, gdhe.error_message,
 998                     exc_info=True)
 999             result = DownloadTask.FAILED
1000             d = {'code': gdhe.error_code, 'message': gdhe.error_message}
1001             self.error_message = _('HTTP Error %(code)s: %(message)s') % d
1002         except Exception as e:
1003             result = DownloadTask.FAILED
1004             logger.error('Download failed: %s', str(e), exc_info=True)
1005             self.error_message = _('Error: %s') % (str(e),)
1006
1007         with self:
1008             if result == DownloadTask.DOWNLOADING:
1009                 # Everything went well - we're done (even if the task was cancelled/paused,
1010                 # since it's finished we might as well mark it done)
1011                 self.status = DownloadTask.DONE
1012                 if self.total_size <= 0:
1013                     self.total_size = util.calculate_size(self.filename)
1014                     logger.info('Total size updated to %d', self.total_size)
1015                 self.progress = 1.0
1016                 gpodder.user_extensions.on_episode_downloaded(self.__episode)
1017                 return True
1018
1019             self.speed = 0.0
1020
1021             if result == DownloadTask.FAILED:
1022                 self.status = DownloadTask.FAILED
1023                 self.__episode._download_error = self.error_message
1024
1025             # cancelled/paused -- update state to mark it as safe to manipulate this task again
1026             elif self.status == DownloadTask.PAUSING:
1027                 self.status = DownloadTask.PAUSED
1028             elif self.status == DownloadTask.CANCELLING:
1029                 self.status = DownloadTask.CANCELLED
1030
1031         # We finished, but not successfully (at least not really)
1032         return False