limit_rate -> limit.bandwidth.enabled
[gpodder.git] / src / gpodder / download.py
blobd1af50cb8c63b185d55103eb3b3ec69a707e4349
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2018 The gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # download.py -- Download queue management
23 # Thomas Perl <thp@perli.net> 2007-09-15
25 # Based on libwget.py (2005-10-29)
28 import glob
29 import logging
30 import mimetypes
31 import os
32 import os.path
33 import shutil
34 import threading
35 import time
36 import urllib.error
37 from abc import ABC, abstractmethod
39 import requests
40 from requests.adapters import HTTPAdapter
41 from requests.exceptions import ConnectionError, HTTPError, RequestException
42 from requests.packages.urllib3.exceptions import MaxRetryError
43 from requests.packages.urllib3.util.retry import Retry
45 import gpodder
46 from gpodder import registry, util
48 logger = logging.getLogger(__name__)
50 _ = gpodder.gettext
52 REDIRECT_RETRIES = 3
55 class CustomDownload(ABC):
56 """ abstract class for custom downloads. DownloadTask call retrieve_resume() on it """
58 @property
59 @abstractmethod
60 def partial_filename(self):
61 """
62 Full path to the temporary file actually being downloaded (downloaders
63 may not support setting a tempname).
64 """
65 ...
67 @partial_filename.setter
68 @abstractmethod
69 def partial_filename(self, val):
70 ...
72 @abstractmethod
73 def retrieve_resume(self, tempname, reporthook):
74 """
75 :param str tempname: temporary filename for the download
76 :param func(number, number, number) reporthook: callback for download progress (count, blockSize, totalSize)
77 :return dict(str, str), str: (headers, real_url)
78 """
79 return {}, None
82 class CustomDownloader(ABC):
83 """
84 abstract class for custom downloaders.
86 DownloadTask calls custom_downloader to get a CustomDownload
87 """
89 @abstractmethod
90 def custom_downloader(self, config, episode):
91 """
92 if this custom downloader has a custom download method (e.g. youtube-dl),
93 return a CustomDownload. Else return None
94 :param config: gpodder config (e.g. to get preferred video format)
95 :param model.PodcastEpisode episode: episode to download
96 :return CustomDownload: object used to download the episode
97 """
98 return None
101 class ContentRange(object):
102 # Based on:
103 # http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
105 # Copyright (c) 2007 Ian Bicking and Contributors
107 # Permission is hereby granted, free of charge, to any person obtaining
108 # a copy of this software and associated documentation files (the
109 # "Software"), to deal in the Software without restriction, including
110 # without limitation the rights to use, copy, modify, merge, publish,
111 # distribute, sublicense, and/or sell copies of the Software, and to
112 # permit persons to whom the Software is furnished to do so, subject to
113 # the following conditions:
115 # The above copyright notice and this permission notice shall be
116 # included in all copies or substantial portions of the Software.
118 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
119 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
120 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
121 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
122 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
123 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
124 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
126 Represents the Content-Range header
128 This header is ``start-stop/length``, where stop and length can be
129 ``*`` (represented as None in the attributes).
132 def __init__(self, start, stop, length):
133 assert start >= 0, "Bad start: %r" % start
134 assert stop is None or (stop >= 0 and stop >= start), (
135 "Bad stop: %r" % stop)
136 self.start = start
137 self.stop = stop
138 self.length = length
140 def __repr__(self):
141 return '<%s %s>' % (
142 self.__class__.__name__,
143 self)
145 def __str__(self):
146 if self.stop is None:
147 stop = '*'
148 else:
149 stop = self.stop + 1
150 if self.length is None:
151 length = '*'
152 else:
153 length = self.length
154 return 'bytes %s-%s/%s' % (self.start, stop, length)
156 def __iter__(self):
158 Mostly so you can unpack this, like:
160 start, stop, length = res.content_range
162 return iter([self.start, self.stop, self.length])
164 @classmethod
165 def parse(cls, value):
167 Parse the header. May return None if it cannot parse.
169 if value is None:
170 return None
171 value = value.strip()
172 if not value.startswith('bytes '):
173 # Unparseable
174 return None
175 value = value[len('bytes '):].strip()
176 if '/' not in value:
177 # Invalid, no length given
178 return None
179 range, length = value.split('/', 1)
180 if '-' not in range:
181 # Invalid, no range
182 return None
183 start, end = range.split('-', 1)
184 try:
185 start = int(start)
186 if end == '*':
187 end = None
188 else:
189 end = int(end)
190 if length == '*':
191 length = None
192 else:
193 length = int(length)
194 except ValueError:
195 # Parse problem
196 return None
197 if end is None:
198 return cls(start, None, length)
199 else:
200 return cls(start, end - 1, length)
203 class DownloadCancelledException(Exception): pass
206 class DownloadNoURLException(Exception): pass
209 class gPodderDownloadHTTPError(Exception):
210 def __init__(self, url, error_code, error_message):
211 self.url = url
212 self.error_code = error_code
213 self.error_message = error_message
216 class DownloadURLOpener:
218 # Sometimes URLs are not escaped correctly - try to fix them
219 # (see RFC2396; Section 2.4.3. Excluded US-ASCII Characters)
220 # FYI: The omission of "%" in the list is to avoid double escaping!
221 ESCAPE_CHARS = dict((ord(c), '%%%x' % ord(c)) for c in ' <>#"{}|\\^[]`')
223 def __init__(self, channel, max_retries=3):
224 super().__init__()
225 self.channel = channel
226 self.max_retries = max_retries
228 def init_session(self):
229 """ init a session with our own retry codes + retry count """
230 # I add a few retries for redirects but it means that I will allow max_retries + REDIRECT_RETRIES
231 # if encountering max_retries connect and REDIRECT_RETRIES read for instance
232 retry_strategy = Retry(
233 total=self.max_retries + REDIRECT_RETRIES,
234 connect=self.max_retries,
235 read=self.max_retries,
236 redirect=max(REDIRECT_RETRIES, self.max_retries),
237 status=self.max_retries,
238 status_forcelist=Retry.RETRY_AFTER_STATUS_CODES.union((408, 418, 504, 598, 599,)))
239 adapter = HTTPAdapter(max_retries=retry_strategy)
240 http = requests.Session()
241 http.mount("https://", adapter)
242 http.mount("http://", adapter)
243 return http
245 # The following is based on Python's urllib.py "URLopener.retrieve"
246 # Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
248 def retrieve_resume(self, url, filename, reporthook=None, data=None, disable_auth=False):
249 """Download files from an URL; return (headers, real_url)
251 Resumes a download if the local filename exists and
252 the server supports download resuming.
255 current_size = 0
256 tfp = None
257 headers = {
258 'User-agent': gpodder.user_agent
261 if (self.channel.auth_username or self.channel.auth_password) and not disable_auth:
262 logger.debug('Authenticating as "%s"', self.channel.auth_username)
263 auth = (self.channel.auth_username, self.channel.auth_password)
264 else:
265 auth = None
267 if os.path.exists(filename):
268 try:
269 current_size = os.path.getsize(filename)
270 tfp = open(filename, 'ab')
271 # If the file exists, then only download the remainder
272 if current_size > 0:
273 headers['Range'] = 'bytes=%s-' % (current_size)
274 except:
275 logger.warning('Cannot resume download: %s', filename, exc_info=True)
276 tfp = None
277 current_size = 0
279 if tfp is None:
280 tfp = open(filename, 'wb')
282 # Fix a problem with bad URLs that are not encoded correctly (bug 549)
283 url = url.translate(self.ESCAPE_CHARS)
285 session = self.init_session()
286 with session.get(url,
287 headers=headers,
288 stream=True,
289 auth=auth,
290 timeout=gpodder.SOCKET_TIMEOUT) as resp:
291 try:
292 resp.raise_for_status()
293 except HTTPError as e:
294 if auth is not None:
295 # Try again without authentication (bug 1296)
296 return self.retrieve_resume(url, filename, reporthook, data, True)
297 else:
298 raise gPodderDownloadHTTPError(url, resp.status_code, str(e))
300 headers = resp.headers
302 if current_size > 0:
303 # We told the server to resume - see if she agrees
304 # See RFC2616 (206 Partial Content + Section 14.16)
305 # XXX check status code here, too...
306 range = ContentRange.parse(headers.get('content-range', ''))
307 if range is None or range.start != current_size:
308 # Ok, that did not work. Reset the download
309 # TODO: seek and truncate if content-range differs from request
310 tfp.close()
311 tfp = open(filename, 'wb')
312 current_size = 0
313 logger.warning('Cannot resume: Invalid Content-Range (RFC2616).')
315 result = headers, resp.url
316 bs = 1024 * 8
317 size = -1
318 read = current_size
319 blocknum = current_size // bs
320 if reporthook:
321 if "content-length" in headers:
322 size = int(headers['content-length']) + current_size
323 reporthook(blocknum, bs, size)
324 for block in resp.iter_content(bs):
325 read += len(block)
326 tfp.write(block)
327 blocknum += 1
328 if reporthook:
329 reporthook(blocknum, bs, size)
330 tfp.close()
331 del tfp
333 # raise exception if actual size does not match content-length header
334 if size >= 0 and read < size:
335 raise urllib.error.ContentTooShortError("retrieval incomplete: got only %i out "
336 "of %i bytes" % (read, size), result)
338 return result
340 # end code based on urllib.py
343 class DefaultDownload(CustomDownload):
344 def __init__(self, config, episode, url):
345 self._config = config
346 self.__episode = episode
347 self._url = url
348 self.__partial_filename = None
350 @property
351 def partial_filename(self):
352 return self.__partial_filename
354 @partial_filename.setter
355 def partial_filename(self, val):
356 self.__partial_filename = val
358 def retrieve_resume(self, tempname, reporthook):
359 url = self._url
360 logger.info("Downloading %s", url)
361 max_retries = max(0, self._config.auto.retries)
362 downloader = DownloadURLOpener(self.__episode.channel, max_retries=max_retries)
363 self.partial_filename = tempname
365 # Retry the download on incomplete download (other retries are done by the Retry strategy)
366 for retry in range(max_retries + 1):
367 if retry > 0:
368 logger.info('Retrying download of %s (%d)', url, retry)
369 time.sleep(1)
371 try:
372 headers, real_url = downloader.retrieve_resume(url,
373 tempname, reporthook=reporthook)
374 # If we arrive here, the download was successful
375 break
376 except urllib.error.ContentTooShortError as ctse:
377 if retry < max_retries:
378 logger.info('Content too short: %s - will retry.',
379 url)
380 continue
381 raise
382 return (headers, real_url)
385 class DefaultDownloader(CustomDownloader):
386 @staticmethod
387 def custom_downloader(config, episode):
388 url = episode.url
389 # Resolve URL and start downloading the episode
390 res = registry.download_url.resolve(config, None, episode, False)
391 if res:
392 url = res
393 if url == episode.url:
394 # don't modify custom urls (#635 - vimeo breaks if * is unescaped)
395 url = url.strip()
396 url = util.iri_to_url(url)
397 return DefaultDownload(config, episode, url)
400 class DownloadQueueWorker(object):
401 def __init__(self, queue, exit_callback, continue_check_callback):
402 self.queue = queue
403 self.exit_callback = exit_callback
404 self.continue_check_callback = continue_check_callback
406 def __repr__(self):
407 return threading.current_thread().getName()
409 def run(self):
410 logger.info('Starting new thread: %s', self)
411 while True:
412 if not self.continue_check_callback(self):
413 return
415 task = self.queue.get_next()
416 if not task:
417 logger.info('No more tasks for %s to carry out.', self)
418 break
419 logger.info('%s is processing: %s', self, task)
420 task.run()
421 task.recycle()
423 self.exit_callback(self)
426 class ForceDownloadWorker(object):
427 def __init__(self, task):
428 self.task = task
430 def __repr__(self):
431 return threading.current_thread().getName()
433 def run(self):
434 logger.info('Starting new thread: %s', self)
435 logger.info('%s is processing: %s', self, self.task)
436 self.task.run()
437 self.task.recycle()
440 class DownloadQueueManager(object):
441 def __init__(self, config, queue):
442 self._config = config
443 self.tasks = queue
445 self.worker_threads_access = threading.RLock()
446 self.worker_threads = []
448 def __exit_callback(self, worker_thread):
449 with self.worker_threads_access:
450 self.worker_threads.remove(worker_thread)
452 def __continue_check_callback(self, worker_thread):
453 with self.worker_threads_access:
454 if len(self.worker_threads) > self._config.max_downloads and \
455 self._config.max_downloads_enabled:
456 self.worker_threads.remove(worker_thread)
457 return False
458 else:
459 return True
461 def __spawn_threads(self):
462 """Spawn new worker threads if necessary
464 with self.worker_threads_access:
465 work_count = self.tasks.available_work_count()
466 if self._config.max_downloads_enabled:
467 # always allow at least 1 download
468 spawn_limit = max(int(self._config.max_downloads), 1)
469 else:
470 spawn_limit = self._config.limit.downloads.concurrent_max
471 running = len(self.worker_threads)
472 logger.info('%r tasks to do, can start at most %r threads, %r threads currently running', work_count, spawn_limit, running)
473 for i in range(0, min(work_count, spawn_limit - running)):
474 # We have to create a new thread here, there's work to do
475 logger.info('Starting new worker thread.')
477 worker = DownloadQueueWorker(self.tasks, self.__exit_callback,
478 self.__continue_check_callback)
479 self.worker_threads.append(worker)
480 util.run_in_background(worker.run)
482 def update_max_downloads(self):
483 self.__spawn_threads()
485 def force_start_task(self, task):
486 with task:
487 if task.status in (task.QUEUED, task.PAUSED, task.CANCELLED, task.FAILED):
488 task.status = task.DOWNLOADING
489 worker = ForceDownloadWorker(task)
490 util.run_in_background(worker.run)
492 def queue_task(self, task):
493 """Marks a task as queued
495 self.tasks.queue_task(task)
496 self.__spawn_threads()
498 def has_workers(self):
499 return len(self.worker_threads) > 0
502 class DownloadTask(object):
503 """An object representing the download task of an episode
505 You can create a new download task like this:
507 task = DownloadTask(episode, gpodder.config.Config(CONFIGFILE))
508 task.status = DownloadTask.QUEUED
509 task.run()
511 While the download is in progress, you can access its properties:
513 task.total_size # in bytes
514 task.progress # from 0.0 to 1.0
515 task.speed # in bytes per second
516 str(task) # name of the episode
517 task.status # current status
518 task.status_changed # True if the status has been changed (see below)
519 task.url # URL of the episode being downloaded
520 task.podcast_url # URL of the podcast this download belongs to
521 task.episode # Episode object of this task
523 You can cancel a running download task by setting its status:
525 with task:
526 task.status = DownloadTask.CANCELLING
528 The task will then abort as soon as possible (due to the nature
529 of downloading data, this can take a while when the Internet is
530 busy).
532 The "status_changed" attribute gets set to True everytime the
533 "status" attribute changes its value. After you get the value of
534 the "status_changed" attribute, it is always reset to False:
536 if task.status_changed:
537 new_status = task.status
538 # .. update the UI accordingly ..
540 Obviously, this also means that you must have at most *one*
541 place in your UI code where you check for status changes and
542 broadcast the status updates from there.
544 While the download is taking place and after the .run() method
545 has finished, you can get the final status to check if the download
546 was successful:
548 if task.status == DownloadTask.DONE:
549 # .. everything ok ..
550 elif task.status == DownloadTask.FAILED:
551 # .. an error happened, and the
552 # error_message attribute is set ..
553 print task.error_message
554 elif task.status == DownloadTask.PAUSED:
555 # .. user paused the download ..
556 elif task.status == DownloadTask.CANCELLED:
557 # .. user cancelled the download ..
559 The difference between cancelling and pausing a DownloadTask is
560 that the temporary file gets deleted when cancelling, but does
561 not get deleted when pausing.
563 Be sure to call .removed_from_list() on this task when removing
564 it from the UI, so that it can carry out any pending clean-up
565 actions (e.g. removing the temporary file when the task has not
566 finished successfully; i.e. task.status != DownloadTask.DONE).
568 The UI can call the method "notify_as_finished()" to determine if
569 this episode still has still to be shown as "finished" download
570 in a notification window. This will return True only the first time
571 it is called when the status is DONE. After returning True once,
572 it will always return False afterwards.
574 The same thing works for failed downloads ("notify_as_failed()").
576 # Possible states this download task can be in
577 STATUS_MESSAGE = (_('Queued'), _('Queued'), _('Downloading'),
578 _('Finished'), _('Failed'), _('Cancelling'), _('Cancelled'), _('Pausing'), _('Paused'))
579 (NEW, QUEUED, DOWNLOADING, DONE, FAILED, CANCELLING, CANCELLED, PAUSING, PAUSED) = list(range(9))
581 # Wheter this task represents a file download or a device sync operation
582 ACTIVITY_DOWNLOAD, ACTIVITY_SYNCHRONIZE = list(range(2))
584 # Minimum time between progress updates (in seconds)
585 MIN_TIME_BETWEEN_UPDATES = 1.
587 def __str__(self):
588 return self.__episode.title
590 def __enter__(self):
591 return self.__lock.acquire()
593 def __exit__(self, type, value, traceback):
594 self.__lock.release()
596 def __get_status(self):
597 return self.__status
599 def __set_status(self, status):
600 if status != self.__status:
601 self.__status_changed = True
602 self.__status = status
604 status = property(fget=__get_status, fset=__set_status)
606 def __get_status_changed(self):
607 if self.__status_changed:
608 self.__status_changed = False
609 return True
610 else:
611 return False
613 status_changed = property(fget=__get_status_changed)
615 def __get_activity(self):
616 return self.__activity
618 def __set_activity(self, activity):
619 self.__activity = activity
621 activity = property(fget=__get_activity, fset=__set_activity)
623 def __get_url(self):
624 return self.__episode.url
626 url = property(fget=__get_url)
628 def __get_podcast_url(self):
629 return self.__episode.channel.url
631 podcast_url = property(fget=__get_podcast_url)
633 def __get_episode(self):
634 return self.__episode
636 episode = property(fget=__get_episode)
638 def __get_downloader(self):
639 return self.__downloader
641 def __set_downloader(self, downloader):
642 # modifying the downloader will only have effect before the download is started
643 self.__downloader = downloader
645 downloader = property(fget=__get_downloader, fset=__set_downloader)
647 def can_queue(self):
648 return self.status in (self.CANCELLED, self.PAUSED, self.FAILED)
650 def unpause(self):
651 with self:
652 # Resume a downloading task that was transitioning to paused
653 if self.status == self.PAUSING:
654 self.status = self.DOWNLOADING
656 def can_pause(self):
657 return self.status in (self.DOWNLOADING, self.QUEUED)
659 def pause(self):
660 with self:
661 # Pause a queued download
662 if self.status == self.QUEUED:
663 self.status = self.PAUSED
664 # Request pause of a running download
665 elif self.status == self.DOWNLOADING:
666 self.status = self.PAUSING
667 # download rate limited tasks sleep and take longer to transition from the PAUSING state to the PAUSED state
669 def can_cancel(self):
670 return self.status in (self.DOWNLOADING, self.QUEUED, self.PAUSED, self.FAILED)
672 def cancel(self):
673 with self:
674 # Cancelling directly is allowed if the task isn't currently downloading
675 if self.status in (self.QUEUED, self.PAUSED, self.FAILED):
676 self.status = self.CANCELLING
677 # Call run, so the partial file gets deleted, and task recycled
678 self.run()
679 # Otherwise request cancellation
680 elif self.status == self.DOWNLOADING:
681 self.status = self.CANCELLING
683 def can_remove(self):
684 return self.status in (self.CANCELLED, self.FAILED, self.DONE)
686 def delete_partial_files(self):
687 temporary_files = [self.tempname]
688 # youtube-dl creates .partial.* files for adaptive formats
689 temporary_files += glob.glob('%s.*' % self.tempname)
691 for tempfile in temporary_files:
692 util.delete_file(tempfile)
694 def removed_from_list(self):
695 if self.status != self.DONE:
696 self.delete_partial_files()
698 def __init__(self, episode, config, downloader=None):
699 assert episode.download_task is None
700 self.__lock = threading.RLock()
701 self.__status = DownloadTask.NEW
702 self.__activity = DownloadTask.ACTIVITY_DOWNLOAD
703 self.__status_changed = True
704 self.__episode = episode
705 self._config = config
706 # specify a custom downloader to be used for this download
707 self.__downloader = downloader
709 # Create the target filename and save it in the database
710 self.filename = self.__episode.local_filename(create=True)
711 self.tempname = self.filename + '.partial'
713 self.total_size = self.__episode.file_size
714 self.speed = 0.0
715 self.progress = 0.0
716 self.error_message = None
717 self.custom_downloader = None
719 # Have we already shown this task in a notification?
720 self._notification_shown = False
722 # Variables for speed limit and speed calculation
723 self.__start_time = 0
724 self.__start_blocks = 0
725 self.__limit_rate_value = self._config.limit.bandwidth.kbps
726 self.__limit_rate = self._config.limit.bandwidth.enabled
728 # Progress update functions
729 self._progress_updated = None
730 self._last_progress_updated = 0.
732 # If the tempname already exists, set progress accordingly
733 if os.path.exists(self.tempname):
734 try:
735 already_downloaded = os.path.getsize(self.tempname)
736 if self.total_size > 0:
737 self.progress = max(0.0, min(1.0, already_downloaded / self.total_size))
738 except OSError as os_error:
739 logger.error('Cannot get size for %s', os_error)
740 else:
741 # "touch self.tempname", so we also get partial
742 # files for resuming when the file is queued
743 open(self.tempname, 'w').close()
745 # Store a reference to this task in the episode
746 episode.download_task = self
748 def reuse(self):
749 if not os.path.exists(self.tempname):
750 # partial file was deleted when cancelled, recreate it
751 open(self.tempname, 'w').close()
753 def notify_as_finished(self):
754 if self.status == DownloadTask.DONE:
755 if self._notification_shown:
756 return False
757 else:
758 self._notification_shown = True
759 return True
761 return False
763 def notify_as_failed(self):
764 if self.status == DownloadTask.FAILED:
765 if self._notification_shown:
766 return False
767 else:
768 self._notification_shown = True
769 return True
771 return False
773 def add_progress_callback(self, callback):
774 self._progress_updated = callback
776 def status_updated(self, count, blockSize, totalSize):
777 # We see a different "total size" while downloading,
778 # so correct the total size variable in the thread
779 if totalSize != self.total_size and totalSize > 0:
780 self.total_size = float(totalSize)
781 if self.__episode.file_size != self.total_size:
782 logger.debug('Updating file size of %s to %s',
783 self.filename, self.total_size)
784 self.__episode.file_size = self.total_size
785 self.__episode.save()
787 if self.total_size > 0:
788 self.progress = max(0.0, min(1.0, count * blockSize / self.total_size))
789 if self._progress_updated is not None:
790 diff = time.time() - self._last_progress_updated
791 if diff > self.MIN_TIME_BETWEEN_UPDATES or self.progress == 1.:
792 self._progress_updated(self.progress)
793 self._last_progress_updated = time.time()
795 self.calculate_speed(count, blockSize)
797 if self.status == DownloadTask.CANCELLING:
798 raise DownloadCancelledException()
800 if self.status == DownloadTask.PAUSING:
801 raise DownloadCancelledException()
803 def calculate_speed(self, count, blockSize):
804 if count % 5 == 0:
805 now = time.time()
806 if self.__start_time > 0:
807 # Has rate limiting been enabled or disabled?
808 if self.__limit_rate != self._config.limit.bandwidth.enabled:
809 # If it has been enabled then reset base time and block count
810 if self._config.limit.bandwidth.enabled:
811 self.__start_time = now
812 self.__start_blocks = count
813 self.__limit_rate = self._config.limit.bandwidth.enabled
815 # Has the rate been changed and are we currently limiting?
816 if self.__limit_rate_value != self._config.limit.bandwidth.kbps and self.__limit_rate:
817 self.__start_time = now
818 self.__start_blocks = count
819 self.__limit_rate_value = self._config.limit.bandwidth.kbps
821 passed = now - self.__start_time
822 if passed > 0:
823 speed = ((count - self.__start_blocks) * blockSize) / passed
824 else:
825 speed = 0
826 else:
827 self.__start_time = now
828 self.__start_blocks = count
829 passed = now - self.__start_time
830 speed = count * blockSize
832 self.speed = float(speed)
834 if self._config.limit.bandwidth.enabled and speed > self._config.limit.bandwidth.kbps:
835 # calculate the time that should have passed to reach
836 # the desired download rate and wait if necessary
837 should_have_passed = (count - self.__start_blocks) * blockSize / (self._config.limit.bandwidth.kbps * 1024.0)
838 if should_have_passed > passed:
839 # sleep a maximum of 10 seconds to not cause time-outs
840 delay = min(10.0, float(should_have_passed - passed))
841 time.sleep(delay)
843 def recycle(self):
844 if self.status not in (self.FAILED, self.PAUSED):
845 self.episode.download_task = None
847 def set_episode_download_task(self):
848 if not self.episode.download_task:
849 self.episode.download_task = self
851 def run(self):
852 # Speed calculation (re-)starts here
853 self.__start_time = 0
854 self.__start_blocks = 0
856 # If the download has already been cancelled/paused, skip it
857 with self:
858 if self.status == DownloadTask.CANCELLING:
859 self.status = DownloadTask.CANCELLED
860 self.__episode._download_error = None
861 self.delete_partial_files()
862 self.progress = 0.0
863 self.speed = 0.0
864 self.recycle()
865 return False
867 if self.status == DownloadTask.PAUSING:
868 self.status = DownloadTask.PAUSED
869 return False
871 # We only start this download if its status is downloading
872 if self.status != DownloadTask.DOWNLOADING:
873 return False
875 # We are downloading this file right now
876 self._notification_shown = False
878 # Restore a reference to this task in the episode
879 # when running a recycled task following a pause or failed
880 # see #649
881 self.set_episode_download_task()
883 url = self.__episode.url
884 result = DownloadTask.DOWNLOADING
885 try:
886 if url == '':
887 raise DownloadNoURLException()
889 if self.downloader:
890 downloader = self.downloader.custom_downloader(self._config, self.episode)
891 else:
892 downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
894 if downloader:
895 logger.info('Downloading %s with %s', url, downloader)
896 else:
897 downloader = DefaultDownloader.custom_downloader(self._config, self.episode)
899 self.custom_downloader = downloader
900 headers, real_url = downloader.retrieve_resume(self.tempname, self.status_updated)
902 new_mimetype = headers.get('content-type', self.__episode.mime_type)
903 old_mimetype = self.__episode.mime_type
904 _basename, ext = os.path.splitext(self.filename)
905 if new_mimetype != old_mimetype or util.wrong_extension(ext):
906 logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
907 old_extension = self.__episode.extension()
908 self.__episode.mime_type = new_mimetype
909 # don't call local_filename because we'll get the old download name
910 new_extension = self.__episode.extension(may_call_local_filename=False)
912 # If the desired filename extension changed due to the new
913 # mimetype, we force an update of the local filename to fix the
914 # extension.
915 if old_extension != new_extension or util.wrong_extension(ext):
916 self.filename = self.__episode.local_filename(create=True, force_update=True)
918 # In some cases, the redirect of a URL causes the real filename to
919 # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
920 if real_url != url and not util.is_known_redirecter(real_url):
921 realname, realext = util.filename_from_url(real_url)
923 # Only update from redirect if the redirected-to filename has
924 # a proper extension (this is needed for e.g. YouTube)
925 if not util.wrong_extension(realext):
926 real_filename = ''.join((realname, realext))
927 self.filename = self.__episode.local_filename(create=True,
928 force_update=True, template=real_filename)
929 logger.info('Download was redirected (%s). New filename: %s',
930 real_url, os.path.basename(self.filename))
932 # Look at the Content-disposition header; use if if available
933 disposition_filename = util.get_header_param(headers, 'filename', 'content-disposition')
935 # Some servers do send the content-disposition header, but provide
936 # an empty filename, resulting in an empty string here (bug 1440)
937 if disposition_filename is not None and disposition_filename != '':
938 # The server specifies a download filename - try to use it
939 # filename_from_url to remove query string; see #591
940 fn, ext = util.filename_from_url(disposition_filename)
941 logger.debug("converting disposition filename '%s' to local filename '%s%s'", disposition_filename, fn, ext)
942 disposition_filename = fn + ext
943 self.filename = self.__episode.local_filename(create=True,
944 force_update=True, template=disposition_filename)
945 new_mimetype, encoding = mimetypes.guess_type(self.filename)
946 if new_mimetype is not None:
947 logger.info('Using content-disposition mimetype: %s',
948 new_mimetype)
949 self.__episode.mime_type = new_mimetype
951 # Re-evaluate filename and tempname to take care of podcast renames
952 # while downloads are running (which will change both file names)
953 self.filename = self.__episode.local_filename(create=False)
954 self.tempname = os.path.join(os.path.dirname(self.filename),
955 os.path.basename(self.tempname))
956 shutil.move(self.tempname, self.filename)
958 # Model- and database-related updates after a download has finished
959 self.__episode.on_downloaded(self.filename)
960 except DownloadCancelledException:
961 logger.info('Download has been cancelled/paused: %s', self)
962 if self.status == DownloadTask.CANCELLING:
963 self.__episode._download_error = None
964 self.delete_partial_files()
965 self.progress = 0.0
966 self.speed = 0.0
967 result = DownloadTask.CANCELLED
968 except DownloadNoURLException:
969 result = DownloadTask.FAILED
970 self.error_message = _('Episode has no URL to download')
971 except urllib.error.ContentTooShortError as ctse:
972 result = DownloadTask.FAILED
973 self.error_message = _('Missing content from server')
974 except ConnectionError as ce:
975 # special case request exception
976 result = DownloadTask.FAILED
977 logger.error('Download failed: %s', str(ce), exc_info=True)
978 d = {'host': ce.args[0].pool.host, 'port': ce.args[0].pool.port}
979 self.error_message = _("Couldn't connect to server %(host)s:%(port)s" % d)
980 except RequestException as re:
981 # extract MaxRetryError to shorten the exception message
982 if isinstance(re.args[0], MaxRetryError):
983 re = re.args[0]
984 logger.error('%s while downloading "%s"', str(re),
985 self.__episode.title, exc_info=True)
986 result = DownloadTask.FAILED
987 d = {'error': str(re)}
988 self.error_message = _('Request Error: %(error)s') % d
989 except IOError as ioe:
990 logger.error('%s while downloading "%s": %s', ioe.strerror,
991 self.__episode.title, ioe.filename, exc_info=True)
992 result = DownloadTask.FAILED
993 d = {'error': ioe.strerror, 'filename': ioe.filename}
994 self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
995 except gPodderDownloadHTTPError as gdhe:
996 logger.error('HTTP %s while downloading "%s": %s',
997 gdhe.error_code, self.__episode.title, gdhe.error_message,
998 exc_info=True)
999 result = DownloadTask.FAILED
1000 d = {'code': gdhe.error_code, 'message': gdhe.error_message}
1001 self.error_message = _('HTTP Error %(code)s: %(message)s') % d
1002 except Exception as e:
1003 result = DownloadTask.FAILED
1004 logger.error('Download failed: %s', str(e), exc_info=True)
1005 self.error_message = _('Error: %s') % (str(e),)
1007 with self:
1008 if result == DownloadTask.DOWNLOADING:
1009 # Everything went well - we're done (even if the task was cancelled/paused,
1010 # since it's finished we might as well mark it done)
1011 self.status = DownloadTask.DONE
1012 if self.total_size <= 0:
1013 self.total_size = util.calculate_size(self.filename)
1014 logger.info('Total size updated to %d', self.total_size)
1015 self.progress = 1.0
1016 gpodder.user_extensions.on_episode_downloaded(self.__episode)
1017 return True
1019 self.speed = 0.0
1021 if result == DownloadTask.FAILED:
1022 self.status = DownloadTask.FAILED
1023 self.__episode._download_error = self.error_message
1025 # cancelled/paused -- update state to mark it as safe to manipulate this task again
1026 elif self.status == DownloadTask.PAUSING:
1027 self.status = DownloadTask.PAUSED
1028 elif self.status == DownloadTask.CANCELLING:
1029 self.status = DownloadTask.CANCELLED
1031 # We finished, but not successfully (at least not really)
1032 return False