1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # download.py -- Download client using DownloadStatusManager
23 # Thomas Perl <thp@perli.net> 2007-09-15
25 # Based on libwget.py (2005-10-29)
28 from __future__
import with_statement
30 from gpodder
.liblogger
import log
31 from gpodder
.libgpodder
import gl
32 from gpodder
.dbsqlite
import db
33 from gpodder
import util
34 from gpodder
import resolver
45 from xml
.sax
import saxutils
48 class ContentRange(object):
50 # http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
52 # Copyright (c) 2007 Ian Bicking and Contributors
54 # Permission is hereby granted, free of charge, to any person obtaining
55 # a copy of this software and associated documentation files (the
56 # "Software"), to deal in the Software without restriction, including
57 # without limitation the rights to use, copy, modify, merge, publish,
58 # distribute, sublicense, and/or sell copies of the Software, and to
59 # permit persons to whom the Software is furnished to do so, subject to
60 # the following conditions:
62 # The above copyright notice and this permission notice shall be
63 # included in all copies or substantial portions of the Software.
65 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
66 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
67 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
68 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
69 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
70 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
71 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
73 Represents the Content-Range header
75 This header is ``start-stop/length``, where stop and length can be
76 ``*`` (represented as None in the attributes).
79 def __init__(self
, start
, stop
, length
):
80 assert start
>= 0, "Bad start: %r" % start
81 assert stop
is None or (stop
>= 0 and stop
>= start
), (
82 "Bad stop: %r" % stop
)
89 self
.__class
__.__name
__,
97 if self
.length
is None:
101 return 'bytes %s-%s/%s' % (self
.start
, stop
, length
)
105 Mostly so you can unpack this, like:
107 start, stop, length = res.content_range
109 return iter([self
.start
, self
.stop
, self
.length
])
112 def parse(cls
, value
):
114 Parse the header. May return None if it cannot parse.
118 value
= value
.strip()
119 if not value
.startswith('bytes '):
122 value
= value
[len('bytes '):].strip()
124 # Invalid, no length given
126 range, length
= value
.split('/', 1)
130 start
, end
= range.split('-', 1)
145 return cls(start
, None, length
)
147 return cls(start
, end
-1, length
)
150 class DownloadCancelledException(Exception): pass
152 class gPodderDownloadHTTPError(Exception):
153 def __init__(self
, url
, error_code
, error_message
):
155 self
.error_code
= error_code
156 self
.error_message
= error_message
158 class DownloadURLOpener(urllib
.FancyURLopener
):
159 version
= gpodder
.user_agent
161 def __init__( self
, channel
):
162 if gl
.config
.proxy_use_environment
:
166 if gl
.config
.http_proxy
:
167 proxies
['http'] = gl
.config
.http_proxy
168 if gl
.config
.ftp_proxy
:
169 proxies
['ftp'] = gl
.config
.ftp_proxy
171 self
.channel
= channel
172 urllib
.FancyURLopener
.__init
__( self
, proxies
)
174 def http_error_default(self
, url
, fp
, errcode
, errmsg
, headers
):
176 FancyURLopener by default does not raise an exception when
177 there is some unknown HTTP error code. We want to override
178 this and provide a function to log the error and raise an
179 exception, so we don't download the HTTP error page here.
181 # The following two lines are copied from urllib.URLopener's
182 # implementation of http_error_default
185 raise gPodderDownloadHTTPError(url
, errcode
, errmsg
)
187 # The following is based on Python's urllib.py "URLopener.retrieve"
188 # Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
190 def http_error_206(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
191 # The next line is taken from urllib's URLopener.open_http
192 # method, at the end after the line "if errcode == 200:"
193 return urllib
.addinfourl(fp
, headers
, 'http:' + url
)
195 def retrieve_resume(self
, url
, filename
, reporthook
=None, data
=None):
196 """retrieve_resume(url) returns (filename, headers) for a local object
197 or (tempfilename, headers) for a remote object.
199 The filename argument is REQUIRED (no tempfile creation code here!)
201 Additionally resumes a download if the local filename exists"""
205 if os
.path
.exists(filename
):
207 current_size
= os
.path
.getsize(filename
)
208 tfp
= open(filename
, 'ab')
209 #If the file exists, then only download the remainder
210 self
.addheader('Range', 'bytes=%s-' % (current_size
))
212 log('Cannot open file for resuming: %s', filename
, sender
=self
, traceback
=True)
217 tfp
= open(filename
, 'wb')
219 url
= urllib
.unwrap(urllib
.toBytes(url
))
220 fp
= self
.open(url
, data
)
224 # We told the server to resume - see if she agrees
225 # See RFC2616 (206 Partial Content + Section 14.16)
226 # XXX check status code here, too...
227 range = ContentRange
.parse(headers
.get('content-range', ''))
228 if range is None or range.start
!= current_size
:
229 # Ok, that did not work. Reset the download
230 # TODO: seek and truncate if content-range differs from request
232 tfp
= open(filename
, 'wb')
234 log('Cannot resume. Missing or wrong Content-Range header (RFC2616)', sender
=self
)
237 # gPodder TODO: we can get the real url via fp.geturl() here
238 # (if anybody wants to fix filenames in the future)
240 result
= filename
, headers
244 blocknum
= int(current_size
/bs
)
246 if "content-length" in headers
:
247 size
= int(headers
["Content-Length"]) + current_size
248 reporthook(blocknum
, bs
, size
)
257 reporthook(blocknum
, bs
, size
)
263 # raise exception if actual size does not match content-length header
264 if size
>= 0 and read
< size
:
265 raise urllib
.ContentTooShortError("retrieval incomplete: got only %i out "
266 "of %i bytes" % (read
, size
), result
)
270 # end code based on urllib.py
272 def prompt_user_passwd( self
, host
, realm
):
273 if self
.channel
.username
or self
.channel
.password
:
274 log( 'Authenticating as "%s" to "%s" for realm "%s".', self
.channel
.username
, host
, realm
, sender
= self
)
275 return ( self
.channel
.username
, self
.channel
.password
)
277 return ( None, None )
280 class DownloadQueueWorker(threading
.Thread
):
281 def __init__(self
, queue
, exit_callback
):
282 threading
.Thread
.__init
__(self
)
284 self
.exit_callback
= exit_callback
285 self
.cancelled
= False
287 def stop_accepting_tasks(self
):
289 When this is called, the worker will not accept new tasks,
290 but quit when the current task has been finished.
292 if not self
.cancelled
:
293 self
.cancelled
= True
294 log('%s stopped accepting tasks.', self
.getName(), sender
=self
)
297 log('Running new thread: %s', self
.getName(), sender
=self
)
298 while not self
.cancelled
:
300 task
= self
.queue
.pop()
301 log('%s is processing: %s', self
.getName(), task
, sender
=self
)
303 except IndexError, e
:
304 log('No more tasks for %s to carry out.', self
.getName(), sender
=self
)
306 self
.exit_callback(self
)
309 class DownloadQueueManager(object):
310 def __init__(self
, download_status_manager
):
311 self
.download_status_manager
= download_status_manager
312 self
.tasks
= collections
.deque()
314 self
.worker_threads_access
= threading
.RLock()
315 self
.worker_threads
= []
317 def __exit_callback(self
, worker_thread
):
318 with self
.worker_threads_access
:
319 self
.worker_threads
.remove(worker_thread
)
321 def spawn_and_retire_threads(self
, request_new_thread
=False):
322 with self
.worker_threads_access
:
323 if len(self
.worker_threads
) > gl
.config
.max_downloads
and \
324 gl
.config
.max_downloads_enabled
:
325 # Tell the excessive amount of oldest worker threads to quit, but keep at least one
326 count
= min(len(self
.worker_threads
)-1, len(self
.worker_threads
)-gl
.config
.max_downloads
)
327 for worker
in self
.worker_threads
[:count
]:
328 worker
.stop_accepting_tasks()
330 if request_new_thread
and (len(self
.worker_threads
) == 0 or \
331 len(self
.worker_threads
) < gl
.config
.max_downloads
or \
332 not gl
.config
.max_downloads_enabled
):
333 # We have to create a new thread here, there's work to do
334 log('I am going to spawn a new worker thread.', sender
=self
)
335 worker
= DownloadQueueWorker(self
.tasks
, self
.__exit
_callback
)
336 self
.worker_threads
.append(worker
)
339 def add_resumed_task(self
, task
):
340 """Simply add the task without starting the download"""
341 self
.download_status_manager
.register_task(task
)
343 def add_task(self
, task
):
344 if task
.status
== DownloadTask
.INIT
:
345 # This task is fresh, so add it to our status manager
346 self
.download_status_manager
.register_task(task
)
348 # This task is old so update episode from db
349 task
.episode
.reload_from_db()
350 task
.status
= DownloadTask
.QUEUED
351 self
.tasks
.appendleft(task
)
352 self
.spawn_and_retire_threads(request_new_thread
=True)
355 class DownloadTask(object):
356 """An object representing the download task of an episode
358 You can create a new download task like this:
360 task = DownloadTask(episode)
361 task.status = DownloadTask.QUEUED
364 While the download is in progress, you can access its properties:
366 task.total_size # in bytes
367 task.progress # from 0.0 to 1.0
368 task.speed # in bytes per second
369 str(task) # name of the episode
370 task.status # current status
371 task.status_changed # True if the status has been changed
373 You can cancel a running download task by setting its status:
375 task.status = DownloadTask.CANCELLED
377 The task will then abort as soon as possible (due to the nature
378 of downloading data, this can take a while when the Internet is
381 The "status_changed" attribute gets set to True everytime the
382 "status" attribute changes its value. After you get the value of
383 the "status_changed" attribute, it is always reset to False:
385 if task.status_changed:
386 new_status = task.status
387 # .. update the UI accordingly ..
389 Obviously, this also means that you must have at most *one*
390 place in your UI code where you check for status changes and
391 broadcast the status updates from there.
393 While the download is taking place and after the .run() method
394 has finished, you can get the final status to check if the download
397 if task.status == DownloadTask.DONE:
398 # .. everything ok ..
399 elif task.status == DownloadTask.FAILED:
400 # .. an error happened, and the
401 # error_message attribute is set ..
402 print task.error_message
403 elif task.status == DownloadTask.PAUSED:
404 # .. user paused the download ..
405 elif task.status == DownloadTask.CANCELLED:
406 # .. user cancelled the download ..
408 The difference between cancelling and pausing a DownloadTask is
409 that the temporary file gets deleted when cancelling, but does
410 not get deleted when pausing.
412 Be sure to call .removed_from_list() on this task when removing
413 it from the UI, so that it can carry out any pending clean-up
414 actions (e.g. removing the temporary file when the task has not
415 finished successfully; i.e. task.status != DownloadTask.DONE).
417 # Possible states this download task can be in
418 STATUS_MESSAGE
= (_('Added'), _('Queued'), _('Downloading'),
419 _('Finished'), _('Failed'), _('Cancelled'), _('Paused'))
420 (INIT
, QUEUED
, DOWNLOADING
, DONE
, FAILED
, CANCELLED
, PAUSED
) = range(7)
423 return self
.__episode
.title
425 def __get_status(self
):
428 def __set_status(self
, status
):
429 if status
!= self
.__status
:
430 self
.__status
_changed
= True
431 self
.__status
= status
433 status
= property(fget
=__get_status
, fset
=__set_status
)
435 def __get_status_changed(self
):
436 if self
.__status
_changed
:
437 self
.__status
_changed
= False
442 status_changed
= property(fget
=__get_status_changed
)
445 return self
.__episode
.url
447 url
= property(fget
=__get_url
)
449 def __get_episode(self
):
450 return self
.__episode
452 episode
= property(fget
=__get_episode
)
454 def removed_from_list(self
):
455 if self
.status
!= self
.DONE
:
456 util
.delete_file(self
.tempname
)
458 def __init__(self
, episode
):
459 self
.__status
= DownloadTask
.INIT
460 self
.__status
_changed
= True
461 self
.__episode
= episode
463 # Create the target filename and save it in the database
464 self
.filename
= self
.__episode
.local_filename(create
=True)
465 self
.tempname
= self
.filename
+ '.partial'
468 self
.total_size
= self
.__episode
.length
471 self
.error_message
= None
473 # Variables for speed limit and speed calculation
474 self
.__start
_time
= 0
475 self
.__start
_blocks
= 0
476 self
.__limit
_rate
_value
= gl
.config
.limit_rate_value
477 self
.__limit
_rate
= gl
.config
.limit_rate
479 # If the tempname already exists, set progress accordingly
480 if os
.path
.exists(self
.tempname
):
482 already_downloaded
= os
.path
.getsize(self
.tempname
)
483 if self
.total_size
> 0:
484 self
.progress
= max(0.0, min(1.0, float(already_downloaded
)/self
.total_size
))
485 except OSError, os_error
:
486 log('Error while getting size for existing file: %s', os_error
, sender
=self
)
488 # "touch self.tempname", so we also get partial
489 # files for resuming when the file is queued
490 open(self
.tempname
, 'w').close()
492 def status_updated(self
, count
, blockSize
, totalSize
):
493 # We see a different "total size" while downloading,
494 # so correct the total size variable in the thread
495 if totalSize
!= self
.total_size
and totalSize
> 0:
496 self
.total_size
= float(totalSize
)
498 if self
.total_size
> 0:
499 self
.progress
= max(0.0, min(1.0, float(count
*blockSize
)/self
.total_size
))
501 self
.calculate_speed(count
, blockSize
)
503 if self
.status
== DownloadTask
.CANCELLED
:
504 raise DownloadCancelledException()
506 if self
.status
== DownloadTask
.PAUSED
:
507 raise DownloadCancelledException()
509 def calculate_speed(self
, count
, blockSize
):
512 if self
.__start
_time
> 0:
513 # Has rate limiting been enabled or disabled?
514 if self
.__limit
_rate
!= gl
.config
.limit_rate
:
515 # If it has been enabled then reset base time and block count
516 if gl
.config
.limit_rate
:
517 self
.__start
_time
= now
518 self
.__start
_blocks
= count
519 self
.__limit
_rate
= gl
.config
.limit_rate
521 # Has the rate been changed and are we currently limiting?
522 if self
.__limit
_rate
_value
!= gl
.config
.limit_rate_value
and self
.__limit
_rate
:
523 self
.__start
_time
= now
524 self
.__start
_blocks
= count
525 self
.__limit
_rate
_value
= gl
.config
.limit_rate_value
527 passed
= now
- self
.__start
_time
529 speed
= ((count
-self
.__start
_blocks
)*blockSize
)/passed
533 self
.__start
_time
= now
534 self
.__start
_blocks
= count
535 passed
= now
- self
.__start
_time
536 speed
= count
*blockSize
538 self
.speed
= float(speed
)
540 if gl
.config
.limit_rate
and speed
> gl
.config
.limit_rate_value
:
541 # calculate the time that should have passed to reach
542 # the desired download rate and wait if necessary
543 should_have_passed
= float((count
-self
.__start
_blocks
)*blockSize
)/(gl
.config
.limit_rate_value
*1024.0)
544 if should_have_passed
> passed
:
545 # sleep a maximum of 10 seconds to not cause time-outs
546 delay
= min(10.0, float(should_have_passed
-passed
))
550 # Speed calculation (re-)starts here
551 self
.__start
_time
= 0
552 self
.__start
_blocks
= 0
554 # If the download has already been cancelled, skip it
555 if self
.status
== DownloadTask
.CANCELLED
:
556 util
.delete_file(self
.tempname
)
559 # We only start this download if its status is "queued"
560 if self
.status
!= DownloadTask
.QUEUED
:
563 # We are downloading this file right now
564 self
.status
= DownloadTask
.DOWNLOADING
567 # Resolve URL and start downloading the episode
568 url
= resolver
.get_real_download_url(self
.__episode
.url
)
569 downloader
= DownloadURLOpener(self
.__episode
.channel
)
570 (unused
, headers
) = downloader
.retrieve_resume(url
,
571 self
.tempname
, reporthook
=self
.status_updated
)
573 new_mimetype
= headers
.get('content-type', self
.__episode
.mimetype
)
574 old_mimetype
= self
.__episode
.mimetype
575 if new_mimetype
!= old_mimetype
:
576 log('Correcting mime type: %s => %s', old_mimetype
, new_mimetype
, sender
=self
)
577 old_extension
= self
.__episode
.extension()
578 self
.__episode
.mimetype
= new_mimetype
579 new_extension
= self
.__episode
.extension()
581 # If the desired filename extension changed due to the new mimetype,
582 # we force an update of the local filename to fix the extension
583 if old_extension
!= new_extension
:
584 self
.filename
= self
.__episode
.local_filename(create
=True, force_update
=True)
586 shutil
.move(self
.tempname
, self
.filename
)
588 # Get the _real_ filesize once we actually have the file
589 self
.__episode
.length
= os
.path
.getsize(self
.filename
)
590 self
.__episode
.channel
.addDownloadedItem(self
.__episode
)
592 # If a user command has been defined, execute the command setting some environment variables
593 if len(gl
.config
.cmd_download_complete
) > 0:
594 os
.environ
["GPODDER_EPISODE_URL"]=self
.__episode
.url
or ''
595 os
.environ
["GPODDER_EPISODE_TITLE"]=self
.__episode
.title
or ''
596 os
.environ
["GPODDER_EPISODE_FILENAME"]=self
.filename
or ''
597 os
.environ
["GPODDER_EPISODE_PUBDATE"]=str(int(self
.__episode
.pubDate
))
598 os
.environ
["GPODDER_EPISODE_LINK"]=self
.__episode
.link
or ''
599 os
.environ
["GPODDER_EPISODE_DESC"]=self
.__episode
.description
or ''
600 util
.run_external_command(gl
.config
.cmd_download_complete
)
601 except DownloadCancelledException
:
602 log('Download has been cancelled/paused: %s', self
, sender
=self
)
603 if self
.status
== DownloadTask
.CANCELLED
:
604 util
.delete_file(self
.tempname
)
608 log( 'Error "%s" while downloading "%s": %s', ioe
.strerror
, self
.__episode
.title
, ioe
.filename
, sender
=self
)
609 self
.status
= DownloadTask
.FAILED
610 self
.error_message
= _('I/O Error: %s: %s') % (ioe
.strerror
, ioe
.filename
)
611 except gPodderDownloadHTTPError
, gdhe
:
612 log( 'HTTP error %s while downloading "%s": %s', gdhe
.error_code
, self
.__episode
.title
, gdhe
.error_message
, sender
=self
)
613 self
.status
= DownloadTask
.FAILED
614 self
.error_message
= _('HTTP Error %s: %s') % (gdhe
.error_code
, gdhe
.error_message
)
616 self
.status
= DownloadTask
.FAILED
617 self
.error_message
= _('Error: %s') % (e
.message
,)
619 if self
.status
== DownloadTask
.DOWNLOADING
:
620 # Everything went well - we're done
621 self
.status
= DownloadTask
.DONE
627 # We finished, but not successfully (at least not really)