zeroinstall/injector/scheduler.py

   1 """
   2 Manage pools of connections so that we can limit the number of requests per site and reuse
   3 connections.
   4 @since: 1.6
   5 """
   6
   7 # Copyright (C) 2011, Thomas Leonard
   8 # See the README file for details, or visit http://0install.net.
   9
  10 import urlparse
  11 from collections import defaultdict
  12 import threading, gobject
  13
  14 from zeroinstall.support import tasks
  15 from zeroinstall.injector import download
  16
  17 default_port = {
  18         'http': 80,
  19         'https': 443,
  20 }
  21
  22 class DownloadStep:
  23         url = None
  24         status = None
  25         redirect = None
  26
  27 class DownloadScheduler:
  28         """Assigns (and re-assigns on redirect) Downloads to Sites, allowing per-site limits and connection pooling.
  29         @since: 1.6"""
  30         def __init__(self):
  31                 self._sites = defaultdict(lambda: Site())       # (scheme://host:port) -> Site
  32
  33         @tasks.async
  34         def download(self, dl):
  35                 # (changed if we get redirected)
  36                 current_url = dl.url
  37
  38                 redirections_remaining = 10
  39
  40                 # Assign the Download to a Site based on its scheme, host and port. If the result is a redirect,
  41                 # reassign it to the appropriate new site. Note that proxy handling happens later; we want to group
  42                 # and limit by the target site, not treat everything as going to a single site (the proxy).
  43                 while True:
  44                         location_parts = urlparse.urlparse(current_url)
  45
  46                         site_key = (location_parts.scheme,
  47                                     location_parts.hostname,
  48                                     location_parts.port or default_port.get(location_parts.scheme, None))
  49
  50                         step = DownloadStep()
  51                         step.dl = dl
  52                         step.url = current_url
  53                         blocker = self._sites[site_key].download(step)
  54                         yield blocker
  55                         tasks.check(blocker)
  56
  57                         if not step.redirect:
  58                                 break
  59
  60                         current_url = step.redirect
  61
  62                         if redirections_remaining == 0:
  63                                 raise download.DownloadError("Too many redirections {url} -> {current}".format(
  64                                                 url = dl.url,
  65                                                 current = current_url))
  66                         redirections_remaining -= 1
  67                         # (else go around the loop again)
  68
  69 class Site:
  70         """Represents a service accepting download requests. All requests with the same scheme, host and port are
  71         handled by the same Site object, allowing it to do connection pooling and queuing, although the current
  72         implementation doesn't do either."""
  73         @tasks.async
  74         def download(self, step):
  75                 from ._download_child import download_in_thread
  76
  77                 thread_blocker = tasks.Blocker("wait for thread " + step.url)
  78                 def notify_done(status, ex = None, redirect = None):
  79                         step.status = status
  80                         step.redirect = redirect
  81                         def wake_up_main():
  82                                 thread_blocker.trigger(ex)
  83                                 return False
  84                         gobject.idle_add(wake_up_main)
  85                 child = threading.Thread(target = lambda: download_in_thread(step.url, step.dl.tempfile, step.dl.modification_time, notify_done))
  86                 child.daemon = True
  87                 child.start()
  88
  89                 # Wait for child to complete download.
  90                 yield thread_blocker, step.dl._aborted
  91
  92                 if step.dl._aborted.happened:
  93                         # Don't wait for child to finish (might be stuck doing IO)
  94                         raise download.DownloadAborted()
  95
  96                 # Download is complete...
  97                 child.join()
  98
  99                 tasks.check(thread_blocker)
 100
 101                 if step.status == download.RESULT_REDIRECT:
 102                         assert step.redirect
 103                         return                          # DownloadScheduler will handle it
 104
 105                 assert not step.redirect, step.redirect
 106
 107                 step.dl._finish(step.status)