Handle HTTP redirects manually
[zeroinstall/solver.git] / zeroinstall / injector / download.py
blobf14f93f3e47560e775c1c7e37a1681e9a2394943
1 """
2 Handles URL downloads.
4 This is the low-level interface for downloading interfaces, implementations, icons, etc.
6 @see: L{fetch} higher-level API for downloads that uses this module
7 """
9 # Copyright (C) 2009, Thomas Leonard
10 # See the README file for details, or visit http://0install.net.
12 import tempfile, os, sys, threading, gobject
14 from zeroinstall import SafeException
15 from zeroinstall.support import tasks
16 from logging import info, debug
17 from zeroinstall import _
19 download_starting = "starting" # Waiting for UI to start it (no longer used)
20 download_fetching = "fetching" # In progress
21 download_complete = "complete" # Downloaded and cached OK
22 download_failed = "failed"
24 RESULT_OK = 0
25 RESULT_FAILED = 1
26 RESULT_NOT_MODIFIED = 2
27 RESULT_REDIRECT = 3
29 class DownloadError(SafeException):
30 """Download process failed."""
31 pass
33 class DownloadAborted(DownloadError):
34 """Download aborted because of a call to L{Download.abort}"""
35 def __init__(self, message = None):
36 SafeException.__init__(self, message or _("Download aborted at user's request"))
38 class Download(object):
39 """A download of a single resource to a temporary file.
40 @ivar url: the URL of the resource being fetched
41 @type url: str
42 @ivar tempfile: the file storing the downloaded data
43 @type tempfile: file
44 @ivar status: the status of the download
45 @type status: (download_fetching | download_failed | download_complete)
46 @ivar expected_size: the expected final size of the file
47 @type expected_size: int | None
48 @ivar downloaded: triggered when the download ends (on success or failure)
49 @type downloaded: L{tasks.Blocker}
50 @ivar hint: hint passed by and for caller
51 @type hint: object
52 @ivar aborted_by_user: whether anyone has called L{abort}
53 @type aborted_by_user: bool
54 @ivar unmodified: whether the resource was not modified since the modification_time given at construction
55 @type unmodified: bool
56 """
57 __slots__ = ['url', 'tempfile', 'status', 'expected_size', 'downloaded',
58 'hint', '_final_total_size', 'aborted_by_user',
59 'modification_time', 'unmodified']
61 def __init__(self, url, hint = None, modification_time = None, expected_size = None):
62 """Create a new download object.
63 @param url: the resource to download
64 @param hint: object with which this download is associated (an optional hint for the GUI)
65 @param modification_time: string with HTTP date that indicates last modification time.
66 The resource will not be downloaded if it was not modified since that date.
67 @postcondition: L{status} == L{download_fetching}."""
68 self.url = url
69 self.hint = hint
70 self.aborted_by_user = False
71 self.modification_time = modification_time
72 self.unmodified = False
74 self.tempfile = None # Stream for result
75 self.downloaded = None
77 self.expected_size = expected_size # Final size (excluding skipped bytes)
78 self._final_total_size = None # Set when download is finished
80 self.status = download_fetching
81 self.tempfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
83 task = tasks.Task(self._do_download(), "download " + self.url)
84 self.downloaded = task.finished
86 def _do_download(self):
87 """Will trigger L{downloaded} when done (on success or failure)."""
88 from ._download_child import download_in_thread
90 # (changed if we get redirected)
91 current_url = self.url
93 redirections_remaining = 10
95 while True:
96 result = []
97 thread_blocker = tasks.Blocker("wait for thread " + current_url)
98 def notify_done(status, ex = None, redirect = None):
99 result.append((status, redirect))
100 def wake_up_main():
101 thread_blocker.trigger(ex)
102 return False
103 gobject.idle_add(wake_up_main)
104 child = threading.Thread(target = lambda: download_in_thread(current_url, self.tempfile, self.modification_time, notify_done))
105 child.daemon = True
106 child.start()
108 # Wait for child to complete download.
109 yield thread_blocker
111 # Download is complete...
112 child.join()
114 (status, redirect), = result
116 if status != RESULT_REDIRECT:
117 assert not redirect, redirect
118 break
120 assert redirect
121 current_url = redirect
123 if redirections_remaining == 0:
124 raise DownloadError("Too many redirections {url} -> {current}".format(
125 url = self.url,
126 current = current_url))
127 redirections_remaining -= 1
128 # (else go around the loop again)
130 assert self.status is download_fetching
131 assert self.tempfile is not None
133 if status == RESULT_NOT_MODIFIED:
134 debug("%s not modified", self.url)
135 self.tempfile = None
136 self.unmodified = True
137 self.status = download_complete
138 self._final_total_size = 0
139 self.downloaded.trigger()
140 return
142 self._final_total_size = self.get_bytes_downloaded_so_far()
144 self.tempfile = None
146 if self.aborted_by_user:
147 assert self.downloaded.happened
148 raise DownloadAborted()
150 try:
152 tasks.check(thread_blocker)
154 assert status == RESULT_OK
156 # Check that the download has the correct size, if we know what it should be.
157 if self.expected_size is not None:
158 if self._final_total_size != self.expected_size:
159 raise SafeException(_('Downloaded archive has incorrect size.\n'
160 'URL: %(url)s\n'
161 'Expected: %(expected_size)d bytes\n'
162 'Received: %(size)d bytes') % {'url': self.url, 'expected_size': self.expected_size, 'size': self._final_total_size})
163 except:
164 self.status = download_failed
165 _unused, ex, tb = sys.exc_info()
166 self.downloaded.trigger(exception = (ex, tb))
167 else:
168 self.status = download_complete
169 self.downloaded.trigger()
171 def abort(self):
172 """Signal the current download to stop.
173 @postcondition: L{aborted_by_user}"""
174 self.status = download_failed
176 if self.tempfile is not None:
177 info(_("Aborting download of %s"), self.url)
178 # TODO: we currently just close the output file; the thread will end when it tries to
179 # write to it. We should try harder to stop the thread immediately (e.g. by closing its
180 # socket when known), although we can never cover all cases (e.g. a stuck DNS lookup).
181 # In any case, we don't wait for the child to exit before notifying tasks that are waiting
182 # on us.
183 self.aborted_by_user = True
184 self.tempfile.close()
185 self.tempfile = None
186 self.downloaded.trigger((DownloadAborted(), None))
188 def get_current_fraction(self):
189 """Returns the current fraction of this download that has been fetched (from 0 to 1),
190 or None if the total size isn't known.
191 @return: fraction downloaded
192 @rtype: int | None"""
193 if self.tempfile is None:
194 return 1
195 if self.expected_size is None:
196 return None # Unknown
197 current_size = self.get_bytes_downloaded_so_far()
198 return float(current_size) / self.expected_size
200 def get_bytes_downloaded_so_far(self):
201 """Get the download progress. Will be zero if the download has not yet started.
202 @rtype: int"""
203 if self.status is download_fetching:
204 return os.fstat(self.tempfile.fileno()).st_size
205 else:
206 return self._final_total_size or 0
208 def __str__(self):
209 return _("<Download from %s>") % self.url