5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
22 # Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
25 # fetch.py - a file download utility
27 # A simple program similiar to wget(1), but handles local file copy, ignores
28 # directories, and verifies file hashes.
39 from urllib import splittype
40 from urllib2 import urlopen
41 from urllib2 import Request
45 def printIOError(e, txt):
46 """ Function to decode and print IOError type exception """
47 print "I/O Error: " + txt + ": "
50 print str(message) + " (" + str(code) + ")"
55 def validate_signature(path, signature):
56 """Given paths to a file and a detached PGP signature, verify that
57 the signature is valid for the file. Current configuration allows for
58 unrecognized keys to be downloaded as necessary."""
60 # Find the root of the repo so that we can point GnuPG at the right
61 # configuration and keyring.
62 proc = subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE)
64 if proc.returncode != 0:
66 out, err = proc.communicate()
67 gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
69 # Skip the permissions warning: none of the information here is private,
70 # so not having to worry about getting git keeping the directory
71 # unreadable is just simplest.
73 proc = subprocess.Popen(["gpg2", "--verify",
74 "--no-permission-warning", "--homedir", gpgdir, signature,
75 path], stdin=open("/dev/null"),
76 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
78 # If the executable simply couldn't be found, just skip the
80 if e.errno == errno.ENOENT:
85 if proc.returncode != 0:
86 # Only print GnuPG's output when there was a problem.
87 print proc.stdout.read()
92 def validate(file, hash):
93 """Given a file-like object and a hash string, verify that the hash
94 matches the file contents."""
97 algorithm, hashvalue = hash.split(':')
101 # force migration away from sha1
102 if algorithm == "sha1":
106 m = hashlib.new(algorithm)
121 return "%s:%s" % (algorithm, m.hexdigest())
124 def validate_container(filename, hash):
125 """Given a file path and a hash string, verify that the hash matches the
129 file = open(filename, 'r')
131 printIOError(e, "Can't open file " + filename)
133 return validate(file, hash)
136 def validate_payload(filename, hash):
137 """Given a file path and a hash string, verify that the hash matches the
138 payload (uncompressed content) of the file."""
140 expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
141 expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
142 expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
145 if expr_bz.match(filename):
146 file = bz2.BZ2File(filename, 'r')
147 elif expr_gz.match(filename):
148 file = gzip.GzipFile(filename, 'r')
149 elif expr_tgz.match(filename):
150 file = gzip.GzipFile(filename, 'r')
154 printIOError(e, "Can't open archive " + filename)
156 return validate(file, hash)
159 def download(url, filename=None, user_agent_arg=None, quiet=None):
160 """Download the content at the given URL to the given filename
161 (defaulting to the basename of the URL if not given. If 'quiet' is
162 True, throw away any error messages. Returns the name of the file to
163 which the content was donloaded."""
169 if user_agent_arg is not None:
170 req.add_header("User-Agent", user_agent_arg)
174 printIOError(e, "Can't open url " + url)
177 # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
178 if src.getcode() and (3 <= int(src.getcode() / 100) <= 5):
180 print "Error code: " + str(src.getcode())
184 filename = src.geturl().split('/')[-1]
187 dst = open(filename, 'wb')
190 printIOError(e, "Can't open file " + filename + " for writing")
203 # return the name of the file that we downloaded the data to.
207 def download_paths(search, filename, url):
208 """Returns a list of URLs where the file 'filename' might be found,
209 using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
211 If 'filename' is None, then the list will simply contain 'url'."""
215 if filename is not None:
216 tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
218 search += tmp.split(' ')
220 file = os.path.basename(filename)
222 urls = [base + '/' + file for base in search]
224 # filename should always be first
226 urls.remove(filename)
227 urls.insert(0, filename)
229 # command line url is a fallback, so it's last
230 if url is not None and url not in urls:
236 def download_from_paths(search_list, file_arg, url, link_arg, quiet=False):
237 """Attempts to download a file from a number of possible locations.
238 Generates a list of paths where the file ends up on the local
239 filesystem. This is a generator because while a download might be
240 successful, the signature or hash may not validate, and the caller may
241 want to try again from the next location. The 'link_arg' argument is a
242 boolean which, when True, specifies that if the source is not a remote
243 URL and not already found where it should be, to make a symlink to the
244 source rather than copying it."""
246 for url in download_paths(search_list, file_arg, url):
248 print "Source %s..." % url,
250 scheme, path = splittype(url)
253 if scheme in [None, 'file']:
254 if os.path.exists(path) is False:
256 print "not found, skipping file copy"
258 elif name and name != path:
259 if link_arg is False:
261 print "\n copying..."
262 shutil.copy2(path, name)
265 print "\n linking..."
266 os.symlink(path, name)
267 elif scheme in ['http', 'https', 'ftp']:
269 print "\n downloading...",
270 name = download(url, file_arg, quiet)
280 print "Usage: %s [-a|--user-agent (user-agent)] [-f|--file (file)] [-l|--link] " \
281 "[-h|--hash (hash)] [-s|--search (search-dir)] [-S|--sigurl (signature-url)] " \
282 "--url (url)" % (sys.argv[0].split('/')[-1])
290 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
292 user_agent_arg = None
302 opts, args = getopt.getopt(sys.argv[1:], "a:f:h:lks:u:",
303 ["file=", "link", "keep", "hash=", "search=", "url=",
304 "sigurl=", "user-agent="])
305 except getopt.GetoptError, err:
309 for opt, arg in opts:
310 if opt in ["-a", "--user-agent"]:
312 elif opt in ["-f", "--file"]:
314 elif opt in ["-l", "--link"]:
316 elif opt in ["-k", "--keep"]:
318 elif opt in ["-h", "--hash"]:
320 elif opt in ["-s", "--search"]:
321 search_list.append(arg)
322 elif opt in ["-S", "--sigurl"]:
324 elif opt in ["-u", "--url"]:
327 assert False, "unknown option"
332 for name in download_from_paths(search_list, file_arg, url_arg, link_arg):
333 print "\n validating signature...",
337 print "skipping (no signature URL)"
339 # Put the signature file in the same directory as the
340 # file we're downloading.
341 sig_file = os.path.join(
342 os.path.dirname(file_arg),
343 os.path.basename(sig_arg))
344 # Validate with the first signature we find.
345 for sig_file in download_from_paths(search_list, sig_file,
346 sig_arg, link_arg, True):
348 if validate_signature(name, sig_file):
357 print "failed (couldn't fetch signature)"
359 print " validating hash...",
360 realhash = validate_container(name, hash_arg)
363 print "skipping (no hash)"
364 print "hash is: %s" % realhash
365 elif realhash == hash_arg:
368 payloadhash = validate_payload(name, hash_arg)
369 if payloadhash == hash_arg:
372 # If the signature validated, then we assume
373 # that the expected hash is just a typo, but we
376 print "invalid hash!"
378 print "corruption detected"
380 print " expected: %s" % hash_arg
381 print " actual: %s" % realhash
382 print " payload: %s" % payloadhash
384 # An invalid hash shouldn't cause us to remove
385 # the target file if the signature was valid.
398 if __name__ == "__main__":