tools/userland-fetch

   1 #!/usr/bin/python2.7
   2 #
   3 # CDDL HEADER START
   4 #
   5 # The contents of this file are subject to the terms of the
   6 # Common Development and Distribution License (the "License").
   7 # You may not use this file except in compliance with the License.
   8 #
   9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10 # or http://www.opensolaris.org/os/licensing.
  11 # See the License for the specific language governing permissions
  12 # and limitations under the License.
  13 #
  14 # When distributing Covered Code, include this CDDL HEADER in each
  15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16 # If applicable, add the following below this CDDL HEADER, with the
  17 # fields enclosed by brackets "[]" replaced with your own identifying
  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 # Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  23 #
  24 #
  25 # fetch.py - a file download utility
  26 #
  27 #  A simple program similiar to wget(1), but handles local file copy, ignores
  28 #  directories, and verifies file hashes.
  29 #
  30
  31 import os
  32 import sys
  33 import shutil
  34 from urllib import splittype
  35 from urllib2 import urlopen
  36 from urllib2 import Request
  37 import hashlib
  38
  39 def printIOError(e, txt):
  40         """ Function to decode and print IOError type exception """
  41         print "I/O Error: " + txt + ": "
  42         try:
  43                 (code, message) = e
  44                 print str(message) + " (" + str(code) + ")"
  45         except:
  46                 print str(e)
  47
  48 def validate(file, hash):
  49         algorithm, hashvalue = hash.split(':')
  50
  51         # force migration away from sha1
  52         if algorithm == "sha1":
  53                 algorithm = "sha256"
  54         try:
  55                 m = hashlib.new(algorithm)
  56         except ValueError:
  57                 return False
  58
  59         while True:
  60                 try:
  61                         block = file.read()
  62                 except IOError, err:
  63                         print str(err),
  64                         break
  65
  66                 m.update(block)
  67                 if block == '':
  68                         break
  69
  70         return "%s:%s" % (algorithm, m.hexdigest())
  71
  72 def validate_container(filename, hash):
  73         try:
  74                 file = open(filename, 'r')
  75         except IOError as e:
  76                 printIOError(e, "Can't open file " + filename)
  77                 return False
  78         return validate(file, hash)
  79
  80
  81 def validate_payload(filename, hash):
  82         import re
  83         import gzip
  84         import bz2
  85
  86         expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
  87         expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
  88         expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
  89
  90         try:
  91                 if expr_bz.match(filename):
  92                         file = bz2.BZ2File(filename, 'r')
  93                 elif expr_gz.match(filename):
  94                         file = gzip.GzipFile(filename, 'r')
  95                 elif expr_tgz.match(filename):
  96                         file = gzip.GzipFile(filename, 'r')
  97                 else:
  98                         return False
  99         except IOError as e:
 100                 printIOError(e, "Can't open archive " + filename)
 101                 return False
 102         return validate(file, hash)
 103
 104
 105 def download(url, filename = None, user_agent_arg = None):
 106         src = None
 107
 108         try:
 109                 req = Request(url)
 110                 if user_agent_arg != None:
 111                         req.add_header("User-Agent", user_agent_arg)
 112                 src = urlopen(req)
 113         except IOError as e:
 114                 printIOError(e, "Can't open url " + url)
 115                 return None
 116
 117         # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
 118         if src.getcode() and (3 <= int(src.getcode()/100) <= 5):
 119                 print "Error code: " + str(src.getcode())
 120                 return None
 121
 122         if filename == None:
 123                 filename = src.geturl().split('/')[-1]
 124
 125         try:
 126                 dst = open(filename, 'wb');
 127         except IOError as e:
 128                 printIOError(e, "Can't open file " + filename + " for writing")
 129                 src.close()
 130                 return None
 131
 132         while True:
 133                 block = src.read()
 134                 if block == '':
 135                         break;
 136                 dst.write(block)
 137
 138         src.close()
 139         dst.close()
 140
 141         # return the name of the file that we downloaded the data to.
 142         return filename
 143
 144 def download_paths(search, filename, url):
 145         urls = list()
 146
 147         if filename != None:
 148                 tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
 149                 if tmp:
 150                         search += tmp.split(' ')
 151
 152                 file = os.path.basename(filename)
 153
 154                 urls = [ base + '/' + file for base in search ]
 155
 156                 # filename should always be first
 157                 if filename in urls:
 158                         urls.remove(filename)
 159                 urls.insert(0, filename)
 160
 161         # command line url is a fallback, so it's last
 162         if url != None and url not in urls:
 163                 urls.append(url)
 164
 165         return urls
 166
 167 def usage():
 168         print "Usage: %s [-a|--user-agent (user-agent)] [-f|--file (file)] " \
 169                 "[-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] " \
 170                 "-u|--url (url)" % (sys.argv[0].split('/')[-1])
 171         sys.exit(1)
 172
 173 def main():
 174         import getopt
 175
 176         # FLUSH STDOUT
 177         sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
 178
 179         user_agent_arg = None
 180         file_arg = None
 181         link_arg = False
 182         hash_arg = None
 183         url_arg = None
 184         search_list = list()
 185
 186         try:
 187                 opts, args = getopt.getopt(sys.argv[1:], "a:f:h:ls:u:",
 188                         ["file=", "link", "hash=", "search=", "url=",
 189                         "user-agent="])
 190         except getopt.GetoptError, err:
 191                 print str(err)
 192                 usage()
 193
 194         for opt, arg in opts:
 195                 if opt in [ "-a", "--user-agent" ]:
 196                         user_agent_arg = arg
 197                 elif opt in [ "-f", "--file" ]:
 198                         file_arg = arg
 199                 elif opt in [ "-l", "--link" ]:
 200                         link_arg = True
 201                 elif opt in [ "-h", "--hash" ]:
 202                         hash_arg = arg
 203                 elif opt in [ "-s", "--search" ]:
 204                         search_list.append(arg)
 205                 elif opt in [ "-u", "--url" ]:
 206                         url_arg = arg
 207                 else:
 208                         assert False, "unknown option"
 209
 210         if url_arg == None:
 211                 usage()
 212
 213         for url in download_paths(search_list, file_arg, url_arg):
 214                 print "Source %s..." % url,
 215
 216                 scheme, path = splittype(url)
 217                 name = file_arg
 218
 219                 if scheme in [ None, 'file' ]:
 220                         if os.path.exists(path) == False:
 221                                 print "not found, skipping file copy"
 222                                 continue
 223                         elif name != path:
 224                                 if link_arg == False:
 225                                         print "\n    copying..."
 226                                         shutil.copy2(path, name)
 227                                 else:
 228                                         print "\n    linking..."
 229                                         os.symlink(path, name)
 230                         else:
 231                                 pass
 232                 elif scheme in [ 'http', 'https', 'ftp' ]:
 233                         print "\n    downloading...",
 234                         name = download(url, file_arg, user_agent_arg)
 235                         if name == None:
 236                                 print "failed"
 237                                 continue
 238
 239                 print "\n    validating...",
 240                 if hash_arg == None:
 241                         print "skipping (no hash)"
 242                         sys.exit(0)
 243
 244                 realhash = validate_container(name, hash_arg)
 245                 if realhash == hash_arg:
 246                         print "ok"
 247                         sys.exit(0)
 248                 else:
 249                         payloadhash = validate_payload(name, hash_arg)
 250                         if payloadhash == hash_arg:
 251                                 print "ok"
 252                                 sys.exit(0)
 253                         print "corruption detected"
 254                         print "    expected: %s" % hash_arg
 255                         print "    actual:   %s" % realhash
 256                         print "    payload:  %s" % payloadhash
 257
 258                 try:
 259                         os.remove(name)
 260                 except OSError:
 261                         pass
 262
 263         sys.exit(1)
 264
 265 if __name__ == "__main__":
 266         main()