build/unix/build-clang/tooltool.py

   1 #!/usr/bin/env python
   2
   3 #tooltool is a lookaside cache implemented in Python
   4 #Copyright (C) 2011 John H. Ford <john@johnford.info>
   5 #
   6 #This program is free software; you can redistribute it and/or
   7 #modify it under the terms of the GNU General Public License
   8 #as published by the Free Software Foundation version 2
   9 #
  10 #This program is distributed in the hope that it will be useful,
  11 #but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 #GNU General Public License for more details.
  14 #
  15 #You should have received a copy of the GNU General Public License
  16 #along with this program; if not, write to the Free Software
  17 #Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  18
  19 # An manifest file specifies files in that directory that are stored
  20 # elsewhere.  This file should only contain file in the directory
  21 # which the manifest file resides in and it should be called 'manifest.manifest'
  22
  23 __version__ = '1'
  24
  25 import json
  26 import os
  27 import optparse
  28 import logging
  29 import hashlib
  30 import urllib2
  31 import ConfigParser
  32
  33 log = logging.getLogger(__name__)
  34
  35 class FileRecordJSONEncoderException(Exception): pass
  36 class InvalidManifest(Exception): pass
  37 class ExceptionWithFilename(Exception):
  38     def __init__(self, filename):
  39         Exception.__init__(self)
  40         self.filename = filename
  41
  42 class DigestMismatchException(ExceptionWithFilename): pass
  43 class MissingFileException(ExceptionWithFilename): pass
  44
  45 class FileRecord(object):
  46     def __init__(self, filename, size, digest, algorithm):
  47         object.__init__(self)
  48         self.filename = filename
  49         self.size = size
  50         self.digest = digest
  51         self.algorithm = algorithm
  52         log.debug("creating %s 0x%x" % (self.__class__.__name__, id(self)))
  53
  54     def __eq__(self, other):
  55         if self is other:
  56             return True
  57         if self.filename == other.filename and \
  58             self.size == other.size and \
  59             self.digest == other.digest and \
  60             self.algorithm == other.algorithm:
  61             return True
  62         else:
  63             return False
  64
  65     def __ne__(self, other):
  66         return not self.__eq__(other)
  67
  68     def __str__(self):
  69         return repr(self)
  70
  71     def __repr__(self):
  72         return "%s.%s(filename='%s', size='%s', digest='%s', algorithm='%s')" % (__name__,
  73                 self.__class__.__name__,
  74                 self.filename, self.size, self.digest, self.algorithm)
  75
  76     def present(self):
  77         # Doesn't check validity
  78         return os.path.exists(self.filename)
  79
  80     def validate_size(self):
  81         if self.present():
  82             return self.size == os.path.getsize(self.filename)
  83         else:
  84             log.debug("trying to validate size on a missing file, %s", self.filename)
  85             raise MissingFileException(filename=self.filename)
  86
  87     def validate_digest(self):
  88         if self.present():
  89             with open(self.filename, 'rb') as f:
  90                 return self.digest == digest_file(f, self.algorithm)
  91         else:
  92             log.debug("trying to validate digest on a missing file, %s', self.filename")
  93             raise MissingFileException(filename=self.filename)
  94
  95     def validate(self):
  96         if self.validate_size():
  97             if self.validate_digest():
  98                 return True
  99         return False
 100
 101     def describe(self):
 102         if self.present() and self.validate():
 103             return "'%s' is present and valid" % self.filename
 104         elif self.present():
 105             return "'%s' is present and invalid" % self.filename
 106         else:
 107             return "'%s' is absent" % self.filename
 108
 109
 110 def create_file_record(filename, algorithm):
 111     fo = open(filename, 'rb')
 112     stored_filename = os.path.split(filename)[1]
 113     fr = FileRecord(stored_filename, os.path.getsize(filename), digest_file(fo, algorithm), algorithm)
 114     fo.close()
 115     return fr
 116
 117
 118 class FileRecordJSONEncoder(json.JSONEncoder):
 119     def encode_file_record(self, obj):
 120         if not issubclass(type(obj), FileRecord):
 121             err = "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, not %s" % obj.__class__.__name__
 122             log.warn(err)
 123             raise FileRecordJSONEncoderException(err)
 124         else:
 125             return {'filename': obj.filename, 'size': obj.size, 'algorithm': obj.algorithm, 'digest': obj.digest}
 126
 127     def default(self, f):
 128         if issubclass(type(f), list):
 129             record_list = []
 130             for i in f:
 131                 record_list.append(self.encode_file_record(i))
 132             return record_list
 133         else:
 134             return self.encode_file_record(f)
 135
 136
 137 class FileRecordJSONDecoder(json.JSONDecoder):
 138     """I help the json module materialize a FileRecord from
 139     a JSON file.  I understand FileRecords and lists of
 140     FileRecords.  I ignore things that I don't expect for now"""
 141     # TODO: make this more explicit in what it's looking for
 142     # and error out on unexpected things
 143     def process_file_records(self, obj):
 144         if isinstance(obj, list):
 145             record_list = []
 146             for i in obj:
 147                 record = self.process_file_records(i)
 148                 if issubclass(type(record), FileRecord):
 149                     record_list.append(record)
 150             return record_list
 151         if isinstance(obj, dict) and \
 152            len(obj.keys()) == 4 and \
 153            obj.has_key('filename') and \
 154            obj.has_key('size') and \
 155            obj.has_key('algorithm') and \
 156            obj.has_key('digest'):
 157             rv = FileRecord(obj['filename'], obj['size'], obj['digest'], obj['algorithm'])
 158             log.debug("materialized %s" % rv)
 159             return rv
 160         return obj
 161
 162     def decode(self, s):
 163         decoded = json.JSONDecoder.decode(self, s)
 164         rv = self.process_file_records(decoded)
 165         return rv
 166
 167
 168 class Manifest(object):
 169
 170     valid_formats = ('json',)
 171
 172     def __init__(self, file_records=[]):
 173         self.file_records = file_records
 174
 175     def __eq__(self, other):
 176         if self is other:
 177             return True
 178         if len(self.file_records) != len(other.file_records):
 179             log.debug('Manifests differ in number of files')
 180             return False
 181         #TODO: Lists in a different order should be equal
 182         for record in range(0,len(self.file_records)):
 183             if self.file_records[record] != other.file_records[record]:
 184                 log.debug('FileRecords differ, %s vs %s' % (self.file_records[record],
 185                                                             other.file_records[record]))
 186                 return False
 187         return True
 188
 189     def __deepcopy__(self, memo):
 190         # This is required for a deep copy
 191         return Manifest(self.file_records[:])
 192
 193     def __copy__(self):
 194         return Manifest(self.file_records)
 195
 196     def copy(self):
 197         return Manifest(self.file_records[:])
 198
 199     def present(self):
 200         return all(i.present() for i in self.file_records)
 201
 202     def validate_sizes(self):
 203         return all(i.validate_size() for i in self.file_records)
 204
 205     def validate_digests(self):
 206         return all(i.validate_digest() for i in self.file_records)
 207
 208     def validate(self):
 209         return all(i.validate() for i in self.file_records)
 210
 211     def sort(self):
 212         #TODO: WRITE TESTS
 213         self.file_records.sort(key=lambda x: x.size)
 214
 215     def load(self, data_file, fmt='json'):
 216         assert fmt in self.valid_formats
 217         if fmt == 'json':
 218             try:
 219                 self.file_records.extend(json.load(data_file, cls=FileRecordJSONDecoder))
 220                 self.sort()
 221             except ValueError:
 222                 raise InvalidManifest("trying to read invalid manifest file")
 223
 224     def loads(self, data_string, fmt='json'):
 225         assert fmt in self.valid_formats
 226         if fmt == 'json':
 227             try:
 228                 self.file_records.extend(json.loads(data_string, cls=FileRecordJSONDecoder))
 229                 self.sort()
 230             except ValueError:
 231                 raise InvalidManifest("trying to read invalid manifest file")
 232
 233     def dump(self, output_file, fmt='json'):
 234         assert fmt in self.valid_formats
 235         self.sort()
 236         if fmt == 'json':
 237             rv = json.dump(self.file_records, output_file, indent=0, cls=FileRecordJSONEncoder)
 238             print >> output_file, ''
 239             return rv
 240
 241     def dumps(self, fmt='json'):
 242         assert fmt in self.valid_formats
 243         self.sort()
 244         if fmt == 'json':
 245             return json.dumps(self.file_records, cls=FileRecordJSONEncoder)
 246
 247
 248 def digest_file(f, a):
 249     """I take a file like object 'f' and return a hex-string containing
 250     of the result of the algorithm 'a' applied to 'f'."""
 251     h = hashlib.new(a)
 252     chunk_size = 1024*10
 253     data = f.read(chunk_size)
 254     while data:
 255         h.update(data)
 256         data = f.read(chunk_size)
 257     if hasattr(f, 'name'):
 258         log.debug('hashed %s with %s to be %s', f.name, a, h.hexdigest())
 259     else:
 260         log.debug('hashed a file with %s to be %s', a, h.hexdigest())
 261     return h.hexdigest()
 262
 263 # TODO: write tests for this function
 264 def open_manifest(manifest_file):
 265     """I know how to take a filename and load it into a Manifest object"""
 266     if os.path.exists(manifest_file):
 267         manifest = Manifest()
 268         with open(manifest_file) as f:
 269             manifest.load(f)
 270             log.debug("loaded manifest from file '%s'" % manifest_file)
 271         return manifest
 272     else:
 273         log.debug("tried to load absent file '%s' as manifest" % manifest_file)
 274         raise InvalidManifest("manifest file '%s' does not exist" % manifest_file)
 275
 276 # TODO: write tests for this function
 277 def list_manifest(manifest_file):
 278     """I know how print all the files in a location"""
 279     try:
 280         manifest = open_manifest(manifest_file)
 281     except InvalidManifest:
 282         log.error("failed to load manifest file at '%s'" % manifest_file)
 283         return False
 284     for f in manifest.file_records:
 285         print "%s\t%s\t%s" % ("P" if f.present() else "-",
 286                               "V" if f.present() and f.validate() else "-",
 287                               f.filename)
 288     return True
 289
 290 def validate_manifest(manifest_file):
 291     """I validate that all files in a manifest are present and valid but
 292     don't fetch or delete them if they aren't"""
 293     try:
 294         manifest = open_manifest(manifest_file)
 295     except InvalidManifest:
 296         log.error("failed to load manifest file at '%s'" % manifest_file)
 297         return False
 298     invalid_files = []
 299     absent_files = []
 300     for f in manifest.file_records:
 301         if not f.present():
 302             absent_files.append(f)
 303         else:
 304             if not f.validate():
 305                 invalid_files.append(f)
 306     if len(invalid_files + absent_files) == 0:
 307         return True
 308     else:
 309         return False
 310
 311 # TODO: write tests for this function
 312 def add_files(manifest_file, algorithm, filenames):
 313     # returns True if all files successfully added, False if not
 314     # and doesn't catch library Exceptions.  If any files are already
 315     # tracked in the manifest, return will be False because they weren't
 316     # added
 317     all_files_added = True
 318     # Create a old_manifest object to add to
 319     if os.path.exists(manifest_file):
 320         old_manifest = open_manifest(manifest_file)
 321     else:
 322         old_manifest = Manifest()
 323         log.debug("creating a new manifest file")
 324     new_manifest = Manifest() # use a different manifest for the output
 325     for filename in filenames:
 326         log.debug("adding %s" % filename)
 327         path, name = os.path.split(filename)
 328         new_fr = create_file_record(filename, algorithm)
 329         log.debug("appending a new file record to manifest file")
 330         add = True
 331         for fr in old_manifest.file_records:
 332             log.debug("manifest file has '%s'" % "', ".join([x.filename for x in old_manifest.file_records]))
 333             if new_fr == fr and new_fr.validate():
 334                 # TODO: Decide if this case should really cause a False return
 335                 log.info("file already in old_manifest file and matches")
 336                 add = False
 337             elif new_fr == fr and not new_fr.validate():
 338                 log.error("file already in old_manifest file but is invalid")
 339                 add = False
 340             if filename == fr.filename:
 341                 log.error("manifest already contains file named %s" % filename)
 342                 add = False
 343         if add:
 344             new_manifest.file_records.append(new_fr)
 345             log.debug("added '%s' to manifest" % filename)
 346         else:
 347             all_files_added = False
 348     with open(manifest_file, 'wb') as output:
 349         new_manifest.dump(output, fmt='json')
 350     return all_files_added
 351
 352
 353 # TODO: write tests for this function
 354 def fetch_file(base_url, file_record, overwrite=False, grabchunk=1024*4):
 355     # A file which is requested to be fetched that exists locally will be hashed.
 356     # If the hash matches the requested file's hash, nothing will be done and the
 357     # function will return.  If the function is told to overwrite and there is a
 358     # digest mismatch, the exiting file will be overwritten
 359     if file_record.present():
 360         if file_record.validate():
 361             log.info("existing '%s' is valid, not fetching" % file_record.filename)
 362             return True
 363         if overwrite:
 364             log.info("overwriting '%s' as requested" % file_record.filename)
 365         else:
 366             # All of the following is for a useful error message
 367             with open(file_record.filename, 'rb') as f:
 368                 d = digest_file(f, file_record.algorithm)
 369             log.error("digest mismatch between manifest(%s...) and local file(%s...)" % \
 370                     (file_record.digest[:8], d[:8]))
 371             log.debug("full digests: manifest (%s) local file (%s)" % (file_record.digest, d))
 372             # Let's bail!
 373             return False
 374
 375     # Generate the URL for the file on the server side
 376     url = "%s/%s/%s" % (base_url, file_record.algorithm, file_record.digest)
 377
 378     log.debug("fetching from '%s'" % url)
 379
 380     # TODO: This should be abstracted to make generic retreival protocol handling easy
 381     # Well, the file doesn't exist locally.  Lets fetch it.
 382     try:
 383         f = urllib2.urlopen(url)
 384         log.debug("opened %s for reading" % url)
 385         with open(file_record.filename, 'wb') as out:
 386             k = True
 387             size = 0
 388             while k:
 389                 # TODO: print statistics as file transfers happen both for info and to stop
 390                 # buildbot timeouts
 391                 indata = f.read(grabchunk)
 392                 out.write(indata)
 393                 size += len(indata)
 394                 if indata == '':
 395                     k = False
 396             if size != file_record.size:
 397                 log.error("transfer from %s to %s failed due to a difference of %d bytes" % (url,
 398                             file_record.filename, file_record.size - size))
 399                 return False
 400             log.info("fetched %s" % file_record.filename)
 401     except (urllib2.URLError, urllib2.HTTPError) as e:
 402         log.error("failed to fetch '%s': %s" % (file_record.filename, e),
 403                   exc_info=True)
 404         return False
 405     except IOError:
 406         log.error("failed to write to '%s'" % file_record.filename,
 407                   exc_info=True)
 408         return False
 409     return True
 410
 411
 412 # TODO: write tests for this function
 413 def fetch_files(manifest_file, base_url, overwrite, filenames=[]):
 414     # Lets load the manifest file
 415     try:
 416         manifest = open_manifest(manifest_file)
 417     except InvalidManifest:
 418         log.error("failed to load manifest file at '%s'" % manifest_file)
 419         return False
 420     # We want to track files that fail to be fetched as well as
 421     # files that are fetched
 422     failed_files = []
 423
 424     # Lets go through the manifest and fetch the files that we want
 425     fetched_files = []
 426     for f in manifest.file_records:
 427         if f.filename in filenames or len(filenames) == 0:
 428             log.debug("fetching %s" % f.filename)
 429             if fetch_file(base_url, f, overwrite):
 430                 fetched_files.append(f)
 431             else:
 432                 failed_files.append(f.filename)
 433         else:
 434             log.debug("skipping %s" % f.filename)
 435
 436     # Even if we get the file, lets ensure that it matches what the
 437     # manifest specified
 438     for localfile in fetched_files:
 439         if not localfile.validate():
 440             log.error("'%s'" % localfile.describe())
 441
 442     # If we failed to fetch or validate a file, we need to fail
 443     if len(failed_files) > 0:
 444         log.error("The following files failed: '%s'" % "', ".join(failed_files))
 445         return False
 446     return True
 447
 448
 449 # TODO: write tests for this function
 450 def process_command(options, args):
 451     """ I know how to take a list of program arguments and
 452     start doing the right thing with them"""
 453     cmd = args[0]
 454     cmd_args = args[1:]
 455     log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args)))
 456     log.debug("using options: %s" % options)
 457     if cmd == 'list':
 458         return list_manifest(options['manifest'])
 459     if cmd == 'validate':
 460         return validate_manifest(options['manifest'])
 461     elif cmd == 'add':
 462         return add_files(options['manifest'], options['algorithm'], cmd_args)
 463     elif cmd == 'fetch':
 464         if not options.has_key('base_url') or options.get('base_url') is None:
 465             log.critical('fetch command requires url option')
 466             return False
 467         return fetch_files(options['manifest'], options['base_url'], options['overwrite'], cmd_args)
 468     else:
 469         log.critical('command "%s" is not implemented' % cmd)
 470         return False
 471
 472 # fetching api:
 473 #   http://hostname/algorithm/hash
 474 #   example: http://people.mozilla.org/sha1/1234567890abcedf
 475 # This will make it possible to have the server allow clients to
 476 # use different algorithms than what was uploaded to the server
 477
 478 # TODO: Implement the following features:
 479 #   -optimization: do small files first, justification is that they are faster
 480 #    and cause a faster failure if they are invalid
 481 #   -store permissions
 482 #   -local renames i.e. call the file one thing on the server and
 483 #    something different locally
 484 #   -deal with the cases:
 485 #     -local data matches file requested with different filename
 486 #     -two different files with same name, different hash
 487 #   -?only ever locally to digest as filename, symlink to real name
 488 #   -?maybe deal with files as a dir of the filename with all files in that dir as the versions of that file
 489 #      - e.g. ./python-2.6.7.dmg/0123456789abcdef and ./python-2.6.7.dmg/abcdef0123456789
 490
 491 def main():
 492     # Set up logging, for now just to the console
 493     ch = logging.StreamHandler()
 494     cf = logging.Formatter("%(levelname)s - %(message)s")
 495     ch.setFormatter(cf)
 496
 497     # Set up option parsing
 498     parser = optparse.OptionParser()
 499     # I wish there was a way to say "only allow args to be
 500     # sequential and at the end of the argv.
 501     # OH! i could step through sys.argv and check for things starting without -/-- before things starting with them
 502     parser.add_option('-q', '--quiet', default=False,
 503             dest='quiet', action='store_true')
 504     parser.add_option('-v', '--verbose', default=False,
 505             dest='verbose', action='store_true')
 506     parser.add_option('-m', '--manifest', default='manifest.tt',
 507             dest='manifest', action='store',
 508             help='specify the manifest file to be operated on')
 509     parser.add_option('-d', '--algorithm', default='sha512',
 510             dest='algorithm', action='store',
 511             help='openssl hashing algorithm to use')
 512     parser.add_option('-o', '--overwrite', default=False,
 513             dest='overwrite', action='store_true',
 514             help='if fetching, remote copy will overwrite a local copy that is different. ')
 515     parser.add_option('--url', dest='base_url', action='store',
 516             help='base url for fetching files')
 517     parser.add_option('--ignore-config-files', action='store_true', default=False,
 518                      dest='ignore_cfg_files')
 519     (options_obj, args) = parser.parse_args()
 520     # Dictionaries are easier to work with
 521     options = vars(options_obj)
 522
 523
 524     # Use some of the option parser to figure out application
 525     # log level
 526     if options.get('verbose'):
 527         ch.setLevel(logging.DEBUG)
 528     elif options.get('quiet'):
 529         ch.setLevel(logging.ERROR)
 530     else:
 531         ch.setLevel(logging.INFO)
 532     log.addHandler(ch)
 533
 534     cfg_file = ConfigParser.SafeConfigParser()
 535     if not options.get("ignore_cfg_files"):
 536         read_files = cfg_file.read(['/etc/tooltool', os.path.expanduser('~/.tooltool'),
 537                    os.path.join(os.getcwd(), '.tooltool')])
 538         log.debug("read in the config files '%s'" % '", '.join(read_files))
 539     else:
 540         log.debug("skipping config files")
 541
 542     for option in ('base_url', 'algorithm'):
 543         if not options.get(option):
 544             try:
 545                 options[option] = cfg_file.get('general', option)
 546                 log.debug("read '%s' as '%s' from cfg_file" % (option, options[option]))
 547             except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
 548                 log.debug("%s in config file" % e, exc_info=True)
 549
 550     if not options.has_key('manifest'):
 551         parser.error("no manifest file specified")
 552
 553     if len(args) < 1:
 554         parser.error('You must specify a command')
 555     exit(0 if process_command(options, args) else 1)
 556
 557 if __name__ == "__main__":
 558     main()
 559 else:
 560     log.addHandler(logging.NullHandler())
 561     #log.addHandler(logging.StreamHandler())