Bumping manifests a=b2g-bump
[gecko.git] / build / unix / build-clang / tooltool.py
blobccb77aaea2872b60d45563596eda7cae610e3f9d
1 #!/usr/bin/env python
3 #tooltool is a lookaside cache implemented in Python
4 #Copyright (C) 2011 John H. Ford <john@johnford.info>
6 #This program is free software; you can redistribute it and/or
7 #modify it under the terms of the GNU General Public License
8 #as published by the Free Software Foundation version 2
10 #This program is distributed in the hope that it will be useful,
11 #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 #GNU General Public License for more details.
15 #You should have received a copy of the GNU General Public License
16 #along with this program; if not, write to the Free Software
17 #Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # An manifest file specifies files in that directory that are stored
20 # elsewhere. This file should only contain file in the directory
21 # which the manifest file resides in and it should be called 'manifest.manifest'
23 __version__ = '1'
25 import json
26 import os
27 import optparse
28 import logging
29 import hashlib
30 import urllib2
31 import ConfigParser
33 log = logging.getLogger(__name__)
35 class FileRecordJSONEncoderException(Exception): pass
36 class InvalidManifest(Exception): pass
37 class ExceptionWithFilename(Exception):
38 def __init__(self, filename):
39 Exception.__init__(self)
40 self.filename = filename
42 class DigestMismatchException(ExceptionWithFilename): pass
43 class MissingFileException(ExceptionWithFilename): pass
45 class FileRecord(object):
46 def __init__(self, filename, size, digest, algorithm):
47 object.__init__(self)
48 self.filename = filename
49 self.size = size
50 self.digest = digest
51 self.algorithm = algorithm
52 log.debug("creating %s 0x%x" % (self.__class__.__name__, id(self)))
54 def __eq__(self, other):
55 if self is other:
56 return True
57 if self.filename == other.filename and \
58 self.size == other.size and \
59 self.digest == other.digest and \
60 self.algorithm == other.algorithm:
61 return True
62 else:
63 return False
65 def __ne__(self, other):
66 return not self.__eq__(other)
68 def __str__(self):
69 return repr(self)
71 def __repr__(self):
72 return "%s.%s(filename='%s', size='%s', digest='%s', algorithm='%s')" % (__name__,
73 self.__class__.__name__,
74 self.filename, self.size, self.digest, self.algorithm)
76 def present(self):
77 # Doesn't check validity
78 return os.path.exists(self.filename)
80 def validate_size(self):
81 if self.present():
82 return self.size == os.path.getsize(self.filename)
83 else:
84 log.debug("trying to validate size on a missing file, %s", self.filename)
85 raise MissingFileException(filename=self.filename)
87 def validate_digest(self):
88 if self.present():
89 with open(self.filename, 'rb') as f:
90 return self.digest == digest_file(f, self.algorithm)
91 else:
92 log.debug("trying to validate digest on a missing file, %s', self.filename")
93 raise MissingFileException(filename=self.filename)
95 def validate(self):
96 if self.validate_size():
97 if self.validate_digest():
98 return True
99 return False
101 def describe(self):
102 if self.present() and self.validate():
103 return "'%s' is present and valid" % self.filename
104 elif self.present():
105 return "'%s' is present and invalid" % self.filename
106 else:
107 return "'%s' is absent" % self.filename
110 def create_file_record(filename, algorithm):
111 fo = open(filename, 'rb')
112 stored_filename = os.path.split(filename)[1]
113 fr = FileRecord(stored_filename, os.path.getsize(filename), digest_file(fo, algorithm), algorithm)
114 fo.close()
115 return fr
118 class FileRecordJSONEncoder(json.JSONEncoder):
119 def encode_file_record(self, obj):
120 if not issubclass(type(obj), FileRecord):
121 err = "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, not %s" % obj.__class__.__name__
122 log.warn(err)
123 raise FileRecordJSONEncoderException(err)
124 else:
125 return {'filename': obj.filename, 'size': obj.size, 'algorithm': obj.algorithm, 'digest': obj.digest}
127 def default(self, f):
128 if issubclass(type(f), list):
129 record_list = []
130 for i in f:
131 record_list.append(self.encode_file_record(i))
132 return record_list
133 else:
134 return self.encode_file_record(f)
137 class FileRecordJSONDecoder(json.JSONDecoder):
138 """I help the json module materialize a FileRecord from
139 a JSON file. I understand FileRecords and lists of
140 FileRecords. I ignore things that I don't expect for now"""
141 # TODO: make this more explicit in what it's looking for
142 # and error out on unexpected things
143 def process_file_records(self, obj):
144 if isinstance(obj, list):
145 record_list = []
146 for i in obj:
147 record = self.process_file_records(i)
148 if issubclass(type(record), FileRecord):
149 record_list.append(record)
150 return record_list
151 if isinstance(obj, dict) and \
152 len(obj.keys()) == 4 and \
153 obj.has_key('filename') and \
154 obj.has_key('size') and \
155 obj.has_key('algorithm') and \
156 obj.has_key('digest'):
157 rv = FileRecord(obj['filename'], obj['size'], obj['digest'], obj['algorithm'])
158 log.debug("materialized %s" % rv)
159 return rv
160 return obj
162 def decode(self, s):
163 decoded = json.JSONDecoder.decode(self, s)
164 rv = self.process_file_records(decoded)
165 return rv
168 class Manifest(object):
170 valid_formats = ('json',)
172 def __init__(self, file_records=[]):
173 self.file_records = file_records
175 def __eq__(self, other):
176 if self is other:
177 return True
178 if len(self.file_records) != len(other.file_records):
179 log.debug('Manifests differ in number of files')
180 return False
181 #TODO: Lists in a different order should be equal
182 for record in range(0,len(self.file_records)):
183 if self.file_records[record] != other.file_records[record]:
184 log.debug('FileRecords differ, %s vs %s' % (self.file_records[record],
185 other.file_records[record]))
186 return False
187 return True
189 def __deepcopy__(self, memo):
190 # This is required for a deep copy
191 return Manifest(self.file_records[:])
193 def __copy__(self):
194 return Manifest(self.file_records)
196 def copy(self):
197 return Manifest(self.file_records[:])
199 def present(self):
200 return all(i.present() for i in self.file_records)
202 def validate_sizes(self):
203 return all(i.validate_size() for i in self.file_records)
205 def validate_digests(self):
206 return all(i.validate_digest() for i in self.file_records)
208 def validate(self):
209 return all(i.validate() for i in self.file_records)
211 def sort(self):
212 #TODO: WRITE TESTS
213 self.file_records.sort(key=lambda x: x.size)
215 def load(self, data_file, fmt='json'):
216 assert fmt in self.valid_formats
217 if fmt == 'json':
218 try:
219 self.file_records.extend(json.load(data_file, cls=FileRecordJSONDecoder))
220 self.sort()
221 except ValueError:
222 raise InvalidManifest("trying to read invalid manifest file")
224 def loads(self, data_string, fmt='json'):
225 assert fmt in self.valid_formats
226 if fmt == 'json':
227 try:
228 self.file_records.extend(json.loads(data_string, cls=FileRecordJSONDecoder))
229 self.sort()
230 except ValueError:
231 raise InvalidManifest("trying to read invalid manifest file")
233 def dump(self, output_file, fmt='json'):
234 assert fmt in self.valid_formats
235 self.sort()
236 if fmt == 'json':
237 rv = json.dump(self.file_records, output_file, indent=0, cls=FileRecordJSONEncoder)
238 print >> output_file, ''
239 return rv
241 def dumps(self, fmt='json'):
242 assert fmt in self.valid_formats
243 self.sort()
244 if fmt == 'json':
245 return json.dumps(self.file_records, cls=FileRecordJSONEncoder)
248 def digest_file(f, a):
249 """I take a file like object 'f' and return a hex-string containing
250 of the result of the algorithm 'a' applied to 'f'."""
251 h = hashlib.new(a)
252 chunk_size = 1024*10
253 data = f.read(chunk_size)
254 while data:
255 h.update(data)
256 data = f.read(chunk_size)
257 if hasattr(f, 'name'):
258 log.debug('hashed %s with %s to be %s', f.name, a, h.hexdigest())
259 else:
260 log.debug('hashed a file with %s to be %s', a, h.hexdigest())
261 return h.hexdigest()
263 # TODO: write tests for this function
264 def open_manifest(manifest_file):
265 """I know how to take a filename and load it into a Manifest object"""
266 if os.path.exists(manifest_file):
267 manifest = Manifest()
268 with open(manifest_file) as f:
269 manifest.load(f)
270 log.debug("loaded manifest from file '%s'" % manifest_file)
271 return manifest
272 else:
273 log.debug("tried to load absent file '%s' as manifest" % manifest_file)
274 raise InvalidManifest("manifest file '%s' does not exist" % manifest_file)
276 # TODO: write tests for this function
277 def list_manifest(manifest_file):
278 """I know how print all the files in a location"""
279 try:
280 manifest = open_manifest(manifest_file)
281 except InvalidManifest:
282 log.error("failed to load manifest file at '%s'" % manifest_file)
283 return False
284 for f in manifest.file_records:
285 print "%s\t%s\t%s" % ("P" if f.present() else "-",
286 "V" if f.present() and f.validate() else "-",
287 f.filename)
288 return True
290 def validate_manifest(manifest_file):
291 """I validate that all files in a manifest are present and valid but
292 don't fetch or delete them if they aren't"""
293 try:
294 manifest = open_manifest(manifest_file)
295 except InvalidManifest:
296 log.error("failed to load manifest file at '%s'" % manifest_file)
297 return False
298 invalid_files = []
299 absent_files = []
300 for f in manifest.file_records:
301 if not f.present():
302 absent_files.append(f)
303 else:
304 if not f.validate():
305 invalid_files.append(f)
306 if len(invalid_files + absent_files) == 0:
307 return True
308 else:
309 return False
311 # TODO: write tests for this function
312 def add_files(manifest_file, algorithm, filenames):
313 # returns True if all files successfully added, False if not
314 # and doesn't catch library Exceptions. If any files are already
315 # tracked in the manifest, return will be False because they weren't
316 # added
317 all_files_added = True
318 # Create a old_manifest object to add to
319 if os.path.exists(manifest_file):
320 old_manifest = open_manifest(manifest_file)
321 else:
322 old_manifest = Manifest()
323 log.debug("creating a new manifest file")
324 new_manifest = Manifest() # use a different manifest for the output
325 for filename in filenames:
326 log.debug("adding %s" % filename)
327 path, name = os.path.split(filename)
328 new_fr = create_file_record(filename, algorithm)
329 log.debug("appending a new file record to manifest file")
330 add = True
331 for fr in old_manifest.file_records:
332 log.debug("manifest file has '%s'" % "', ".join([x.filename for x in old_manifest.file_records]))
333 if new_fr == fr and new_fr.validate():
334 # TODO: Decide if this case should really cause a False return
335 log.info("file already in old_manifest file and matches")
336 add = False
337 elif new_fr == fr and not new_fr.validate():
338 log.error("file already in old_manifest file but is invalid")
339 add = False
340 if filename == fr.filename:
341 log.error("manifest already contains file named %s" % filename)
342 add = False
343 if add:
344 new_manifest.file_records.append(new_fr)
345 log.debug("added '%s' to manifest" % filename)
346 else:
347 all_files_added = False
348 with open(manifest_file, 'wb') as output:
349 new_manifest.dump(output, fmt='json')
350 return all_files_added
353 # TODO: write tests for this function
354 def fetch_file(base_url, file_record, overwrite=False, grabchunk=1024*4):
355 # A file which is requested to be fetched that exists locally will be hashed.
356 # If the hash matches the requested file's hash, nothing will be done and the
357 # function will return. If the function is told to overwrite and there is a
358 # digest mismatch, the exiting file will be overwritten
359 if file_record.present():
360 if file_record.validate():
361 log.info("existing '%s' is valid, not fetching" % file_record.filename)
362 return True
363 if overwrite:
364 log.info("overwriting '%s' as requested" % file_record.filename)
365 else:
366 # All of the following is for a useful error message
367 with open(file_record.filename, 'rb') as f:
368 d = digest_file(f, file_record.algorithm)
369 log.error("digest mismatch between manifest(%s...) and local file(%s...)" % \
370 (file_record.digest[:8], d[:8]))
371 log.debug("full digests: manifest (%s) local file (%s)" % (file_record.digest, d))
372 # Let's bail!
373 return False
375 # Generate the URL for the file on the server side
376 url = "%s/%s/%s" % (base_url, file_record.algorithm, file_record.digest)
378 log.debug("fetching from '%s'" % url)
380 # TODO: This should be abstracted to make generic retreival protocol handling easy
381 # Well, the file doesn't exist locally. Lets fetch it.
382 try:
383 f = urllib2.urlopen(url)
384 log.debug("opened %s for reading" % url)
385 with open(file_record.filename, 'wb') as out:
386 k = True
387 size = 0
388 while k:
389 # TODO: print statistics as file transfers happen both for info and to stop
390 # buildbot timeouts
391 indata = f.read(grabchunk)
392 out.write(indata)
393 size += len(indata)
394 if indata == '':
395 k = False
396 if size != file_record.size:
397 log.error("transfer from %s to %s failed due to a difference of %d bytes" % (url,
398 file_record.filename, file_record.size - size))
399 return False
400 log.info("fetched %s" % file_record.filename)
401 except (urllib2.URLError, urllib2.HTTPError) as e:
402 log.error("failed to fetch '%s': %s" % (file_record.filename, e),
403 exc_info=True)
404 return False
405 except IOError:
406 log.error("failed to write to '%s'" % file_record.filename,
407 exc_info=True)
408 return False
409 return True
412 # TODO: write tests for this function
413 def fetch_files(manifest_file, base_url, overwrite, filenames=[]):
414 # Lets load the manifest file
415 try:
416 manifest = open_manifest(manifest_file)
417 except InvalidManifest:
418 log.error("failed to load manifest file at '%s'" % manifest_file)
419 return False
420 # We want to track files that fail to be fetched as well as
421 # files that are fetched
422 failed_files = []
424 # Lets go through the manifest and fetch the files that we want
425 fetched_files = []
426 for f in manifest.file_records:
427 if f.filename in filenames or len(filenames) == 0:
428 log.debug("fetching %s" % f.filename)
429 if fetch_file(base_url, f, overwrite):
430 fetched_files.append(f)
431 else:
432 failed_files.append(f.filename)
433 else:
434 log.debug("skipping %s" % f.filename)
436 # Even if we get the file, lets ensure that it matches what the
437 # manifest specified
438 for localfile in fetched_files:
439 if not localfile.validate():
440 log.error("'%s'" % localfile.describe())
442 # If we failed to fetch or validate a file, we need to fail
443 if len(failed_files) > 0:
444 log.error("The following files failed: '%s'" % "', ".join(failed_files))
445 return False
446 return True
449 # TODO: write tests for this function
450 def process_command(options, args):
451 """ I know how to take a list of program arguments and
452 start doing the right thing with them"""
453 cmd = args[0]
454 cmd_args = args[1:]
455 log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args)))
456 log.debug("using options: %s" % options)
457 if cmd == 'list':
458 return list_manifest(options['manifest'])
459 if cmd == 'validate':
460 return validate_manifest(options['manifest'])
461 elif cmd == 'add':
462 return add_files(options['manifest'], options['algorithm'], cmd_args)
463 elif cmd == 'fetch':
464 if not options.has_key('base_url') or options.get('base_url') is None:
465 log.critical('fetch command requires url option')
466 return False
467 return fetch_files(options['manifest'], options['base_url'], options['overwrite'], cmd_args)
468 else:
469 log.critical('command "%s" is not implemented' % cmd)
470 return False
472 # fetching api:
473 # http://hostname/algorithm/hash
474 # example: http://people.mozilla.org/sha1/1234567890abcedf
475 # This will make it possible to have the server allow clients to
476 # use different algorithms than what was uploaded to the server
478 # TODO: Implement the following features:
479 # -optimization: do small files first, justification is that they are faster
480 # and cause a faster failure if they are invalid
481 # -store permissions
482 # -local renames i.e. call the file one thing on the server and
483 # something different locally
484 # -deal with the cases:
485 # -local data matches file requested with different filename
486 # -two different files with same name, different hash
487 # -?only ever locally to digest as filename, symlink to real name
488 # -?maybe deal with files as a dir of the filename with all files in that dir as the versions of that file
489 # - e.g. ./python-2.6.7.dmg/0123456789abcdef and ./python-2.6.7.dmg/abcdef0123456789
491 def main():
492 # Set up logging, for now just to the console
493 ch = logging.StreamHandler()
494 cf = logging.Formatter("%(levelname)s - %(message)s")
495 ch.setFormatter(cf)
497 # Set up option parsing
498 parser = optparse.OptionParser()
499 # I wish there was a way to say "only allow args to be
500 # sequential and at the end of the argv.
501 # OH! i could step through sys.argv and check for things starting without -/-- before things starting with them
502 parser.add_option('-q', '--quiet', default=False,
503 dest='quiet', action='store_true')
504 parser.add_option('-v', '--verbose', default=False,
505 dest='verbose', action='store_true')
506 parser.add_option('-m', '--manifest', default='manifest.tt',
507 dest='manifest', action='store',
508 help='specify the manifest file to be operated on')
509 parser.add_option('-d', '--algorithm', default='sha512',
510 dest='algorithm', action='store',
511 help='openssl hashing algorithm to use')
512 parser.add_option('-o', '--overwrite', default=False,
513 dest='overwrite', action='store_true',
514 help='if fetching, remote copy will overwrite a local copy that is different. ')
515 parser.add_option('--url', dest='base_url', action='store',
516 help='base url for fetching files')
517 parser.add_option('--ignore-config-files', action='store_true', default=False,
518 dest='ignore_cfg_files')
519 (options_obj, args) = parser.parse_args()
520 # Dictionaries are easier to work with
521 options = vars(options_obj)
524 # Use some of the option parser to figure out application
525 # log level
526 if options.get('verbose'):
527 ch.setLevel(logging.DEBUG)
528 elif options.get('quiet'):
529 ch.setLevel(logging.ERROR)
530 else:
531 ch.setLevel(logging.INFO)
532 log.addHandler(ch)
534 cfg_file = ConfigParser.SafeConfigParser()
535 if not options.get("ignore_cfg_files"):
536 read_files = cfg_file.read(['/etc/tooltool', os.path.expanduser('~/.tooltool'),
537 os.path.join(os.getcwd(), '.tooltool')])
538 log.debug("read in the config files '%s'" % '", '.join(read_files))
539 else:
540 log.debug("skipping config files")
542 for option in ('base_url', 'algorithm'):
543 if not options.get(option):
544 try:
545 options[option] = cfg_file.get('general', option)
546 log.debug("read '%s' as '%s' from cfg_file" % (option, options[option]))
547 except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
548 log.debug("%s in config file" % e, exc_info=True)
550 if not options.has_key('manifest'):
551 parser.error("no manifest file specified")
553 if len(args) < 1:
554 parser.error('You must specify a command')
555 exit(0 if process_command(options, args) else 1)
557 if __name__ == "__main__":
558 main()
559 else:
560 log.addHandler(logging.NullHandler())
561 #log.addHandler(logging.StreamHandler())