Log using the "0install" logger rather than "root"
[zeroinstall/solver.git] / zeroinstall / zerostore / manifest.py
bloba0737cc8b3349e49672b550a91876ff2d34c31b3
2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
19 of the manifest.
21 A top-level ".manifest" file is ignored.
22 """
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
28 import os, stat, base64
29 from zeroinstall import SafeException, _, logger
30 from zeroinstall.zerostore import BadDigest, parse_algorithm_digest_pair, format_algorithm_digest_pair
32 import hashlib
33 sha1_new = hashlib.sha1
35 class Algorithm:
36 """Abstract base class for algorithms.
37 An algorithm knows how to generate a manifest from a directory tree.
38 @ivar rating: how much we like this algorithm (higher is better)
39 @type rating: int
40 """
41 def generate_manifest(self, root):
42 """Returns an iterator that yields each line of the manifest for the directory
43 tree rooted at 'root'."""
44 raise Exception('Abstract')
46 def new_digest(self):
47 """Create a new digest. Call update() on the returned object to digest the data.
48 Call getID() to turn it into a full ID string."""
49 raise Exception('Abstract')
51 def getID(self, digest):
52 """Convert a digest (from new_digest) to a full ID."""
53 raise Exception('Abstract')
55 class OldSHA1(Algorithm):
56 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
58 rating = 10
60 def generate_manifest(self, root):
61 def recurse(sub):
62 # To ensure that a line-by-line comparison of the manifests
63 # is possible, we require that filenames don't contain newlines.
64 # Otherwise, you can name a file so that the part after the \n
65 # would be interpreted as another line in the manifest.
66 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
67 assert sub.startswith('/')
69 if sub == '/.manifest': return
71 full = os.path.join(root, sub[1:].replace('/', os.sep))
72 info = os.lstat(full)
74 m = info.st_mode
75 if stat.S_ISDIR(m):
76 if sub != '/':
77 yield "D %s %s" % (int(info.st_mtime), sub)
78 items = os.listdir(full)
79 items.sort()
80 subdir = sub
81 if not subdir.endswith('/'):
82 subdir += '/'
83 for x in items:
84 for y in recurse(subdir + x):
85 yield y
86 return
88 assert sub[1:]
89 leaf = os.path.basename(sub[1:])
90 if stat.S_ISREG(m):
91 with open(full, 'rb') as stream:
92 d = sha1_new(stream.read()).hexdigest() # XXX could be very large!
93 if m & 0o111:
94 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
95 else:
96 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
97 elif stat.S_ISLNK(m):
98 target = os.readlink(full).encode('utf-8')
99 d = sha1_new(target).hexdigest()
100 # Note: Can't use utime on symlinks, so skip mtime
101 # Note: eCryptfs may report length as zero, so count ourselves instead
102 yield "S %s %s %s" % (d, len(target), leaf)
103 else:
104 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
105 full)
106 for x in recurse('/'): yield x
108 def new_digest(self):
109 return sha1_new()
111 def getID(self, digest):
112 return 'sha1=' + digest.hexdigest()
114 def get_algorithm(name):
115 """Look-up an L{Algorithm} by name.
116 @raise BadDigest: if the name is unknown."""
117 try:
118 return algorithms[name]
119 except KeyError:
120 raise BadDigest(_("Unknown algorithm '%s'") % name)
122 def generate_manifest(root, alg = 'sha1'):
123 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
124 return get_algorithm(alg).generate_manifest(root)
126 def add_manifest_file(dir, digest_or_alg):
127 """Writes a .manifest file into 'dir', and returns the digest.
128 You should call fixup_permissions before this to ensure that the permissions are correct.
129 On exit, dir itself has mode 555. Subdirectories are not changed.
130 @param dir: root of the implementation
131 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
132 here is deprecated."""
133 mfile = os.path.join(dir, '.manifest')
134 if os.path.islink(mfile) or os.path.exists(mfile):
135 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
136 manifest = ''
137 if isinstance(digest_or_alg, Algorithm):
138 alg = digest_or_alg
139 digest = alg.new_digest()
140 else:
141 digest = digest_or_alg
142 alg = get_algorithm('sha1')
143 for line in alg.generate_manifest(dir):
144 manifest += line + '\n'
145 manifest = manifest.encode('utf-8')
146 digest.update(manifest)
148 os.chmod(dir, 0o755)
149 with open(mfile, 'wb') as stream:
150 os.chmod(dir, 0o555)
151 stream.write(manifest)
152 os.chmod(mfile, 0o444)
153 return digest
155 def splitID(id):
156 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
157 where 'alg' is an instance of Algorithm and 'value' is a string.
158 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
159 alg, digest = parse_algorithm_digest_pair(id)
160 return (get_algorithm(alg), digest)
162 def copy_with_verify(src, dest, mode, alg, required_digest):
163 """Copy path src to dest, checking that the contents give the right digest.
164 dest must not exist. New file is created with a mode of 'mode & umask'.
165 @param src: source filename
166 @type src: str
167 @param dest: target filename
168 @type dest: str
169 @param mode: target mode
170 @type mode: int
171 @param alg: algorithm to generate digest
172 @type alg: L{Algorithm}
173 @param required_digest: expected digest value
174 @type required_digest: str
175 @raise BadDigest: the contents of the file don't match required_digest"""
176 with open(src, 'rb') as src_obj:
177 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
178 try:
179 digest = alg.new_digest()
180 while True:
181 data = src_obj.read(256)
182 if not data: break
183 digest.update(data)
184 while data:
185 written = os.write(dest_fd, data)
186 assert written >= 0
187 data = data[written:]
188 finally:
189 os.close(dest_fd)
190 actual = digest.hexdigest()
191 if actual == required_digest: return
192 os.unlink(dest)
193 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
194 "Expected: %(required_digest)s\n"
195 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
197 def verify(root, required_digest = None):
198 """Ensure that directory 'dir' generates the given digest.
199 For a non-error return:
200 - Dir's name must be a digest (in the form "alg=value")
201 - The calculated digest of the contents must match this name.
202 - If there is a .manifest file, then its digest must also match.
203 @raise BadDigest: if verification fails."""
204 if required_digest is None:
205 required_digest = os.path.basename(root)
206 alg = splitID(required_digest)[0]
208 digest = alg.new_digest()
209 lines = []
210 for line in alg.generate_manifest(root):
211 line += '\n'
212 digest.update(line.encode('utf-8'))
213 lines.append(line)
214 actual_digest = alg.getID(digest)
216 manifest_file = os.path.join(root, '.manifest')
217 if os.path.isfile(manifest_file):
218 digest = alg.new_digest()
219 with open(manifest_file, 'rb') as stream:
220 digest.update(stream.read())
221 manifest_digest = alg.getID(digest)
222 else:
223 manifest_digest = None
225 if required_digest == actual_digest == manifest_digest:
226 return
228 error = BadDigest(_("Cached item does NOT verify."))
230 error.detail = _(" Expected: %(required_digest)s\n"
231 " Actual: %(actual_digest)s\n"
232 ".manifest digest: %(manifest_digest)s\n\n") \
233 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')}
235 if manifest_digest is None:
236 error.detail += _("No .manifest, so no further details available.")
237 elif manifest_digest == actual_digest:
238 error.detail += _("The .manifest file matches the actual contents. Very strange!")
239 elif manifest_digest == required_digest:
240 import difflib
241 with open(manifest_file, 'rt') as stream:
242 diff = difflib.unified_diff(stream.readlines(), lines,
243 'Recorded', 'Actual')
244 error.detail += _("The .manifest file matches the directory name.\n" \
245 "The contents of the directory have changed:\n") + \
246 ''.join(diff)
247 elif required_digest == actual_digest:
248 error.detail += _("The directory contents are correct, but the .manifest file is wrong!")
249 else:
250 error.detail += _("The .manifest file matches neither of the other digests. Odd.")
251 raise error
253 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
254 # - A regualar file suddenly turns into a symlink?
255 # - We find a device file (users can hard-link them if on the same device)
256 def copy_tree_with_verify(source, target, manifest_data, required_digest):
257 """Copy directory source to be a subdirectory of target if it matches the required_digest.
258 manifest_data is normally source/.manifest. source and manifest_data are not trusted
259 (will typically be under the control of another user).
260 The copy is first done to a temporary directory in target, then renamed to the final name
261 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
262 A successful return means than target/required_digest now exists (whether we created it or not)."""
263 import tempfile
265 alg, digest_value = splitID(required_digest)
267 if isinstance(alg, OldSHA1):
268 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
270 digest = alg.new_digest()
271 digest.update(manifest_data)
272 manifest_digest = alg.getID(digest)
274 if manifest_digest != required_digest:
275 raise BadDigest(_("Manifest has been tampered with!\n"
276 "Manifest digest: %(actual_digest)s\n"
277 "Directory name : %(required_digest)s")
278 % {'actual_digest': manifest_digest, 'required_digest': required_digest})
280 target_impl = os.path.join(target, required_digest)
281 if os.path.isdir(target_impl):
282 logger.info(_("Target directory '%s' already exists"), target_impl)
283 return
285 # We've checked that the source's manifest matches required_digest, so it
286 # is what we want. Make a list of all the files we need to copy...
288 wanted = _parse_manifest(manifest_data.decode('utf-8'))
290 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
291 try:
292 _copy_files(alg, wanted, source, tmpdir)
294 if wanted:
295 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
296 '\n- '.join(wanted.keys()))
298 # Make directories read-only (files are already RO)
299 for root, dirs, files in os.walk(tmpdir):
300 for d in dirs:
301 path = os.path.join(root, d)
302 mode = os.stat(path).st_mode
303 os.chmod(path, mode & 0o555)
305 # Check that the copy is correct
306 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
307 if actual_digest != required_digest:
308 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
309 "Unless the target was modified during the copy, this is a BUG\n"
310 "in 0store and should be reported.\n"
311 "Expected: %(required_digest)s\n"
312 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest})
313 try:
314 os.chmod(tmpdir, 0o755) # need write permission to rename on MacOS X
315 os.rename(tmpdir, target_impl)
316 os.chmod(target_impl, 0o555)
317 tmpdir = None
318 except OSError:
319 if not os.path.isdir(target_impl):
320 raise
321 # else someone else installed it already - return success
322 finally:
323 if tmpdir is not None:
324 logger.info(_("Deleting tmpdir '%s'") % tmpdir)
325 from zeroinstall.support import ro_rmtree
326 ro_rmtree(tmpdir)
328 def _parse_manifest(manifest_data):
329 """Parse a manifest file.
330 @param manifest_data: the contents of the manifest file
331 @type manifest_data: str
332 @return: a mapping from paths to information about that path
333 @rtype: {str: tuple}"""
334 wanted = {}
335 dir = ''
336 for line in manifest_data.split('\n'):
337 if not line: break
338 if line[0] == 'D':
339 data = line.split(' ', 1)
340 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line)
341 path = data[-1]
342 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line)
343 path = path[1:]
344 dir = path
345 elif line[0] == 'S':
346 data = line.split(' ', 3)
347 path = os.path.join(dir, data[-1])
348 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line)
349 else:
350 data = line.split(' ', 4)
351 path = os.path.join(dir, data[-1])
352 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line)
353 if path in wanted:
354 raise BadDigest(_('Duplicate entry "%s"') % line)
355 wanted[path] = data[:-1]
356 return wanted
358 def _copy_files(alg, wanted, source, target):
359 """Scan for files under 'source'. For each one:
360 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
361 then copy it into 'target'.
362 If it's not in wanted, warn and skip it.
363 On exit, wanted contains only files that were not found."""
364 dir = ''
365 for line in alg.generate_manifest(source):
366 if line[0] == 'D':
367 type, name = line.split(' ', 1)
368 assert name.startswith('/')
369 dir = name[1:]
370 path = dir
371 elif line[0] == 'S':
372 type, actual_digest, actual_size, name = line.split(' ', 3)
373 path = os.path.join(dir, name)
374 else:
375 assert line[0] in 'XF'
376 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
377 path = os.path.join(dir, name)
378 try:
379 required_details = wanted.pop(path)
380 except KeyError:
381 logger.warn(_("Skipping file not in manifest: '%s'"), path)
382 continue
383 if required_details[0] != type:
384 raise BadDigest(_("Item '%s' has wrong type!") % path)
385 if type == 'D':
386 os.mkdir(os.path.join(target, path))
387 elif type in 'XF':
388 required_type, required_digest, required_mtime, required_size = required_details
389 if required_size != actual_size:
390 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
391 "%(required_size)s according to manifest)") %
392 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
393 required_mtime = int(required_mtime)
394 dest_path = os.path.join(target, path)
395 if type == 'X':
396 mode = 0o555
397 else:
398 mode = 0o444
399 copy_with_verify(os.path.join(source, path),
400 dest_path,
401 mode,
402 alg,
403 required_digest)
404 os.utime(dest_path, (required_mtime, required_mtime))
405 elif type == 'S':
406 required_type, required_digest, required_size = required_details
407 if required_size != actual_size:
408 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
409 "%(required_size)s according to manifest)") %
410 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
411 symlink_target = os.readlink(os.path.join(source, path))
412 symlink_digest = alg.new_digest()
413 symlink_digest.update(symlink_target.encode('utf-8'))
414 if symlink_digest.hexdigest() != required_digest:
415 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
416 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest})
417 dest_path = os.path.join(target, path)
418 os.symlink(symlink_target, dest_path)
419 else:
420 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
422 class HashLibAlgorithm(Algorithm):
423 new_digest = None # Constructor for digest objects
425 def __init__(self, name, rating, hash_name = None):
426 self.name = name
427 self.new_digest = getattr(hashlib, hash_name or name)
428 self.rating = rating
430 def generate_manifest(self, root):
431 def recurse(sub):
432 # To ensure that a line-by-line comparison of the manifests
433 # is possible, we require that filenames don't contain newlines.
434 # Otherwise, you can name a file so that the part after the \n
435 # would be interpreted as another line in the manifest.
436 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub)
437 assert sub.startswith('/')
439 full = os.path.join(root, sub[1:])
440 info = os.lstat(full)
441 new_digest = self.new_digest
443 m = info.st_mode
444 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full)
445 if sub != '/':
446 yield "D %s" % sub
447 items = os.listdir(full)
448 items.sort()
449 dirs = []
450 for leaf in items:
451 path = os.path.join(root, sub[1:], leaf)
452 info = os.lstat(path)
453 m = info.st_mode
455 if stat.S_ISREG(m):
456 if leaf == '.manifest': continue
458 with open(path, 'rb') as stream:
459 d = new_digest(stream.read()).hexdigest()
460 if m & 0o111:
461 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
462 else:
463 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
464 elif stat.S_ISLNK(m):
465 target = os.readlink(path).encode('utf-8')
466 d = new_digest(target).hexdigest()
467 # Note: Can't use utime on symlinks, so skip mtime
468 # Note: eCryptfs may report length as zero, so count ourselves instead
469 yield "S %s %s %s" % (d, len(target), leaf)
470 elif stat.S_ISDIR(m):
471 dirs.append(leaf)
472 else:
473 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
474 path)
476 if not sub.endswith('/'):
477 sub += '/'
478 for x in dirs:
479 # Note: "sub" is always Unix style. Don't use os.path.join here.
480 for y in recurse(sub + x): yield y
481 return
483 for x in recurse('/'): yield x
485 def getID(self, digest):
486 if self.name in ('sha1new', 'sha256'):
487 digest_str = digest.hexdigest()
488 else:
489 # Base32-encode newer algorithms to make the digest shorter.
490 # We can't use base64 as Windows is case insensitive.
491 # There's no need for padding (and = characters in paths cause problems for some software).
492 digest_str = base64.b32encode(digest.digest()).rstrip(b'=').decode('ascii')
493 return format_algorithm_digest_pair(self.name, digest_str)
495 algorithms = {
496 'sha1': OldSHA1(),
497 'sha1new': HashLibAlgorithm('sha1new', 50, 'sha1'),
498 'sha256': HashLibAlgorithm('sha256', 80),
499 'sha256new': HashLibAlgorithm('sha256new', 90, 'sha256'),
503 def fixup_permissions(root):
504 """Set permissions recursively for children of root:
505 - If any X bit is set, they all must be.
506 - World readable, non-writable.
507 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
509 for main, dirs, files in os.walk(root):
510 for x in ['.'] + files:
511 full = os.path.join(main, x)
513 raw_mode = os.lstat(full).st_mode
514 if stat.S_ISLNK(raw_mode): continue
516 mode = stat.S_IMODE(raw_mode)
517 if mode & ~0o777:
518 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)})
519 if mode & 0o111:
520 os.chmod(full, 0o555)
521 else:
522 os.chmod(full, 0o444)