More Python 3 support
[zeroinstall/solver.git] / zeroinstall / zerostore / manifest.py
blob9f2d8f3a20942a37a24eed01f4864965be28fac6
2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
19 of the manifest.
21 A top-level ".manifest" file is ignored.
22 """
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
28 import os, stat
29 from zeroinstall import SafeException, _
30 from zeroinstall.zerostore import BadDigest
32 try:
33 import hashlib
34 sha1_new = hashlib.sha1
35 except:
36 import sha
37 sha1_new = sha.new
38 hashlib = None
40 class Algorithm:
41 """Abstract base class for algorithms.
42 An algorithm knows how to generate a manifest from a directory tree.
43 @ivar rating: how much we like this algorithm (higher is better)
44 @type rating: int
45 """
46 def generate_manifest(self, root):
47 """Returns an iterator that yields each line of the manifest for the directory
48 tree rooted at 'root'."""
49 raise Exception('Abstract')
51 def new_digest(self):
52 """Create a new digest. Call update() on the returned object to digest the data.
53 Call getID() to turn it into a full ID string."""
54 raise Exception('Abstract')
56 def getID(self, digest):
57 """Convert a digest (from new_digest) to a full ID."""
58 raise Exception('Abstract')
60 class OldSHA1(Algorithm):
61 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
63 rating = 10
65 def generate_manifest(self, root):
66 def recurse(sub):
67 # To ensure that a line-by-line comparison of the manifests
68 # is possible, we require that filenames don't contain newlines.
69 # Otherwise, you can name a file so that the part after the \n
70 # would be interpreted as another line in the manifest.
71 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
72 assert sub.startswith('/')
74 if sub == '/.manifest': return
76 full = os.path.join(root, sub[1:].replace('/', os.sep))
77 info = os.lstat(full)
79 m = info.st_mode
80 if stat.S_ISDIR(m):
81 if sub != '/':
82 yield "D %s %s" % (int(info.st_mtime), sub)
83 items = os.listdir(full)
84 items.sort()
85 subdir = sub
86 if not subdir.endswith('/'):
87 subdir += '/'
88 for x in items:
89 for y in recurse(subdir + x):
90 yield y
91 return
93 assert sub[1:]
94 leaf = os.path.basename(sub[1:])
95 if stat.S_ISREG(m):
96 with open(full, 'rb') as stream:
97 d = sha1_new(stream.read()).hexdigest() # XXX could be very large!
98 if m & 0o111:
99 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
100 else:
101 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
102 elif stat.S_ISLNK(m):
103 target = os.readlink(full)
104 d = sha1_new(target).hexdigest()
105 # Note: Can't use utime on symlinks, so skip mtime
106 # Note: eCryptfs may report length as zero, so count ourselves instead
107 yield "S %s %s %s" % (d, len(target), leaf)
108 else:
109 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
110 full)
111 for x in recurse('/'): yield x
113 def new_digest(self):
114 return sha1_new()
116 def getID(self, digest):
117 return 'sha1=' + digest.hexdigest()
119 def get_algorithm(name):
120 """Look-up an L{Algorithm} by name.
121 @raise BadDigest: if the name is unknown."""
122 try:
123 return algorithms[name]
124 except KeyError:
125 raise BadDigest(_("Unknown algorithm '%s'") % name)
127 def generate_manifest(root, alg = 'sha1'):
128 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
129 return get_algorithm(alg).generate_manifest(root)
131 def add_manifest_file(dir, digest_or_alg):
132 """Writes a .manifest file into 'dir', and returns the digest.
133 You should call fixup_permissions before this to ensure that the permissions are correct.
134 On exit, dir itself has mode 555. Subdirectories are not changed.
135 @param dir: root of the implementation
136 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
137 here is deprecated."""
138 mfile = os.path.join(dir, '.manifest')
139 if os.path.islink(mfile) or os.path.exists(mfile):
140 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
141 manifest = ''
142 if isinstance(digest_or_alg, Algorithm):
143 alg = digest_or_alg
144 digest = alg.new_digest()
145 else:
146 digest = digest_or_alg
147 alg = get_algorithm('sha1')
148 for line in alg.generate_manifest(dir):
149 manifest += line + '\n'
150 manifest = manifest.encode('utf-8')
151 digest.update(manifest)
153 os.chmod(dir, 0o755)
154 with open(mfile, 'wb') as stream:
155 os.chmod(dir, 0o555)
156 stream.write(manifest)
157 os.chmod(mfile, 0o444)
158 return digest
160 def splitID(id):
161 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
162 where 'alg' is an instance of Algorithm and 'value' is a string.
163 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
164 parts = id.split('=', 1)
165 if len(parts) != 2:
166 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id)
167 return (get_algorithm(parts[0]), parts[1])
169 def copy_with_verify(src, dest, mode, alg, required_digest):
170 """Copy path src to dest, checking that the contents give the right digest.
171 dest must not exist. New file is created with a mode of 'mode & umask'.
172 @param src: source filename
173 @type src: str
174 @param dest: target filename
175 @type dest: str
176 @param mode: target mode
177 @type mode: int
178 @param alg: algorithm to generate digest
179 @type alg: L{Algorithm}
180 @param required_digest: expected digest value
181 @type required_digest: str
182 @raise BadDigest: the contents of the file don't match required_digest"""
183 with open(src, 'rb') as src_obj:
184 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
185 try:
186 digest = alg.new_digest()
187 while True:
188 data = src_obj.read(256)
189 if not data: break
190 digest.update(data)
191 while data:
192 written = os.write(dest_fd, data)
193 assert written >= 0
194 data = data[written:]
195 finally:
196 os.close(dest_fd)
197 actual = digest.hexdigest()
198 if actual == required_digest: return
199 os.unlink(dest)
200 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
201 "Expected: %(required_digest)s\n"
202 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
204 def verify(root, required_digest = None):
205 """Ensure that directory 'dir' generates the given digest.
206 For a non-error return:
207 - Dir's name must be a digest (in the form "alg=value")
208 - The calculated digest of the contents must match this name.
209 - If there is a .manifest file, then its digest must also match.
210 @raise BadDigest: if verification fails."""
211 if required_digest is None:
212 required_digest = os.path.basename(root)
213 alg = splitID(required_digest)[0]
215 digest = alg.new_digest()
216 lines = []
217 for line in alg.generate_manifest(root):
218 line += '\n'
219 digest.update(line)
220 lines.append(line)
221 actual_digest = alg.getID(digest)
223 manifest_file = os.path.join(root, '.manifest')
224 if os.path.isfile(manifest_file):
225 digest = alg.new_digest()
226 with open(manifest_file, 'rt') as stream:
227 digest.update(stream.read())
228 manifest_digest = alg.getID(digest)
229 else:
230 manifest_digest = None
232 if required_digest == actual_digest == manifest_digest:
233 return
235 error = BadDigest(_("Cached item does NOT verify."))
237 error.detail = _(" Expected: %(required_digest)s\n"
238 " Actual: %(actual_digest)s\n"
239 ".manifest digest: %(manifest_digest)s\n\n") \
240 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')}
242 if manifest_digest is None:
243 error.detail += _("No .manifest, so no further details available.")
244 elif manifest_digest == actual_digest:
245 error.detail += _("The .manifest file matches the actual contents. Very strange!")
246 elif manifest_digest == required_digest:
247 import difflib
248 with open(manifest_file, 'rb') as stream:
249 diff = difflib.unified_diff(stream.readlines(), lines,
250 'Recorded', 'Actual')
251 error.detail += _("The .manifest file matches the directory name.\n" \
252 "The contents of the directory have changed:\n") + \
253 ''.join(diff)
254 elif required_digest == actual_digest:
255 error.detail += _("The directory contents are correct, but the .manifest file is wrong!")
256 else:
257 error.detail += _("The .manifest file matches neither of the other digests. Odd.")
258 raise error
260 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
261 # - A regualar file suddenly turns into a symlink?
262 # - We find a device file (users can hard-link them if on the same device)
263 def copy_tree_with_verify(source, target, manifest_data, required_digest):
264 """Copy directory source to be a subdirectory of target if it matches the required_digest.
265 manifest_data is normally source/.manifest. source and manifest_data are not trusted
266 (will typically be under the control of another user).
267 The copy is first done to a temporary directory in target, then renamed to the final name
268 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
269 A successful return means than target/required_digest now exists (whether we created it or not)."""
270 import tempfile
271 from logging import info
273 alg, digest_value = splitID(required_digest)
275 if isinstance(alg, OldSHA1):
276 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
278 digest = alg.new_digest()
279 digest.update(manifest_data)
280 manifest_digest = alg.getID(digest)
282 if manifest_digest != required_digest:
283 raise BadDigest(_("Manifest has been tampered with!\n"
284 "Manifest digest: %(actual_digest)s\n"
285 "Directory name : %(required_digest)s")
286 % {'actual_digest': manifest_digest, 'required_digest': required_digest})
288 target_impl = os.path.join(target, required_digest)
289 if os.path.isdir(target_impl):
290 info(_("Target directory '%s' already exists"), target_impl)
291 return
293 # We've checked that the source's manifest matches required_digest, so it
294 # is what we want. Make a list of all the files we need to copy...
296 wanted = _parse_manifest(manifest_data)
298 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
299 try:
300 _copy_files(alg, wanted, source, tmpdir)
302 if wanted:
303 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
304 '\n- '.join(wanted.keys()))
306 # Make directories read-only (files are already RO)
307 for root, dirs, files in os.walk(tmpdir):
308 for d in dirs:
309 path = os.path.join(root, d)
310 mode = os.stat(path).st_mode
311 os.chmod(path, mode & 0o555)
313 # Check that the copy is correct
314 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
315 if actual_digest != required_digest:
316 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
317 "Unless the target was modified during the copy, this is a BUG\n"
318 "in 0store and should be reported.\n"
319 "Expected: %(required_digest)s\n"
320 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest})
321 try:
322 os.chmod(tmpdir, 0o755) # need write permission to rename on MacOS X
323 os.rename(tmpdir, target_impl)
324 os.chmod(target_impl, 0o555)
325 tmpdir = None
326 except OSError:
327 if not os.path.isdir(target_impl):
328 raise
329 # else someone else installed it already - return success
330 finally:
331 if tmpdir is not None:
332 info(_("Deleting tmpdir '%s'") % tmpdir)
333 from zeroinstall.support import ro_rmtree
334 ro_rmtree(tmpdir)
336 def _parse_manifest(manifest_data):
337 """Parse a manifest file.
338 @param manifest_data: the contents of the manifest file
339 @type manifest_data: str
340 @return: a mapping from paths to information about that path
341 @rtype: {str: tuple}"""
342 wanted = {}
343 dir = ''
344 for line in manifest_data.split('\n'):
345 if not line: break
346 if line[0] == 'D':
347 data = line.split(' ', 1)
348 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line)
349 path = data[-1]
350 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line)
351 path = path[1:]
352 dir = path
353 elif line[0] == 'S':
354 data = line.split(' ', 3)
355 path = os.path.join(dir, data[-1])
356 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line)
357 else:
358 data = line.split(' ', 4)
359 path = os.path.join(dir, data[-1])
360 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line)
361 if path in wanted:
362 raise BadDigest(_('Duplicate entry "%s"') % line)
363 wanted[path] = data[:-1]
364 return wanted
366 def _copy_files(alg, wanted, source, target):
367 """Scan for files under 'source'. For each one:
368 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
369 then copy it into 'target'.
370 If it's not in wanted, warn and skip it.
371 On exit, wanted contains only files that were not found."""
372 from logging import warn
373 dir = ''
374 for line in alg.generate_manifest(source):
375 if line[0] == 'D':
376 type, name = line.split(' ', 1)
377 assert name.startswith('/')
378 dir = name[1:]
379 path = dir
380 elif line[0] == 'S':
381 type, actual_digest, actual_size, name = line.split(' ', 3)
382 path = os.path.join(dir, name)
383 else:
384 assert line[0] in 'XF'
385 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
386 path = os.path.join(dir, name)
387 try:
388 required_details = wanted.pop(path)
389 except KeyError:
390 warn(_("Skipping file not in manifest: '%s'"), path)
391 continue
392 if required_details[0] != type:
393 raise BadDigest(_("Item '%s' has wrong type!") % path)
394 if type == 'D':
395 os.mkdir(os.path.join(target, path))
396 elif type in 'XF':
397 required_type, required_digest, required_mtime, required_size = required_details
398 if required_size != actual_size:
399 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
400 "%(required_size)s according to manifest)") %
401 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
402 required_mtime = int(required_mtime)
403 dest_path = os.path.join(target, path)
404 if type == 'X':
405 mode = 0o555
406 else:
407 mode = 0o444
408 copy_with_verify(os.path.join(source, path),
409 dest_path,
410 mode,
411 alg,
412 required_digest)
413 os.utime(dest_path, (required_mtime, required_mtime))
414 elif type == 'S':
415 required_type, required_digest, required_size = required_details
416 if required_size != actual_size:
417 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
418 "%(required_size)s according to manifest)") %
419 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
420 symlink_target = os.readlink(os.path.join(source, path))
421 symlink_digest = alg.new_digest()
422 symlink_digest.update(symlink_target)
423 if symlink_digest.hexdigest() != required_digest:
424 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
425 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest})
426 dest_path = os.path.join(target, path)
427 os.symlink(symlink_target, dest_path)
428 else:
429 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
431 class HashLibAlgorithm(Algorithm):
432 new_digest = None # Constructor for digest objects
434 def __init__(self, name, rating):
435 if name == 'sha1':
436 self.new_digest = sha1_new
437 self.name = 'sha1new'
438 else:
439 self.new_digest = getattr(hashlib, name)
440 self.name = name
441 self.rating = rating
443 def generate_manifest(self, root):
444 def recurse(sub):
445 # To ensure that a line-by-line comparison of the manifests
446 # is possible, we require that filenames don't contain newlines.
447 # Otherwise, you can name a file so that the part after the \n
448 # would be interpreted as another line in the manifest.
449 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub)
450 assert sub.startswith('/')
452 full = os.path.join(root, sub[1:])
453 info = os.lstat(full)
454 new_digest = self.new_digest
456 m = info.st_mode
457 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full)
458 if sub != '/':
459 yield "D %s" % sub
460 items = os.listdir(full)
461 items.sort()
462 dirs = []
463 for leaf in items:
464 path = os.path.join(root, sub[1:], leaf)
465 info = os.lstat(path)
466 m = info.st_mode
468 if stat.S_ISREG(m):
469 if leaf == '.manifest': continue
471 with open(path, 'rb') as stream:
472 d = new_digest(stream.read()).hexdigest()
473 if m & 0o111:
474 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
475 else:
476 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
477 elif stat.S_ISLNK(m):
478 target = os.readlink(path)
479 d = new_digest(target).hexdigest()
480 # Note: Can't use utime on symlinks, so skip mtime
481 # Note: eCryptfs may report length as zero, so count ourselves instead
482 yield "S %s %s %s" % (d, len(target), leaf)
483 elif stat.S_ISDIR(m):
484 dirs.append(leaf)
485 else:
486 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
487 path)
489 if not sub.endswith('/'):
490 sub += '/'
491 for x in dirs:
492 # Note: "sub" is always Unix style. Don't use os.path.join here.
493 for y in recurse(sub + x): yield y
494 return
496 for x in recurse('/'): yield x
498 def getID(self, digest):
499 return self.name + '=' + digest.hexdigest()
501 algorithms = {
502 'sha1': OldSHA1(),
503 'sha1new': HashLibAlgorithm('sha1', 50),
506 if hashlib is not None:
507 algorithms['sha256'] = HashLibAlgorithm('sha256', 80)
509 def fixup_permissions(root):
510 """Set permissions recursively for children of root:
511 - If any X bit is set, they all must be.
512 - World readable, non-writable.
513 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
515 for main, dirs, files in os.walk(root):
516 for x in ['.'] + files:
517 full = os.path.join(main, x)
519 raw_mode = os.lstat(full).st_mode
520 if stat.S_ISLNK(raw_mode): continue
522 mode = stat.S_IMODE(raw_mode)
523 if mode & ~0o777:
524 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)})
525 if mode & 0o111:
526 os.chmod(full, 0o555)
527 else:
528 os.chmod(full, 0o444)