Replaced x.startswith('/') with os.path.isabs(x)
[zeroinstall/zeroinstall-afb.git] / zeroinstall / zerostore / manifest.py
blobab637f5baaa1d961fcc6d1b1506a0e3815b4c3d7
2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
19 of the manifest.
21 A top-level ".manifest" file is ignored.
22 """
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
27 from __future__ import generators
28 import os, stat
29 from zeroinstall import SafeException, _
30 from zeroinstall.zerostore import BadDigest
32 try:
33 import hashlib
34 sha1_new = hashlib.sha1
35 except:
36 import sha
37 sha1_new = sha.new
38 hashlib = None
40 class Algorithm:
41 """Abstract base class for algorithms.
42 An algorithm knows how to generate a manifest from a directory tree.
43 @ivar rating: how much we like this algorithm (higher is better)
44 @type rating: int
45 """
46 def generate_manifest(self, root):
47 """Returns an iterator that yields each line of the manifest for the directory
48 tree rooted at 'root'."""
49 raise Exception('Abstract')
51 def new_digest(self):
52 """Create a new digest. Call update() on the returned object to digest the data.
53 Call getID() to turn it into a full ID string."""
54 raise Exception('Abstract')
56 def getID(self, digest):
57 """Convert a digest (from new_digest) to a full ID."""
58 raise Exception('Abstract')
60 class OldSHA1(Algorithm):
61 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
63 rating = 10
65 def generate_manifest(self, root):
66 def recurse(sub):
67 # To ensure that a line-by-line comparison of the manifests
68 # is possible, we require that filenames don't contain newlines.
69 # Otherwise, you can name a file so that the part after the \n
70 # would be interpreted as another line in the manifest.
71 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
72 assert sub.startswith('/')
74 if sub == '/.manifest': return
76 full = os.path.join(root, str.replace(sub[1:], '/', os.sep))
77 info = os.lstat(full)
79 m = info.st_mode
80 if stat.S_ISDIR(m):
81 if sub != '/':
82 yield "D %s %s" % (int(info.st_mtime), sub)
83 items = os.listdir(full)
84 items.sort()
85 for x in items:
86 for y in recurse(os.path.join(sub, x)):
87 yield y
88 return
90 assert sub[1:]
91 leaf = os.path.basename(sub[1:])
92 if stat.S_ISREG(m):
93 d = sha1_new(file(full).read()).hexdigest()
94 if m & 0111:
95 yield "X %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
96 else:
97 yield "F %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
98 elif stat.S_ISLNK(m):
99 target = os.readlink(full)
100 d = sha1_new(target).hexdigest()
101 # Note: Can't use utime on symlinks, so skip mtime
102 # Note: eCryptfs may report length as zero, so count ourselves instead
103 yield "S %s %s %s" % (d, len(target), leaf)
104 else:
105 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
106 full)
107 for x in recurse('/'): yield x
109 def new_digest(self):
110 return sha1_new()
112 def getID(self, digest):
113 return 'sha1=' + digest.hexdigest()
115 def get_algorithm(name):
116 """Look-up an L{Algorithm} by name.
117 @raise BadDigest: if the name is unknown."""
118 try:
119 return algorithms[name]
120 except KeyError:
121 raise BadDigest(_("Unknown algorithm '%s'") % name)
123 def generate_manifest(root, alg = 'sha1'):
124 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
125 return get_algorithm(alg).generate_manifest(root)
127 def add_manifest_file(dir, digest_or_alg):
128 """Writes a .manifest file into 'dir', and returns the digest.
129 You should call fixup_permissions before this to ensure that the permissions are correct.
130 On exit, dir itself has mode 555. Subdirectories are not changed.
131 @param dir: root of the implementation
132 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
133 here is deprecated."""
134 mfile = os.path.join(dir, '.manifest')
135 if os.path.islink(mfile) or os.path.exists(mfile):
136 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
137 manifest = ''
138 if isinstance(digest_or_alg, Algorithm):
139 alg = digest_or_alg
140 digest = alg.new_digest()
141 else:
142 digest = digest_or_alg
143 alg = get_algorithm('sha1')
144 for line in alg.generate_manifest(dir):
145 manifest += line + '\n'
146 digest.update(manifest)
148 os.chmod(dir, 0755)
149 stream = file(mfile, 'wb')
150 os.chmod(dir, 0555)
151 stream.write(manifest)
152 stream.close()
153 os.chmod(mfile, 0444)
154 return digest
156 def splitID(id):
157 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
158 where 'alg' is an instance of Algorithm and 'value' is a string.
159 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
160 parts = id.split('=', 1)
161 if len(parts) != 2:
162 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id)
163 return (get_algorithm(parts[0]), parts[1])
165 def copy_with_verify(src, dest, mode, alg, required_digest):
166 """Copy path src to dest, checking that the contents give the right digest.
167 dest must not exist. New file is created with a mode of 'mode & umask'.
168 @param src: source filename
169 @type src: str
170 @param dest: target filename
171 @type dest: str
172 @param mode: target mode
173 @type mode: int
174 @param alg: algorithm to generate digest
175 @type alg: L{Algorithm}
176 @param required_digest: expected digest value
177 @type required_digest: str
178 @raise BadDigest: the contents of the file don't match required_digest"""
179 src_obj = file(src)
180 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
181 try:
182 digest = alg.new_digest()
183 while True:
184 data = src_obj.read(256)
185 if not data: break
186 digest.update(data)
187 while data:
188 written = os.write(dest_fd, data)
189 assert written >= 0
190 data = data[written:]
191 finally:
192 os.close(dest_fd)
193 src_obj.close()
194 actual = digest.hexdigest()
195 if actual == required_digest: return
196 os.unlink(dest)
197 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
198 "Expected: %(required_digest)s\n"
199 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
201 def verify(root, required_digest = None):
202 """Ensure that directory 'dir' generates the given digest.
203 For a non-error return:
204 - Dir's name must be a digest (in the form "alg=value")
205 - The calculated digest of the contents must match this name.
206 - If there is a .manifest file, then its digest must also match.
207 @raise BadDigest: if verification fails."""
208 if required_digest is None:
209 required_digest = os.path.basename(root)
210 alg = splitID(required_digest)[0]
212 digest = alg.new_digest()
213 lines = []
214 for line in alg.generate_manifest(root):
215 line += '\n'
216 digest.update(line)
217 lines.append(line)
218 actual_digest = alg.getID(digest)
220 manifest_file = os.path.join(root, '.manifest')
221 if os.path.isfile(manifest_file):
222 digest = alg.new_digest()
223 digest.update(file(manifest_file, 'rb').read())
224 manifest_digest = alg.getID(digest)
225 else:
226 manifest_digest = None
228 if required_digest == actual_digest == manifest_digest:
229 return
231 error = BadDigest(_("Cached item does NOT verify."))
233 error.detail = _(" Expected: %(required_digest)s\n"
234 " Actual: %(actual_digest)s\n"
235 ".manifest digest: %(manifest_digest)s\n\n") \
236 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')}
238 if manifest_digest is None:
239 error.detail += _("No .manifest, so no further details available.")
240 elif manifest_digest == actual_digest:
241 error.detail += _("The .manifest file matches the actual contents. Very strange!")
242 elif manifest_digest == required_digest:
243 import difflib
244 diff = difflib.unified_diff(file(manifest_file, 'rb').readlines(), lines,
245 'Recorded', 'Actual')
246 error.detail += _("The .manifest file matches the directory name.\n" \
247 "The contents of the directory have changed:\n") + \
248 ''.join(diff)
249 elif required_digest == actual_digest:
250 error.detail += _("The directory contents are correct, but the .manifest file is wrong!")
251 else:
252 error.detail += _("The .manifest file matches neither of the other digests. Odd.")
253 raise error
255 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
256 # - A regualar file suddenly turns into a symlink?
257 # - We find a device file (users can hard-link them if on the same device)
258 def copy_tree_with_verify(source, target, manifest_data, required_digest):
259 """Copy directory source to be a subdirectory of target if it matches the required_digest.
260 manifest_data is normally source/.manifest. source and manifest_data are not trusted
261 (will typically be under the control of another user).
262 The copy is first done to a temporary directory in target, then renamed to the final name
263 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
264 A successful return means than target/required_digest now exists (whether we created it or not)."""
265 import tempfile
266 from logging import info
268 alg, digest_value = splitID(required_digest)
270 if isinstance(alg, OldSHA1):
271 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
273 digest = alg.new_digest()
274 digest.update(manifest_data)
275 manifest_digest = alg.getID(digest)
277 if manifest_digest != required_digest:
278 raise BadDigest(_("Manifest has been tampered with!\n"
279 "Manifest digest: %(actual_digest)s\n"
280 "Directory name : %(required_digest)s")
281 % {'actual_digest': manifest_digest, 'required_digest': required_digest})
283 target_impl = os.path.join(target, required_digest)
284 if os.path.isdir(target_impl):
285 info(_("Target directory '%s' already exists"), target_impl)
286 return
288 # We've checked that the source's manifest matches required_digest, so it
289 # is what we want. Make a list of all the files we need to copy...
291 wanted = _parse_manifest(manifest_data)
293 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
294 try:
295 _copy_files(alg, wanted, source, tmpdir)
297 if wanted:
298 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
299 '\n- '.join(wanted.keys()))
301 # Make directories read-only (files are already RO)
302 for root, dirs, files in os.walk(tmpdir):
303 for d in dirs:
304 path = os.path.join(root, d)
305 mode = os.stat(path).st_mode
306 os.chmod(path, mode & 0555)
308 # Check that the copy is correct
309 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
310 if actual_digest != required_digest:
311 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
312 "Unless the target was modified during the copy, this is a BUG\n"
313 "in 0store and should be reported.\n"
314 "Expected: %(required_digest)s\n"
315 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest})
316 os.rename(tmpdir, target_impl)
317 # TODO: catch already-exists, delete tmpdir and return success
318 except:
319 info(_("Deleting tmpdir '%s'") % tmpdir)
320 from zeroinstall.support import ro_rmtree
321 ro_rmtree(tmpdir)
322 raise
324 def _parse_manifest(manifest_data):
325 """Parse a manifest file.
326 @param manifest_data: the contents of the manifest file
327 @type manifest_data: str
328 @return: a mapping from paths to information about that path
329 @rtype: {str: tuple}"""
330 wanted = {}
331 dir = ''
332 for line in manifest_data.split('\n'):
333 if not line: break
334 if line[0] == 'D':
335 data = line.split(' ', 1)
336 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line)
337 path = data[-1]
338 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line)
339 path = path[1:]
340 dir = path
341 elif line[0] == 'S':
342 data = line.split(' ', 3)
343 path = os.path.join(dir, data[-1])
344 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line)
345 else:
346 data = line.split(' ', 4)
347 path = os.path.join(dir, data[-1])
348 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line)
349 if path in wanted:
350 raise BadDigest(_('Duplicate entry "%s"') % line)
351 wanted[path] = data[:-1]
352 return wanted
354 def _copy_files(alg, wanted, source, target):
355 """Scan for files under 'source'. For each one:
356 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
357 then copy it into 'target'.
358 If it's not in wanted, warn and skip it.
359 On exit, wanted contains only files that were not found."""
360 from logging import warn
361 dir = ''
362 for line in alg.generate_manifest(source):
363 if line[0] == 'D':
364 type, name = line.split(' ', 1)
365 assert name.startswith('/')
366 dir = name[1:]
367 path = dir
368 elif line[0] == 'S':
369 type, actual_digest, actual_size, name = line.split(' ', 3)
370 path = os.path.join(dir, name)
371 else:
372 assert line[0] in 'XF'
373 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
374 path = os.path.join(dir, name)
375 try:
376 required_details = wanted.pop(path)
377 except KeyError:
378 warn(_("Skipping file not in manifest: '%s'"), path)
379 continue
380 if required_details[0] != type:
381 raise BadDigest(_("Item '%s' has wrong type!") % path)
382 if type == 'D':
383 os.mkdir(os.path.join(target, path))
384 elif type in 'XF':
385 required_type, required_digest, required_mtime, required_size = required_details
386 if required_size != actual_size:
387 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
388 "%(required_size)s according to manifest)") %
389 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
390 required_mtime = int(required_mtime)
391 dest_path = os.path.join(target, path)
392 if type == 'X':
393 mode = 0555
394 else:
395 mode = 0444
396 copy_with_verify(os.path.join(source, path),
397 dest_path,
398 mode,
399 alg,
400 required_digest)
401 os.utime(dest_path, (required_mtime, required_mtime))
402 elif type == 'S':
403 required_type, required_digest, required_size = required_details
404 if required_size != actual_size:
405 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
406 "%(required_size)s according to manifest)") %
407 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
408 symlink_target = os.readlink(os.path.join(source, path))
409 symlink_digest = alg.new_digest()
410 symlink_digest.update(symlink_target)
411 if symlink_digest.hexdigest() != required_digest:
412 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
413 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest})
414 dest_path = os.path.join(target, path)
415 os.symlink(symlink_target, dest_path)
416 else:
417 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
419 class HashLibAlgorithm(Algorithm):
420 new_digest = None # Constructor for digest objects
422 def __init__(self, name, rating):
423 if name == 'sha1':
424 self.new_digest = sha1_new
425 self.name = 'sha1new'
426 else:
427 self.new_digest = getattr(hashlib, name)
428 self.name = name
429 self.rating = rating
431 def generate_manifest(self, root):
432 def recurse(sub):
433 # To ensure that a line-by-line comparison of the manifests
434 # is possible, we require that filenames don't contain newlines.
435 # Otherwise, you can name a file so that the part after the \n
436 # would be interpreted as another line in the manifest.
437 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub)
438 assert sub.startswith('/')
440 full = os.path.join(root, sub[1:])
441 info = os.lstat(full)
442 new_digest = self.new_digest
444 m = info.st_mode
445 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full)
446 if sub != '/':
447 yield "D %s" % sub
448 items = os.listdir(full)
449 items.sort()
450 dirs = []
451 for leaf in items:
452 path = os.path.join(root, sub[1:], leaf)
453 info = os.lstat(path)
454 m = info.st_mode
456 if stat.S_ISREG(m):
457 if leaf == '.manifest': continue
459 d = new_digest(file(path).read()).hexdigest()
460 if m & 0111:
461 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
462 else:
463 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
464 elif stat.S_ISLNK(m):
465 target = os.readlink(path)
466 d = new_digest(target).hexdigest()
467 # Note: Can't use utime on symlinks, so skip mtime
468 # Note: eCryptfs may report length as zero, so count ourselves instead
469 yield "S %s %s %s" % (d, len(target), leaf)
470 elif stat.S_ISDIR(m):
471 dirs.append(leaf)
472 else:
473 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
474 path)
475 for x in dirs:
476 for y in recurse(os.path.join(sub, x)): yield y
477 return
479 for x in recurse('/'): yield x
481 def getID(self, digest):
482 return self.name + '=' + digest.hexdigest()
484 algorithms = {
485 'sha1': OldSHA1(),
486 'sha1new': HashLibAlgorithm('sha1', 50),
489 if hashlib is not None:
490 algorithms['sha256'] = HashLibAlgorithm('sha256', 80)
492 def fixup_permissions(root):
493 """Set permissions recursively for children of root:
494 - If any X bit is set, they all must be.
495 - World readable, non-writable.
496 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
498 for main, dirs, files in os.walk(root):
499 for x in ['.'] + files:
500 full = os.path.join(main, x)
502 raw_mode = os.lstat(full).st_mode
503 if stat.S_ISLNK(raw_mode): continue
505 mode = stat.S_IMODE(raw_mode)
506 if mode & ~0777:
507 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)})
508 if mode & 0111:
509 os.chmod(full, 0555)
510 else:
511 os.chmod(full, 0444)