Fixed typo in log format string
[zeroinstall/zeroinstall-rsl.git] / zeroinstall / zerostore / manifest.py
blob4343b19dd9ad3843af6863e1b8a7522302ab03eb
2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
19 of the manifest.
21 A top-level ".manifest" file is ignored.
22 """
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
27 from __future__ import generators
28 import os, stat
29 from zeroinstall import SafeException, _
30 from zeroinstall.zerostore import BadDigest
32 try:
33 import hashlib
34 sha1_new = hashlib.sha1
35 except:
36 import sha
37 sha1_new = sha.new
38 hashlib = None
40 class Algorithm:
41 """Abstract base class for algorithms.
42 An algorithm knows how to generate a manifest from a directory tree.
43 """
44 def generate_manifest(self, root):
45 """Returns an iterator that yields each line of the manifest for the directory
46 tree rooted at 'root'."""
47 raise Exception('Abstract')
49 def new_digest(self):
50 """Create a new digest. Call update() on the returned object to digest the data.
51 Call getID() to turn it into a full ID string."""
52 raise Exception('Abstract')
54 def getID(self, digest):
55 """Convert a digest (from new_digest) to a full ID."""
56 raise Exception('Abstract')
58 class OldSHA1(Algorithm):
59 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
60 def generate_manifest(self, root):
61 def recurse(sub):
62 # To ensure that a line-by-line comparison of the manifests
63 # is possible, we require that filenames don't contain newlines.
64 # Otherwise, you can name a file so that the part after the \n
65 # would be interpreted as another line in the manifest.
66 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
67 assert sub.startswith('/')
69 if sub == '/.manifest': return
71 full = os.path.join(root, sub[1:])
72 info = os.lstat(full)
74 m = info.st_mode
75 if stat.S_ISDIR(m):
76 if sub != '/':
77 yield "D %s %s" % (int(info.st_mtime), sub)
78 items = os.listdir(full)
79 items.sort()
80 for x in items:
81 for y in recurse(os.path.join(sub, x)):
82 yield y
83 return
85 assert sub[1:]
86 leaf = os.path.basename(sub[1:])
87 if stat.S_ISREG(m):
88 d = sha1_new(file(full).read()).hexdigest()
89 if m & 0111:
90 yield "X %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
91 else:
92 yield "F %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
93 elif stat.S_ISLNK(m):
94 target = os.readlink(full)
95 d = sha1_new(target).hexdigest()
96 # Note: Can't use utime on symlinks, so skip mtime
97 # Note: eCryptfs may report length as zero, so count ourselves instead
98 yield "S %s %s %s" % (d, len(target), leaf)
99 else:
100 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
101 full)
102 for x in recurse('/'): yield x
104 def new_digest(self):
105 return sha1_new()
107 def getID(self, digest):
108 return 'sha1=' + digest.hexdigest()
110 def get_algorithm(name):
111 """Look-up an L{Algorithm} by name.
112 @raise BadDigest: if the name is unknown."""
113 try:
114 return algorithms[name]
115 except KeyError:
116 raise BadDigest(_("Unknown algorithm '%s'") % name)
118 def generate_manifest(root, alg = 'sha1'):
119 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
120 return get_algorithm(alg).generate_manifest(root)
122 def add_manifest_file(dir, digest_or_alg):
123 """Writes a .manifest file into 'dir', and returns the digest.
124 You should call fixup_permissions before this to ensure that the permissions are correct.
125 On exit, dir itself has mode 555. Subdirectories are not changed.
126 @param dir: root of the implementation
127 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
128 here is deprecated."""
129 mfile = os.path.join(dir, '.manifest')
130 if os.path.islink(mfile) or os.path.exists(mfile):
131 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
132 manifest = ''
133 if isinstance(digest_or_alg, Algorithm):
134 alg = digest_or_alg
135 digest = alg.new_digest()
136 else:
137 digest = digest_or_alg
138 alg = get_algorithm('sha1')
139 for line in alg.generate_manifest(dir):
140 manifest += line + '\n'
141 digest.update(manifest)
143 os.chmod(dir, 0755)
144 stream = file(mfile, 'w')
145 os.chmod(dir, 0555)
146 stream.write(manifest)
147 stream.close()
148 os.chmod(mfile, 0444)
149 return digest
151 def splitID(id):
152 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
153 where 'alg' is an instance of Algorithm and 'value' is a string.
154 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
155 parts = id.split('=', 1)
156 if len(parts) != 2:
157 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id)
158 return (get_algorithm(parts[0]), parts[1])
160 def copy_with_verify(src, dest, mode, alg, required_digest):
161 """Copy path src to dest, checking that the contents give the right digest.
162 dest must not exist. New file is created with a mode of 'mode & umask'.
163 @param src: source filename
164 @type src: str
165 @param dest: target filename
166 @type dest: str
167 @param mode: target mode
168 @type mode: int
169 @param alg: algorithm to generate digest
170 @type alg: L{Algorithm}
171 @param required_digest: expected digest value
172 @type required_digest: str
173 @raise BadDigest: the contents of the file don't match required_digest"""
174 src_obj = file(src)
175 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
176 try:
177 digest = alg.new_digest()
178 while True:
179 data = src_obj.read(256)
180 if not data: break
181 digest.update(data)
182 while data:
183 written = os.write(dest_fd, data)
184 assert written >= 0
185 data = data[written:]
186 finally:
187 os.close(dest_fd)
188 src_obj.close()
189 actual = digest.hexdigest()
190 if actual == required_digest: return
191 os.unlink(dest)
192 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
193 "Expected: %(required_digest)s\n"
194 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
196 def verify(root, required_digest = None):
197 """Ensure that directory 'dir' generates the given digest.
198 For a non-error return:
199 - Dir's name must be a digest (in the form "alg=value")
200 - The calculated digest of the contents must match this name.
201 - If there is a .manifest file, then its digest must also match.
202 @raise BadDigest: if verification fails."""
203 if required_digest is None:
204 required_digest = os.path.basename(root)
205 alg = splitID(required_digest)[0]
207 digest = alg.new_digest()
208 lines = []
209 for line in alg.generate_manifest(root):
210 line += '\n'
211 digest.update(line)
212 lines.append(line)
213 actual_digest = alg.getID(digest)
215 manifest_file = os.path.join(root, '.manifest')
216 if os.path.isfile(manifest_file):
217 digest = alg.new_digest()
218 digest.update(file(manifest_file).read())
219 manifest_digest = alg.getID(digest)
220 else:
221 manifest_digest = None
223 if required_digest == actual_digest == manifest_digest:
224 return
226 error = BadDigest(_("Cached item does NOT verify."))
228 error.detail = _(" Expected: %(required_digest)s\n"
229 " Actual: %(actual_digest)s\n"
230 ".manifest digest: %s\n\n") \
231 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')}
233 if manifest_digest is None:
234 error.detail += _("No .manifest, so no further details available.")
235 elif manifest_digest == actual_digest:
236 error.detail += _("The .manifest file matches the actual contents. Very strange!")
237 elif manifest_digest == required_digest:
238 import difflib
239 diff = difflib.unified_diff(file(manifest_file).readlines(), lines,
240 'Recorded', 'Actual')
241 error.detail += _("The .manifest file matches the directory name.\n" \
242 "The contents of the directory have changed:\n") + \
243 ''.join(diff)
244 elif required_digest == actual_digest:
245 error.detail += _("The directory contents are correct, but the .manifest file is wrong!")
246 else:
247 error.detail += _("The .manifest file matches neither of the other digests. Odd.")
248 raise error
250 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
251 # - A regualar file suddenly turns into a symlink?
252 # - We find a device file (users can hard-link them if on the same device)
253 def copy_tree_with_verify(source, target, manifest_data, required_digest):
254 """Copy directory source to be a subdirectory of target if it matches the required_digest.
255 manifest_data is normally source/.manifest. source and manifest_data are not trusted
256 (will typically be under the control of another user).
257 The copy is first done to a temporary directory in target, then renamed to the final name
258 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
259 A successful return means than target/required_digest now exists (whether we created it or not)."""
260 import tempfile
261 from logging import info
263 alg, digest_value = splitID(required_digest)
265 if isinstance(alg, OldSHA1):
266 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
268 digest = alg.new_digest()
269 digest.update(manifest_data)
270 manifest_digest = alg.getID(digest)
272 if manifest_digest != required_digest:
273 raise BadDigest(_("Manifest has been tampered with!\n"
274 "Manifest digest: %(actual_digest)s\n"
275 "Directory name : %(required_digest)s")
276 % {'actual_digest': manifest_digest, 'required_digest': required_digest})
278 target_impl = os.path.join(target, required_digest)
279 if os.path.isdir(target_impl):
280 info(_("Target directory '%s' already exists"), target_impl)
281 return
283 # We've checked that the source's manifest matches required_digest, so it
284 # is what we want. Make a list of all the files we need to copy...
286 wanted = _parse_manifest(manifest_data)
288 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
289 try:
290 _copy_files(alg, wanted, source, tmpdir)
292 if wanted:
293 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
294 '\n- '.join(wanted.keys()))
296 # Make directories read-only (files are already RO)
297 for root, dirs, files in os.walk(tmpdir):
298 for d in dirs:
299 path = os.path.join(root, d)
300 mode = os.stat(path).st_mode
301 os.chmod(path, mode & 0555)
303 # Check that the copy is correct
304 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
305 if actual_digest != required_digest:
306 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
307 "Unless the target was modified during the copy, this is a BUG\n"
308 "in 0store and should be reported.\n"
309 "Expected: %(required_digest)s\n"
310 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest})
311 os.rename(tmpdir, target_impl)
312 # TODO: catch already-exists, delete tmpdir and return success
313 except:
314 info(_("Deleting tmpdir '%s'") % tmpdir)
315 from zeroinstall.support import ro_rmtree
316 ro_rmtree(tmpdir)
317 raise
319 def _parse_manifest(manifest_data):
320 """Parse a manifest file.
321 @param manifest_data: the contents of the manifest file
322 @type manifest_data: str
323 @return: a mapping from paths to information about that path
324 @rtype: {str: tuple}"""
325 wanted = {}
326 dir = ''
327 for line in manifest_data.split('\n'):
328 if not line: break
329 if line[0] == 'D':
330 data = line.split(' ', 1)
331 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line)
332 path = data[-1]
333 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line)
334 path = path[1:]
335 dir = path
336 elif line[0] == 'S':
337 data = line.split(' ', 3)
338 path = os.path.join(dir, data[-1])
339 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line)
340 else:
341 data = line.split(' ', 4)
342 path = os.path.join(dir, data[-1])
343 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line)
344 if path in wanted:
345 raise BadDigest(_('Duplicate entry "%s"') % line)
346 wanted[path] = data[:-1]
347 return wanted
349 def _copy_files(alg, wanted, source, target):
350 """Scan for files under 'source'. For each one:
351 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
352 then copy it into 'target'.
353 If it's not in wanted, warn and skip it.
354 On exit, wanted contains only files that were not found."""
355 from logging import warn
356 dir = ''
357 for line in alg.generate_manifest(source):
358 if line[0] == 'D':
359 type, name = line.split(' ', 1)
360 assert name.startswith('/')
361 dir = name[1:]
362 path = dir
363 elif line[0] == 'S':
364 type, actual_digest, actual_size, name = line.split(' ', 3)
365 path = os.path.join(dir, name)
366 else:
367 assert line[0] in 'XF'
368 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
369 path = os.path.join(dir, name)
370 try:
371 required_details = wanted.pop(path)
372 except KeyError:
373 warn(_("Skipping file not in manifest: '%s'"), path)
374 continue
375 if required_details[0] != type:
376 raise BadDigest(_("Item '%s' has wrong type!") % path)
377 if type == 'D':
378 os.mkdir(os.path.join(target, path))
379 elif type in 'XF':
380 required_type, required_digest, required_mtime, required_size = required_details
381 if required_size != actual_size:
382 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
383 "%(required_size)s according to manifest)") %
384 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
385 required_mtime = int(required_mtime)
386 dest_path = os.path.join(target, path)
387 if type == 'X':
388 mode = 0555
389 else:
390 mode = 0444
391 copy_with_verify(os.path.join(source, path),
392 dest_path,
393 mode,
394 alg,
395 required_digest)
396 os.utime(dest_path, (required_mtime, required_mtime))
397 elif type == 'S':
398 required_type, required_digest, required_size = required_details
399 if required_size != actual_size:
400 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
401 "%(required_size)s according to manifest)") %
402 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
403 symlink_target = os.readlink(os.path.join(source, path))
404 symlink_digest = alg.new_digest()
405 symlink_digest.update(symlink_target)
406 if symlink_digest.hexdigest() != required_digest:
407 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
408 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest})
409 dest_path = os.path.join(target, path)
410 os.symlink(symlink_target, dest_path)
411 else:
412 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
414 class HashLibAlgorithm(Algorithm):
415 new_digest = None # Constructor for digest objects
417 def __init__(self, name):
418 if name == 'sha1':
419 self.new_digest = sha1_new
420 self.name = 'sha1new'
421 else:
422 self.new_digest = getattr(hashlib, name)
423 self.name = name
425 def generate_manifest(self, root):
426 def recurse(sub):
427 # To ensure that a line-by-line comparison of the manifests
428 # is possible, we require that filenames don't contain newlines.
429 # Otherwise, you can name a file so that the part after the \n
430 # would be interpreted as another line in the manifest.
431 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub)
432 assert sub.startswith('/')
434 full = os.path.join(root, sub[1:])
435 info = os.lstat(full)
436 new_digest = self.new_digest
438 m = info.st_mode
439 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full)
440 if sub != '/':
441 yield "D %s" % sub
442 items = os.listdir(full)
443 items.sort()
444 dirs = []
445 for leaf in items:
446 path = os.path.join(root, sub[1:], leaf)
447 info = os.lstat(path)
448 m = info.st_mode
450 if stat.S_ISREG(m):
451 if leaf == '.manifest': continue
453 d = new_digest(file(path).read()).hexdigest()
454 if m & 0111:
455 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
456 else:
457 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
458 elif stat.S_ISLNK(m):
459 target = os.readlink(path)
460 d = new_digest(target).hexdigest()
461 # Note: Can't use utime on symlinks, so skip mtime
462 # Note: eCryptfs may report length as zero, so count ourselves instead
463 yield "S %s %s %s" % (d, len(target), leaf)
464 elif stat.S_ISDIR(m):
465 dirs.append(leaf)
466 else:
467 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
468 path)
469 for x in dirs:
470 for y in recurse(os.path.join(sub, x)): yield y
471 return
473 for x in recurse('/'): yield x
475 def getID(self, digest):
476 return self.name + '=' + digest.hexdigest()
478 algorithms = {
479 'sha1': OldSHA1(),
480 'sha1new': HashLibAlgorithm('sha1'),
483 if hashlib is not None:
484 algorithms['sha256'] = HashLibAlgorithm('sha256')
486 def fixup_permissions(root):
487 """Set permissions recursively for children of root:
488 - If any X bit is set, they all must be.
489 - World readable, non-writable.
490 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
492 for main, dirs, files in os.walk(root):
493 for x in ['.'] + files:
494 full = os.path.join(main, x)
496 raw_mode = os.lstat(full).st_mode
497 if stat.S_ISLNK(raw_mode): continue
499 mode = stat.S_IMODE(raw_mode)
500 if mode & ~0777:
501 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)})
502 if mode & 0111:
503 os.chmod(full, 0555)
504 else:
505 os.chmod(full, 0444)