Update year to 2009 in various places
[zeroinstall/zeroinstall-rsl.git] / zeroinstall / zerostore / manifest.py
blobdb141e6443f8a2830788f003a6bac074080c70c2
1 """Processing of implementation manifests.
3 A manifest is a string representing a directory tree, with the property
4 that two trees will generate identical manifest strings if and only if:
6 - They have extactly the same set of files, directories and symlinks.
7 - For each pair of corresponding directories in the two sets:
8 - The mtimes are the same (OldSHA1 only).
9 - For each pair of corresponding files in the two sets:
10 - The size, executable flag and mtime are the same.
11 - The contents have matching secure hash values.
12 - For each pair of corresponding symlinks in the two sets:
13 - The mtime and size are the same.
14 - The targets have matching secure hash values.
16 The manifest is typically processed with a secure hash itself. So, the idea is that
17 any significant change to the contents of the tree will change the secure hash value
18 of the manifest.
20 A top-level ".manifest" file is ignored.
21 """
23 # Copyright (C) 2009, Thomas Leonard
24 # See the README file for details, or visit http://0install.net.
26 from __future__ import generators
27 import os, stat
28 from zeroinstall import SafeException
29 from zeroinstall.zerostore import BadDigest
31 try:
32 import hashlib
33 sha1_new = hashlib.sha1
34 except:
35 import sha
36 sha1_new = sha.new
37 hashlib = None
39 class Algorithm:
40 """Abstract base class for algorithms.
41 An algorithm knows how to generate a manifest from a directory tree.
42 """
43 def generate_manifest(self, root):
44 """Returns an iterator that yields each line of the manifest for the directory
45 tree rooted at 'root'."""
46 raise Exception('Abstract')
48 def new_digest(self):
49 """Create a new digest. Call update() on the returned object to digest the data.
50 Call getID() to turn it into a full ID string."""
51 raise Exception('Abstract')
53 def getID(self, digest):
54 """Convert a digest (from new_digest) to a full ID."""
55 raise Exception('Abstract')
57 class OldSHA1(Algorithm):
58 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
59 def generate_manifest(self, root):
60 def recurse(sub):
61 # To ensure that a line-by-line comparison of the manifests
62 # is possible, we require that filenames don't contain newlines.
63 # Otherwise, you can name a file so that the part after the \n
64 # would be interpreted as another line in the manifest.
65 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
66 assert sub.startswith('/')
68 if sub == '/.manifest': return
70 full = os.path.join(root, sub[1:])
71 info = os.lstat(full)
73 m = info.st_mode
74 if stat.S_ISDIR(m):
75 if sub != '/':
76 yield "D %s %s" % (int(info.st_mtime), sub)
77 items = os.listdir(full)
78 items.sort()
79 for x in items:
80 for y in recurse(os.path.join(sub, x)):
81 yield y
82 return
84 assert sub[1:]
85 leaf = os.path.basename(sub[1:])
86 if stat.S_ISREG(m):
87 d = sha1_new(file(full).read()).hexdigest()
88 if m & 0111:
89 yield "X %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
90 else:
91 yield "F %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
92 elif stat.S_ISLNK(m):
93 d = sha1_new(os.readlink(full)).hexdigest()
94 # Note: Can't use utime on symlinks, so skip mtime
95 yield "S %s %s %s" % (d, info.st_size, leaf)
96 else:
97 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
98 full)
99 for x in recurse('/'): yield x
101 def new_digest(self):
102 return sha1_new()
104 def getID(self, digest):
105 return 'sha1=' + digest.hexdigest()
107 def get_algorithm(name):
108 """Look-up an L{Algorithm} by name.
109 @raise BadDigest: if the name is unknown."""
110 try:
111 return algorithms[name]
112 except KeyError:
113 raise BadDigest("Unknown algorithm '%s'" % name)
115 def generate_manifest(root, alg = 'sha1'):
116 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
117 return get_algorithm(alg).generate_manifest(root)
119 def add_manifest_file(dir, digest_or_alg):
120 """Writes a .manifest file into 'dir', and returns the digest.
121 You should call fixup_permissions before this to ensure that the permissions are correct.
122 On exit, dir itself has mode 555. Subdirectories are not changed.
123 @param dir: root of the implementation
124 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
125 here is deprecated."""
126 mfile = os.path.join(dir, '.manifest')
127 if os.path.islink(mfile) or os.path.exists(mfile):
128 raise SafeException("Directory '%s' already contains a .manifest file!" % dir)
129 manifest = ''
130 if isinstance(digest_or_alg, Algorithm):
131 alg = digest_or_alg
132 digest = alg.new_digest()
133 else:
134 digest = digest_or_alg
135 alg = get_algorithm('sha1')
136 for line in alg.generate_manifest(dir):
137 manifest += line + '\n'
138 digest.update(manifest)
140 os.chmod(dir, 0755)
141 stream = file(mfile, 'w')
142 os.chmod(dir, 0555)
143 stream.write(manifest)
144 stream.close()
145 os.chmod(mfile, 0444)
146 return digest
148 def splitID(id):
149 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
150 where 'alg' is an instance of Algorithm and 'value' is a string.
151 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
152 parts = id.split('=', 1)
153 if len(parts) != 2:
154 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'" % id)
155 return (get_algorithm(parts[0]), parts[1])
157 def copy_with_verify(src, dest, mode, alg, required_digest):
158 """Copy path src to dest, checking that the contents give the right digest.
159 dest must not exist. New file is created with a mode of 'mode & umask'.
160 @param src: source filename
161 @type src: str
162 @param dest: target filename
163 @type dest: str
164 @param mode: target mode
165 @type mode: int
166 @param alg: algorithm to generate digest
167 @type alg: L{Algorithm}
168 @param required_digest: expected digest value
169 @type required_digest: str
170 @raise BadDigest: the contents of the file don't match required_digest"""
171 src_obj = file(src)
172 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
173 try:
174 digest = alg.new_digest()
175 while True:
176 data = src_obj.read(256)
177 if not data: break
178 digest.update(data)
179 while data:
180 written = os.write(dest_fd, data)
181 assert written >= 0
182 data = data[written:]
183 finally:
184 os.close(dest_fd)
185 src_obj.close()
186 actual = digest.hexdigest()
187 if actual == required_digest: return
188 os.unlink(dest)
189 raise BadDigest(("Copy failed: file '%s' has wrong digest (may have been tampered with)\n"
190 "Excepted: %s\n"
191 "Actual: %s") % (src, required_digest, actual))
193 def verify(root, required_digest = None):
194 """Ensure that directory 'dir' generates the given digest.
195 For a non-error return:
196 - Dir's name must be a digest (in the form "alg=value")
197 - The calculated digest of the contents must match this name.
198 - If there is a .manifest file, then its digest must also match.
199 @raise BadDigest: if verification fails."""
200 if required_digest is None:
201 required_digest = os.path.basename(root)
202 alg = splitID(required_digest)[0]
204 digest = alg.new_digest()
205 lines = []
206 for line in alg.generate_manifest(root):
207 line += '\n'
208 digest.update(line)
209 lines.append(line)
210 actual_digest = alg.getID(digest)
212 manifest_file = os.path.join(root, '.manifest')
213 if os.path.isfile(manifest_file):
214 digest = alg.new_digest()
215 digest.update(file(manifest_file).read())
216 manifest_digest = alg.getID(digest)
217 else:
218 manifest_digest = None
220 if required_digest == actual_digest == manifest_digest:
221 return
223 error = BadDigest("Cached item does NOT verify.")
225 error.detail = " Expected digest: " + required_digest + "\n" + \
226 " Actual digest: " + actual_digest + "\n" + \
227 ".manifest digest: " + (manifest_digest or 'No .manifest file') + "\n\n"
229 if manifest_digest is None:
230 error.detail += "No .manifest, so no further details available."
231 elif manifest_digest == actual_digest:
232 error.detail += "The .manifest file matches the actual contents. Very strange!"
233 elif manifest_digest == required_digest:
234 import difflib
235 diff = difflib.unified_diff(file(manifest_file).readlines(), lines,
236 'Recorded', 'Actual')
237 error.detail += "The .manifest file matches the directory name.\n" \
238 "The contents of the directory have changed:\n" + \
239 ''.join(diff)
240 elif required_digest == actual_digest:
241 error.detail += "The directory contents are correct, but the .manifest file is wrong!"
242 else:
243 error.detail += "The .manifest file matches neither of the other digests. Odd."
244 raise error
246 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
247 # - A regualar file suddenly turns into a symlink?
248 # - We find a device file (users can hard-link them if on the same device)
249 def copy_tree_with_verify(source, target, manifest_data, required_digest):
250 """Copy directory source to be a subdirectory of target if it matches the required_digest.
251 manifest_data is normally source/.manifest. source and manifest_data are not trusted
252 (will typically be under the control of another user).
253 The copy is first done to a temporary directory in target, then renamed to the final name
254 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
255 A successful return means than target/required_digest now exists (whether we created it or not)."""
256 import tempfile, shutil
257 from logging import info
259 alg, digest_value = splitID(required_digest)
261 if isinstance(alg, OldSHA1):
262 raise SafeException("Sorry, the 'sha1' algorithm does not support copying.")
264 digest = alg.new_digest()
265 digest.update(manifest_data)
266 manifest_digest = alg.getID(digest)
268 if manifest_digest != required_digest:
269 raise BadDigest("Manifest has been tampered with!\n"
270 "Manifest digest: " + manifest_digest + "\n"
271 "Directory name : " + required_digest)
273 target_impl = os.path.join(target, required_digest)
274 if os.path.isdir(target_impl):
275 info("Target directory '%s' already exists", target_impl)
276 return
278 # We've checked that the source's manifest matches required_digest, so it
279 # is what we want. Make a list of all the files we need to copy...
281 wanted = _parse_manifest(manifest_data)
283 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
285 try:
286 _copy_files(alg, wanted, source, tmpdir)
288 if wanted:
289 raise SafeException('Copy failed; files missing from source:\n- ' +
290 '\n- '.join(wanted.keys()))
292 # Check that the copy is correct
293 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
294 if actual_digest != required_digest:
295 raise SafeException(("Copy failed; double-check of target gave the wrong digest.\n"
296 "Unless the target was modified during the copy, this is a BUG\n"
297 "in 0store and should be reported.\n"
298 "Expected: %s\n"
299 "Actual: %s") % (required_digest, actual_digest))
300 os.rename(tmpdir, target_impl)
301 # TODO: catch already-exists, delete tmpdir and return success
302 except:
303 info("Deleting tmpdir '%s'" % tmpdir)
304 shutil.rmtree(tmpdir)
305 raise
307 def _parse_manifest(manifest_data):
308 """Parse a manifest file.
309 @param manifest_data: the contents of the manifest file
310 @type manifest_data: str
311 @return: a mapping from paths to information about that path
312 @rtype: {str: tuple}"""
313 wanted = {}
314 dir = ''
315 for line in manifest_data.split('\n'):
316 if not line: break
317 if line[0] == 'D':
318 data = line.split(' ', 1)
319 if len(data) != 2: raise BadDigest("Bad line '%s'" % line)
320 path = data[-1]
321 if not path.startswith('/'): raise BadDigest("Not absolute: '%s'" % line)
322 path = path[1:]
323 dir = path
324 elif line[0] == 'S':
325 data = line.split(' ', 3)
326 path = os.path.join(dir, data[-1])
327 if len(data) != 4: raise BadDigest("Bad line '%s'" % line)
328 else:
329 data = line.split(' ', 4)
330 path = os.path.join(dir, data[-1])
331 if len(data) != 5: raise BadDigest("Bad line '%s'" % line)
332 if path in wanted:
333 raise BadDigest('Duplicate entry "%s"' % line)
334 wanted[path] = data[:-1]
335 return wanted
337 def _copy_files(alg, wanted, source, target):
338 """Scan for files under 'source'. For each one:
339 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
340 then copy it into 'target'.
341 If it's not in wanted, warn and skip it.
342 On exit, wanted contains only files that were not found."""
343 from logging import warn
344 dir = ''
345 for line in alg.generate_manifest(source):
346 if line[0] == 'D':
347 type, name = line.split(' ', 1)
348 assert name.startswith('/')
349 dir = name[1:]
350 path = dir
351 elif line[0] == 'S':
352 type, actual_digest, actual_size, name = line.split(' ', 3)
353 path = os.path.join(dir, name)
354 else:
355 assert line[0] in 'XF'
356 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
357 path = os.path.join(dir, name)
358 try:
359 required_details = wanted.pop(path)
360 except KeyError:
361 warn("Skipping file not in manifest: '%s'", path)
362 continue
363 if required_details[0] != type:
364 raise BadDigest("Item '%s' has wrong type!" % path)
365 if type == 'D':
366 os.mkdir(os.path.join(target, path))
367 elif type in 'XF':
368 required_type, required_digest, required_mtime, required_size = required_details
369 if required_size != actual_size:
370 raise SafeException("File '%s' has wrong size (%s bytes, but should be "
371 "%s according to manifest)" %
372 (path, actual_size, required_size))
373 required_mtime = int(required_mtime)
374 dest_path = os.path.join(target, path)
375 if type == 'X':
376 mode = 0555
377 else:
378 mode = 0444
379 copy_with_verify(os.path.join(source, path),
380 dest_path,
381 mode,
382 alg,
383 required_digest)
384 os.utime(dest_path, (required_mtime, required_mtime))
385 elif type == 'S':
386 required_type, required_digest, required_size = required_details
387 if required_size != actual_size:
388 raise SafeException("Symlink '%s' has wrong size (%s bytes, but should be "
389 "%s according to manifest)" %
390 (path, actual_size, required_size))
391 symlink_target = os.readlink(os.path.join(source, path))
392 symlink_digest = alg.new_digest()
393 symlink_digest.update(symlink_target)
394 if symlink_digest.hexdigest() != required_digest:
395 raise SafeException("Symlink '%s' has wrong target (digest should be "
396 "%s according to manifest)" % (path, required_digest))
397 dest_path = os.path.join(target, path)
398 os.symlink(symlink_target, dest_path)
399 else:
400 raise SafeException("Unknown manifest type %s for '%s'" % (type, path))
402 class HashLibAlgorithm(Algorithm):
403 new_digest = None # Constructor for digest objects
405 def __init__(self, name):
406 if name == 'sha1':
407 self.new_digest = sha1_new
408 self.name = 'sha1new'
409 else:
410 self.new_digest = getattr(hashlib, name)
411 self.name = name
413 def generate_manifest(self, root):
414 def recurse(sub):
415 # To ensure that a line-by-line comparison of the manifests
416 # is possible, we require that filenames don't contain newlines.
417 # Otherwise, you can name a file so that the part after the \n
418 # would be interpreted as another line in the manifest.
419 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
420 assert sub.startswith('/')
422 full = os.path.join(root, sub[1:])
423 info = os.lstat(full)
424 new_digest = self.new_digest
426 m = info.st_mode
427 if not stat.S_ISDIR(m): raise Exception('Not a directory: "%s"' % full)
428 if sub != '/':
429 yield "D %s" % sub
430 items = os.listdir(full)
431 items.sort()
432 dirs = []
433 for leaf in items:
434 path = os.path.join(root, sub[1:], leaf)
435 info = os.lstat(path)
436 m = info.st_mode
438 if stat.S_ISREG(m):
439 if leaf == '.manifest': continue
441 d = new_digest(file(path).read()).hexdigest()
442 if m & 0111:
443 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
444 else:
445 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
446 elif stat.S_ISLNK(m):
447 d = new_digest(os.readlink(path)).hexdigest()
448 # Note: Can't use utime on symlinks, so skip mtime
449 yield "S %s %s %s" % (d, info.st_size, leaf)
450 elif stat.S_ISDIR(m):
451 dirs.append(leaf)
452 else:
453 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
454 path)
455 for x in dirs:
456 for y in recurse(os.path.join(sub, x)): yield y
457 return
459 for x in recurse('/'): yield x
461 def getID(self, digest):
462 return self.name + '=' + digest.hexdigest()
464 algorithms = {
465 'sha1': OldSHA1(),
466 'sha1new': HashLibAlgorithm('sha1'),
469 if hashlib is not None:
470 algorithms['sha256'] = HashLibAlgorithm('sha256')
472 def fixup_permissions(root):
473 """Set permissions recursively for children of root:
474 - If any X bit is set, they all must be.
475 - World readable, non-writable.
476 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
478 for main, dirs, files in os.walk(root):
479 for x in ['.'] + files:
480 full = os.path.join(main, x)
482 raw_mode = os.lstat(full).st_mode
483 if stat.S_ISLNK(raw_mode): continue
485 mode = stat.S_IMODE(raw_mode)
486 if mode & ~0777:
487 raise Exception("Unsafe mode: extracted file '%s' had special bits set in mode '%s'" % (full, oct(mode)))
488 if mode & 0111:
489 os.chmod(full, 0555)
490 else:
491 os.chmod(full, 0444)