Better check for Debian-style distribution
[zeroinstall.git] / zeroinstall / zerostore / manifest.py
blob7302706bcfd536c8dae70c47c4a655e8ef055d53
1 """Processing of implementation manifests.
3 A manifest is a string representing a directory tree, with the property
4 that two trees will generate identical manifest strings if and only if:
6 - They have extactly the same set of files, directories and symlinks.
7 - For each pair of corresponding directories in the two sets:
8 - The mtimes are the same (OldSHA1 only).
9 - For each pair of corresponding files in the two sets:
10 - The size, executable flag and mtime are the same.
11 - The contents have matching secure hash values.
12 - For each pair of corresponding symlinks in the two sets:
13 - The mtime and size are the same.
14 - The targets have matching secure hash values.
16 The manifest is typically processed with a secure hash itself. So, the idea is that
17 any significant change to the contents of the tree will change the secure hash value
18 of the manifest.
20 A top-level ".manifest" file is ignored.
21 """
23 # Copyright (C) 2006, Thomas Leonard
24 # See the README file for details, or visit http://0install.net.
26 from __future__ import generators
27 import os, stat
28 import sha
29 from zeroinstall import SafeException
30 from zeroinstall.zerostore import BadDigest
32 try:
33 import hashlib
34 except:
35 hashlib = None
37 class Algorithm:
38 """Abstract base class for algorithms.
39 An algorithm knows how to generate a manifest from a directory tree.
40 """
41 def generate_manifest(self, root):
42 """Returns an iterator that yields each line of the manifest for the directory
43 tree rooted at 'root'."""
44 raise Exception('Abstract')
46 def new_digest(self):
47 """Create a new digest. Call update() on the returned object to digest the data.
48 Call getID() to turn it into a full ID string."""
49 raise Exception('Abstract')
51 def getID(self, digest):
52 """Convert a digest (from new_digest) to a full ID."""
53 raise Exception('Abstract')
55 class OldSHA1(Algorithm):
56 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
57 def generate_manifest(self, root):
58 def recurse(sub):
59 # To ensure that a line-by-line comparison of the manifests
60 # is possible, we require that filenames don't contain newlines.
61 # Otherwise, you can name a file so that the part after the \n
62 # would be interpreted as another line in the manifest.
63 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
64 assert sub.startswith('/')
66 if sub == '/.manifest': return
68 full = os.path.join(root, sub[1:])
69 info = os.lstat(full)
71 m = info.st_mode
72 if stat.S_ISDIR(m):
73 if sub != '/':
74 yield "D %s %s" % (int(info.st_mtime), sub)
75 items = os.listdir(full)
76 items.sort()
77 for x in items:
78 for y in recurse(os.path.join(sub, x)):
79 yield y
80 return
82 assert sub[1:]
83 leaf = os.path.basename(sub[1:])
84 if stat.S_ISREG(m):
85 d = sha.new(file(full).read()).hexdigest()
86 if m & 0111:
87 yield "X %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
88 else:
89 yield "F %s %s %s %s" % (d, int(info.st_mtime) ,info.st_size, leaf)
90 elif stat.S_ISLNK(m):
91 d = sha.new(os.readlink(full)).hexdigest()
92 # Note: Can't use utime on symlinks, so skip mtime
93 yield "S %s %s %s" % (d, info.st_size, leaf)
94 else:
95 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
96 full)
97 for x in recurse('/'): yield x
99 def new_digest(self):
100 return sha.new()
102 def getID(self, digest):
103 return 'sha1=' + digest.hexdigest()
105 def get_algorithm(name):
106 """Look-up an L{Algorithm} by name.
107 @raise BadDigest: if the name is unknown."""
108 try:
109 return algorithms[name]
110 except KeyError:
111 raise BadDigest("Unknown algorithm '%s'" % name)
113 def generate_manifest(root, alg = 'sha1'):
114 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
115 return get_algorithm(alg).generate_manifest(root)
117 def add_manifest_file(dir, digest_or_alg):
118 """Writes a .manifest file into 'dir', and returns the digest.
119 You should call fixup_permissions before this to ensure that the permissions are correct.
120 On exit, dir itself has mode 555. Subdirectories are not changed.
121 @param dir: root of the implementation
122 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
123 here is deprecated."""
124 mfile = os.path.join(dir, '.manifest')
125 if os.path.islink(mfile) or os.path.exists(mfile):
126 raise SafeException("Directory '%s' already contains a .manifest file!" % dir)
127 manifest = ''
128 if isinstance(digest_or_alg, Algorithm):
129 alg = digest_or_alg
130 digest = alg.new_digest()
131 else:
132 digest = digest_or_alg
133 alg = get_algorithm('sha1')
134 for line in alg.generate_manifest(dir):
135 manifest += line + '\n'
136 digest.update(manifest)
138 os.chmod(dir, 0755)
139 stream = file(mfile, 'w')
140 os.chmod(dir, 0555)
141 stream.write(manifest)
142 stream.close()
143 os.chmod(mfile, 0444)
144 return digest
146 def splitID(id):
147 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
148 where 'alg' is an instance of Algorithm and 'value' is a string.
149 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
150 parts = id.split('=', 1)
151 if len(parts) != 2:
152 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'" % id)
153 return (get_algorithm(parts[0]), parts[1])
155 def copy_with_verify(src, dest, mode, alg, required_digest):
156 """Copy path src to dest, checking that the contents give the right digest.
157 dest must not exist. New file is created with a mode of 'mode & umask'.
158 @param src: source filename
159 @type src: str
160 @param dest: target filename
161 @type dest: str
162 @param mode: target mode
163 @type mode: int
164 @param alg: algorithm to generate digest
165 @type alg: L{Algorithm}
166 @param required_digest: expected digest value
167 @type required_digest: str
168 @raise BadDigest: the contents of the file don't match required_digest"""
169 src_obj = file(src)
170 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
171 try:
172 digest = alg.new_digest()
173 while True:
174 data = src_obj.read(256)
175 if not data: break
176 digest.update(data)
177 while data:
178 written = os.write(dest_fd, data)
179 assert written >= 0
180 data = data[written:]
181 finally:
182 os.close(dest_fd)
183 src_obj.close()
184 actual = digest.hexdigest()
185 if actual == required_digest: return
186 os.unlink(dest)
187 raise BadDigest(("Copy failed: file '%s' has wrong digest (may have been tampered with)\n"
188 "Excepted: %s\n"
189 "Actual: %s") % (src, required_digest, actual))
191 def verify(root, required_digest = None):
192 """Ensure that directory 'dir' generates the given digest.
193 For a non-error return:
194 - Dir's name must be a digest (in the form "alg=value")
195 - The calculated digest of the contents must match this name.
196 - If there is a .manifest file, then its digest must also match.
197 @raise BadDigest: if verification fails."""
198 if required_digest is None:
199 required_digest = os.path.basename(root)
200 alg = splitID(required_digest)[0]
202 digest = alg.new_digest()
203 lines = []
204 for line in alg.generate_manifest(root):
205 line += '\n'
206 digest.update(line)
207 lines.append(line)
208 actual_digest = alg.getID(digest)
210 manifest_file = os.path.join(root, '.manifest')
211 if os.path.isfile(manifest_file):
212 digest = alg.new_digest()
213 digest.update(file(manifest_file).read())
214 manifest_digest = alg.getID(digest)
215 else:
216 manifest_digest = None
218 if required_digest == actual_digest == manifest_digest:
219 return
221 error = BadDigest("Cached item does NOT verify.")
223 error.detail = " Expected digest: " + required_digest + "\n" + \
224 " Actual digest: " + actual_digest + "\n" + \
225 ".manifest digest: " + (manifest_digest or 'No .manifest file') + "\n\n"
227 if manifest_digest is None:
228 error.detail += "No .manifest, so no further details available."
229 elif manifest_digest == actual_digest:
230 error.detail += "The .manifest file matches the actual contents. Very strange!"
231 elif manifest_digest == required_digest:
232 import difflib
233 diff = difflib.unified_diff(file(manifest_file).readlines(), lines,
234 'Recorded', 'Actual')
235 error.detail += "The .manifest file matches the directory name.\n" \
236 "The contents of the directory have changed:\n" + \
237 ''.join(diff)
238 elif required_digest == actual_digest:
239 error.detail += "The directory contents are correct, but the .manifest file is wrong!"
240 else:
241 error.detail += "The .manifest file matches neither of the other digests. Odd."
242 raise error
244 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
245 # - A regualar file suddenly turns into a symlink?
246 # - We find a device file (users can hard-link them if on the same device)
247 def copy_tree_with_verify(source, target, manifest_data, required_digest):
248 """Copy directory source to be a subdirectory of target if it matches the required_digest.
249 manifest_data is normally source/.manifest. source and manifest_data are not trusted
250 (will typically be under the control of another user).
251 The copy is first done to a temporary directory in target, then renamed to the final name
252 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
253 A successful return means than target/required_digest now exists (whether we created it or not)."""
254 import tempfile, shutil
255 from logging import info
257 alg, digest_value = splitID(required_digest)
259 if isinstance(alg, OldSHA1):
260 raise SafeException("Sorry, the 'sha1' algorithm does not support copying.")
262 digest = alg.new_digest()
263 digest.update(manifest_data)
264 manifest_digest = alg.getID(digest)
266 if manifest_digest != required_digest:
267 raise BadDigest("Manifest has been tampered with!\n"
268 "Manifest digest: " + manifest_digest + "\n"
269 "Directory name : " + required_digest)
271 target_impl = os.path.join(target, required_digest)
272 if os.path.isdir(target_impl):
273 info("Target directory '%s' already exists", target_impl)
274 return
276 # We've checked that the source's manifest matches required_digest, so it
277 # is what we want. Make a list of all the files we need to copy...
279 wanted = _parse_manifest(manifest_data)
281 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
283 try:
284 _copy_files(alg, wanted, source, tmpdir)
286 if wanted:
287 raise SafeException('Copy failed; files missing from source:\n- ' +
288 '\n- '.join(wanted.keys()))
290 # Check that the copy is correct
291 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
292 if actual_digest != required_digest:
293 raise SafeException(("Copy failed; double-check of target gave the wrong digest.\n"
294 "Unless the target was modified during the copy, this is a BUG\n"
295 "in 0store and should be reported.\n"
296 "Expected: %s\n"
297 "Actual: %s") % (required_digest, actual_digest))
298 os.rename(tmpdir, target_impl)
299 # TODO: catch already-exists, delete tmpdir and return success
300 except:
301 info("Deleting tmpdir '%s'" % tmpdir)
302 shutil.rmtree(tmpdir)
303 raise
305 def _parse_manifest(manifest_data):
306 """Parse a manifest file.
307 @param manifest_data: the contents of the manifest file
308 @type manifest_data: str
309 @return: a mapping from paths to information about that path
310 @rtype: {str: tuple}"""
311 wanted = {}
312 dir = ''
313 for line in manifest_data.split('\n'):
314 if not line: break
315 if line[0] == 'D':
316 data = line.split(' ', 1)
317 if len(data) != 2: raise BadDigest("Bad line '%s'" % line)
318 path = data[-1]
319 if not path.startswith('/'): raise BadDigest("Not absolute: '%s'" % line)
320 path = path[1:]
321 dir = path
322 elif line[0] == 'S':
323 data = line.split(' ', 3)
324 path = os.path.join(dir, data[-1])
325 if len(data) != 4: raise BadDigest("Bad line '%s'" % line)
326 else:
327 data = line.split(' ', 4)
328 path = os.path.join(dir, data[-1])
329 if len(data) != 5: raise BadDigest("Bad line '%s'" % line)
330 if path in wanted:
331 raise BadDigest('Duplicate entry "%s"' % line)
332 wanted[path] = data[:-1]
333 return wanted
335 def _copy_files(alg, wanted, source, target):
336 """Scan for files under 'source'. For each one:
337 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
338 then copy it into 'target'.
339 If it's not in wanted, warn and skip it.
340 On exit, wanted contains only files that were not found."""
341 from logging import warn
342 dir = ''
343 for line in alg.generate_manifest(source):
344 if line[0] == 'D':
345 type, name = line.split(' ', 1)
346 assert name.startswith('/')
347 dir = name[1:]
348 path = dir
349 elif line[0] == 'S':
350 type, actual_digest, actual_size, name = line.split(' ', 3)
351 path = os.path.join(dir, name)
352 else:
353 assert line[0] in 'XF'
354 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
355 path = os.path.join(dir, name)
356 try:
357 required_details = wanted.pop(path)
358 except KeyError:
359 warn("Skipping file not in manifest: '%s'", path)
360 continue
361 if required_details[0] != type:
362 raise BadDigest("Item '%s' has wrong type!" % path)
363 if type == 'D':
364 os.mkdir(os.path.join(target, path))
365 elif type in 'XF':
366 required_type, required_digest, required_mtime, required_size = required_details
367 if required_size != actual_size:
368 raise SafeException("File '%s' has wrong size (%s bytes, but should be "
369 "%s according to manifest)" %
370 (path, actual_size, required_size))
371 required_mtime = int(required_mtime)
372 dest_path = os.path.join(target, path)
373 if type == 'X':
374 mode = 0555
375 else:
376 mode = 0444
377 copy_with_verify(os.path.join(source, path),
378 dest_path,
379 mode,
380 alg,
381 required_digest)
382 os.utime(dest_path, (required_mtime, required_mtime))
383 elif type == 'S':
384 required_type, required_digest, required_size = required_details
385 if required_size != actual_size:
386 raise SafeException("Symlink '%s' has wrong size (%s bytes, but should be "
387 "%s according to manifest)" %
388 (path, actual_size, required_size))
389 symlink_target = os.readlink(os.path.join(source, path))
390 symlink_digest = alg.new_digest()
391 symlink_digest.update(symlink_target)
392 if symlink_digest.hexdigest() != required_digest:
393 raise SafeException("Symlink '%s' has wrong target (digest should be "
394 "%s according to manifest)" % (path, required_digest))
395 dest_path = os.path.join(target, path)
396 os.symlink(symlink_target, dest_path)
397 else:
398 raise SafeException("Unknown manifest type %s for '%s'" % (type, path))
400 class HashLibAlgorithm(Algorithm):
401 new_digest = None # Constructor for digest objects
403 def __init__(self, name):
404 if name == 'sha1':
405 self.new_digest = sha.new
406 self.name = 'sha1new'
407 else:
408 self.new_digest = getattr(hashlib, name)
409 self.name = name
411 def generate_manifest(self, root):
412 def recurse(sub):
413 # To ensure that a line-by-line comparison of the manifests
414 # is possible, we require that filenames don't contain newlines.
415 # Otherwise, you can name a file so that the part after the \n
416 # would be interpreted as another line in the manifest.
417 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
418 assert sub.startswith('/')
420 full = os.path.join(root, sub[1:])
421 info = os.lstat(full)
422 new_digest = self.new_digest
424 m = info.st_mode
425 if not stat.S_ISDIR(m): raise Exception('Not a directory: "%s"' % full)
426 if sub != '/':
427 yield "D %s" % sub
428 items = os.listdir(full)
429 items.sort()
430 dirs = []
431 for leaf in items:
432 path = os.path.join(root, sub[1:], leaf)
433 info = os.lstat(path)
434 m = info.st_mode
436 if stat.S_ISREG(m):
437 if leaf == '.manifest': continue
439 d = new_digest(file(path).read()).hexdigest()
440 if m & 0111:
441 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
442 else:
443 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
444 elif stat.S_ISLNK(m):
445 d = new_digest(os.readlink(path)).hexdigest()
446 # Note: Can't use utime on symlinks, so skip mtime
447 yield "S %s %s %s" % (d, info.st_size, leaf)
448 elif stat.S_ISDIR(m):
449 dirs.append(leaf)
450 else:
451 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
452 path)
453 for x in dirs:
454 for y in recurse(os.path.join(sub, x)): yield y
455 return
457 for x in recurse('/'): yield x
459 def getID(self, digest):
460 return self.name + '=' + digest.hexdigest()
462 algorithms = {
463 'sha1': OldSHA1(),
464 'sha1new': HashLibAlgorithm('sha1'),
467 if hashlib is not None:
468 algorithms['sha256'] = HashLibAlgorithm('sha256')
470 def fixup_permissions(root):
471 """Set permissions recursively for children of root:
472 - If any X bit is set, they all must be.
473 - World readable, non-writable.
474 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
476 for main, dirs, files in os.walk(root):
477 for x in ['.'] + files:
478 full = os.path.join(main, x)
480 raw_mode = os.lstat(full).st_mode
481 if stat.S_ISLNK(raw_mode): continue
483 mode = stat.S_IMODE(raw_mode)
484 if mode & ~0777:
485 raise Exception("Unsafe mode: extracted file '%s' had special bits set in mode '%s'" % (full, oct(mode)))
486 if mode & 0111:
487 os.chmod(full, 0555)
488 else:
489 os.chmod(full, 0444)