2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
21 A top-level ".manifest" file is ignored.
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
27 from __future__
import generators
29 from zeroinstall
import SafeException
, _
30 from zeroinstall
.zerostore
import BadDigest
34 sha1_new
= hashlib
.sha1
41 """Abstract base class for algorithms.
42 An algorithm knows how to generate a manifest from a directory tree.
44 def generate_manifest(self
, root
):
45 """Returns an iterator that yields each line of the manifest for the directory
46 tree rooted at 'root'."""
47 raise Exception('Abstract')
50 """Create a new digest. Call update() on the returned object to digest the data.
51 Call getID() to turn it into a full ID string."""
52 raise Exception('Abstract')
54 def getID(self
, digest
):
55 """Convert a digest (from new_digest) to a full ID."""
56 raise Exception('Abstract')
58 class OldSHA1(Algorithm
):
59 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
60 def generate_manifest(self
, root
):
62 # To ensure that a line-by-line comparison of the manifests
63 # is possible, we require that filenames don't contain newlines.
64 # Otherwise, you can name a file so that the part after the \n
65 # would be interpreted as another line in the manifest.
66 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
67 assert sub
.startswith('/')
69 if sub
== '/.manifest': return
71 full
= os
.path
.join(root
, sub
[1:])
77 yield "D %s %s" % (int(info
.st_mtime
), sub
)
78 items
= os
.listdir(full
)
81 for y
in recurse(os
.path
.join(sub
, x
)):
86 leaf
= os
.path
.basename(sub
[1:])
88 d
= sha1_new(file(full
).read()).hexdigest()
90 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
) ,info
.st_size
, leaf
)
92 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
) ,info
.st_size
, leaf
)
94 target
= os
.readlink(full
)
95 d
= sha1_new(target
).hexdigest()
96 # Note: Can't use utime on symlinks, so skip mtime
97 # Note: eCryptfs may report length as zero, so count ourselves instead
98 yield "S %s %s %s" % (d
, len(target
), leaf
)
100 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
102 for x
in recurse('/'): yield x
104 def new_digest(self
):
107 def getID(self
, digest
):
108 return 'sha1=' + digest
.hexdigest()
110 def get_algorithm(name
):
111 """Look-up an L{Algorithm} by name.
112 @raise BadDigest: if the name is unknown."""
114 return algorithms
[name
]
116 raise BadDigest(_("Unknown algorithm '%s'") % name
)
118 def generate_manifest(root
, alg
= 'sha1'):
119 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
120 return get_algorithm(alg
).generate_manifest(root
)
122 def add_manifest_file(dir, digest_or_alg
):
123 """Writes a .manifest file into 'dir', and returns the digest.
124 You should call fixup_permissions before this to ensure that the permissions are correct.
125 On exit, dir itself has mode 555. Subdirectories are not changed.
126 @param dir: root of the implementation
127 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
128 here is deprecated."""
129 mfile
= os
.path
.join(dir, '.manifest')
130 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
131 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
133 if isinstance(digest_or_alg
, Algorithm
):
135 digest
= alg
.new_digest()
137 digest
= digest_or_alg
138 alg
= get_algorithm('sha1')
139 for line
in alg
.generate_manifest(dir):
140 manifest
+= line
+ '\n'
141 digest
.update(manifest
)
144 stream
= file(mfile
, 'w')
146 stream
.write(manifest
)
148 os
.chmod(mfile
, 0444)
152 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
153 where 'alg' is an instance of Algorithm and 'value' is a string.
154 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
155 parts
= id.split('=', 1)
157 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id)
158 return (get_algorithm(parts
[0]), parts
[1])
160 def copy_with_verify(src
, dest
, mode
, alg
, required_digest
):
161 """Copy path src to dest, checking that the contents give the right digest.
162 dest must not exist. New file is created with a mode of 'mode & umask'.
163 @param src: source filename
165 @param dest: target filename
167 @param mode: target mode
169 @param alg: algorithm to generate digest
170 @type alg: L{Algorithm}
171 @param required_digest: expected digest value
172 @type required_digest: str
173 @raise BadDigest: the contents of the file don't match required_digest"""
175 dest_fd
= os
.open(dest
, os
.O_WRONLY | os
.O_CREAT | os
.O_EXCL
, mode
)
177 digest
= alg
.new_digest()
179 data
= src_obj
.read(256)
183 written
= os
.write(dest_fd
, data
)
185 data
= data
[written
:]
189 actual
= digest
.hexdigest()
190 if actual
== required_digest
: return
192 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
193 "Expected: %(required_digest)s\n"
194 "Actual: %(actual_digest)s") % {'src': src
, 'required_digest': required_digest
, 'actual_digest': actual
})
196 def verify(root
, required_digest
= None):
197 """Ensure that directory 'dir' generates the given digest.
198 For a non-error return:
199 - Dir's name must be a digest (in the form "alg=value")
200 - The calculated digest of the contents must match this name.
201 - If there is a .manifest file, then its digest must also match.
202 @raise BadDigest: if verification fails."""
203 if required_digest
is None:
204 required_digest
= os
.path
.basename(root
)
205 alg
= splitID(required_digest
)[0]
207 digest
= alg
.new_digest()
209 for line
in alg
.generate_manifest(root
):
213 actual_digest
= alg
.getID(digest
)
215 manifest_file
= os
.path
.join(root
, '.manifest')
216 if os
.path
.isfile(manifest_file
):
217 digest
= alg
.new_digest()
218 digest
.update(file(manifest_file
).read())
219 manifest_digest
= alg
.getID(digest
)
221 manifest_digest
= None
223 if required_digest
== actual_digest
== manifest_digest
:
226 error
= BadDigest(_("Cached item does NOT verify."))
228 error
.detail
= _(" Expected: %(required_digest)s\n"
229 " Actual: %(actual_digest)s\n"
230 ".manifest digest: %s\n\n") \
231 % {'required_digest': required_digest
, 'actual_digest': actual_digest
, 'manifest_digest': manifest_digest
or _('No .manifest file')}
233 if manifest_digest
is None:
234 error
.detail
+= _("No .manifest, so no further details available.")
235 elif manifest_digest
== actual_digest
:
236 error
.detail
+= _("The .manifest file matches the actual contents. Very strange!")
237 elif manifest_digest
== required_digest
:
239 diff
= difflib
.unified_diff(file(manifest_file
).readlines(), lines
,
240 'Recorded', 'Actual')
241 error
.detail
+= _("The .manifest file matches the directory name.\n" \
242 "The contents of the directory have changed:\n") + \
244 elif required_digest
== actual_digest
:
245 error
.detail
+= _("The directory contents are correct, but the .manifest file is wrong!")
247 error
.detail
+= _("The .manifest file matches neither of the other digests. Odd.")
250 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
251 # - A regualar file suddenly turns into a symlink?
252 # - We find a device file (users can hard-link them if on the same device)
253 def copy_tree_with_verify(source
, target
, manifest_data
, required_digest
):
254 """Copy directory source to be a subdirectory of target if it matches the required_digest.
255 manifest_data is normally source/.manifest. source and manifest_data are not trusted
256 (will typically be under the control of another user).
257 The copy is first done to a temporary directory in target, then renamed to the final name
258 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
259 A successful return means than target/required_digest now exists (whether we created it or not)."""
261 from logging
import info
263 alg
, digest_value
= splitID(required_digest
)
265 if isinstance(alg
, OldSHA1
):
266 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
268 digest
= alg
.new_digest()
269 digest
.update(manifest_data
)
270 manifest_digest
= alg
.getID(digest
)
272 if manifest_digest
!= required_digest
:
273 raise BadDigest(_("Manifest has been tampered with!\n"
274 "Manifest digest: %(actual_digest)s\n"
275 "Directory name : %(required_digest)s")
276 % {'actual_digest': manifest_digest
, 'required_digest': required_digest
})
278 target_impl
= os
.path
.join(target
, required_digest
)
279 if os
.path
.isdir(target_impl
):
280 info(_("Target directory '%s' already exists"), target_impl
)
283 # We've checked that the source's manifest matches required_digest, so it
284 # is what we want. Make a list of all the files we need to copy...
286 wanted
= _parse_manifest(manifest_data
)
288 tmpdir
= tempfile
.mkdtemp(prefix
= 'tmp-copy-', dir = target
)
290 _copy_files(alg
, wanted
, source
, tmpdir
)
293 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
294 '\n- '.join(wanted
.keys()))
296 # Make directories read-only (files are already RO)
297 for root
, dirs
, files
in os
.walk(tmpdir
):
299 path
= os
.path
.join(root
, d
)
300 mode
= os
.stat(path
).st_mode
301 os
.chmod(path
, mode
& 0555)
303 # Check that the copy is correct
304 actual_digest
= alg
.getID(add_manifest_file(tmpdir
, alg
))
305 if actual_digest
!= required_digest
:
306 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
307 "Unless the target was modified during the copy, this is a BUG\n"
308 "in 0store and should be reported.\n"
309 "Expected: %(required_digest)s\n"
310 "Actual: %(actual_digest)s") % {'required_digest': required_digest
, 'actual_digest': actual_digest
})
311 os
.rename(tmpdir
, target_impl
)
312 # TODO: catch already-exists, delete tmpdir and return success
314 info(_("Deleting tmpdir '%s'") % tmpdir
)
315 from zeroinstall
.support
import ro_rmtree
319 def _parse_manifest(manifest_data
):
320 """Parse a manifest file.
321 @param manifest_data: the contents of the manifest file
322 @type manifest_data: str
323 @return: a mapping from paths to information about that path
324 @rtype: {str: tuple}"""
327 for line
in manifest_data
.split('\n'):
330 data
= line
.split(' ', 1)
331 if len(data
) != 2: raise BadDigest(_("Bad line '%s'") % line
)
333 if not path
.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line
)
337 data
= line
.split(' ', 3)
338 path
= os
.path
.join(dir, data
[-1])
339 if len(data
) != 4: raise BadDigest(_("Bad line '%s'") % line
)
341 data
= line
.split(' ', 4)
342 path
= os
.path
.join(dir, data
[-1])
343 if len(data
) != 5: raise BadDigest(_("Bad line '%s'") % line
)
345 raise BadDigest(_('Duplicate entry "%s"') % line
)
346 wanted
[path
] = data
[:-1]
349 def _copy_files(alg
, wanted
, source
, target
):
350 """Scan for files under 'source'. For each one:
351 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
352 then copy it into 'target'.
353 If it's not in wanted, warn and skip it.
354 On exit, wanted contains only files that were not found."""
355 from logging
import warn
357 for line
in alg
.generate_manifest(source
):
359 type, name
= line
.split(' ', 1)
360 assert name
.startswith('/')
364 type, actual_digest
, actual_size
, name
= line
.split(' ', 3)
365 path
= os
.path
.join(dir, name
)
367 assert line
[0] in 'XF'
368 type, actual_digest
, actual_mtime
, actual_size
, name
= line
.split(' ', 4)
369 path
= os
.path
.join(dir, name
)
371 required_details
= wanted
.pop(path
)
373 warn(_("Skipping file not in manifest: '%s'"), path
)
375 if required_details
[0] != type:
376 raise BadDigest(_("Item '%s' has wrong type!") % path
)
378 os
.mkdir(os
.path
.join(target
, path
))
380 required_type
, required_digest
, required_mtime
, required_size
= required_details
381 if required_size
!= actual_size
:
382 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
383 "%(required_size)s according to manifest)") %
384 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
385 required_mtime
= int(required_mtime
)
386 dest_path
= os
.path
.join(target
, path
)
391 copy_with_verify(os
.path
.join(source
, path
),
396 os
.utime(dest_path
, (required_mtime
, required_mtime
))
398 required_type
, required_digest
, required_size
= required_details
399 if required_size
!= actual_size
:
400 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
401 "%(required_size)s according to manifest)") %
402 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
403 symlink_target
= os
.readlink(os
.path
.join(source
, path
))
404 symlink_digest
= alg
.new_digest()
405 symlink_digest
.update(symlink_target
)
406 if symlink_digest
.hexdigest() != required_digest
:
407 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
408 "%(digest)s according to manifest)") % {'path': path
, 'digest': required_digest
})
409 dest_path
= os
.path
.join(target
, path
)
410 os
.symlink(symlink_target
, dest_path
)
412 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path
})
414 class HashLibAlgorithm(Algorithm
):
415 new_digest
= None # Constructor for digest objects
417 def __init__(self
, name
):
419 self
.new_digest
= sha1_new
420 self
.name
= 'sha1new'
422 self
.new_digest
= getattr(hashlib
, name
)
425 def generate_manifest(self
, root
):
427 # To ensure that a line-by-line comparison of the manifests
428 # is possible, we require that filenames don't contain newlines.
429 # Otherwise, you can name a file so that the part after the \n
430 # would be interpreted as another line in the manifest.
431 if '\n' in sub
: raise BadDigest(_("Newline in filename '%s'") % sub
)
432 assert sub
.startswith('/')
434 full
= os
.path
.join(root
, sub
[1:])
435 info
= os
.lstat(full
)
436 new_digest
= self
.new_digest
439 if not stat
.S_ISDIR(m
): raise Exception(_('Not a directory: "%s"') % full
)
442 items
= os
.listdir(full
)
446 path
= os
.path
.join(root
, sub
[1:], leaf
)
447 info
= os
.lstat(path
)
451 if leaf
== '.manifest': continue
453 d
= new_digest(file(path
).read()).hexdigest()
455 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
457 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
458 elif stat
.S_ISLNK(m
):
459 target
= os
.readlink(path
)
460 d
= new_digest(target
).hexdigest()
461 # Note: Can't use utime on symlinks, so skip mtime
462 # Note: eCryptfs may report length as zero, so count ourselves instead
463 yield "S %s %s %s" % (d
, len(target
), leaf
)
464 elif stat
.S_ISDIR(m
):
467 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
470 for y
in recurse(os
.path
.join(sub
, x
)): yield y
473 for x
in recurse('/'): yield x
475 def getID(self
, digest
):
476 return self
.name
+ '=' + digest
.hexdigest()
480 'sha1new': HashLibAlgorithm('sha1'),
483 if hashlib
is not None:
484 algorithms
['sha256'] = HashLibAlgorithm('sha256')
486 def fixup_permissions(root
):
487 """Set permissions recursively for children of root:
488 - If any X bit is set, they all must be.
489 - World readable, non-writable.
490 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
492 for main
, dirs
, files
in os
.walk(root
):
493 for x
in ['.'] + files
:
494 full
= os
.path
.join(main
, x
)
496 raw_mode
= os
.lstat(full
).st_mode
497 if stat
.S_ISLNK(raw_mode
): continue
499 mode
= stat
.S_IMODE(raw_mode
)
501 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full
, 'mode': oct(mode
)})