2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
21 A top-level ".manifest" file is ignored.
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
29 from zeroinstall
import SafeException
, _
30 from zeroinstall
.zerostore
import BadDigest
34 sha1_new
= hashlib
.sha1
41 """Abstract base class for algorithms.
42 An algorithm knows how to generate a manifest from a directory tree.
43 @ivar rating: how much we like this algorithm (higher is better)
46 def generate_manifest(self
, root
):
47 """Returns an iterator that yields each line of the manifest for the directory
48 tree rooted at 'root'."""
49 raise Exception('Abstract')
52 """Create a new digest. Call update() on the returned object to digest the data.
53 Call getID() to turn it into a full ID string."""
54 raise Exception('Abstract')
56 def getID(self
, digest
):
57 """Convert a digest (from new_digest) to a full ID."""
58 raise Exception('Abstract')
60 class OldSHA1(Algorithm
):
61 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
65 def generate_manifest(self
, root
):
67 # To ensure that a line-by-line comparison of the manifests
68 # is possible, we require that filenames don't contain newlines.
69 # Otherwise, you can name a file so that the part after the \n
70 # would be interpreted as another line in the manifest.
71 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
72 assert sub
.startswith('/')
74 if sub
== '/.manifest': return
76 full
= os
.path
.join(root
, sub
[1:].replace('/', os
.sep
))
82 yield "D %s %s" % (int(info
.st_mtime
), sub
)
83 items
= os
.listdir(full
)
86 if not subdir
.endswith('/'):
89 for y
in recurse(subdir
+ x
):
94 leaf
= os
.path
.basename(sub
[1:])
96 d
= sha1_new(file(full
).read()).hexdigest()
98 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
) ,info
.st_size
, leaf
)
100 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
) ,info
.st_size
, leaf
)
101 elif stat
.S_ISLNK(m
):
102 target
= os
.readlink(full
)
103 d
= sha1_new(target
).hexdigest()
104 # Note: Can't use utime on symlinks, so skip mtime
105 # Note: eCryptfs may report length as zero, so count ourselves instead
106 yield "S %s %s %s" % (d
, len(target
), leaf
)
108 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
110 for x
in recurse('/'): yield x
112 def new_digest(self
):
115 def getID(self
, digest
):
116 return 'sha1=' + digest
.hexdigest()
118 def get_algorithm(name
):
119 """Look-up an L{Algorithm} by name.
120 @raise BadDigest: if the name is unknown."""
122 return algorithms
[name
]
124 raise BadDigest(_("Unknown algorithm '%s'") % name
)
126 def generate_manifest(root
, alg
= 'sha1'):
127 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
128 return get_algorithm(alg
).generate_manifest(root
)
130 def add_manifest_file(dir, digest_or_alg
):
131 """Writes a .manifest file into 'dir', and returns the digest.
132 You should call fixup_permissions before this to ensure that the permissions are correct.
133 On exit, dir itself has mode 555. Subdirectories are not changed.
134 @param dir: root of the implementation
135 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
136 here is deprecated."""
137 mfile
= os
.path
.join(dir, '.manifest')
138 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
139 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
141 if isinstance(digest_or_alg
, Algorithm
):
143 digest
= alg
.new_digest()
145 digest
= digest_or_alg
146 alg
= get_algorithm('sha1')
147 for line
in alg
.generate_manifest(dir):
148 manifest
+= line
+ '\n'
149 digest
.update(manifest
)
152 stream
= file(mfile
, 'wb')
154 stream
.write(manifest
)
156 os
.chmod(mfile
, 0o444)
160 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
161 where 'alg' is an instance of Algorithm and 'value' is a string.
162 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
163 parts
= id.split('=', 1)
165 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id)
166 return (get_algorithm(parts
[0]), parts
[1])
168 def copy_with_verify(src
, dest
, mode
, alg
, required_digest
):
169 """Copy path src to dest, checking that the contents give the right digest.
170 dest must not exist. New file is created with a mode of 'mode & umask'.
171 @param src: source filename
173 @param dest: target filename
175 @param mode: target mode
177 @param alg: algorithm to generate digest
178 @type alg: L{Algorithm}
179 @param required_digest: expected digest value
180 @type required_digest: str
181 @raise BadDigest: the contents of the file don't match required_digest"""
183 dest_fd
= os
.open(dest
, os
.O_WRONLY | os
.O_CREAT | os
.O_EXCL
, mode
)
185 digest
= alg
.new_digest()
187 data
= src_obj
.read(256)
191 written
= os
.write(dest_fd
, data
)
193 data
= data
[written
:]
197 actual
= digest
.hexdigest()
198 if actual
== required_digest
: return
200 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
201 "Expected: %(required_digest)s\n"
202 "Actual: %(actual_digest)s") % {'src': src
, 'required_digest': required_digest
, 'actual_digest': actual
})
204 def verify(root
, required_digest
= None):
205 """Ensure that directory 'dir' generates the given digest.
206 For a non-error return:
207 - Dir's name must be a digest (in the form "alg=value")
208 - The calculated digest of the contents must match this name.
209 - If there is a .manifest file, then its digest must also match.
210 @raise BadDigest: if verification fails."""
211 if required_digest
is None:
212 required_digest
= os
.path
.basename(root
)
213 alg
= splitID(required_digest
)[0]
215 digest
= alg
.new_digest()
217 for line
in alg
.generate_manifest(root
):
221 actual_digest
= alg
.getID(digest
)
223 manifest_file
= os
.path
.join(root
, '.manifest')
224 if os
.path
.isfile(manifest_file
):
225 digest
= alg
.new_digest()
226 digest
.update(file(manifest_file
, 'rb').read())
227 manifest_digest
= alg
.getID(digest
)
229 manifest_digest
= None
231 if required_digest
== actual_digest
== manifest_digest
:
234 error
= BadDigest(_("Cached item does NOT verify."))
236 error
.detail
= _(" Expected: %(required_digest)s\n"
237 " Actual: %(actual_digest)s\n"
238 ".manifest digest: %(manifest_digest)s\n\n") \
239 % {'required_digest': required_digest
, 'actual_digest': actual_digest
, 'manifest_digest': manifest_digest
or _('No .manifest file')}
241 if manifest_digest
is None:
242 error
.detail
+= _("No .manifest, so no further details available.")
243 elif manifest_digest
== actual_digest
:
244 error
.detail
+= _("The .manifest file matches the actual contents. Very strange!")
245 elif manifest_digest
== required_digest
:
247 diff
= difflib
.unified_diff(file(manifest_file
, 'rb').readlines(), lines
,
248 'Recorded', 'Actual')
249 error
.detail
+= _("The .manifest file matches the directory name.\n" \
250 "The contents of the directory have changed:\n") + \
252 elif required_digest
== actual_digest
:
253 error
.detail
+= _("The directory contents are correct, but the .manifest file is wrong!")
255 error
.detail
+= _("The .manifest file matches neither of the other digests. Odd.")
258 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
259 # - A regualar file suddenly turns into a symlink?
260 # - We find a device file (users can hard-link them if on the same device)
261 def copy_tree_with_verify(source
, target
, manifest_data
, required_digest
):
262 """Copy directory source to be a subdirectory of target if it matches the required_digest.
263 manifest_data is normally source/.manifest. source and manifest_data are not trusted
264 (will typically be under the control of another user).
265 The copy is first done to a temporary directory in target, then renamed to the final name
266 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
267 A successful return means than target/required_digest now exists (whether we created it or not)."""
269 from logging
import info
271 alg
, digest_value
= splitID(required_digest
)
273 if isinstance(alg
, OldSHA1
):
274 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
276 digest
= alg
.new_digest()
277 digest
.update(manifest_data
)
278 manifest_digest
= alg
.getID(digest
)
280 if manifest_digest
!= required_digest
:
281 raise BadDigest(_("Manifest has been tampered with!\n"
282 "Manifest digest: %(actual_digest)s\n"
283 "Directory name : %(required_digest)s")
284 % {'actual_digest': manifest_digest
, 'required_digest': required_digest
})
286 target_impl
= os
.path
.join(target
, required_digest
)
287 if os
.path
.isdir(target_impl
):
288 info(_("Target directory '%s' already exists"), target_impl
)
291 # We've checked that the source's manifest matches required_digest, so it
292 # is what we want. Make a list of all the files we need to copy...
294 wanted
= _parse_manifest(manifest_data
)
296 tmpdir
= tempfile
.mkdtemp(prefix
= 'tmp-copy-', dir = target
)
298 _copy_files(alg
, wanted
, source
, tmpdir
)
301 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
302 '\n- '.join(wanted
.keys()))
304 # Make directories read-only (files are already RO)
305 for root
, dirs
, files
in os
.walk(tmpdir
):
307 path
= os
.path
.join(root
, d
)
308 mode
= os
.stat(path
).st_mode
309 os
.chmod(path
, mode
& 0o555)
311 # Check that the copy is correct
312 actual_digest
= alg
.getID(add_manifest_file(tmpdir
, alg
))
313 if actual_digest
!= required_digest
:
314 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
315 "Unless the target was modified during the copy, this is a BUG\n"
316 "in 0store and should be reported.\n"
317 "Expected: %(required_digest)s\n"
318 "Actual: %(actual_digest)s") % {'required_digest': required_digest
, 'actual_digest': actual_digest
})
320 os
.rename(tmpdir
, target_impl
)
323 if not os
.path
.isdir(target_impl
):
325 # else someone else installed it already - return success
327 if tmpdir
is not None:
328 info(_("Deleting tmpdir '%s'") % tmpdir
)
329 from zeroinstall
.support
import ro_rmtree
332 def _parse_manifest(manifest_data
):
333 """Parse a manifest file.
334 @param manifest_data: the contents of the manifest file
335 @type manifest_data: str
336 @return: a mapping from paths to information about that path
337 @rtype: {str: tuple}"""
340 for line
in manifest_data
.split('\n'):
343 data
= line
.split(' ', 1)
344 if len(data
) != 2: raise BadDigest(_("Bad line '%s'") % line
)
346 if not path
.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line
)
350 data
= line
.split(' ', 3)
351 path
= os
.path
.join(dir, data
[-1])
352 if len(data
) != 4: raise BadDigest(_("Bad line '%s'") % line
)
354 data
= line
.split(' ', 4)
355 path
= os
.path
.join(dir, data
[-1])
356 if len(data
) != 5: raise BadDigest(_("Bad line '%s'") % line
)
358 raise BadDigest(_('Duplicate entry "%s"') % line
)
359 wanted
[path
] = data
[:-1]
362 def _copy_files(alg
, wanted
, source
, target
):
363 """Scan for files under 'source'. For each one:
364 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
365 then copy it into 'target'.
366 If it's not in wanted, warn and skip it.
367 On exit, wanted contains only files that were not found."""
368 from logging
import warn
370 for line
in alg
.generate_manifest(source
):
372 type, name
= line
.split(' ', 1)
373 assert name
.startswith('/')
377 type, actual_digest
, actual_size
, name
= line
.split(' ', 3)
378 path
= os
.path
.join(dir, name
)
380 assert line
[0] in 'XF'
381 type, actual_digest
, actual_mtime
, actual_size
, name
= line
.split(' ', 4)
382 path
= os
.path
.join(dir, name
)
384 required_details
= wanted
.pop(path
)
386 warn(_("Skipping file not in manifest: '%s'"), path
)
388 if required_details
[0] != type:
389 raise BadDigest(_("Item '%s' has wrong type!") % path
)
391 os
.mkdir(os
.path
.join(target
, path
))
393 required_type
, required_digest
, required_mtime
, required_size
= required_details
394 if required_size
!= actual_size
:
395 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
396 "%(required_size)s according to manifest)") %
397 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
398 required_mtime
= int(required_mtime
)
399 dest_path
= os
.path
.join(target
, path
)
404 copy_with_verify(os
.path
.join(source
, path
),
409 os
.utime(dest_path
, (required_mtime
, required_mtime
))
411 required_type
, required_digest
, required_size
= required_details
412 if required_size
!= actual_size
:
413 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
414 "%(required_size)s according to manifest)") %
415 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
416 symlink_target
= os
.readlink(os
.path
.join(source
, path
))
417 symlink_digest
= alg
.new_digest()
418 symlink_digest
.update(symlink_target
)
419 if symlink_digest
.hexdigest() != required_digest
:
420 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
421 "%(digest)s according to manifest)") % {'path': path
, 'digest': required_digest
})
422 dest_path
= os
.path
.join(target
, path
)
423 os
.symlink(symlink_target
, dest_path
)
425 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path
})
427 class HashLibAlgorithm(Algorithm
):
428 new_digest
= None # Constructor for digest objects
430 def __init__(self
, name
, rating
):
432 self
.new_digest
= sha1_new
433 self
.name
= 'sha1new'
435 self
.new_digest
= getattr(hashlib
, name
)
439 def generate_manifest(self
, root
):
441 # To ensure that a line-by-line comparison of the manifests
442 # is possible, we require that filenames don't contain newlines.
443 # Otherwise, you can name a file so that the part after the \n
444 # would be interpreted as another line in the manifest.
445 if '\n' in sub
: raise BadDigest(_("Newline in filename '%s'") % sub
)
446 assert sub
.startswith('/')
448 full
= os
.path
.join(root
, sub
[1:])
449 info
= os
.lstat(full
)
450 new_digest
= self
.new_digest
453 if not stat
.S_ISDIR(m
): raise Exception(_('Not a directory: "%s"') % full
)
456 items
= os
.listdir(full
)
460 path
= os
.path
.join(root
, sub
[1:], leaf
)
461 info
= os
.lstat(path
)
465 if leaf
== '.manifest': continue
467 d
= new_digest(file(path
).read()).hexdigest()
469 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
471 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
472 elif stat
.S_ISLNK(m
):
473 target
= os
.readlink(path
)
474 d
= new_digest(target
).hexdigest()
475 # Note: Can't use utime on symlinks, so skip mtime
476 # Note: eCryptfs may report length as zero, so count ourselves instead
477 yield "S %s %s %s" % (d
, len(target
), leaf
)
478 elif stat
.S_ISDIR(m
):
481 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
484 if not sub
.endswith('/'):
487 # Note: "sub" is always Unix style. Don't use os.path.join here.
488 for y
in recurse(sub
+ x
): yield y
491 for x
in recurse('/'): yield x
493 def getID(self
, digest
):
494 return self
.name
+ '=' + digest
.hexdigest()
498 'sha1new': HashLibAlgorithm('sha1', 50),
501 if hashlib
is not None:
502 algorithms
['sha256'] = HashLibAlgorithm('sha256', 80)
504 def fixup_permissions(root
):
505 """Set permissions recursively for children of root:
506 - If any X bit is set, they all must be.
507 - World readable, non-writable.
508 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
510 for main
, dirs
, files
in os
.walk(root
):
511 for x
in ['.'] + files
:
512 full
= os
.path
.join(main
, x
)
514 raw_mode
= os
.lstat(full
).st_mode
515 if stat
.S_ISLNK(raw_mode
): continue
517 mode
= stat
.S_IMODE(raw_mode
)
519 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full
, 'mode': oct(mode
)})
521 os
.chmod(full
, 0o555)
523 os
.chmod(full
, 0o444)