2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
21 A top-level ".manifest" file is ignored.
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
28 import os
, stat
, base64
29 from zeroinstall
import SafeException
, _
, logger
30 from zeroinstall
.zerostore
import BadDigest
, parse_algorithm_digest_pair
, format_algorithm_digest_pair
33 sha1_new
= hashlib
.sha1
36 """Abstract base class for algorithms.
37 An algorithm knows how to generate a manifest from a directory tree.
38 @ivar rating: how much we like this algorithm (higher is better)
41 def generate_manifest(self
, root
):
42 """Returns an iterator that yields each line of the manifest for the directory
43 tree rooted at 'root'."""
44 raise Exception('Abstract')
47 """Create a new digest. Call update() on the returned object to digest the data.
48 Call getID() to turn it into a full ID string."""
49 raise Exception('Abstract')
51 def getID(self
, digest
):
52 """Convert a digest (from new_digest) to a full ID."""
53 raise Exception('Abstract')
55 class OldSHA1(Algorithm
):
56 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
60 def generate_manifest(self
, root
):
62 # To ensure that a line-by-line comparison of the manifests
63 # is possible, we require that filenames don't contain newlines.
64 # Otherwise, you can name a file so that the part after the \n
65 # would be interpreted as another line in the manifest.
66 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
67 assert sub
.startswith('/')
69 if sub
== '/.manifest': return
71 full
= os
.path
.join(root
, sub
[1:].replace('/', os
.sep
))
77 yield "D %s %s" % (int(info
.st_mtime
), sub
)
78 items
= os
.listdir(full
)
81 if not subdir
.endswith('/'):
84 for y
in recurse(subdir
+ x
):
89 leaf
= os
.path
.basename(sub
[1:])
91 with
open(full
, 'rb') as stream
:
92 d
= sha1_new(stream
.read()).hexdigest() # XXX could be very large!
94 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
96 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
98 target
= os
.readlink(full
).encode('utf-8')
99 d
= sha1_new(target
).hexdigest()
100 # Note: Can't use utime on symlinks, so skip mtime
101 # Note: eCryptfs may report length as zero, so count ourselves instead
102 yield "S %s %s %s" % (d
, len(target
), leaf
)
104 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
106 for x
in recurse('/'): yield x
108 def new_digest(self
):
111 def getID(self
, digest
):
112 return 'sha1=' + digest
.hexdigest()
114 def get_algorithm(name
):
115 """Look-up an L{Algorithm} by name.
116 @raise BadDigest: if the name is unknown."""
118 return algorithms
[name
]
120 raise BadDigest(_("Unknown algorithm '%s'") % name
)
122 def generate_manifest(root
, alg
= 'sha1'):
123 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
124 return get_algorithm(alg
).generate_manifest(root
)
126 def add_manifest_file(dir, digest_or_alg
):
127 """Writes a .manifest file into 'dir', and returns the digest.
128 You should call fixup_permissions before this to ensure that the permissions are correct.
129 On exit, dir itself has mode 555. Subdirectories are not changed.
130 @param dir: root of the implementation
131 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
132 here is deprecated."""
133 mfile
= os
.path
.join(dir, '.manifest')
134 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
135 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
137 if isinstance(digest_or_alg
, Algorithm
):
139 digest
= alg
.new_digest()
141 digest
= digest_or_alg
142 alg
= get_algorithm('sha1')
143 for line
in alg
.generate_manifest(dir):
144 manifest
+= line
+ '\n'
145 manifest
= manifest
.encode('utf-8')
146 digest
.update(manifest
)
149 with
open(mfile
, 'wb') as stream
:
151 stream
.write(manifest
)
152 os
.chmod(mfile
, 0o444)
156 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
157 where 'alg' is an instance of Algorithm and 'value' is a string.
158 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
159 alg
, digest
= parse_algorithm_digest_pair(id)
160 return (get_algorithm(alg
), digest
)
162 def copy_with_verify(src
, dest
, mode
, alg
, required_digest
):
163 """Copy path src to dest, checking that the contents give the right digest.
164 dest must not exist. New file is created with a mode of 'mode & umask'.
165 @param src: source filename
167 @param dest: target filename
169 @param mode: target mode
171 @param alg: algorithm to generate digest
172 @type alg: L{Algorithm}
173 @param required_digest: expected digest value
174 @type required_digest: str
175 @raise BadDigest: the contents of the file don't match required_digest"""
176 with
open(src
, 'rb') as src_obj
:
177 dest_fd
= os
.open(dest
, os
.O_WRONLY | os
.O_CREAT | os
.O_EXCL
, mode
)
179 digest
= alg
.new_digest()
181 data
= src_obj
.read(256)
185 written
= os
.write(dest_fd
, data
)
187 data
= data
[written
:]
190 actual
= digest
.hexdigest()
191 if actual
== required_digest
: return
193 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
194 "Expected: %(required_digest)s\n"
195 "Actual: %(actual_digest)s") % {'src': src
, 'required_digest': required_digest
, 'actual_digest': actual
})
197 def verify(root
, required_digest
= None):
198 """Ensure that directory 'dir' generates the given digest.
199 For a non-error return:
200 - Dir's name must be a digest (in the form "alg=value")
201 - The calculated digest of the contents must match this name.
202 - If there is a .manifest file, then its digest must also match.
203 @raise BadDigest: if verification fails."""
204 if required_digest
is None:
205 required_digest
= os
.path
.basename(root
)
206 alg
= splitID(required_digest
)[0]
208 digest
= alg
.new_digest()
210 for line
in alg
.generate_manifest(root
):
212 digest
.update(line
.encode('utf-8'))
214 actual_digest
= alg
.getID(digest
)
216 manifest_file
= os
.path
.join(root
, '.manifest')
217 if os
.path
.isfile(manifest_file
):
218 digest
= alg
.new_digest()
219 with
open(manifest_file
, 'rb') as stream
:
220 digest
.update(stream
.read())
221 manifest_digest
= alg
.getID(digest
)
223 manifest_digest
= None
225 if required_digest
== actual_digest
== manifest_digest
:
228 error
= BadDigest(_("Cached item does NOT verify."))
230 error
.detail
= _(" Expected: %(required_digest)s\n"
231 " Actual: %(actual_digest)s\n"
232 ".manifest digest: %(manifest_digest)s\n\n") \
233 % {'required_digest': required_digest
, 'actual_digest': actual_digest
, 'manifest_digest': manifest_digest
or _('No .manifest file')}
235 if manifest_digest
is None:
236 error
.detail
+= _("No .manifest, so no further details available.")
237 elif manifest_digest
== actual_digest
:
238 error
.detail
+= _("The .manifest file matches the actual contents. Very strange!")
239 elif manifest_digest
== required_digest
:
241 with
open(manifest_file
, 'rt') as stream
:
242 diff
= difflib
.unified_diff(stream
.readlines(), lines
,
243 'Recorded', 'Actual')
244 error
.detail
+= _("The .manifest file matches the directory name.\n" \
245 "The contents of the directory have changed:\n") + \
247 elif required_digest
== actual_digest
:
248 error
.detail
+= _("The directory contents are correct, but the .manifest file is wrong!")
250 error
.detail
+= _("The .manifest file matches neither of the other digests. Odd.")
253 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
254 # - A regualar file suddenly turns into a symlink?
255 # - We find a device file (users can hard-link them if on the same device)
256 def copy_tree_with_verify(source
, target
, manifest_data
, required_digest
):
257 """Copy directory source to be a subdirectory of target if it matches the required_digest.
258 manifest_data is normally source/.manifest. source and manifest_data are not trusted
259 (will typically be under the control of another user).
260 The copy is first done to a temporary directory in target, then renamed to the final name
261 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
262 A successful return means than target/required_digest now exists (whether we created it or not)."""
265 alg
, digest_value
= splitID(required_digest
)
267 if isinstance(alg
, OldSHA1
):
268 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
270 digest
= alg
.new_digest()
271 digest
.update(manifest_data
)
272 manifest_digest
= alg
.getID(digest
)
274 if manifest_digest
!= required_digest
:
275 raise BadDigest(_("Manifest has been tampered with!\n"
276 "Manifest digest: %(actual_digest)s\n"
277 "Directory name : %(required_digest)s")
278 % {'actual_digest': manifest_digest
, 'required_digest': required_digest
})
280 target_impl
= os
.path
.join(target
, required_digest
)
281 if os
.path
.isdir(target_impl
):
282 logger
.info(_("Target directory '%s' already exists"), target_impl
)
285 # We've checked that the source's manifest matches required_digest, so it
286 # is what we want. Make a list of all the files we need to copy...
288 wanted
= _parse_manifest(manifest_data
.decode('utf-8'))
290 tmpdir
= tempfile
.mkdtemp(prefix
= 'tmp-copy-', dir = target
)
292 _copy_files(alg
, wanted
, source
, tmpdir
)
295 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
296 '\n- '.join(wanted
.keys()))
298 # Make directories read-only (files are already RO)
299 for root
, dirs
, files
in os
.walk(tmpdir
):
301 path
= os
.path
.join(root
, d
)
302 mode
= os
.stat(path
).st_mode
303 os
.chmod(path
, mode
& 0o555)
305 # Check that the copy is correct
306 actual_digest
= alg
.getID(add_manifest_file(tmpdir
, alg
))
307 if actual_digest
!= required_digest
:
308 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
309 "Unless the target was modified during the copy, this is a BUG\n"
310 "in 0store and should be reported.\n"
311 "Expected: %(required_digest)s\n"
312 "Actual: %(actual_digest)s") % {'required_digest': required_digest
, 'actual_digest': actual_digest
})
314 os
.chmod(tmpdir
, 0o755) # need write permission to rename on MacOS X
315 os
.rename(tmpdir
, target_impl
)
316 os
.chmod(target_impl
, 0o555)
319 if not os
.path
.isdir(target_impl
):
321 # else someone else installed it already - return success
323 if tmpdir
is not None:
324 logger
.info(_("Deleting tmpdir '%s'") % tmpdir
)
325 from zeroinstall
.support
import ro_rmtree
328 def _parse_manifest(manifest_data
):
329 """Parse a manifest file.
330 @param manifest_data: the contents of the manifest file
331 @type manifest_data: str
332 @return: a mapping from paths to information about that path
333 @rtype: {str: tuple}"""
336 for line
in manifest_data
.split('\n'):
339 data
= line
.split(' ', 1)
340 if len(data
) != 2: raise BadDigest(_("Bad line '%s'") % line
)
342 if not path
.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line
)
346 data
= line
.split(' ', 3)
347 path
= os
.path
.join(dir, data
[-1])
348 if len(data
) != 4: raise BadDigest(_("Bad line '%s'") % line
)
350 data
= line
.split(' ', 4)
351 path
= os
.path
.join(dir, data
[-1])
352 if len(data
) != 5: raise BadDigest(_("Bad line '%s'") % line
)
354 raise BadDigest(_('Duplicate entry "%s"') % line
)
355 wanted
[path
] = data
[:-1]
358 def _copy_files(alg
, wanted
, source
, target
):
359 """Scan for files under 'source'. For each one:
360 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
361 then copy it into 'target'.
362 If it's not in wanted, warn and skip it.
363 On exit, wanted contains only files that were not found."""
365 for line
in alg
.generate_manifest(source
):
367 type, name
= line
.split(' ', 1)
368 assert name
.startswith('/')
372 type, actual_digest
, actual_size
, name
= line
.split(' ', 3)
373 path
= os
.path
.join(dir, name
)
375 assert line
[0] in 'XF'
376 type, actual_digest
, actual_mtime
, actual_size
, name
= line
.split(' ', 4)
377 path
= os
.path
.join(dir, name
)
379 required_details
= wanted
.pop(path
)
381 logger
.warn(_("Skipping file not in manifest: '%s'"), path
)
383 if required_details
[0] != type:
384 raise BadDigest(_("Item '%s' has wrong type!") % path
)
386 os
.mkdir(os
.path
.join(target
, path
))
388 required_type
, required_digest
, required_mtime
, required_size
= required_details
389 if required_size
!= actual_size
:
390 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
391 "%(required_size)s according to manifest)") %
392 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
393 required_mtime
= int(required_mtime
)
394 dest_path
= os
.path
.join(target
, path
)
399 copy_with_verify(os
.path
.join(source
, path
),
404 os
.utime(dest_path
, (required_mtime
, required_mtime
))
406 required_type
, required_digest
, required_size
= required_details
407 if required_size
!= actual_size
:
408 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
409 "%(required_size)s according to manifest)") %
410 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
411 symlink_target
= os
.readlink(os
.path
.join(source
, path
))
412 symlink_digest
= alg
.new_digest()
413 symlink_digest
.update(symlink_target
.encode('utf-8'))
414 if symlink_digest
.hexdigest() != required_digest
:
415 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
416 "%(digest)s according to manifest)") % {'path': path
, 'digest': required_digest
})
417 dest_path
= os
.path
.join(target
, path
)
418 os
.symlink(symlink_target
, dest_path
)
420 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path
})
422 class HashLibAlgorithm(Algorithm
):
423 new_digest
= None # Constructor for digest objects
425 def __init__(self
, name
, rating
, hash_name
= None):
427 self
.new_digest
= getattr(hashlib
, hash_name
or name
)
430 def generate_manifest(self
, root
):
432 # To ensure that a line-by-line comparison of the manifests
433 # is possible, we require that filenames don't contain newlines.
434 # Otherwise, you can name a file so that the part after the \n
435 # would be interpreted as another line in the manifest.
436 if '\n' in sub
: raise BadDigest(_("Newline in filename '%s'") % sub
)
437 assert sub
.startswith('/')
439 full
= os
.path
.join(root
, sub
[1:])
440 info
= os
.lstat(full
)
441 new_digest
= self
.new_digest
444 if not stat
.S_ISDIR(m
): raise Exception(_('Not a directory: "%s"') % full
)
447 items
= os
.listdir(full
)
451 path
= os
.path
.join(root
, sub
[1:], leaf
)
452 info
= os
.lstat(path
)
456 if leaf
== '.manifest': continue
458 with
open(path
, 'rb') as stream
:
459 d
= new_digest(stream
.read()).hexdigest()
461 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
463 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
464 elif stat
.S_ISLNK(m
):
465 target
= os
.readlink(path
).encode('utf-8')
466 d
= new_digest(target
).hexdigest()
467 # Note: Can't use utime on symlinks, so skip mtime
468 # Note: eCryptfs may report length as zero, so count ourselves instead
469 yield "S %s %s %s" % (d
, len(target
), leaf
)
470 elif stat
.S_ISDIR(m
):
473 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
476 if not sub
.endswith('/'):
479 # Note: "sub" is always Unix style. Don't use os.path.join here.
480 for y
in recurse(sub
+ x
): yield y
483 for x
in recurse('/'): yield x
485 def getID(self
, digest
):
486 if self
.name
in ('sha1new', 'sha256'):
487 digest_str
= digest
.hexdigest()
489 # Base32-encode newer algorithms to make the digest shorter.
490 # We can't use base64 as Windows is case insensitive.
491 # There's no need for padding (and = characters in paths cause problems for some software).
492 digest_str
= base64
.b32encode(digest
.digest()).rstrip(b
'=').decode('ascii')
493 return format_algorithm_digest_pair(self
.name
, digest_str
)
497 'sha1new': HashLibAlgorithm('sha1new', 50, 'sha1'),
498 'sha256': HashLibAlgorithm('sha256', 80),
499 'sha256new': HashLibAlgorithm('sha256new', 90, 'sha256'),
503 def fixup_permissions(root
):
504 """Set permissions recursively for children of root:
505 - If any X bit is set, they all must be.
506 - World readable, non-writable.
507 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
509 for main
, dirs
, files
in os
.walk(root
):
510 for x
in ['.'] + files
:
511 full
= os
.path
.join(main
, x
)
513 raw_mode
= os
.lstat(full
).st_mode
514 if stat
.S_ISLNK(raw_mode
): continue
516 mode
= stat
.S_IMODE(raw_mode
)
518 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full
, 'mode': oct(mode
)})
520 os
.chmod(full
, 0o555)
522 os
.chmod(full
, 0o444)