2 """Processing of implementation manifests.
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
21 A top-level ".manifest" file is ignored.
24 # Copyright (C) 2009, Thomas Leonard
25 # See the README file for details, or visit http://0install.net.
29 from zeroinstall
import SafeException
, _
30 from zeroinstall
.zerostore
import BadDigest
34 sha1_new
= hashlib
.sha1
41 """Abstract base class for algorithms.
42 An algorithm knows how to generate a manifest from a directory tree.
43 @ivar rating: how much we like this algorithm (higher is better)
46 def generate_manifest(self
, root
):
47 """Returns an iterator that yields each line of the manifest for the directory
48 tree rooted at 'root'."""
49 raise Exception('Abstract')
52 """Create a new digest. Call update() on the returned object to digest the data.
53 Call getID() to turn it into a full ID string."""
54 raise Exception('Abstract')
56 def getID(self
, digest
):
57 """Convert a digest (from new_digest) to a full ID."""
58 raise Exception('Abstract')
60 class OldSHA1(Algorithm
):
61 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
65 def generate_manifest(self
, root
):
67 # To ensure that a line-by-line comparison of the manifests
68 # is possible, we require that filenames don't contain newlines.
69 # Otherwise, you can name a file so that the part after the \n
70 # would be interpreted as another line in the manifest.
71 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
72 assert sub
.startswith('/')
74 if sub
== '/.manifest': return
76 full
= os
.path
.join(root
, sub
[1:].replace('/', os
.sep
))
82 yield "D %s %s" % (int(info
.st_mtime
), sub
)
83 items
= os
.listdir(full
)
86 if not subdir
.endswith('/'):
89 for y
in recurse(subdir
+ x
):
94 leaf
= os
.path
.basename(sub
[1:])
96 with
open(full
, 'rb') as stream
:
97 d
= sha1_new(stream
.read()).hexdigest() # XXX could be very large!
99 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
101 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
102 elif stat
.S_ISLNK(m
):
103 target
= os
.readlink(full
)
104 d
= sha1_new(target
).hexdigest()
105 # Note: Can't use utime on symlinks, so skip mtime
106 # Note: eCryptfs may report length as zero, so count ourselves instead
107 yield "S %s %s %s" % (d
, len(target
), leaf
)
109 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
111 for x
in recurse('/'): yield x
113 def new_digest(self
):
116 def getID(self
, digest
):
117 return 'sha1=' + digest
.hexdigest()
119 def get_algorithm(name
):
120 """Look-up an L{Algorithm} by name.
121 @raise BadDigest: if the name is unknown."""
123 return algorithms
[name
]
125 raise BadDigest(_("Unknown algorithm '%s'") % name
)
127 def generate_manifest(root
, alg
= 'sha1'):
128 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
129 return get_algorithm(alg
).generate_manifest(root
)
131 def add_manifest_file(dir, digest_or_alg
):
132 """Writes a .manifest file into 'dir', and returns the digest.
133 You should call fixup_permissions before this to ensure that the permissions are correct.
134 On exit, dir itself has mode 555. Subdirectories are not changed.
135 @param dir: root of the implementation
136 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
137 here is deprecated."""
138 mfile
= os
.path
.join(dir, '.manifest')
139 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
140 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
142 if isinstance(digest_or_alg
, Algorithm
):
144 digest
= alg
.new_digest()
146 digest
= digest_or_alg
147 alg
= get_algorithm('sha1')
148 for line
in alg
.generate_manifest(dir):
149 manifest
+= line
+ '\n'
150 manifest
= manifest
.encode('utf-8')
151 digest
.update(manifest
)
154 with
open(mfile
, 'wb') as stream
:
156 stream
.write(manifest
)
157 os
.chmod(mfile
, 0o444)
161 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
162 where 'alg' is an instance of Algorithm and 'value' is a string.
163 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
164 parts
= id.split('=', 1)
166 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id)
167 return (get_algorithm(parts
[0]), parts
[1])
169 def copy_with_verify(src
, dest
, mode
, alg
, required_digest
):
170 """Copy path src to dest, checking that the contents give the right digest.
171 dest must not exist. New file is created with a mode of 'mode & umask'.
172 @param src: source filename
174 @param dest: target filename
176 @param mode: target mode
178 @param alg: algorithm to generate digest
179 @type alg: L{Algorithm}
180 @param required_digest: expected digest value
181 @type required_digest: str
182 @raise BadDigest: the contents of the file don't match required_digest"""
183 with
open(src
, 'rb') as src_obj
:
184 dest_fd
= os
.open(dest
, os
.O_WRONLY | os
.O_CREAT | os
.O_EXCL
, mode
)
186 digest
= alg
.new_digest()
188 data
= src_obj
.read(256)
192 written
= os
.write(dest_fd
, data
)
194 data
= data
[written
:]
197 actual
= digest
.hexdigest()
198 if actual
== required_digest
: return
200 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
201 "Expected: %(required_digest)s\n"
202 "Actual: %(actual_digest)s") % {'src': src
, 'required_digest': required_digest
, 'actual_digest': actual
})
204 def verify(root
, required_digest
= None):
205 """Ensure that directory 'dir' generates the given digest.
206 For a non-error return:
207 - Dir's name must be a digest (in the form "alg=value")
208 - The calculated digest of the contents must match this name.
209 - If there is a .manifest file, then its digest must also match.
210 @raise BadDigest: if verification fails."""
211 if required_digest
is None:
212 required_digest
= os
.path
.basename(root
)
213 alg
= splitID(required_digest
)[0]
215 digest
= alg
.new_digest()
217 for line
in alg
.generate_manifest(root
):
221 actual_digest
= alg
.getID(digest
)
223 manifest_file
= os
.path
.join(root
, '.manifest')
224 if os
.path
.isfile(manifest_file
):
225 digest
= alg
.new_digest()
226 with
open(manifest_file
, 'rt') as stream
:
227 digest
.update(stream
.read())
228 manifest_digest
= alg
.getID(digest
)
230 manifest_digest
= None
232 if required_digest
== actual_digest
== manifest_digest
:
235 error
= BadDigest(_("Cached item does NOT verify."))
237 error
.detail
= _(" Expected: %(required_digest)s\n"
238 " Actual: %(actual_digest)s\n"
239 ".manifest digest: %(manifest_digest)s\n\n") \
240 % {'required_digest': required_digest
, 'actual_digest': actual_digest
, 'manifest_digest': manifest_digest
or _('No .manifest file')}
242 if manifest_digest
is None:
243 error
.detail
+= _("No .manifest, so no further details available.")
244 elif manifest_digest
== actual_digest
:
245 error
.detail
+= _("The .manifest file matches the actual contents. Very strange!")
246 elif manifest_digest
== required_digest
:
248 with
open(manifest_file
, 'rb') as stream
:
249 diff
= difflib
.unified_diff(stream
.readlines(), lines
,
250 'Recorded', 'Actual')
251 error
.detail
+= _("The .manifest file matches the directory name.\n" \
252 "The contents of the directory have changed:\n") + \
254 elif required_digest
== actual_digest
:
255 error
.detail
+= _("The directory contents are correct, but the .manifest file is wrong!")
257 error
.detail
+= _("The .manifest file matches neither of the other digests. Odd.")
260 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
261 # - A regualar file suddenly turns into a symlink?
262 # - We find a device file (users can hard-link them if on the same device)
263 def copy_tree_with_verify(source
, target
, manifest_data
, required_digest
):
264 """Copy directory source to be a subdirectory of target if it matches the required_digest.
265 manifest_data is normally source/.manifest. source and manifest_data are not trusted
266 (will typically be under the control of another user).
267 The copy is first done to a temporary directory in target, then renamed to the final name
268 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
269 A successful return means than target/required_digest now exists (whether we created it or not)."""
271 from logging
import info
273 alg
, digest_value
= splitID(required_digest
)
275 if isinstance(alg
, OldSHA1
):
276 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
278 digest
= alg
.new_digest()
279 digest
.update(manifest_data
)
280 manifest_digest
= alg
.getID(digest
)
282 if manifest_digest
!= required_digest
:
283 raise BadDigest(_("Manifest has been tampered with!\n"
284 "Manifest digest: %(actual_digest)s\n"
285 "Directory name : %(required_digest)s")
286 % {'actual_digest': manifest_digest
, 'required_digest': required_digest
})
288 target_impl
= os
.path
.join(target
, required_digest
)
289 if os
.path
.isdir(target_impl
):
290 info(_("Target directory '%s' already exists"), target_impl
)
293 # We've checked that the source's manifest matches required_digest, so it
294 # is what we want. Make a list of all the files we need to copy...
296 wanted
= _parse_manifest(manifest_data
)
298 tmpdir
= tempfile
.mkdtemp(prefix
= 'tmp-copy-', dir = target
)
300 _copy_files(alg
, wanted
, source
, tmpdir
)
303 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
304 '\n- '.join(wanted
.keys()))
306 # Make directories read-only (files are already RO)
307 for root
, dirs
, files
in os
.walk(tmpdir
):
309 path
= os
.path
.join(root
, d
)
310 mode
= os
.stat(path
).st_mode
311 os
.chmod(path
, mode
& 0o555)
313 # Check that the copy is correct
314 actual_digest
= alg
.getID(add_manifest_file(tmpdir
, alg
))
315 if actual_digest
!= required_digest
:
316 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
317 "Unless the target was modified during the copy, this is a BUG\n"
318 "in 0store and should be reported.\n"
319 "Expected: %(required_digest)s\n"
320 "Actual: %(actual_digest)s") % {'required_digest': required_digest
, 'actual_digest': actual_digest
})
322 os
.chmod(tmpdir
, 0o755) # need write permission to rename on MacOS X
323 os
.rename(tmpdir
, target_impl
)
324 os
.chmod(target_impl
, 0o555)
327 if not os
.path
.isdir(target_impl
):
329 # else someone else installed it already - return success
331 if tmpdir
is not None:
332 info(_("Deleting tmpdir '%s'") % tmpdir
)
333 from zeroinstall
.support
import ro_rmtree
336 def _parse_manifest(manifest_data
):
337 """Parse a manifest file.
338 @param manifest_data: the contents of the manifest file
339 @type manifest_data: str
340 @return: a mapping from paths to information about that path
341 @rtype: {str: tuple}"""
344 for line
in manifest_data
.split('\n'):
347 data
= line
.split(' ', 1)
348 if len(data
) != 2: raise BadDigest(_("Bad line '%s'") % line
)
350 if not path
.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line
)
354 data
= line
.split(' ', 3)
355 path
= os
.path
.join(dir, data
[-1])
356 if len(data
) != 4: raise BadDigest(_("Bad line '%s'") % line
)
358 data
= line
.split(' ', 4)
359 path
= os
.path
.join(dir, data
[-1])
360 if len(data
) != 5: raise BadDigest(_("Bad line '%s'") % line
)
362 raise BadDigest(_('Duplicate entry "%s"') % line
)
363 wanted
[path
] = data
[:-1]
366 def _copy_files(alg
, wanted
, source
, target
):
367 """Scan for files under 'source'. For each one:
368 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
369 then copy it into 'target'.
370 If it's not in wanted, warn and skip it.
371 On exit, wanted contains only files that were not found."""
372 from logging
import warn
374 for line
in alg
.generate_manifest(source
):
376 type, name
= line
.split(' ', 1)
377 assert name
.startswith('/')
381 type, actual_digest
, actual_size
, name
= line
.split(' ', 3)
382 path
= os
.path
.join(dir, name
)
384 assert line
[0] in 'XF'
385 type, actual_digest
, actual_mtime
, actual_size
, name
= line
.split(' ', 4)
386 path
= os
.path
.join(dir, name
)
388 required_details
= wanted
.pop(path
)
390 warn(_("Skipping file not in manifest: '%s'"), path
)
392 if required_details
[0] != type:
393 raise BadDigest(_("Item '%s' has wrong type!") % path
)
395 os
.mkdir(os
.path
.join(target
, path
))
397 required_type
, required_digest
, required_mtime
, required_size
= required_details
398 if required_size
!= actual_size
:
399 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
400 "%(required_size)s according to manifest)") %
401 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
402 required_mtime
= int(required_mtime
)
403 dest_path
= os
.path
.join(target
, path
)
408 copy_with_verify(os
.path
.join(source
, path
),
413 os
.utime(dest_path
, (required_mtime
, required_mtime
))
415 required_type
, required_digest
, required_size
= required_details
416 if required_size
!= actual_size
:
417 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
418 "%(required_size)s according to manifest)") %
419 {'path': path
, 'actual_size': actual_size
, 'required_size': required_size
})
420 symlink_target
= os
.readlink(os
.path
.join(source
, path
))
421 symlink_digest
= alg
.new_digest()
422 symlink_digest
.update(symlink_target
)
423 if symlink_digest
.hexdigest() != required_digest
:
424 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
425 "%(digest)s according to manifest)") % {'path': path
, 'digest': required_digest
})
426 dest_path
= os
.path
.join(target
, path
)
427 os
.symlink(symlink_target
, dest_path
)
429 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path
})
431 class HashLibAlgorithm(Algorithm
):
432 new_digest
= None # Constructor for digest objects
434 def __init__(self
, name
, rating
):
436 self
.new_digest
= sha1_new
437 self
.name
= 'sha1new'
439 self
.new_digest
= getattr(hashlib
, name
)
443 def generate_manifest(self
, root
):
445 # To ensure that a line-by-line comparison of the manifests
446 # is possible, we require that filenames don't contain newlines.
447 # Otherwise, you can name a file so that the part after the \n
448 # would be interpreted as another line in the manifest.
449 if '\n' in sub
: raise BadDigest(_("Newline in filename '%s'") % sub
)
450 assert sub
.startswith('/')
452 full
= os
.path
.join(root
, sub
[1:])
453 info
= os
.lstat(full
)
454 new_digest
= self
.new_digest
457 if not stat
.S_ISDIR(m
): raise Exception(_('Not a directory: "%s"') % full
)
460 items
= os
.listdir(full
)
464 path
= os
.path
.join(root
, sub
[1:], leaf
)
465 info
= os
.lstat(path
)
469 if leaf
== '.manifest': continue
471 with
open(path
, 'rb') as stream
:
472 d
= new_digest(stream
.read()).hexdigest()
474 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
476 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
477 elif stat
.S_ISLNK(m
):
478 target
= os
.readlink(path
)
479 d
= new_digest(target
).hexdigest()
480 # Note: Can't use utime on symlinks, so skip mtime
481 # Note: eCryptfs may report length as zero, so count ourselves instead
482 yield "S %s %s %s" % (d
, len(target
), leaf
)
483 elif stat
.S_ISDIR(m
):
486 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
489 if not sub
.endswith('/'):
492 # Note: "sub" is always Unix style. Don't use os.path.join here.
493 for y
in recurse(sub
+ x
): yield y
496 for x
in recurse('/'): yield x
498 def getID(self
, digest
):
499 return self
.name
+ '=' + digest
.hexdigest()
503 'sha1new': HashLibAlgorithm('sha1', 50),
506 if hashlib
is not None:
507 algorithms
['sha256'] = HashLibAlgorithm('sha256', 80)
509 def fixup_permissions(root
):
510 """Set permissions recursively for children of root:
511 - If any X bit is set, they all must be.
512 - World readable, non-writable.
513 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
515 for main
, dirs
, files
in os
.walk(root
):
516 for x
in ['.'] + files
:
517 full
= os
.path
.join(main
, x
)
519 raw_mode
= os
.lstat(full
).st_mode
520 if stat
.S_ISLNK(raw_mode
): continue
522 mode
= stat
.S_IMODE(raw_mode
)
524 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full
, 'mode': oct(mode
)})
526 os
.chmod(full
, 0o555)
528 os
.chmod(full
, 0o444)