1 """Processing of implementation manifests.
3 A manifest is a string representing a directory tree, with the property
4 that two trees will generate identical manifest strings if and only if:
6 - They have extactly the same set of files, directories and symlinks.
7 - For each pair of corresponding directories in the two sets:
8 - The mtimes are the same (OldSHA1 only).
9 - For each pair of corresponding files in the two sets:
10 - The size, executable flag and mtime are the same.
11 - The contents have matching secure hash values.
12 - For each pair of corresponding symlinks in the two sets:
13 - The mtime and size are the same.
14 - The targets have matching secure hash values.
16 The manifest is typically processed with a secure hash itself. So, the idea is that
17 any significant change to the contents of the tree will change the secure hash value
20 A top-level ".manifest" file is ignored.
23 # Copyright (C) 2006, Thomas Leonard
24 # See the README file for details, or visit http://0install.net.
26 from __future__
import generators
29 from zeroinstall
import SafeException
30 from zeroinstall
.zerostore
import BadDigest
38 """Abstract base class for algorithms.
39 An algorithm knows how to generate a manifest from a directory tree.
41 def generate_manifest(self
, root
):
42 """Returns an iterator that yields each line of the manifest for the directory
43 tree rooted at 'root'."""
44 raise Exception('Abstract')
47 """Create a new digest. Call update() on the returned object to digest the data.
48 Call getID() to turn it into a full ID string."""
49 raise Exception('Abstract')
51 def getID(self
, digest
):
52 """Convert a digest (from new_digest) to a full ID."""
53 raise Exception('Abstract')
55 class OldSHA1(Algorithm
):
56 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
57 def generate_manifest(self
, root
):
59 # To ensure that a line-by-line comparison of the manifests
60 # is possible, we require that filenames don't contain newlines.
61 # Otherwise, you can name a file so that the part after the \n
62 # would be interpreted as another line in the manifest.
63 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
64 assert sub
.startswith('/')
66 if sub
== '/.manifest': return
68 full
= os
.path
.join(root
, sub
[1:])
74 yield "D %s %s" % (int(info
.st_mtime
), sub
)
75 items
= os
.listdir(full
)
78 for y
in recurse(os
.path
.join(sub
, x
)):
83 leaf
= os
.path
.basename(sub
[1:])
85 d
= sha
.new(file(full
).read()).hexdigest()
87 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
) ,info
.st_size
, leaf
)
89 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
) ,info
.st_size
, leaf
)
91 d
= sha
.new(os
.readlink(full
)).hexdigest()
92 # Note: Can't use utime on symlinks, so skip mtime
93 yield "S %s %s %s" % (d
, info
.st_size
, leaf
)
95 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
97 for x
in recurse('/'): yield x
102 def getID(self
, digest
):
103 return 'sha1=' + digest
.hexdigest()
105 def get_algorithm(name
):
106 """Look-up an L{Algorithm} by name.
107 @raise BadDigest: if the name is unknown."""
109 return algorithms
[name
]
111 raise BadDigest("Unknown algorithm '%s'" % name
)
113 def generate_manifest(root
, alg
= 'sha1'):
114 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
115 return get_algorithm(alg
).generate_manifest(root
)
117 def add_manifest_file(dir, digest_or_alg
):
118 """Writes a .manifest file into 'dir', and returns the digest.
119 You should call fixup_permissions before this to ensure that the permissions are correct.
120 On exit, dir itself has mode 555. Subdirectories are not changed.
121 @param dir: root of the implementation
122 @param digest_or_alg: should be an instance of Algorithm. Passing a digest
123 here is deprecated."""
124 mfile
= os
.path
.join(dir, '.manifest')
125 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
126 raise SafeException("Directory '%s' already contains a .manifest file!" % dir)
128 if isinstance(digest_or_alg
, Algorithm
):
130 digest
= alg
.new_digest()
132 digest
= digest_or_alg
133 alg
= get_algorithm('sha1')
134 for line
in alg
.generate_manifest(dir):
135 manifest
+= line
+ '\n'
136 digest
.update(manifest
)
139 stream
= file(mfile
, 'w')
141 stream
.write(manifest
)
143 os
.chmod(mfile
, 0444)
147 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
148 where 'alg' is an instance of Algorithm and 'value' is a string.
149 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
150 parts
= id.split('=', 1)
152 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'" % id)
153 return (get_algorithm(parts
[0]), parts
[1])
155 def copy_with_verify(src
, dest
, mode
, alg
, required_digest
):
156 """Copy path src to dest, checking that the contents give the right digest.
157 dest must not exist. New file is created with a mode of 'mode & umask'.
158 @param src: source filename
160 @param dest: target filename
162 @param mode: target mode
164 @param alg: algorithm to generate digest
165 @type alg: L{Algorithm}
166 @param required_digest: expected digest value
167 @type required_digest: str
168 @raise BadDigest: the contents of the file don't match required_digest"""
170 dest_fd
= os
.open(dest
, os
.O_WRONLY | os
.O_CREAT | os
.O_EXCL
, mode
)
172 digest
= alg
.new_digest()
174 data
= src_obj
.read(256)
178 written
= os
.write(dest_fd
, data
)
180 data
= data
[written
:]
184 actual
= digest
.hexdigest()
185 if actual
== required_digest
: return
187 raise BadDigest(("Copy failed: file '%s' has wrong digest (may have been tampered with)\n"
189 "Actual: %s") % (src
, required_digest
, actual
))
191 def verify(root
, required_digest
= None):
192 """Ensure that directory 'dir' generates the given digest.
193 For a non-error return:
194 - Dir's name must be a digest (in the form "alg=value")
195 - The calculated digest of the contents must match this name.
196 - If there is a .manifest file, then its digest must also match.
197 @raise BadDigest: if verification fails."""
198 if required_digest
is None:
199 required_digest
= os
.path
.basename(root
)
200 alg
= splitID(required_digest
)[0]
202 digest
= alg
.new_digest()
204 for line
in alg
.generate_manifest(root
):
208 actual_digest
= alg
.getID(digest
)
210 manifest_file
= os
.path
.join(root
, '.manifest')
211 if os
.path
.isfile(manifest_file
):
212 digest
= alg
.new_digest()
213 digest
.update(file(manifest_file
).read())
214 manifest_digest
= alg
.getID(digest
)
216 manifest_digest
= None
218 if required_digest
== actual_digest
== manifest_digest
:
221 error
= BadDigest("Cached item does NOT verify.")
223 error
.detail
= " Expected digest: " + required_digest
+ "\n" + \
224 " Actual digest: " + actual_digest
+ "\n" + \
225 ".manifest digest: " + (manifest_digest
or 'No .manifest file') + "\n\n"
227 if manifest_digest
is None:
228 error
.detail
+= "No .manifest, so no further details available."
229 elif manifest_digest
== actual_digest
:
230 error
.detail
+= "The .manifest file matches the actual contents. Very strange!"
231 elif manifest_digest
== required_digest
:
233 diff
= difflib
.unified_diff(file(manifest_file
).readlines(), lines
,
234 'Recorded', 'Actual')
235 error
.detail
+= "The .manifest file matches the directory name.\n" \
236 "The contents of the directory have changed:\n" + \
238 elif required_digest
== actual_digest
:
239 error
.detail
+= "The directory contents are correct, but the .manifest file is wrong!"
241 error
.detail
+= "The .manifest file matches neither of the other digests. Odd."
244 # XXX: Be more careful about the source tree changing under us. In particular, what happens if:
245 # - A regualar file suddenly turns into a symlink?
246 # - We find a device file (users can hard-link them if on the same device)
247 def copy_tree_with_verify(source
, target
, manifest_data
, required_digest
):
248 """Copy directory source to be a subdirectory of target if it matches the required_digest.
249 manifest_data is normally source/.manifest. source and manifest_data are not trusted
250 (will typically be under the control of another user).
251 The copy is first done to a temporary directory in target, then renamed to the final name
252 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
253 A successful return means than target/required_digest now exists (whether we created it or not)."""
254 import tempfile
, shutil
255 from logging
import info
257 alg
, digest_value
= splitID(required_digest
)
259 if isinstance(alg
, OldSHA1
):
260 raise SafeException("Sorry, the 'sha1' algorithm does not support copying.")
262 digest
= alg
.new_digest()
263 digest
.update(manifest_data
)
264 manifest_digest
= alg
.getID(digest
)
266 if manifest_digest
!= required_digest
:
267 raise BadDigest("Manifest has been tampered with!\n"
268 "Manifest digest: " + manifest_digest
+ "\n"
269 "Directory name : " + required_digest
)
271 target_impl
= os
.path
.join(target
, required_digest
)
272 if os
.path
.isdir(target_impl
):
273 info("Target directory '%s' already exists", target_impl
)
276 # We've checked that the source's manifest matches required_digest, so it
277 # is what we want. Make a list of all the files we need to copy...
279 wanted
= _parse_manifest(manifest_data
)
281 tmpdir
= tempfile
.mkdtemp(prefix
= 'tmp-copy-', dir = target
)
284 _copy_files(alg
, wanted
, source
, tmpdir
)
287 raise SafeException('Copy failed; files missing from source:\n- ' +
288 '\n- '.join(wanted
.keys()))
290 # Check that the copy is correct
291 actual_digest
= alg
.getID(add_manifest_file(tmpdir
, alg
))
292 if actual_digest
!= required_digest
:
293 raise SafeException(("Copy failed; double-check of target gave the wrong digest.\n"
294 "Unless the target was modified during the copy, this is a BUG\n"
295 "in 0store and should be reported.\n"
297 "Actual: %s") % (required_digest
, actual_digest
))
298 os
.rename(tmpdir
, target_impl
)
299 # TODO: catch already-exists, delete tmpdir and return success
301 info("Deleting tmpdir '%s'" % tmpdir
)
302 shutil
.rmtree(tmpdir
)
305 def _parse_manifest(manifest_data
):
306 """Parse a manifest file.
307 @param manifest_data: the contents of the manifest file
308 @type manifest_data: str
309 @return: a mapping from paths to information about that path
310 @rtype: {str: tuple}"""
313 for line
in manifest_data
.split('\n'):
316 data
= line
.split(' ', 1)
317 if len(data
) != 2: raise BadDigest("Bad line '%s'" % line
)
319 if not path
.startswith('/'): raise BadDigest("Not absolute: '%s'" % line
)
323 data
= line
.split(' ', 3)
324 path
= os
.path
.join(dir, data
[-1])
325 if len(data
) != 4: raise BadDigest("Bad line '%s'" % line
)
327 data
= line
.split(' ', 4)
328 path
= os
.path
.join(dir, data
[-1])
329 if len(data
) != 5: raise BadDigest("Bad line '%s'" % line
)
331 raise BadDigest('Duplicate entry "%s"' % line
)
332 wanted
[path
] = data
[:-1]
335 def _copy_files(alg
, wanted
, source
, target
):
336 """Scan for files under 'source'. For each one:
337 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
338 then copy it into 'target'.
339 If it's not in wanted, warn and skip it.
340 On exit, wanted contains only files that were not found."""
341 from logging
import warn
343 for line
in alg
.generate_manifest(source
):
345 type, name
= line
.split(' ', 1)
346 assert name
.startswith('/')
350 type, actual_digest
, actual_size
, name
= line
.split(' ', 3)
351 path
= os
.path
.join(dir, name
)
353 assert line
[0] in 'XF'
354 type, actual_digest
, actual_mtime
, actual_size
, name
= line
.split(' ', 4)
355 path
= os
.path
.join(dir, name
)
357 required_details
= wanted
.pop(path
)
359 warn("Skipping file not in manifest: '%s'", path
)
361 if required_details
[0] != type:
362 raise BadDigest("Item '%s' has wrong type!" % path
)
364 os
.mkdir(os
.path
.join(target
, path
))
366 required_type
, required_digest
, required_mtime
, required_size
= required_details
367 if required_size
!= actual_size
:
368 raise SafeException("File '%s' has wrong size (%s bytes, but should be "
369 "%s according to manifest)" %
370 (path
, actual_size
, required_size
))
371 required_mtime
= int(required_mtime
)
372 dest_path
= os
.path
.join(target
, path
)
377 copy_with_verify(os
.path
.join(source
, path
),
382 os
.utime(dest_path
, (required_mtime
, required_mtime
))
384 required_type
, required_digest
, required_size
= required_details
385 if required_size
!= actual_size
:
386 raise SafeException("Symlink '%s' has wrong size (%s bytes, but should be "
387 "%s according to manifest)" %
388 (path
, actual_size
, required_size
))
389 symlink_target
= os
.readlink(os
.path
.join(source
, path
))
390 symlink_digest
= alg
.new_digest()
391 symlink_digest
.update(symlink_target
)
392 if symlink_digest
.hexdigest() != required_digest
:
393 raise SafeException("Symlink '%s' has wrong target (digest should be "
394 "%s according to manifest)" % (path
, required_digest
))
395 dest_path
= os
.path
.join(target
, path
)
396 os
.symlink(symlink_target
, dest_path
)
398 raise SafeException("Unknown manifest type %s for '%s'" % (type, path
))
400 class HashLibAlgorithm(Algorithm
):
401 new_digest
= None # Constructor for digest objects
403 def __init__(self
, name
):
405 self
.new_digest
= sha
.new
406 self
.name
= 'sha1new'
408 self
.new_digest
= getattr(hashlib
, name
)
411 def generate_manifest(self
, root
):
413 # To ensure that a line-by-line comparison of the manifests
414 # is possible, we require that filenames don't contain newlines.
415 # Otherwise, you can name a file so that the part after the \n
416 # would be interpreted as another line in the manifest.
417 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
418 assert sub
.startswith('/')
420 full
= os
.path
.join(root
, sub
[1:])
421 info
= os
.lstat(full
)
422 new_digest
= self
.new_digest
425 if not stat
.S_ISDIR(m
): raise Exception('Not a directory: "%s"' % full
)
428 items
= os
.listdir(full
)
432 path
= os
.path
.join(root
, sub
[1:], leaf
)
433 info
= os
.lstat(path
)
437 if leaf
== '.manifest': continue
439 d
= new_digest(file(path
).read()).hexdigest()
441 yield "X %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
443 yield "F %s %s %s %s" % (d
, int(info
.st_mtime
), info
.st_size
, leaf
)
444 elif stat
.S_ISLNK(m
):
445 d
= new_digest(os
.readlink(path
)).hexdigest()
446 # Note: Can't use utime on symlinks, so skip mtime
447 yield "S %s %s %s" % (d
, info
.st_size
, leaf
)
448 elif stat
.S_ISDIR(m
):
451 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
454 for y
in recurse(os
.path
.join(sub
, x
)): yield y
457 for x
in recurse('/'): yield x
459 def getID(self
, digest
):
460 return self
.name
+ '=' + digest
.hexdigest()
464 'sha1new': HashLibAlgorithm('sha1'),
467 if hashlib
is not None:
468 algorithms
['sha256'] = HashLibAlgorithm('sha256')
470 def fixup_permissions(root
):
471 """Set permissions recursively for children of root:
472 - If any X bit is set, they all must be.
473 - World readable, non-writable.
474 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
476 for main
, dirs
, files
in os
.walk(root
):
477 for x
in ['.'] + files
:
478 full
= os
.path
.join(main
, x
)
480 raw_mode
= os
.lstat(full
).st_mode
481 if stat
.S_ISLNK(raw_mode
): continue
483 mode
= stat
.S_IMODE(raw_mode
)
485 raise Exception("Unsafe mode: extracted file '%s' had special bits set in mode '%s'" % (full
, oct(mode
)))