From a18211c1fa9ef83d94e1611434f9e6a681d2d991 Mon Sep 17 00:00:00 2001 From: Thomas Leonard Date: Sun, 6 Aug 2006 15:10:26 +0000 Subject: [PATCH] Added '0store copy' command. This copies a possibly-untrusted directory structure into your cache (by default), checking that it matches the requested digest. This is useful if you notice that a user you don't fully trust already has a program you were about to download - you can safely get a copy from them. A little more work may be needed to make this completely robust. When ready, the injector can be made to check for suitable directories to copy automatically before starting a download. This may also be useful in the long-planned setuid script to copy downloads by users into the shared system directory (since it will only copy correctly named directories). git-svn-id: file:///home/talex/Backups/sf.net/Subversion/zero-install/trunk/0launch@1016 9f8c893c-44ee-0310-b757-c8ca8341c71e --- 0store.1 | 14 +++ tests/teststore.py | 85 ++++++++++++++++++- zeroinstall/zerostore/cli.py | 43 ++++++++-- zeroinstall/zerostore/manifest.py | 174 +++++++++++++++++++++++++++++++++++++- 4 files changed, 306 insertions(+), 10 deletions(-) diff --git a/0store.1 b/0store.1 index 5adc61c..b105e1e 100644 --- a/0store.1 +++ b/0store.1 @@ -10,6 +10,9 @@ .B 0store add \fBDIGEST\fP \fBARCHIVE\fP [ \fBEXTRACT\fP ] +.B 0store copy +\fBDIRECTORY\fP [ \fBDIRECTORY\fP ] + .B 0store find \fBDIGEST\fP @@ -46,6 +49,17 @@ To add a subdirectory of an archive to the store: The actual digest is calculated and compared to the given one. If they don't match, the operation is rejected. +.SH COPY +.PP +To copy an implementation (a directory with a name in the form +"algorithm=value"), use the copy function. This is similar to performing +a normal recursive directory copy followed by a +.B 0store verify +to check that the name matches the contents. E.g.: + +.B 0store copy ~someuser/.cache/0install.net/implementations/sha256=XXX +/var/cache/0install.net/implementations/ + .SH FIND .PP To find the path of a stored item: diff --git a/tests/teststore.py b/tests/teststore.py index d604013..2c4888a 100755 --- a/tests/teststore.py +++ b/tests/teststore.py @@ -6,7 +6,8 @@ from logging import getLogger, DEBUG, INFO sys.path.insert(0, '..') -from zeroinstall.zerostore import Store, manifest, BadDigest +from zeroinstall.zerostore import Store, manifest, BadDigest, cli +from zeroinstall import SafeException class TestStore(unittest.TestCase): def setUp(self): @@ -62,6 +63,88 @@ class TestStore(unittest.TestCase): os.unlink(mfile) except BadDigest, ex: raise Exception(alg_name + ": " + str(ex) + "\n" + ex.detail) + + def populate_sample(self, target): + """Create a set of files, links and directories in target for testing.""" + path = os.path.join(target, 'MyFile') + f = file(path, 'w') + f.write('Hello') + f.close() + os.utime(path, (1, 2)) + + subdir = os.path.join(target, 'My Dir') + os.mkdir(subdir) + + subfile = os.path.join(subdir, '!a file!') + f = file(subfile, 'w') + f.write('Some data.') + f.close() + os.utime(subfile, (1, 2)) + + subfile += '.exe' + f = file(subfile, 'w') + f.write('Some code.') + f.close() + os.chmod(subfile, 0500) + os.utime(subfile, (1, 2)) + + os.symlink('/the/symlink/target', + os.path.join(target, 'a symlink')) + + def testCopy(self): + sha1 = manifest.get_algorithm('sha1') + sha1new = manifest.get_algorithm('sha1new') + source = os.path.join(self.tmp, 'badname') + os.mkdir(source) + + self.populate_sample(source) + + lines = list(sha1new.generate_manifest(source)) + self.assertEquals(['F f7ff9e8b7bb2e09b70935a5d785e0cc5d9d0abf0 2 5 MyFile', + 'S 570b0ce957ab43e774c82fca0ea3873fc452278b 19 a symlink', + 'D /My Dir', + 'F 0236ef92e1e37c57f0eb161e7e2f8b6a8face705 2 10 !a file!', + 'X b4ab02f2c791596a980fd35f51f5d92ee0b4705c 2 10 !a file!.exe'], + lines) + digest = sha1.getID(manifest.add_manifest_file(source, sha1)) + + copy = tempfile.mktemp() + os.mkdir(copy) + try: + # Source must be in the form alg=value + try: + cli.do_copy([source, copy]) + assert 0 + except BadDigest, ex: + assert 'badname' in str(ex) + source, badname = os.path.join(self.tmp, digest), source + os.rename(badname, source) + + # Can't copy sha1 implementations (unsafe) + try: + cli.do_copy([source, copy]) + except SafeException, ex: + assert 'sha1' in str(ex) + + # Already have a .manifest + try: + manifest.add_manifest_file(source, sha1new) + assert 0 + except SafeException, ex: + assert '.manifest' in str(ex) + + os.unlink(os.path.join(source, '.manifest')) + + # Switch to sha1new + digest = sha1new.getID(manifest.add_manifest_file(source, sha1new)) + source, badname = os.path.join(self.tmp, digest), source + os.rename(badname, source) + + cli.do_copy([source, copy]) + + self.assertEquals('Hello', file(os.path.join(copy, digest, 'MyFile')).read()) + finally: + shutil.rmtree(copy) suite = unittest.makeSuite(TestStore) if __name__ == '__main__': diff --git a/zeroinstall/zerostore/cli.py b/zeroinstall/zerostore/cli.py index efcd97e..6068cf2 100644 --- a/zeroinstall/zerostore/cli.py +++ b/zeroinstall/zerostore/cli.py @@ -1,9 +1,8 @@ # Copyright (C) 2006, Thomas Leonard # See the README file for details, or visit http://0install.net. -import sys, os, sha, tempfile, shutil -from logging import warn -from zeroinstall.zerostore.manifest import generate_manifest, verify, get_algorithm +import sys, os +from zeroinstall.zerostore.manifest import generate_manifest, verify, get_algorithm, copy_tree_with_verify from zeroinstall import zerostore, SafeException stores = None @@ -70,13 +69,19 @@ def do_add(args): raise UsageError("No such file or directory '%s'" % args[1]) def do_verify(args): - """verify (DIGEST | DIRECTORY)""" - if len(args) != 1: raise UsageError("Missing DIGEST or DIRECTORY") - root = get_stored(args[0]) + """verify (DIGEST | (DIRECTORY [DIGEST])""" + if len(args) == 2: + required_digest = args[1] + root = args[0] + elif len(args) == 1: + root = get_stored(args[0]) + required_digest = None # Get from name + else: + raise UsageError("Missing DIGEST or DIRECTORY") print "Verifying", root try: - verify(root) + verify(root, required_digest) print "OK" except zerostore.BadDigest, ex: print str(ex) @@ -109,4 +114,26 @@ def get_stored(dir_or_digest): print >>sys.stderr, ex sys.exit(1) -commands = [do_add, do_find, do_list, do_manifest, do_verify] +def do_copy(args): + """copy SOURCE [ TARGET ]""" + if len(args) == 2: + source, target = args + elif len(args) == 1: + source = args[0] + target = stores.stores[0].dir + else: + raise UsageError("Wrong number of arguments.") + + if not os.path.isdir(source): + raise UsageError("Source directory '%s' not found" % source) + if not os.path.isdir(target): + raise UsageError("Target directory '%s' not found" % target) + manifest_path = os.path.join(source, '.manifest') + if not os.path.isfile(manifest_path): + raise UsageError("Source manifest '%s' not found" % manifest_path) + required_digest = os.path.basename(source) + manifest_data = file(manifest_path).read() + + copy_tree_with_verify(source, target, manifest_data, required_digest) + +commands = [do_add, do_copy, do_find, do_list, do_manifest, do_verify] diff --git a/zeroinstall/zerostore/manifest.py b/zeroinstall/zerostore/manifest.py index e9637af..c939b22 100644 --- a/zeroinstall/zerostore/manifest.py +++ b/zeroinstall/zerostore/manifest.py @@ -112,7 +112,7 @@ def add_manifest_file(dir, digest_or_alg): here is deprecated.""" mfile = os.path.join(dir, '.manifest') if os.path.islink(mfile) or os.path.exists(mfile): - raise Exception('Archive contains a .manifest file!') + raise SafeException("Directory '%s' already contains a .manifest file!" % dir) manifest = '' if isinstance(digest_or_alg, Algorithm): alg = digest_or_alg @@ -137,6 +137,27 @@ def splitID(id): raise BadDigest("Digest '%s' is not in the form 'algorithm=value'" % id) return (get_algorithm(parts[0]), parts[1]) +def copy_with_verify(src, dest, mode, alg, required_digest): + """Copy path src to dest, checking that the contents give the right digest. + dest must not exist. New file is created with a mode of 'mode & umask'.""" + src_obj = file(src) + dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode) + digest = alg.new_digest() + while True: + data = src_obj.read(256) + if not data: break + digest.update(data) + while data: + written = os.write(dest_fd, data) + assert written >= 0 + data = data[written:] + actual = digest.hexdigest() + if actual == required_digest: return + os.unlink(dest) + raise BadDigest(("Copy failed: file '%s' has wrong digest (may have been tampered with)\n" + "Excepted: %s\n" + "Actual: %s") % (src, required_digest, actual)) + def verify(root, required_digest = None): """Ensure that directory 'dir' generates the given digest. Raises BadDigest if not. For a non-error return: @@ -189,6 +210,157 @@ def verify(root, required_digest = None): error.detail += "The .manifest file matches neither of the other digests. Odd." raise error +# XXX: Be more careful about the source tree changing under us. In particular, what happens if: +# - A regualar file suddenly turns into a symlink? +# - We find a device file (users can hard-link them if on the same device) +def copy_tree_with_verify(source, target, manifest_data, required_digest): + """Copy directory source to be a subdirectory of target if it matches the required_digest. + manifest_data is normally source/.manifest. source and manifest_data are not trusted + (will typically be under the control of another user). + The copy is first done to a temporary directory in target, then renamed to the final name + only if correct. Therefore, an invalid 'target/required_digest' will never exist. + A successful return means than target/required_digest now exists (whether we created it or not).""" + import tempfile, shutil + from logging import info + + alg, digest_value = splitID(required_digest) + + if isinstance(alg, OldSHA1): + raise SafeException("Sorry, the 'sha1' algorithm does not support copying.") + + digest = alg.new_digest() + digest.update(manifest_data) + manifest_digest = alg.getID(digest) + + if manifest_digest != required_digest: + raise zerostore.BadDigest("Manifest has been tampered with!\n" + "Manifest digest: " + manifest_digest + "\n" + "Directory name : " + required_digest) + + target_impl = os.path.join(target, required_digest) + if os.path.isdir(target_impl): + info("Target directory '%s' already exists", target_impl) + return + + # We've checked that the source's manifest matches required_digest, so it + # is what we want. Make a list of all the files we need to copy... + + wanted = _parse_manifest(manifest_data) + + tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target) + + try: + _copy_files(alg, wanted, source, tmpdir) + + if wanted: + raise SafeException('Copy failed; files missing from source:\n- ' + + '\n- '.join(wanted.keys())) + + # Check that the copy is correct + actual_digest = alg.getID(add_manifest_file(tmpdir, alg)) + if actual_digest != required_digest: + raise SafeException(("Copy failed; double-check of target gave the wrong digest.\n" + "Unless the target was modified during the copy, this is a BUG\n" + "in 0store and should be reported.\n" + "Expected: %s\n" + "Actual: %s") % (required_digest, actual_digest)) + os.rename(tmpdir, target_impl) + # TODO: catch already-exists, delete tmpdir and return success + except: + info("Deleting tmpdir '%s'" % tmpdir) + shutil.rmtree(tmpdir) + raise + +def _parse_manifest(manifest_data): + wanted = {} # Path -> (manifest line tuple) + dir = '' + for line in manifest_data.split('\n'): + if not line: break + if line[0] == 'D': + data = line.split(' ', 1) + if len(data) != 2: raise zerostore.BadDigest("Bad line '%s'" % line) + path = data[-1] + if not path.startswith('/'): raise zerostore.BadDigest("Not absolute: '%s'" % line) + path = path[1:] + dir = path + elif line[0] == 'S': + data = line.split(' ', 3) + path = os.path.join(dir, data[-1]) + if len(data) != 4: raise zerostore.BadDigest("Bad line '%s'" % line) + else: + data = line.split(' ', 4) + path = os.path.join(dir, data[-1]) + if len(data) != 5: raise zerostore.BadDigest("Bad line '%s'" % line) + if path in wanted: + raise zerostore.BadDigest('Duplicate entry "%s"' % line) + wanted[path] = data[:-1] + return wanted + +def _copy_files(alg, wanted, source, target): + """Scan for files under 'source'. For each one: + If it is in wanted and has the right details (or they can be fixed; e.g. mtime), + then copy it into 'target'. + If it's not in wanted, warn and skip it. + On exit, wanted contains only files that were not found.""" + from logging import warn + dir = '' + for line in alg.generate_manifest(source): + if line[0] == 'D': + type, name = line.split(' ', 1) + assert name.startswith('/') + dir = name[1:] + path = dir + elif line[0] == 'S': + type, actual_digest, actual_size, name = line.split(' ', 3) + path = os.path.join(dir, name) + else: + assert line[0] in 'XF' + type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4) + path = os.path.join(dir, name) + try: + required_details = wanted.pop(path) + except KeyError: + warn("Skipping file not in manifest: '%s'", path) + continue + if required_details[0] != type: + raise zerostore.BadDigest("Item '%s' has wrong type!" % path) + if type == 'D': + os.mkdir(os.path.join(target, path)) + elif type in 'XF': + required_type, required_digest, required_mtime, required_size = required_details + if required_size != actual_size: + raise SafeException("File '%s' has wrong size (%s bytes, but should be " + "%s according to manifest)" % + (path, actual_size, required_size)) + required_mtime = int(required_mtime) + dest_path = os.path.join(target, path) + if type == 'X': + mode = 0555 + else: + mode = 0444 + copy_with_verify(os.path.join(source, path), + dest_path, + mode, + alg, + required_digest) + os.utime(dest_path, (required_mtime, required_mtime)) + elif type == 'S': + required_type, required_digest, required_size = required_details + if required_size != actual_size: + raise SafeException("Symlink '%s' has wrong size (%s bytes, but should be " + "%s according to manifest)" % + (path, actual_size, required_size)) + symlink_target = os.readlink(os.path.join(source, path)) + symlink_digest = alg.new_digest() + symlink_digest.update(symlink_target) + if symlink_digest.hexdigest() != required_digest: + raise SafeException("Symlink '%s' has wrong target (digest should be " + "%s according to manifest)" % (path, required_digest)) + dest_path = os.path.join(target, path) + os.symlink(symlink_target, dest_path) + else: + raise SafeException("Unknown manifest type %s for '%s'" % (type, path)) + class HashLibAlgorithm(Algorithm): new_digest = None # Constructor for digest objects -- 2.11.4.GIT