Depend on binutils ('ar' is needed to extract from debs).
[zeroinstall.git] / zeroinstall / zerostore / manifest.py
blobe9637afe4dfba6a1b9daf3ffdd334016d766b85c
1 # Copyright (C) 2006, Thomas Leonard
2 # See the README file for details, or visit http://0install.net.
4 from __future__ import generators
5 import os, stat
6 from sets import Set
7 import sha
8 from zeroinstall import SafeException
9 from zeroinstall.zerostore import BadDigest
11 try:
12 import hashlib
13 except:
14 hashlib = None
16 """A manifest is a string representing a directory tree, with the property
17 that two trees will generate identical manifest strings if and only if:
19 - They have extactly the same set of files, directories and symlinks.
20 - For each pair of corresponding directories in the two sets:
21 - The mtimes are the same.
22 - For each pair of corresponding files in the two sets:
23 - The size, executable flag and mtime are the same.
24 - The contents have matching SHA1 sums.
25 - For each pair of corresponding symlinks in the two sets:
26 - The mtime and size are the same.
27 - The targets have matching SHA1 sums.
29 The manifest is typically processed with SHA1 itself. So, the idea is that
30 any significant change to the contents of the tree will change the SHA1 sum
31 of the manifest.
33 A top-level ".manifest" file is ignored.
34 """
36 class Algorithm:
37 def generate_manifest(root):
38 """Returns an iterator that yields each line of the manifest for the directory
39 tree rooted at 'root'."""
40 raise Exception('Abstract')
42 def new_digest(self):
43 """Create a new digest. Call update() on the returned object to digest the data.
44 Call getID() to turn it into a full ID string."""
45 raise Exception('Abstract')
47 def getID(self, digest):
48 """Convert a digest (from new_digest) to a full ID."""
49 raise Exception('Abstract')
51 class OldSHA1(Algorithm):
52 def generate_manifest(self, root):
53 def recurse(sub):
54 # To ensure that a line-by-line comparison of the manifests
55 # is possible, we require that filenames don't contain newlines.
56 # Otherwise, you can name a file so that the part after the \n
57 # would be interpreted as another line in the manifest.
58 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
59 assert sub.startswith('/')
61 if sub == '/.manifest': return
63 full = os.path.join(root, sub[1:])
64 info = os.lstat(full)
66 m = info.st_mode
67 if stat.S_ISDIR(m):
68 if sub != '/':
69 yield "D %s %s" % (info.st_mtime, sub)
70 items = os.listdir(full)
71 items.sort()
72 for x in items:
73 for y in recurse(os.path.join(sub, x)):
74 yield y
75 return
77 assert sub[1:]
78 leaf = os.path.basename(sub[1:])
79 if stat.S_ISREG(m):
80 d = sha.new(file(full).read()).hexdigest()
81 if m & 0111:
82 yield "X %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
83 else:
84 yield "F %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
85 elif stat.S_ISLNK(m):
86 d = sha.new(os.readlink(full)).hexdigest()
87 # Note: Can't use utime on symlinks, so skip mtime
88 yield "S %s %s %s" % (d, info.st_size, leaf)
89 else:
90 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
91 full)
92 for x in recurse('/'): yield x
94 def new_digest(self):
95 return sha.new()
97 def getID(self, digest):
98 return 'sha1=' + digest.hexdigest()
100 def get_algorithm(name):
101 try:
102 return algorithms[name]
103 except KeyError:
104 raise BadDigest("Unknown algorithm '%s'" % name)
106 def generate_manifest(root, alg = 'sha1'):
107 return get_algorithm(alg).generate_manifest(root)
109 def add_manifest_file(dir, digest_or_alg):
110 """Writes a .manifest file into 'dir', and returns the digest.
111 Second argument should be an instance of Algorithm. Passing a digest
112 here is deprecated."""
113 mfile = os.path.join(dir, '.manifest')
114 if os.path.islink(mfile) or os.path.exists(mfile):
115 raise Exception('Archive contains a .manifest file!')
116 manifest = ''
117 if isinstance(digest_or_alg, Algorithm):
118 alg = digest_or_alg
119 digest = alg.new_digest()
120 else:
121 digest = digest_or_alg
122 alg = get_algorithm('sha1')
123 for line in alg.generate_manifest(dir):
124 manifest += line + '\n'
125 digest.update(manifest)
126 stream = file(mfile, 'w')
127 stream.write(manifest)
128 stream.close()
129 return digest
131 def splitID(id):
132 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
133 where 'alg' is an instance of Algorithm and 'value' is a string. If the
134 algorithm isn't known or the ID has the wrong format, raise KeyError."""
135 parts = id.split('=', 1)
136 if len(parts) != 2:
137 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'" % id)
138 return (get_algorithm(parts[0]), parts[1])
140 def verify(root, required_digest = None):
141 """Ensure that directory 'dir' generates the given digest.
142 Raises BadDigest if not. For a non-error return:
143 - Dir's name must be a digest (in the form "alg=value")
144 - The calculated digest of the contents must match this name.
145 - If there is a .manifest file, then its digest must also match."""
146 if required_digest is None:
147 required_digest = os.path.basename(root)
148 alg = splitID(required_digest)[0]
150 digest = alg.new_digest()
151 lines = []
152 for line in alg.generate_manifest(root):
153 line += '\n'
154 digest.update(line)
155 lines.append(line)
156 actual_digest = alg.getID(digest)
158 manifest_file = os.path.join(root, '.manifest')
159 if os.path.isfile(manifest_file):
160 digest = alg.new_digest()
161 digest.update(file(manifest_file).read())
162 manifest_digest = alg.getID(digest)
163 else:
164 manifest_digest = None
166 if required_digest == actual_digest == manifest_digest:
167 return
169 error = BadDigest("Cached item does NOT verify.")
171 error.detail = " Expected digest: " + required_digest + "\n" + \
172 " Actual digest: " + actual_digest + "\n" + \
173 ".manifest digest: " + (manifest_digest or 'No .manifest file') + "\n\n"
175 if manifest_digest is None:
176 error.detail += "No .manifest, so no further details available."
177 elif manifest_digest == actual_digest:
178 error.detail += "The .manifest file matches the actual contents. Very strange!"
179 elif manifest_digest == required_digest:
180 import difflib
181 diff = difflib.unified_diff(file(manifest_file).readlines(), lines,
182 'Recorded', 'Actual')
183 error.detail += "The .manifest file matches the directory name.\n" \
184 "The contents of the directory have changed:\n" + \
185 ''.join(diff)
186 elif required_digest == actual_digest:
187 error.detail += "The directory contents are correct, but the .manifest file is wrong!"
188 else:
189 error.detail += "The .manifest file matches neither of the other digests. Odd."
190 raise error
192 class HashLibAlgorithm(Algorithm):
193 new_digest = None # Constructor for digest objects
195 def __init__(self, name):
196 if name == 'sha1':
197 import sha
198 self.new_digest = sha.new
199 self.name = 'sha1new'
200 else:
201 self.new_digest = getattr(hashlib, name)
202 self.name = name
204 def generate_manifest(self, root):
205 def recurse(sub):
206 # To ensure that a line-by-line comparison of the manifests
207 # is possible, we require that filenames don't contain newlines.
208 # Otherwise, you can name a file so that the part after the \n
209 # would be interpreted as another line in the manifest.
210 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
211 assert sub.startswith('/')
213 full = os.path.join(root, sub[1:])
214 info = os.lstat(full)
215 new_digest = self.new_digest
217 m = info.st_mode
218 if not stat.S_ISDIR(m): raise Exception('Not a directory: "%s"' % full)
219 if sub != '/':
220 yield "D %s" % sub
221 items = os.listdir(full)
222 items.sort()
223 dirs = []
224 for leaf in items:
225 path = os.path.join(root, sub[1:], leaf)
226 info = os.lstat(path)
227 m = info.st_mode
229 if stat.S_ISREG(m):
230 if leaf == '.manifest': continue
232 d = new_digest(file(path).read()).hexdigest()
233 if m & 0111:
234 yield "X %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
235 else:
236 yield "F %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
237 elif stat.S_ISLNK(m):
238 d = new_digest(os.readlink(path)).hexdigest()
239 # Note: Can't use utime on symlinks, so skip mtime
240 yield "S %s %s %s" % (d, info.st_size, leaf)
241 elif stat.S_ISDIR(m):
242 dirs.append(leaf)
243 else:
244 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
245 path)
246 for x in dirs:
247 for y in recurse(os.path.join(sub, x)): yield y
248 return
250 for x in recurse('/'): yield x
252 def getID(self, digest):
253 return self.name + '=' + digest.hexdigest()
255 algorithms = {
256 'sha1': OldSHA1(),
257 'sha1new': HashLibAlgorithm('sha1'),
260 if hashlib is not None:
261 algorithms['sha256'] = HashLibAlgorithm('sha256')