Support directory-symlinks with new-style manifests.
[zeroinstall.git] / zeroinstall / zerostore / manifest.py
blob00f0cf8a67dfcb256cfc7980c44de070150a264d
1 # Copyright (C) 2006, Thomas Leonard
2 # See the README file for details, or visit http://0install.net.
4 from __future__ import generators
5 import os, stat
6 from sets import Set
7 import sha
8 from zeroinstall import SafeException
10 try:
11 import hashlib
12 except:
13 hashlib = None
15 """A manifest is a string representing a directory tree, with the property
16 that two trees will generate identical manifest strings if and only if:
18 - They have extactly the same set of files, directories and symlinks.
19 - For each pair of corresponding directories in the two sets:
20 - The mtimes are the same.
21 - For each pair of corresponding files in the two sets:
22 - The size, executable flag and mtime are the same.
23 - The contents have matching SHA1 sums.
24 - For each pair of corresponding symlinks in the two sets:
25 - The mtime and size are the same.
26 - The targets have matching SHA1 sums.
28 The manifest is typically processed with SHA1 itself. So, the idea is that
29 any significant change to the contents of the tree will change the SHA1 sum
30 of the manifest.
32 A top-level ".manifest" file is ignored.
33 """
35 class Algorithm:
36 def generate_manifest(root):
37 """Returns an iterator that yields each line of the manifest for the directory
38 tree rooted at 'root'."""
39 raise Exception('Abstract')
41 def new_digest(self):
42 """Create a new digest. Call update() on the returned object to digest the data.
43 Call getID() to turn it into a full ID string."""
44 raise Exception('Abstract')
46 def getID(self, digest):
47 """Convert a digest (from new_digest) to a full ID."""
48 raise Exception('Abstract')
50 class OldSHA1(Algorithm):
51 def generate_manifest(self, root):
52 def recurse(sub):
53 # To ensure that a line-by-line comparison of the manifests
54 # is possible, we require that filenames don't contain newlines.
55 # Otherwise, you can name a file so that the part after the \n
56 # would be interpreted as another line in the manifest.
57 assert '\n' not in sub
58 assert sub.startswith('/')
60 if sub == '/.manifest': return
62 full = os.path.join(root, sub[1:])
63 info = os.lstat(full)
65 m = info.st_mode
66 if stat.S_ISDIR(m):
67 if sub != '/':
68 yield "D %s %s" % (info.st_mtime, sub)
69 items = os.listdir(full)
70 items.sort()
71 for x in items:
72 for y in recurse(os.path.join(sub, x)):
73 yield y
74 return
76 assert sub[1:]
77 leaf = os.path.basename(sub[1:])
78 if stat.S_ISREG(m):
79 d = sha.new(file(full).read()).hexdigest()
80 if m & 0111:
81 yield "X %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
82 else:
83 yield "F %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
84 elif stat.S_ISLNK(m):
85 d = sha.new(os.readlink(full)).hexdigest()
86 # Note: Can't use utime on symlinks, so skip mtime
87 yield "S %s %s %s" % (d, info.st_size, leaf)
88 else:
89 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
90 full)
91 for x in recurse('/'): yield x
93 def new_digest(self):
94 return sha.new()
96 def getID(self, digest):
97 return 'sha1=' + digest.hexdigest()
99 def get_algorithm(name):
100 from zeroinstall.zerostore import BadDigest
101 try:
102 return algorithms[name]
103 except KeyError:
104 raise BadDigest("Unknown algorithm '%s'" % name)
106 def generate_manifest(root, alg = 'sha1'):
107 return get_algorithm(alg).generate_manifest(root)
109 def add_manifest_file(dir, digest, alg = 'sha1'):
110 """Writes a .manifest file into 'dir', and updates digest."""
111 mfile = os.path.join(dir, '.manifest')
112 if os.path.islink(mfile) or os.path.exists(mfile):
113 raise Exception('Archive contains a .manifest file!')
114 manifest = ''
115 for line in get_algorithm(alg).generate_manifest(dir):
116 manifest += line + '\n'
117 digest.update(manifest)
118 stream = file(mfile, 'w')
119 stream.write(manifest)
120 stream.close()
121 return digest
123 def splitID(id):
124 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
125 where 'alg' is an instance of Algorithm and 'value' is a string. If the
126 algorithm isn't known or the ID has the wrong format, raise KeyError."""
127 parts = id.split('=', 1)
128 if len(parts) != 2:
129 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'")
130 return (get_algorithm(parts[0]), parts[1])
132 def verify(root):
133 """Ensure that directory 'dir' generates the given digest.
134 Raises BadDigest if not. For a non-error return:
135 - Dir's name must be a digest (in the form "alg=value")
136 - The calculated digest of the contents must match this name.
137 - If there is a .manifest file, then its digest must also match."""
138 from zeroinstall.zerostore import BadDigest
140 required_digest = os.path.basename(root)
141 alg = splitID(required_digest)[0]
143 digest = alg.new_digest()
144 lines = []
145 for line in alg.generate_manifest(root):
146 line += '\n'
147 digest.update(line)
148 lines.append(line)
149 actual_digest = alg.getID(digest)
151 manifest_file = os.path.join(root, '.manifest')
152 if os.path.isfile(manifest_file):
153 digest = alg.new_digest()
154 digest.update(file(manifest_file).read())
155 manifest_digest = alg.getID(digest)
156 else:
157 manifest_digest = None
159 if required_digest == actual_digest == manifest_digest:
160 return
162 error = BadDigest("Cached item does NOT verify.")
164 error.detail = " Expected digest: " + required_digest + "\n" + \
165 " Actual digest: " + actual_digest + "\n" + \
166 ".manifest digest: " + (manifest_digest or 'No .manifest file') + "\n\n"
168 if manifest_digest is None:
169 error.detail += "No .manifest, so no further details available."
170 elif manifest_digest == actual_digest:
171 error.detail += "The .manifest file matches the actual contents. Very strange!"
172 elif manifest_digest == required_digest:
173 import difflib
174 diff = difflib.unified_diff(file(manifest_file).readlines(), lines,
175 'Recorded', 'Actual')
176 error.detail += "The .manifest file matches the directory name.\n" \
177 "The contents of the directory have changed:\n" + \
178 ''.join(diff)
179 elif required_digest == actual_digest:
180 error.detail += "The directory contents are correct, but the .manifest file is wrong!"
181 else:
182 error.detail += "The .manifest file matches neither of the other digests. Odd."
183 raise error
185 class HashLibAlgorithm(Algorithm):
186 new_digest = None # Constructor for digest objects
188 def __init__(self, name):
189 if name == 'sha1':
190 import sha
191 self.new_digest = sha.new
192 self.name = 'sha1new'
193 else:
194 self.new_digest = getattr(hashlib, name)
195 self.name = name
197 def generate_manifest(self, root):
198 def recurse(sub):
199 # To ensure that a line-by-line comparison of the manifests
200 # is possible, we require that filenames don't contain newlines.
201 # Otherwise, you can name a file so that the part after the \n
202 # would be interpreted as another line in the manifest.
203 assert '\n' not in sub
204 assert sub.startswith('/')
206 if sub == '/.manifest': return
208 full = os.path.join(root, sub[1:])
209 info = os.lstat(full)
210 new_digest = self.new_digest
212 m = info.st_mode
213 if not stat.S_ISDIR(m): raise Exception('Not a directory: "%s"' % full)
214 if sub != '/':
215 yield "D %s" % sub
216 items = os.listdir(full)
217 items.sort()
218 dirs = []
219 for leaf in items:
220 path = os.path.join(root, sub[1:], leaf)
221 info = os.lstat(path)
222 m = info.st_mode
224 if stat.S_ISREG(m):
225 d = new_digest(file(path).read()).hexdigest()
226 if m & 0111:
227 yield "X %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
228 else:
229 yield "F %s %s %s %s" % (d, info.st_mtime,info.st_size, leaf)
230 elif stat.S_ISLNK(m):
231 d = new_digest(os.readlink(path)).hexdigest()
232 # Note: Can't use utime on symlinks, so skip mtime
233 yield "S %s %s %s" % (d, info.st_size, leaf)
234 elif stat.S_ISDIR(m):
235 dirs.append(leaf)
236 else:
237 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
238 path)
239 for x in dirs:
240 for y in recurse(os.path.join(sub, x)): yield y
241 return
243 for x in recurse('/'): yield x
245 def getID(self, digest):
246 return self.name + '=' + digest.hexdigest()
248 algorithms = {
249 'sha1': OldSHA1(),
250 'sha1new': HashLibAlgorithm('sha1'),
253 if hashlib is not None:
254 algorithms['sha256'] = HashLibAlgorithm('sha256')