1 # Copyright (C) 2006, Thomas Leonard
2 # See the README file for details, or visit http://0install.net.
4 from __future__
import generators
8 from zeroinstall
import SafeException
15 """A manifest is a string representing a directory tree, with the property
16 that two trees will generate identical manifest strings if and only if:
18 - They have extactly the same set of files, directories and symlinks.
19 - For each pair of corresponding directories in the two sets:
20 - The mtimes are the same.
21 - For each pair of corresponding files in the two sets:
22 - The size, executable flag and mtime are the same.
23 - The contents have matching SHA1 sums.
24 - For each pair of corresponding symlinks in the two sets:
25 - The mtime and size are the same.
26 - The targets have matching SHA1 sums.
28 The manifest is typically processed with SHA1 itself. So, the idea is that
29 any significant change to the contents of the tree will change the SHA1 sum
32 A top-level ".manifest" file is ignored.
36 def generate_manifest(root
):
37 """Returns an iterator that yields each line of the manifest for the directory
38 tree rooted at 'root'."""
39 raise Exception('Abstract')
42 """Create a new digest. Call update() on the returned object to digest the data.
43 Call getID() to turn it into a full ID string."""
44 raise Exception('Abstract')
46 def getID(self
, digest
):
47 """Convert a digest (from new_digest) to a full ID."""
48 raise Exception('Abstract')
50 class OldSHA1(Algorithm
):
51 def generate_manifest(self
, root
):
53 # To ensure that a line-by-line comparison of the manifests
54 # is possible, we require that filenames don't contain newlines.
55 # Otherwise, you can name a file so that the part after the \n
56 # would be interpreted as another line in the manifest.
57 assert '\n' not in sub
58 assert sub
.startswith('/')
60 if sub
== '/.manifest': return
62 full
= os
.path
.join(root
, sub
[1:])
68 yield "D %s %s" % (info
.st_mtime
, sub
)
69 items
= os
.listdir(full
)
72 for y
in recurse(os
.path
.join(sub
, x
)):
77 leaf
= os
.path
.basename(sub
[1:])
79 d
= sha
.new(file(full
).read()).hexdigest()
81 yield "X %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
83 yield "F %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
85 d
= sha
.new(os
.readlink(full
)).hexdigest()
86 # Note: Can't use utime on symlinks, so skip mtime
87 yield "S %s %s %s" % (d
, info
.st_size
, leaf
)
89 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
91 for x
in recurse('/'): yield x
96 def getID(self
, digest
):
97 return 'sha1=' + digest
.hexdigest()
99 def get_algorithm(name
):
100 from zeroinstall
.zerostore
import BadDigest
102 return algorithms
[name
]
104 raise BadDigest("Unknown algorithm '%s'" % name
)
106 def generate_manifest(root
, alg
= 'sha1'):
107 return get_algorithm(alg
).generate_manifest(root
)
109 def add_manifest_file(dir, digest
, alg
= 'sha1'):
110 """Writes a .manifest file into 'dir', and updates digest."""
111 mfile
= os
.path
.join(dir, '.manifest')
112 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
113 raise Exception('Archive contains a .manifest file!')
115 for line
in get_algorithm(alg
).generate_manifest(dir):
116 manifest
+= line
+ '\n'
117 digest
.update(manifest
)
118 stream
= file(mfile
, 'w')
119 stream
.write(manifest
)
124 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
125 where 'alg' is an instance of Algorithm and 'value' is a string. If the
126 algorithm isn't known or the ID has the wrong format, raise KeyError."""
127 parts
= id.split('=', 1)
129 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'")
130 return (get_algorithm(parts
[0]), parts
[1])
133 """Ensure that directory 'dir' generates the given digest.
134 Raises BadDigest if not. For a non-error return:
135 - Dir's name must be a digest (in the form "alg=value")
136 - The calculated digest of the contents must match this name.
137 - If there is a .manifest file, then its digest must also match."""
138 from zeroinstall
.zerostore
import BadDigest
140 required_digest
= os
.path
.basename(root
)
141 alg
= splitID(required_digest
)[0]
143 digest
= alg
.new_digest()
145 for line
in alg
.generate_manifest(root
):
149 actual_digest
= alg
.getID(digest
)
151 manifest_file
= os
.path
.join(root
, '.manifest')
152 if os
.path
.isfile(manifest_file
):
153 digest
= alg
.new_digest()
154 digest
.update(file(manifest_file
).read())
155 manifest_digest
= alg
.getID(digest
)
157 manifest_digest
= None
159 if required_digest
== actual_digest
== manifest_digest
:
162 error
= BadDigest("Cached item does NOT verify.")
164 error
.detail
= " Expected digest: " + required_digest
+ "\n" + \
165 " Actual digest: " + actual_digest
+ "\n" + \
166 ".manifest digest: " + (manifest_digest
or 'No .manifest file') + "\n\n"
168 if manifest_digest
is None:
169 error
.detail
+= "No .manifest, so no further details available."
170 elif manifest_digest
== actual_digest
:
171 error
.detail
+= "The .manifest file matches the actual contents. Very strange!"
172 elif manifest_digest
== required_digest
:
174 diff
= difflib
.unified_diff(file(manifest_file
).readlines(), lines
,
175 'Recorded', 'Actual')
176 error
.detail
+= "The .manifest file matches the directory name.\n" \
177 "The contents of the directory have changed:\n" + \
179 elif required_digest
== actual_digest
:
180 error
.detail
+= "The directory contents are correct, but the .manifest file is wrong!"
182 error
.detail
+= "The .manifest file matches neither of the other digests. Odd."
185 class HashLibAlgorithm(Algorithm
):
186 new_digest
= None # Constructor for digest objects
188 def __init__(self
, name
):
191 self
.new_digest
= sha
.new
192 self
.name
= 'sha1new'
194 self
.new_digest
= getattr(hashlib
, name
)
197 def generate_manifest(self
, root
):
199 # To ensure that a line-by-line comparison of the manifests
200 # is possible, we require that filenames don't contain newlines.
201 # Otherwise, you can name a file so that the part after the \n
202 # would be interpreted as another line in the manifest.
203 assert '\n' not in sub
204 assert sub
.startswith('/')
206 if sub
== '/.manifest': return
208 full
= os
.path
.join(root
, sub
[1:])
209 info
= os
.lstat(full
)
210 new_digest
= self
.new_digest
213 if not stat
.S_ISDIR(m
): raise Exception('Not a directory: "%s"' % full
)
216 items
= os
.listdir(full
)
220 path
= os
.path
.join(root
, sub
[1:], leaf
)
221 info
= os
.lstat(path
)
225 d
= new_digest(file(path
).read()).hexdigest()
227 yield "X %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
229 yield "F %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
230 elif stat
.S_ISLNK(m
):
231 d
= new_digest(os
.readlink(path
)).hexdigest()
232 # Note: Can't use utime on symlinks, so skip mtime
233 yield "S %s %s %s" % (d
, info
.st_size
, leaf
)
234 elif stat
.S_ISDIR(m
):
237 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
240 for y
in recurse(os
.path
.join(sub
, x
)): yield y
243 for x
in recurse('/'): yield x
245 def getID(self
, digest
):
246 return self
.name
+ '=' + digest
.hexdigest()
250 'sha1new': HashLibAlgorithm('sha1'),
253 if hashlib
is not None:
254 algorithms
['sha256'] = HashLibAlgorithm('sha256')