1 # Copyright (C) 2006, Thomas Leonard
2 # See the README file for details, or visit http://0install.net.
4 from __future__
import generators
8 from zeroinstall
import SafeException
9 from zeroinstall
.zerostore
import BadDigest
16 """A manifest is a string representing a directory tree, with the property
17 that two trees will generate identical manifest strings if and only if:
19 - They have extactly the same set of files, directories and symlinks.
20 - For each pair of corresponding directories in the two sets:
21 - The mtimes are the same.
22 - For each pair of corresponding files in the two sets:
23 - The size, executable flag and mtime are the same.
24 - The contents have matching SHA1 sums.
25 - For each pair of corresponding symlinks in the two sets:
26 - The mtime and size are the same.
27 - The targets have matching SHA1 sums.
29 The manifest is typically processed with SHA1 itself. So, the idea is that
30 any significant change to the contents of the tree will change the SHA1 sum
33 A top-level ".manifest" file is ignored.
37 def generate_manifest(root
):
38 """Returns an iterator that yields each line of the manifest for the directory
39 tree rooted at 'root'."""
40 raise Exception('Abstract')
43 """Create a new digest. Call update() on the returned object to digest the data.
44 Call getID() to turn it into a full ID string."""
45 raise Exception('Abstract')
47 def getID(self
, digest
):
48 """Convert a digest (from new_digest) to a full ID."""
49 raise Exception('Abstract')
51 class OldSHA1(Algorithm
):
52 def generate_manifest(self
, root
):
54 # To ensure that a line-by-line comparison of the manifests
55 # is possible, we require that filenames don't contain newlines.
56 # Otherwise, you can name a file so that the part after the \n
57 # would be interpreted as another line in the manifest.
58 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
59 assert sub
.startswith('/')
61 if sub
== '/.manifest': return
63 full
= os
.path
.join(root
, sub
[1:])
69 yield "D %s %s" % (info
.st_mtime
, sub
)
70 items
= os
.listdir(full
)
73 for y
in recurse(os
.path
.join(sub
, x
)):
78 leaf
= os
.path
.basename(sub
[1:])
80 d
= sha
.new(file(full
).read()).hexdigest()
82 yield "X %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
84 yield "F %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
86 d
= sha
.new(os
.readlink(full
)).hexdigest()
87 # Note: Can't use utime on symlinks, so skip mtime
88 yield "S %s %s %s" % (d
, info
.st_size
, leaf
)
90 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
92 for x
in recurse('/'): yield x
97 def getID(self
, digest
):
98 return 'sha1=' + digest
.hexdigest()
100 def get_algorithm(name
):
102 return algorithms
[name
]
104 raise BadDigest("Unknown algorithm '%s'" % name
)
106 def generate_manifest(root
, alg
= 'sha1'):
107 return get_algorithm(alg
).generate_manifest(root
)
109 def add_manifest_file(dir, digest_or_alg
):
110 """Writes a .manifest file into 'dir', and returns the digest.
111 Second argument should be an instance of Algorithm. Passing a digest
112 here is deprecated."""
113 mfile
= os
.path
.join(dir, '.manifest')
114 if os
.path
.islink(mfile
) or os
.path
.exists(mfile
):
115 raise Exception('Archive contains a .manifest file!')
117 if isinstance(digest_or_alg
, Algorithm
):
119 digest
= alg
.new_digest()
121 digest
= digest_or_alg
122 alg
= get_algorithm('sha1')
123 for line
in alg
.generate_manifest(dir):
124 manifest
+= line
+ '\n'
125 digest
.update(manifest
)
126 stream
= file(mfile
, 'w')
127 stream
.write(manifest
)
132 """Take an ID in the form 'alg=value' and return a tuple (alg, value),
133 where 'alg' is an instance of Algorithm and 'value' is a string. If the
134 algorithm isn't known or the ID has the wrong format, raise KeyError."""
135 parts
= id.split('=', 1)
137 raise BadDigest("Digest '%s' is not in the form 'algorithm=value'" % id)
138 return (get_algorithm(parts
[0]), parts
[1])
140 def verify(root
, required_digest
= None):
141 """Ensure that directory 'dir' generates the given digest.
142 Raises BadDigest if not. For a non-error return:
143 - Dir's name must be a digest (in the form "alg=value")
144 - The calculated digest of the contents must match this name.
145 - If there is a .manifest file, then its digest must also match."""
146 if required_digest
is None:
147 required_digest
= os
.path
.basename(root
)
148 alg
= splitID(required_digest
)[0]
150 digest
= alg
.new_digest()
152 for line
in alg
.generate_manifest(root
):
156 actual_digest
= alg
.getID(digest
)
158 manifest_file
= os
.path
.join(root
, '.manifest')
159 if os
.path
.isfile(manifest_file
):
160 digest
= alg
.new_digest()
161 digest
.update(file(manifest_file
).read())
162 manifest_digest
= alg
.getID(digest
)
164 manifest_digest
= None
166 if required_digest
== actual_digest
== manifest_digest
:
169 error
= BadDigest("Cached item does NOT verify.")
171 error
.detail
= " Expected digest: " + required_digest
+ "\n" + \
172 " Actual digest: " + actual_digest
+ "\n" + \
173 ".manifest digest: " + (manifest_digest
or 'No .manifest file') + "\n\n"
175 if manifest_digest
is None:
176 error
.detail
+= "No .manifest, so no further details available."
177 elif manifest_digest
== actual_digest
:
178 error
.detail
+= "The .manifest file matches the actual contents. Very strange!"
179 elif manifest_digest
== required_digest
:
181 diff
= difflib
.unified_diff(file(manifest_file
).readlines(), lines
,
182 'Recorded', 'Actual')
183 error
.detail
+= "The .manifest file matches the directory name.\n" \
184 "The contents of the directory have changed:\n" + \
186 elif required_digest
== actual_digest
:
187 error
.detail
+= "The directory contents are correct, but the .manifest file is wrong!"
189 error
.detail
+= "The .manifest file matches neither of the other digests. Odd."
192 class HashLibAlgorithm(Algorithm
):
193 new_digest
= None # Constructor for digest objects
195 def __init__(self
, name
):
198 self
.new_digest
= sha
.new
199 self
.name
= 'sha1new'
201 self
.new_digest
= getattr(hashlib
, name
)
204 def generate_manifest(self
, root
):
206 # To ensure that a line-by-line comparison of the manifests
207 # is possible, we require that filenames don't contain newlines.
208 # Otherwise, you can name a file so that the part after the \n
209 # would be interpreted as another line in the manifest.
210 if '\n' in sub
: raise BadDigest("Newline in filename '%s'" % sub
)
211 assert sub
.startswith('/')
213 full
= os
.path
.join(root
, sub
[1:])
214 info
= os
.lstat(full
)
215 new_digest
= self
.new_digest
218 if not stat
.S_ISDIR(m
): raise Exception('Not a directory: "%s"' % full
)
221 items
= os
.listdir(full
)
225 path
= os
.path
.join(root
, sub
[1:], leaf
)
226 info
= os
.lstat(path
)
230 if leaf
== '.manifest': continue
232 d
= new_digest(file(path
).read()).hexdigest()
234 yield "X %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
236 yield "F %s %s %s %s" % (d
, info
.st_mtime
,info
.st_size
, leaf
)
237 elif stat
.S_ISLNK(m
):
238 d
= new_digest(os
.readlink(path
)).hexdigest()
239 # Note: Can't use utime on symlinks, so skip mtime
240 yield "S %s %s %s" % (d
, info
.st_size
, leaf
)
241 elif stat
.S_ISDIR(m
):
244 raise SafeException("Unknown object '%s' (not a file, directory or symlink)" %
247 for y
in recurse(os
.path
.join(sub
, x
)): yield y
250 for x
in recurse('/'): yield x
252 def getID(self
, digest
):
253 return self
.name
+ '=' + digest
.hexdigest()
257 'sha1new': HashLibAlgorithm('sha1'),
260 if hashlib
is not None:
261 algorithms
['sha256'] = HashLibAlgorithm('sha256')