Turn tree comparer into a class.
[asure.git] / asure.py
blob4d952e6c84ad0ea39c7b2c9e58d230de91b27c86
1 #! /usr/bin/env python
3 # Directory integrity scanner.
5 from stat import *
6 import os
7 import sys
8 from os.path import join
10 from cPickle import dump, load
11 import gzip
13 def walk(top):
14 """Root of directory generator"""
15 topstat = os.lstat(top)
16 for x in walker(top, '.', topstat):
17 yield x
19 def walker(path, name, topstat):
20 """Directory tree generator.
22 At one point, this started as a copy of os.walk from Python's
23 library. Even the arguments are different now.
24 """
26 try:
27 names = os.listdir(path)
28 except OSError:
29 sys.stderr.write("Warning, can't read dir: %s\n" % path)
30 return
32 # The verification algorithm requires the names to be sorted.
33 names.sort()
35 # Stat each name found, and put the result in one of two lists.
36 dirs, nondirs = [], []
37 for onename in names:
38 if path == '.' and (onename == "0sure.dat.gz" or
39 onename == "0sure.bak.gz" or
40 onename == "0sure.0.gz"):
41 continue
42 st = os.lstat(join(path, onename))
43 if S_ISDIR(st.st_mode):
44 dirs.append((onename, st))
45 else:
46 nondirs.append((onename, st))
48 # Indicate "entering" the directory.
49 yield 'd', name
51 # Then recursively walk into all of the subdirectories.
52 for (onename, st) in dirs:
53 subpath = join(path, onename)
54 if st.st_dev == topstat.st_dev:
55 for x in walker(subpath, onename, topstat):
56 yield x
58 # Then yield each entry that is not a subdirectory.
59 for (onename, st) in nondirs:
60 yield '-', onename
62 # Last, yield the leaving.
63 yield ('u',)
65 class comparer:
66 """Class for comparing two directory iterations. Keeps track of
67 state, and allows child classes to define handlers for the various
68 types of differences found."""
70 def __init__(self, left, right):
71 self.__left = left
72 self.__right = right
74 # Default handlers for the 6 possible changes (or not changes)
75 # that can happen in a directory. The adds and deletes take an
76 # additional argument that will be set to true if this added or
77 # remoted entity is contained in an entirely new directory. Some
78 # handlers may want to avoid printing verbose messages for the
79 # contents of added or deleted directories, and can use this
80 # value.
81 def handle_same_dir(self, path, a, b):
82 #print "same_dir(%s, %s, %s)" % (path, a, b)
83 pass
84 def handle_delete_dir(self, path, a, recursing):
85 #print "delete_dir(%s, %s, %s)" % (path, a, recursing)
86 pass
87 def handle_add_dir(self, path, a, recursing):
88 #print "add_dir(%s, %s, %s)" % (path, a, recursing)
89 pass
90 def handle_same_nondir(self, path, a, b):
91 #print "same_nondir(%s, %s, %s)" % (path, a, b)
92 pass
93 def handle_delete_nondir(self, path, a, recursing):
94 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
95 pass
96 def handle_add_nondir(self, path, a, recursing):
97 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
98 pass
100 def run(self):
101 a = self.__left.next()
102 if a[0] != 'd':
103 raise "Scan doesn't start with a directory"
104 b = self.__right.next()
105 if b[0] != 'd':
106 raise "Tree walk doesn't start with a directory"
107 self.__run(b[1], 1)
109 def __run(self, path, depth):
110 """Iterate both pairs of directories equally
112 Processes the contents of a single directory, recursively
113 calling itself to handle child directories. Returns with both
114 iterators advanced past the 'u' node that ends the dir."""
115 # print "run(%d): '%s'" % (depth, path)
116 a = self.__left.next()
117 b = self.__right.next()
119 while True:
120 # print "Comparing (%d) %s and %s" % (depth, a, b)
121 if a[0] == 'u' and b[0] == 'u':
122 # Both are leaving the directory.
123 # print "leave(%d): '%s'" % (depth, path)
124 return
126 elif a[0] == 'd' and b[0] == 'd':
127 # Both looking at a directory entry.
129 if a[1] == b[1]:
130 # if the name is the same, walk the tree.
131 self.handle_same_dir(path, a, b)
132 self.__run(os.path.join(path, a[1]), depth + 1)
133 a = self.__left.next()
134 b = self.__right.next()
135 continue
137 elif a[1] < b[1]:
138 # A directory has been deleted.
139 self.handle_delete_dir(path, a, False)
140 self.delete_whole_dir(self.__left)
141 a = self.__left.next()
142 continue
144 else:
145 # A directory has been added.
146 self.handle_add_dir(path, b, False)
148 self.add_whole_dir(self.__right, path)
149 b = self.__right.next()
150 continue
152 elif a[0] == '-' and b[0] == '-':
153 # Both are looking at a non-dir.
155 if a[1] == b[1]:
156 # Same name as well.
157 self.handle_same_nondir(path, a, b)
158 a = self.__left.next()
159 b = self.__right.next()
160 continue
162 elif a[1] < b[1]:
163 # Deleted non-dir.
164 self.handle_delete_nondir(path, a, False)
165 a = self.__left.next()
166 continue
168 else:
169 # Added non-dir.
170 self.handle_add_nondir(path, b, False)
171 b = self.__right.next()
172 continue
174 elif a[0] == '-' and b[0] == 'u':
175 self.handle_delete_nondir(path, a, False)
176 a = self.__left.next()
177 continue
179 elif a[0] == 'u' and b[0] == '-':
180 self.handle_add_nondir(path, b, False)
181 b = self.__right.next()
182 continue
184 elif a[0] == 'd' and b[0] == '-':
185 self.handle_delete_dir(path, a, False)
186 self.delete_whole_dir(self.__left, path)
187 a = self.__left.next()
188 continue
190 elif (a[0] == '-' or a[0] == 'u') and b[0] == 'd':
191 self.handle_add_dir(path, b, False)
192 self.add_whole_dir(self.__right, path)
193 b = self.__right.next()
194 continue
196 else:
197 print "Unhandled case!!!"
198 sys.exit(2)
200 def add_whole_dir(self, iter, path):
201 "Consume entries until this directory has been added"
202 # print "add_whole_dir: %s" % path
203 while True:
204 a = iter.next()
205 if a[0] == 'u':
206 return
207 elif a[0] == 'd':
208 self.handle_add_dir(path, a, True)
209 self.add_whole_dir(iter, os.path.join(path, a[1]))
210 else:
211 self.handle_add_nondir(path, a, True)
213 def delete_whole_dir(self, iter, path):
214 "Consume entries until this directory has been deleted"
215 # print "delete_whole_dir: %s" % path
216 while True:
217 a = iter.next()
218 if a[0] == 'u':
219 return
220 elif a[0] == 'd':
221 self.handle_delete_dir(path, a, True)
222 self.delete_whole_dir(iter, os.path.join(path, a[1]))
223 else:
224 self.handle_delete_nondir(path, a, True)
226 class check_comparer(comparer):
227 """Comparer for comparing either two trees, or a tree and a
228 filesystem. 'right' should be the newer tree."""
229 def handle_same_dir(self, path, a, b):
230 #print "same_dir(%s, %s, %s)" % (path, a, b)
231 pass
232 def handle_delete_dir(self, path, a, recursing):
233 if not recursing:
234 print "- dir %s" % (os.path.join(path, a[1]))
235 pass
236 def handle_add_dir(self, path, a, recursing):
237 if not recursing:
238 print "+ dir %s" % (os.path.join(path, a[1]))
239 pass
240 def handle_same_nondir(self, path, a, b):
241 #print "same_nondir(%s, %s, %s)" % (path, a, b)
242 pass
243 def handle_delete_nondir(self, path, a, recursing):
244 if not recursing:
245 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
246 print "- %s" % (os.path.join(path, a[1]))
247 pass
248 def handle_add_nondir(self, path, a, recursing):
249 if not recursing:
250 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
251 print "+ %s" % (os.path.join(path, a[1]))
252 pass
254 version = 'Asure scan version 1.0'
256 def reader(path):
257 """Iterate over a previously written dump"""
258 fd = gzip.open(path, 'rb')
259 vers = load(fd)
260 if version != vers:
261 raise "incompatible version of asure file"
262 try:
263 while True:
264 yield load(fd)
265 except EOFError:
266 return
268 def writer(path, tmppath, iter):
269 """Write the given item (probably assembled iterator)"""
270 fd = gzip.open(tmppath, 'wb')
271 dump(version, fd, -1)
272 for item in iter:
273 dump(item, fd, -1)
274 fd.close
275 os.rename(tmppath, path)
277 def fresh_scan():
278 """Perform a fresh scan of the filesystem"""
279 writer('0sure.dat.gz', '0sure.0.gz', walk('.'))
281 def check_scan():
282 """Perform a scan of the filesystem, and compare it with the scan
283 file. reports differences."""
284 prior = reader('0sure.dat.gz')
285 cur = walk('.')
286 # compare_trees(prior, cur)
287 check_comparer(prior, cur).run()
289 def main(argv):
290 if len(argv) != 1:
291 usage()
292 if argv[0] == 'scan':
293 fresh_scan()
294 elif argv[0] == 'update':
295 print "Update"
296 elif argv[0] == 'check':
297 check_scan()
298 elif argv[0] == 'show':
299 for i in reader('0sure.dat.gz'):
300 print i
302 def usage():
303 print "Usage: asure {scan|update|check}"
304 sys.exit(1)
306 if __name__ == '__main__':
307 "Test this"
308 main(sys.argv[1:])