Workaround for Python 2.5 time.
[asure.git] / assurance / main.py
blob011bd9cfb4623cf6592c3a74ef7cae3781c5d45a
1 #! /usr/bin/env python
3 # Directory integrity scanner.
5 import math
6 from stat import *
7 import os
8 import sys
9 from os.path import join
11 from cPickle import dump, load
12 import gzip
14 import hashing
15 import version
17 def walk(top):
18 """Root of directory generator"""
19 topstat = os.lstat(top)
20 for x in walker(top, '.', topstat):
21 yield x
23 def walker(path, name, dirstat):
24 """Directory tree generator.
26 At one point, this started as a copy of os.walk from Python's
27 library. Even the arguments are different now.
28 """
30 try:
31 names = os.listdir(path)
32 except OSError:
33 sys.stderr.write("Warning, can't read dir: %s\n" % path)
34 return
36 # The verification algorithm requires the names to be sorted.
37 names.sort()
39 # Stat each name found, and put the result in one of two lists.
40 dirs, nondirs = [], []
41 for onename in names:
42 if path == '.' and (onename == "0sure.dat.gz" or
43 onename == "0sure.bak.gz" or
44 onename == "0sure.0.gz"):
45 continue
46 st = os.lstat(join(path, onename))
47 if S_ISDIR(st.st_mode):
48 dirs.append((onename, st))
49 else:
50 nondirs.append((onename, st))
52 # Indicate "entering" the directory.
53 yield 'd', name, convert_stat(dirstat)
55 # Then recursively walk into all of the subdirectories.
56 for (onename, st) in dirs:
57 subpath = join(path, onename)
58 if st.st_dev == dirstat.st_dev:
59 for x in walker(subpath, onename, st):
60 yield x
62 # Then yield each entry that is not a subdirectory.
63 for (onename, st) in nondirs:
64 yield '-', onename, convert_stat(st)
66 # Last, yield the leaving.
67 yield ('u',)
69 # Convert the passed stat info into an association of the information
70 # itself. Does not do anything that requires reading the file (such
71 # as readlink or md5).
72 def convert_stat(st):
73 if S_ISDIR(st.st_mode):
74 return { 'kind': 'dir',
75 'uid': st.st_uid,
76 'gid': st.st_gid,
77 'perm': S_IMODE(st.st_mode) }
79 elif S_ISREG(st.st_mode):
80 return { 'kind': 'file',
81 'uid': st.st_uid,
82 'gid': st.st_gid,
83 'mtime': st.st_mtime,
84 'ctime': st.st_ctime,
85 'ino': st.st_ino,
86 'perm': S_IMODE(st.st_mode) }
88 elif S_ISLNK(st.st_mode):
89 return { 'kind': 'lnk' }
91 elif S_ISSOCK:
92 return { 'kind': 'sock',
93 'uid': st.st_uid,
94 'gid': st.st_gid,
95 'perm': S_IMODE(st.st_mode) }
97 elif S_ISFIFO:
98 return { 'kind': 'fifo',
99 'uid': st.st_uid,
100 'gid': st.st_gid,
101 'perm': S_IMODE(st.st_mode) }
103 elif S_ISBLK:
104 return { 'kind': 'blk',
105 'uid': st.st_uid,
106 'gid': st.st_gid,
107 'devmaj': os.major(st.st_rdev),
108 'devmin': os.minor(st.st_rdev),
109 'perm': S_IMODE(st.st_mode) }
111 elif S_ISCHR:
112 return { 'kind': 'chr',
113 'uid': st.st_uid,
114 'gid': st.st_gid,
115 'devmaj': os.major(st.st_rdev),
116 'devmin': os.minor(st.st_rdev),
117 'perm': S_IMODE(st.st_mode) }
119 else:
120 raise "Unknown file kind"
122 def empty_tree():
123 """Make an empty tree. No meaningful attributes for the root
124 directory"""
125 yield 'd', '.', {}
126 yield 'u',
127 return
129 def empty_generator():
130 return
131 yield ()
133 mode_add, mode_delete, mode_both = (1, 2, 3)
135 class comparer:
136 """Class for comparing two directory iterations. Keeps track of
137 state, and allows child classes to define handlers for the various
138 types of differences found."""
140 def __init__(self, left, right):
141 self.__left = left
142 self.__right = right
144 # Default handlers for the 6 possible changes (or not changes)
145 # that can happen in a directory. The adds and deletes take an
146 # additional argument that will be set to true if this added or
147 # remoted entity is contained in an entirely new directory. Some
148 # handlers may want to avoid printing verbose messages for the
149 # contents of added or deleted directories, and can use this
150 # value.
151 def handle_same_dir(self, path, a, b):
152 #print "same_dir(%s, %s, %s)" % (path, a, b)
153 return empty_generator()
154 def handle_delete_dir(self, path, a, recursing):
155 #print "delete_dir(%s, %s, %s)" % (path, a, recursing)
156 return empty_generator()
157 def handle_add_dir(self, path, a, recursing):
158 #print "add_dir(%s, %s, %s)" % (path, a, recursing)
159 return empty_generator()
160 def handle_same_nondir(self, path, a, b):
161 #print "same_nondir(%s, %s, %s)" % (path, a, b)
162 return empty_generator()
163 def handle_delete_nondir(self, path, a, recursing):
164 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
165 return empty_generator()
166 def handle_add_nondir(self, path, a, recursing):
167 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
168 return empty_generator()
169 def handle_leave(self, path, mode):
170 """Handle the leaving of a directory. Instead of 'recursing',
171 the mode is defined as 'mode_add' (1) for add, 'mode_delete'
172 (2) for delete, or these two or'd together 'mode_both' (3) for
173 both"""
174 return empty_generator()
176 def run(self):
177 a = self.__left.next()
178 if a[0] != 'd':
179 raise "Scan doesn't start with a directory"
180 b = self.__right.next()
181 if b[0] != 'd':
182 raise "Tree walk doesn't start with a directory"
183 for x in self.handle_same_dir(".", a, b):
184 yield x
185 for x in self.__run(b[1], 1):
186 yield x
188 def __run(self, path, depth):
189 """Iterate both pairs of directories equally
191 Processes the contents of a single directory, recursively
192 calling itself to handle child directories. Returns with both
193 iterators advanced past the 'u' node that ends the dir."""
194 # print "run(%d): '%s'" % (depth, path)
195 a = self.__left.next()
196 b = self.__right.next()
198 while True:
199 # print "Comparing (%d) %s and %s" % (depth, a, b)
200 if a[0] == 'u' and b[0] == 'u':
201 # Both are leaving the directory.
202 # print "leave(%d): '%s'" % (depth, path)
203 for x in self.handle_leave(path, mode_both):
204 yield x
205 return
207 elif a[0] == 'd' and b[0] == 'd':
208 # Both looking at a directory entry.
210 if a[1] == b[1]:
211 # if the name is the same, walk the tree.
212 for x in self.handle_same_dir(path, a, b):
213 yield x
214 for x in self.__run(os.path.join(path, a[1]), depth + 1):
215 yield x
216 a = self.__left.next()
217 b = self.__right.next()
218 continue
220 elif a[1] < b[1]:
221 # A directory has been deleted.
222 for x in self.handle_delete_dir(path, a, False):
223 yield x
224 for x in self.delete_whole_dir(self.__left,
225 os.path.join(path, a[1])):
226 yield x
227 a = self.__left.next()
228 continue
230 else:
231 # A directory has been added.
232 for x in self.handle_add_dir(path, b, False):
233 yield x
235 for x in self.add_whole_dir(self.__right,
236 os.path.join(path, b[1])):
237 yield x
238 b = self.__right.next()
239 continue
241 elif a[0] == '-' and b[0] == '-':
242 # Both are looking at a non-dir.
244 if a[1] == b[1]:
245 # Same name as well.
246 for x in self.handle_same_nondir(path, a, b):
247 yield x
248 a = self.__left.next()
249 b = self.__right.next()
250 continue
252 elif a[1] < b[1]:
253 # Deleted non-dir.
254 for x in self.handle_delete_nondir(path, a, False):
255 yield x
256 a = self.__left.next()
257 continue
259 else:
260 # Added non-dir.
261 for x in self.handle_add_nondir(path, b, False):
262 yield x
263 b = self.__right.next()
264 continue
266 elif a[0] == '-' and b[0] == 'u':
267 for x in self.handle_delete_nondir(path, a, False):
268 yield x
269 a = self.__left.next()
270 continue
272 elif a[0] == 'u' and b[0] == '-':
273 for x in self.handle_add_nondir(path, b, False):
274 yield x
275 b = self.__right.next()
276 continue
278 elif a[0] == 'd' and (b[0] == '-' or b[0] == 'u'):
279 for x in self.handle_delete_dir(path, a, False):
280 yield x
281 for x in self.delete_whole_dir(self.__left,
282 os.path.join(path, a[1])):
283 yield x
284 a = self.__left.next()
285 continue
287 elif (a[0] == '-' or a[0] == 'u') and b[0] == 'd':
288 for x in self.handle_add_dir(path, b, False):
289 yield x
290 for x in self.add_whole_dir(self.__right,
291 os.path.join(path, b[1])):
292 yield x
293 b = self.__right.next()
294 continue
296 else:
297 print "Unhandled case: '%s' and '%s'" % (a[0], b[0])
298 sys.exit(2)
300 def add_whole_dir(self, iter, path):
301 "Consume entries until this directory has been added"
302 # print "add_whole_dir: %s" % path
303 while True:
304 a = iter.next()
305 if a[0] == 'u':
306 for x in self.handle_leave(path, mode_add):
307 yield x
308 return
309 elif a[0] == 'd':
310 for x in self.handle_add_dir(path, a, True):
311 yield x
312 for x in self.add_whole_dir(iter, os.path.join(path, a[1])):
313 yield x
314 else:
315 for x in self.handle_add_nondir(path, a, True):
316 yield x
318 def delete_whole_dir(self, iter, path):
319 "Consume entries until this directory has been deleted"
320 # print "delete_whole_dir: %s" % path
321 while True:
322 a = iter.next()
323 if a[0] == 'u':
324 for x in self.handle_leave(path, mode_delete):
325 yield x
326 return
327 elif a[0] == 'd':
328 for x in self.handle_delete_dir(path, a, True):
329 yield x
330 for x in self.delete_whole_dir(iter, os.path.join(path, a[1])):
331 yield x
332 else:
333 for x in self.handle_delete_nondir(path, a, True):
334 yield x
336 __must_match = {
337 'dir': ['uid', 'gid', 'perm'],
338 'file': ['uid', 'gid', 'mtime', 'perm', 'md5'],
339 'lnk': ['targ'],
340 'sock': ['uid', 'gid', 'perm'],
341 'fifo': ['uid', 'gid', 'perm'],
342 'blk': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
343 'chr': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
345 def compare_entries(path, a, b):
346 if a['kind'] != b['kind']:
347 yield "- %-20s %s" % (a['kind'], path)
348 yield "+ %-20s %s" % (b['kind'], path)
349 return
350 misses = []
351 for item in __must_match[a['kind']]:
352 if not (a.has_key(item) and b.has_key(item)):
353 misses.append(item)
354 elif a[item] != b[item]:
355 # Python 2.5 stat is returning subseconds, which tar
356 # doesn't backup. We can check this later, but for now,
357 # just ignore the subsecond portions
358 if (item == 'mtime' and
359 math.floor(a[item]) == math.floor(b[item])):
360 pass
361 else:
362 misses.append(item)
363 if misses:
364 yield " [%-18s] %s" % (",".join(misses), path)
365 if 'targ' in misses:
366 if a.has_key('targ'):
367 yield " old targ: %s" % a['targ']
368 if b.has_key('targ'):
369 yield " new targ: %s" % b['targ']
370 return
372 class check_comparer(comparer):
373 """Comparer for comparing either two trees, or a tree and a
374 filesystem. 'right' should be the newer tree.
375 Yields strings giving the tree differences.
377 def handle_same_dir(self, path, a, b):
378 return compare_entries(os.path.join(path, a[1]), a[2], b[2])
380 def handle_delete_dir(self, path, a, recursing):
381 if recursing:
382 return
383 else:
384 yield "- %-20s %s" % ('dir', os.path.join(path, a[1]))
385 def handle_add_dir(self, path, a, recursing):
386 if recursing:
387 return
388 else:
389 yield "+ %-20s %s" % ('dir', os.path.join(path, a[1]))
390 def handle_same_nondir(self, path, a, b):
391 return compare_entries(os.path.join(path, a[1]), a[2], b[2])
393 def handle_delete_nondir(self, path, a, recursing):
394 if recursing:
395 return
396 else:
397 yield "- %-20s %s" % (a[2]['kind'], os.path.join(path, a[1]))
398 def handle_add_nondir(self, path, a, recursing):
399 if recursing:
400 return
401 else:
402 yield "+ %-20s %s" % (a[2]['kind'], os.path.join(path, a[1]))
404 def update_link(assoc, path, name):
405 if assoc['kind'] == 'lnk':
406 assoc['targ'] = os.readlink(os.path.join(path, name))
408 def same_inode(a, b):
409 """Do these two nodes reference what appears to be the same,
410 unmodified inode."""
411 return (a['kind'] == b['kind'] and
412 a['ino'] == b['ino'] and
413 a['ctime'] == b['ctime'])
415 class update_comparer(comparer):
416 """Yields a tree equivalent to the right tree, which should be
417 coming from a live filesystem. Fills in symlink destinations and
418 file md5sums (if possible)."""
420 def handle_same_dir(self, path, a, b):
421 yield b
422 return
424 def handle_add_dir(self, path, a, recursing):
425 yield a
426 return
428 def handle_same_nondir(self, path, a, b):
429 update_link(b[2], path, b[1])
430 if b[2]['kind'] == 'file':
431 if same_inode(a[2], b[2]):
432 b[2]['md5'] = a[2]['md5']
433 else:
434 try:
435 b[2]['md5'] = hashing.hashof(os.path.join(path, b[1]))
436 except OSError:
437 b[2]['md5'] = '[error]'
438 yield b
439 return
441 def handle_add_nondir(self, path, a, recursing):
442 update_link(a[2], path, a[1])
443 if a[2]['kind'] == 'file':
444 try:
445 a[2]['md5'] = hashing.hashof(os.path.join(path, a[1]))
446 except OSError:
447 a[2]['md5'] = '[error]'
448 yield a
449 return
451 def handle_leave(self, path, mode):
452 if (mode & mode_add) != 0:
453 yield 'u',
454 return
456 file_version = 'Asure scan version 1.1'
458 def read1_0(fd):
459 try:
460 while True:
461 yield load(fd)
462 except EOFError:
463 return
465 def read1_1(fd):
466 try:
467 while True:
468 for item in load(fd):
469 yield item
470 except EOFError:
471 return
473 readers = {
474 'Asure scan version 1.0': read1_0,
475 'Asure scan version 1.1': read1_1 }
477 def reader(path):
478 """Iterate over a previously written dump"""
479 fd = gzip.open(path, 'rb')
480 vers = load(fd)
481 if readers.has_key(vers):
482 for item in readers[vers](fd):
483 yield item
484 else:
485 raise "Unsupported version of asure file"
486 fd.close()
488 use_protocol = -1
490 def writer_new(path, iter):
491 """Write the given item (probably assembled iterator)"""
492 fd = gzip.open(path, 'wb')
493 dump(file_version, fd, use_protocol)
494 items = []
495 for item in iter:
496 items.append(item)
497 if len(items) >= 100:
498 dump(items, fd, use_protocol)
499 items = []
500 if len(items) > 0:
501 dump(items, fd, use_protocol)
502 fd.close()
504 def writer_old(path, iter):
505 """Write the given item (probably assembled iterator)"""
506 fd = gzip.open(path, 'wb')
507 dump('Asure scan version 1.0', fd, use_protocol)
508 for item in iter:
509 dump(item, fd, use_protocol)
510 fd.close()
512 def writer(path, iter):
513 writer_new(path, iter)
515 def rename_cycle():
516 """Cycle through the names"""
517 try:
518 os.rename('0sure.dat.gz', '0sure.bak.gz')
519 except OSError:
520 pass
521 os.rename('0sure.0.gz', '0sure.dat.gz')
523 def fresh_scan():
524 """Perform a fresh scan of the filesystem"""
525 tree = update_comparer(empty_tree(), walk('.'))
526 writer('0sure.0.gz', tree.run())
527 rename_cycle()
529 def check_scan():
530 """Perform a scan of the filesystem, and compare it with the scan
531 file. reports differences."""
532 prior = reader('0sure.dat.gz')
533 cur = update_comparer(empty_tree(), walk('.')).run()
534 # compare_trees(prior, cur)
535 for x in check_comparer(prior, cur).run():
536 print x
538 def update():
539 """Scan filesystem, but also read the previous scan to cache md5
540 hashes of files that haven't had any inode changes"""
541 prior = reader('0sure.dat.gz')
542 cur = update_comparer(prior, walk('.')).run()
543 writer('0sure.0.gz', cur)
544 rename_cycle()
546 def signoff():
547 """Compare the previous scan with the current."""
548 prior = reader('0sure.bak.gz')
549 cur = reader('0sure.dat.gz')
550 for x in check_comparer(prior, cur).run():
551 print x
553 def show():
554 """Show the contents of the scan file"""
555 indent = 0
556 for i in reader('0sure.dat.gz'):
557 if i[0] == 'u':
558 indent -= 1
559 print "%s%s" % (" " * indent, i)
560 if i[0] == 'd':
561 indent += 1
563 def nothing():
564 """Just read the scan file, doing nothing with it"""
565 for i in reader('0sure.dat.gz'):
566 pass
568 def copy():
569 """Copy the latest scan, can be used to update to a newer storage
570 format."""
571 writer('0sure.0.gz', reader('0sure.dat.gz'))
572 os.rename('0sure.0.gz', '0sure.dat.gz')
574 commands = {
575 'scan': fresh_scan,
576 'update': update,
577 'check': check_scan,
578 'signoff': signoff,
579 'show': show,
580 'copy': copy,
581 'nothing': nothing }
583 def main(argv):
584 if len(argv) != 1:
585 usage()
586 if commands.has_key(argv[0]):
587 commands[argv[0]]()
588 else:
589 usage()
591 def usage():
592 print "Asure, version %s" % version.version
593 print "Usage: asure {%s}" % '|'.join(commands.keys())
594 sys.exit(1)
596 if __name__ == '__main__':
597 "Test this"
598 main(sys.argv[1:])