Correctly parse timezones
[trackgit.git] / blobtracker.py
blob8a5d3eac5e558cb12386e4215aaa75ac4bb492c7
1 #!/usr/bin/python
3 import sys
4 import os.path
5 import email.utils as emu
6 from sqlalchemy.sql import and_
7 from sqlalchemy.orm import join
9 import db
10 import dbcache
11 from git import git
13 class BlobTracker(object):
15 def scan_commit_tree(self, commit, autocommit=True):
16 blobs = set()
17 for line in git('ls-tree', '-r', commit.sha1, ret_pipe=True):
18 assert line.endswith('\n')
19 rest, name = line[:-1].split('\t', 1)
20 mode, type, sha1 = rest.split(' ')
21 blob = dbcache.blob_cache.get(sha1)
22 blobs.add(sha1)
23 basename = os.path.basename(name)
24 fname = dbcache.file_cache.get(basename) # magically creates it
25 dbcache.blob_cache.flush()
26 dbcache.file_cache.flush()
27 stale_subset = (db.query(db.Blob)
28 .select_from(join(db.Blob, db.Commit))
29 .filter(db.Blob.sha1.in_(blobs))
30 .filter(db.Commit.cdate < commit.cdate)
31 .all())
32 for b in stale_subset:
33 b.newest_commit = commit
35 def scan_commit(self, sha1, autocommit=True, patch_ids=None):
36 if not patch_ids:
37 patch_ids = {}
38 pipe = git('show', sha1, ret_pipe=True)
39 for line in git('patch-id', input_pipe=pipe, ret_pipe=True):
40 patch_id, commit_sha1 = line.split()
41 patch_ids[commit_sha1] = patch_id
42 output = git('log', '-1', '--pretty=format:%cD\t%aD', sha1)[0]
43 adate, cdate = [emu.mktime_tz(emu.parsedate_tz(s))
44 for s in output.split('\t')]
45 commit = db.Commit(sha1, cdate, adate, patch_ids.get(sha1, None))
46 db.session.add(commit)
47 self.scan_commit_tree(commit, autocommit=autocommit)
49 def scan_history(self, refs):
50 refdata = git('rev-parse', *refs)[0]
51 refs = refdata.split()
52 boundaries = db.session.query(db.Boundary).all()
53 args = refs + ['--not'] + [b.sha1 for b in boundaries]
54 patch_ids = {}
55 print 'log -p | patch-id ...'
56 pipe = git('log', '-p', '--no-merges', *args, ret_pipe=True)
57 for line in git('patch-id', input_pipe=pipe, ret_pipe=True):
58 patch_id, commit_sha1 = line.split()
59 patch_ids[commit_sha1] = patch_id
60 print 'reading trees ...'
61 count = 0
62 for line in git('rev-list', '--no-merges', *args, ret_pipe=True):
63 sys.stdout.write('\r%6d' % count)
64 sys.stdout.flush()
65 sha1 = line.strip()
66 self.scan_commit(sha1, autocommit=False)
67 count = count + 1
68 print '\nstoring boundaries ...'
69 for b in db.session.query(db.Boundary).all():
70 db.session.delete(b)
71 for r in set(refs):
72 db.session.add(db.Boundary(r))
73 db.session.commit()
75 if __name__ == '__main__':
76 bt = BlobTracker()
77 bt.scan_history(sys.argv[1:])