Revert "... and stop calling the blobtracker"
[trackgit.git] / blobtracker.py
blob5f6c7897f525efcc22e7e960687e7e937798e6d4
1 #!/usr/bin/python
3 import sys
4 import os.path
5 import email.utils as emu
6 from sqlalchemy.sql import and_
7 from sqlalchemy.orm import join
9 import db
10 import dbcache
11 from git import git
13 class BlobTracker(object):
15 def scan_commit(self, sha1, autocommit=True, patch_ids=None):
16 if not patch_ids:
17 patch_ids = {}
18 pipe = git('show', sha1, ret_pipe=True)
19 for line in git('patch-id', input_pipe=pipe, ret_pipe=True):
20 patch_id, commit_sha1 = line.split()
21 patch_ids[commit_sha1] = patch_id
22 output = git('log', '-1', '--pretty=format:%aD\t%cD\t%an <%ae>', sha1)[0]
23 adate_s, cdate_s, author = output.split('\t', 2)
24 try:
25 author = author.decode('utf8')
26 except UnicodeDecodeError:
27 author = author.decode('latin1')
28 adate = emu.mktime_tz(emu.parsedate_tz(adate_s))
29 cdate = emu.mktime_tz(emu.parsedate_tz(cdate_s))
30 commit = db.query(db.Commit).filter(db.Commit.sha1==sha1).first()
31 if commit:
32 # upstream version is the same as our first shot at application
33 commit.upstream = True
34 else:
35 commit = db.Commit(sha1, cdate, adate, author, patch_ids.get(sha1, None))
36 db.session.add(commit)
38 def scan_history(self, refs):
39 refdata = git('rev-parse', *refs)[0]
40 refs = refdata.split()
41 boundaries = db.session.query(db.Boundary).all()
42 args = refs + ['--not'] + [b.sha1 for b in boundaries]
43 print 'reading trees ...'
44 count = 0
45 for line in git('rev-list', '--no-merges', *args, **{'ret_pipe':True}):
46 sys.stdout.write('\r%6d' % count)
47 sys.stdout.flush()
48 sha1 = line.strip()
49 self.scan_commit(sha1, autocommit=False)
50 count = count + 1
51 print '\nstoring boundaries ...'
52 for b in db.session.query(db.Boundary).all():
53 db.session.delete(b)
54 for r in set(refs):
55 db.session.add(db.Boundary(r))
56 db.session.commit()
58 if __name__ == '__main__':
59 bt = BlobTracker()
60 bt.scan_history(sys.argv[1:])