Drop the autogenerated commit if its diff is empty
[trackgit.git] / blobtracker.py
blob590ebdbfb1d8398d729891d4a856f2f3be34e13e
1 #!/usr/bin/python
3 import sys
4 import os.path
5 import email.utils as emu
6 from sqlalchemy.sql import and_
7 from sqlalchemy.orm import join
9 import db
10 import dbcache
11 from git import git
13 class BlobTracker(object):
15 def scan_commit_tree(self, commit, autocommit=True):
16 blobs = set()
17 for line in git('ls-tree', '-r', commit.sha1, ret_pipe=True):
18 assert line.endswith('\n')
19 rest, name = line[:-1].split('\t', 1)
20 mode, type, sha1 = rest.split(' ')
21 blob = dbcache.blob_cache.get(sha1)
22 blob.update_contained_in(commit)
23 basename = os.path.basename(name)
24 fname = dbcache.file_cache.get(basename) # magically creates it
25 dbcache.blob_cache.flush()
26 dbcache.file_cache.flush()
28 def scan_commit(self, sha1, autocommit=True, patch_ids=None):
29 if not patch_ids:
30 patch_ids = {}
31 pipe = git('show', sha1, ret_pipe=True)
32 for line in git('patch-id', input_pipe=pipe, ret_pipe=True):
33 patch_id, commit_sha1 = line.split()
34 patch_ids[commit_sha1] = patch_id
35 output = git('log', '-1', '--pretty=format:%aD\t%cD\t%an <%ae>', sha1)[0]
36 adate_s, cdate_s, author = output.split('\t', 2)
37 try:
38 author = author.decode('utf8')
39 except UnicodeDecodeError:
40 author = author.decode('latin1')
41 adate = emu.mktime_tz(emu.parsedate_tz(adate_s))
42 cdate = emu.mktime_tz(emu.parsedate_tz(cdate_s))
43 commit = db.Commit(sha1, cdate, adate, author, patch_ids.get(sha1, None))
44 db.session.add(commit)
45 self.scan_commit_tree(commit, autocommit=autocommit)
47 def scan_history(self, refs):
48 refdata = git('rev-parse', *refs)[0]
49 refs = refdata.split()
50 boundaries = db.session.query(db.Boundary).all()
51 args = refs + ['--not'] + [b.sha1 for b in boundaries]
52 patch_ids = {}
53 print 'log -p | patch-id ...'
54 pipe = git('log', '-p', '--no-merges', *args, ret_pipe=True)
55 for line in git('patch-id', input_pipe=pipe, ret_pipe=True):
56 patch_id, commit_sha1 = line.split()
57 patch_ids[commit_sha1] = patch_id
58 print 'reading trees ...'
59 count = 0
60 for line in git('rev-list', '--no-merges', *args, ret_pipe=True):
61 sys.stdout.write('\r%6d' % count)
62 sys.stdout.flush()
63 sha1 = line.strip()
64 self.scan_commit(sha1, autocommit=False)
65 count = count + 1
66 print '\nstoring boundaries ...'
67 for b in db.session.query(db.Boundary).all():
68 db.session.delete(b)
69 for r in set(refs):
70 db.session.add(db.Boundary(r))
71 db.session.commit()
73 if __name__ == '__main__':
74 bt = BlobTracker()
75 bt.scan_history(sys.argv[1:])