blobtracker: also build map of filenames seen
[trackgit.git] / blobtracker.py
blobcccd19b30fd748e2644326f6df03117ebf00294f
1 import sys
2 import os.path
4 import db
5 import dbcache
6 from git import git
8 def scan_commit(commit):
9 for line in git('ls-tree', '-r', commit.sha1, ret_pipe=True):
10 assert line.endswith('\n')
11 rest, name = line[:-1].split('\t', 1)
12 mode, type, sha1 = rest.split(' ')
13 blob = blob_cache.get(sha1)
14 blob.update_contained_in(commit)
15 basename = os.path.basename(name)
16 fname = file_cache.get(basename) # magically creates it
18 def scan_history(excludes=[]):
19 global blob_cache, file_cache
20 session = db.Session()
21 blob_cache = dbcache.Cache(db.Blob, db.Blob.sha1, session)
22 file_cache = dbcache.Cache(db.Filename, db.Filename.name, session)
23 refdata = git('for-each-ref', '--format=%(objectname)')[0]
24 refs = refdata.split()
25 boundaries = session.query(db.Boundary).all()
26 args = refs + ['--not'] + [b.sha1 for b in boundaries] + excludes
27 patch_ids = {}
28 print 'reading existing commits ...'
29 commit_cache = set()
30 for sha1 in session.query(db.Commit.sha1):
31 commit_cache.add(sha1)
32 print 'log -p | patch-id ...'
33 pipe = git('log', '-p', '--no-merges', *args, ret_pipe=True)
34 for line in git('patch-id', input_pipe=pipe, ret_pipe=True):
35 patch_id, commit_sha1 = line.split()
36 patch_ids[commit_sha1] = patch_id
37 print 'reading trees ...'
38 count = 0
39 for line in git('rev-list', '--no-merges', *args, ret_pipe=True):
40 sys.stdout.write('\r%6d' % count)
41 sys.stdout.flush()
42 sha1 = line.strip()
43 if sha1 not in commit_cache:
44 commit_cache.add(sha1)
45 output = git('log', '-1', '--pretty=format:%ct %at', sha1)[0]
46 adate, cdate = [int(s) for s in output.split()]
47 commit = db.Commit(sha1, cdate, adate, patch_ids.get(sha1, None))
48 session.add(commit)
49 scan_commit(session, commit)
50 count = count + 1
51 print '\nstoring boundaries ...'
52 for b in session.query(db.Boundary).all():
53 session.delete(b)
54 for r in set(refs):
55 session.add(db.Boundary(r))
56 session.commit()
58 if __name__ == '__main__':
59 scan_history(sys.argv[1:])