5 import email
.utils
as emu
6 from sqlalchemy
.sql
import and_
7 from sqlalchemy
.orm
import join
13 class BlobTracker(object):
15 def scan_commit_tree(self
, commit
, autocommit
=True):
17 for line
in git('ls-tree', '-r', commit
.sha1
, ret_pipe
=True):
18 assert line
.endswith('\n')
19 rest
, name
= line
[:-1].split('\t', 1)
20 mode
, type, sha1
= rest
.split(' ')
21 blob
= dbcache
.blob_cache
.get(sha1
)
23 basename
= os
.path
.basename(name
)
24 fname
= dbcache
.file_cache
.get(basename
) # magically creates it
25 dbcache
.blob_cache
.flush()
26 dbcache
.file_cache
.flush()
27 stale_subset
= (db
.query(db
.Blob
)
28 .select_from(join(db
.Blob
, db
.Commit
))
29 .filter(db
.Blob
.sha1
.in_(blobs
))
30 .filter(db
.Commit
.cdate
< commit
.cdate
)
32 for b
in stale_subset
:
33 b
.newest_commit
= commit
35 def scan_commit(self
, sha1
, autocommit
=True, patch_ids
=None):
38 pipe
= git('show', sha1
, ret_pipe
=True)
39 for line
in git('patch-id', input_pipe
=pipe
, ret_pipe
=True):
40 patch_id
, commit_sha1
= line
.split()
41 patch_ids
[commit_sha1
] = patch_id
42 output
= git('log', '-1', '--pretty=format:%cD\t%aD', sha1
)[0]
43 adate
, cdate
= [emu
.mktime_tz(emu
.parsedate_tz(s
))
44 for s
in output
.split('\t')]
45 commit
= db
.Commit(sha1
, cdate
, adate
, patch_ids
.get(sha1
, None))
46 db
.session
.add(commit
)
47 self
.scan_commit_tree(commit
, autocommit
=autocommit
)
49 def scan_history(self
, refs
):
50 refdata
= git('rev-parse', *refs
)[0]
51 refs
= refdata
.split()
52 boundaries
= db
.session
.query(db
.Boundary
).all()
53 args
= refs
+ ['--not'] + [b
.sha1
for b
in boundaries
]
55 print 'log -p | patch-id ...'
56 pipe
= git('log', '-p', '--no-merges', *args
, ret_pipe
=True)
57 for line
in git('patch-id', input_pipe
=pipe
, ret_pipe
=True):
58 patch_id
, commit_sha1
= line
.split()
59 patch_ids
[commit_sha1
] = patch_id
60 print 'reading trees ...'
62 for line
in git('rev-list', '--no-merges', *args
, ret_pipe
=True):
63 sys
.stdout
.write('\r%6d' % count
)
66 self
.scan_commit(sha1
, autocommit
=False)
68 print '\nstoring boundaries ...'
69 for b
in db
.session
.query(db
.Boundary
).all():
72 db
.session
.add(db
.Boundary(r
))
75 if __name__
== '__main__':
77 bt
.scan_history(sys
.argv
[1:])