8 def get_blob(session
, sha1
):
9 if sha1
in _blob_cache
:
10 return _blob_cache
[sha1
]
11 b
= session
.query(db
.Blob
).filter(db
.Blob
.sha1
==sha1
).first()
15 def scan_commit(session
, commit
):
16 for line
in git('ls-tree', '-r', commit
.sha1
, ret_pipe
=True):
17 assert line
.endswith('\n')
18 rest
, name
= line
[:-1].split('\t', 1)
19 mode
, type, sha1
= rest
.split(' ')
20 blob
= get_blob(session
, sha1
)
24 _blob_cache
[sha1
] = blob
25 blob
.update_contained_in(commit
)
27 def scan_history(excludes
=[]):
28 session
= db
.Session()
29 refdata
= git('for-each-ref', '--format=%(objectname)')[0]
30 refs
= refdata
.split()
31 boundaries
= session
.query(db
.Boundary
).all()
32 args
= refs
+ ['--not'] + [b
.sha1
for b
in boundaries
] + excludes
34 print 'reading existing commits ...'
36 for sha1
in session
.query(db
.Commit
.sha1
):
37 commit_cache
.add(sha1
)
38 print 'log -p | patch-id ...'
39 pipe
= git('log', '-p', '--no-merges', *args
, ret_pipe
=True)
40 for line
in git('patch-id', input_pipe
=pipe
, ret_pipe
=True):
41 patch_id
, commit_sha1
= line
.split()
42 patch_ids
[commit_sha1
] = patch_id
43 print 'reading trees ...'
45 for line
in git('rev-list', '--no-merges', *args
, ret_pipe
=True):
46 sys
.stdout
.write('\r%6d' % count
)
49 if sha1
not in commit_cache
:
50 commit_cache
.add(sha1
)
51 output
= git('log', '-1', '--pretty=format:%ct %at', sha1
)[0]
52 adate
, cdate
= [int(s
) for s
in output
.split()]
53 commit
= db
.Commit(sha1
, cdate
, adate
, patch_ids
.get(sha1
, None))
55 scan_commit(session
, commit
)
57 print '\nstoring boundaries ...'
58 for b
in session
.query(db
.Boundary
).all():
61 session
.add(db
.Boundary(r
))
64 if __name__
== '__main__':
65 scan_history(sys
.argv
[1:])