8 def scan_commit(commit
):
9 for line
in git('ls-tree', '-r', commit
.sha1
, ret_pipe
=True):
10 assert line
.endswith('\n')
11 rest
, name
= line
[:-1].split('\t', 1)
12 mode
, type, sha1
= rest
.split(' ')
13 blob
= blob_cache
.get(sha1
)
14 blob
.update_contained_in(commit
)
15 basename
= os
.path
.basename(name
)
16 fname
= file_cache
.get(basename
) # magically creates it
18 def scan_history(excludes
=[]):
19 global blob_cache
, file_cache
20 session
= db
.Session()
21 blob_cache
= dbcache
.Cache(db
.Blob
, db
.Blob
.sha1
, session
)
22 file_cache
= dbcache
.Cache(db
.Filename
, db
.Filename
.name
, session
)
23 refdata
= git('for-each-ref', '--format=%(objectname)')[0]
24 refs
= refdata
.split()
25 boundaries
= session
.query(db
.Boundary
).all()
26 args
= refs
+ ['--not'] + [b
.sha1
for b
in boundaries
] + excludes
28 print 'reading existing commits ...'
30 for sha1
in session
.query(db
.Commit
.sha1
):
31 commit_cache
.add(sha1
)
32 print 'log -p | patch-id ...'
33 pipe
= git('log', '-p', '--no-merges', *args
, ret_pipe
=True)
34 for line
in git('patch-id', input_pipe
=pipe
, ret_pipe
=True):
35 patch_id
, commit_sha1
= line
.split()
36 patch_ids
[commit_sha1
] = patch_id
37 print 'reading trees ...'
39 for line
in git('rev-list', '--no-merges', *args
, ret_pipe
=True):
40 sys
.stdout
.write('\r%6d' % count
)
43 if sha1
not in commit_cache
:
44 commit_cache
.add(sha1
)
45 output
= git('log', '-1', '--pretty=format:%ct %at', sha1
)[0]
46 adate
, cdate
= [int(s
) for s
in output
.split()]
47 commit
= db
.Commit(sha1
, cdate
, adate
, patch_ids
.get(sha1
, None))
49 scan_commit(session
, commit
)
51 print '\nstoring boundaries ...'
52 for b
in session
.query(db
.Boundary
).all():
55 session
.add(db
.Boundary(r
))
58 if __name__
== '__main__':
59 scan_history(sys
.argv
[1:])