5 import email
.utils
as emu
6 from sqlalchemy
.sql
import and_
7 from sqlalchemy
.orm
import join
13 class BlobTracker(object):
15 def scan_commit_tree(self
, commit
, autocommit
=True):
17 for line
in git('ls-tree', '-r', commit
.sha1
, ret_pipe
=True):
18 assert line
.endswith('\n')
19 rest
, name
= line
[:-1].split('\t', 1)
20 mode
, type, sha1
= rest
.split(' ')
21 blob
= dbcache
.blob_cache
.get(sha1
)
22 blob
.update_contained_in(commit
)
23 basename
= os
.path
.basename(name
)
24 fname
= dbcache
.file_cache
.get(basename
) # magically creates it
25 dbcache
.blob_cache
.flush()
26 dbcache
.file_cache
.flush()
28 def scan_commit(self
, sha1
, autocommit
=True, patch_ids
=None):
31 pipe
= git('show', sha1
, ret_pipe
=True)
32 for line
in git('patch-id', input_pipe
=pipe
, ret_pipe
=True):
33 patch_id
, commit_sha1
= line
.split()
34 patch_ids
[commit_sha1
] = patch_id
35 output
= git('log', '-1', '--pretty=format:%aD\t%cD\t%an <%ae>', sha1
)[0]
36 adate_s
, cdate_s
, author
= output
.split('\t', 2)
38 author
= author
.decode('utf8')
39 except UnicodeDecodeError:
40 author
= author
.decode('latin1')
41 adate
= emu
.mktime_tz(emu
.parsedate_tz(adate_s
))
42 cdate
= emu
.mktime_tz(emu
.parsedate_tz(cdate_s
))
43 commit
= db
.Commit(sha1
, cdate
, adate
, author
, patch_ids
.get(sha1
, None))
44 db
.session
.add(commit
)
45 self
.scan_commit_tree(commit
, autocommit
=autocommit
)
47 def scan_history(self
, refs
):
48 refdata
= git('rev-parse', *refs
)[0]
49 refs
= refdata
.split()
50 boundaries
= db
.session
.query(db
.Boundary
).all()
51 args
= refs
+ ['--not'] + [b
.sha1
for b
in boundaries
]
53 print 'log -p | patch-id ...'
54 pipe
= git('log', '-p', '--no-merges', *args
, ret_pipe
=True)
55 for line
in git('patch-id', input_pipe
=pipe
, ret_pipe
=True):
56 patch_id
, commit_sha1
= line
.split()
57 patch_ids
[commit_sha1
] = patch_id
58 print 'reading trees ...'
60 for line
in git('rev-list', '--no-merges', *args
, ret_pipe
=True):
61 sys
.stdout
.write('\r%6d' % count
)
64 self
.scan_commit(sha1
, autocommit
=False)
66 print '\nstoring boundaries ...'
67 for b
in db
.session
.query(db
.Boundary
).all():
70 db
.session
.add(db
.Boundary(r
))
73 if __name__
== '__main__':
75 bt
.scan_history(sys
.argv
[1:])