5 # This code is part of the LWN git data miner.
7 # Copyright 2007-8 LWN.net
8 # Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
10 # This file may be distributed under the terms of the GNU General
11 # Public License, version 2.
14 import database
, csv
, ConfigFile
, reports
15 import getopt
, datetime
16 import os
, re
, sys
, rfc822
, string
17 from patterns
import *
19 Today
= datetime
.date
.today()
31 CFName
= 'gitdm.config'
35 # -a Andrew Morton's signoffs shadow Linus's
36 # -c cfile Specify a configuration file
37 # -d Output individual developer stats
38 # -D Output date statistics
39 # -h hfile HTML output to hfile
40 # -l count Maximum length for output lists
41 # -o file File for text output
42 # -r pattern Restrict to files matching pattern
43 # -s Ignore author SOB lines
44 # -u Map unknown employers to '(Unknown)'
45 # -x file.csv Export raw statistics as CSV
46 # -z Dump out the hacker database at completion
49 global MapUnknown
, DevReports
50 global DateStats
, AuthorSOBs
, FileFilter
, AkpmOverLt
, DumpDB
51 global CFName
, CSVFile
53 opts
, rest
= getopt
.getopt (sys
.argv
[1:], 'adc:Dh:l:o:r:sux:z')
64 reports
.SetHTMLOutput (open (opt
[1], 'w'))
66 reports
.SetMaxList (int (opt
[1]))
68 reports
.SetOutput (open (opt
[1], 'w'))
70 print 'Filter on "%s"' % (opt
[1])
71 FileFilter
= re
.compile (opt
[1])
77 CSVFile
= open (opt
[1], 'w')
78 print "open output file " + opt
[1] + "\n"
84 def LookupStoreHacker (name
, email
):
85 email
= database
.RemapEmail (email
)
86 h
= database
.LookupEmail (email
)
89 elist
= database
.LookupEmployer (email
, MapUnknown
)
90 h
= database
.LookupName (name
)
92 h
.addemail (email
, elist
)
94 return database
.StoreHacker(name
, elist
, email
)
102 def AddDateLines(date
, lines
):
104 print 'Skip big patch (%d)' % lines
107 DateMap
[date
] += lines
109 DateMap
[date
] = lines
111 def PrintDateStats():
112 dates
= DateMap
.keys ()
115 datef
= open ('datelc', 'w')
117 total
+= DateMap
[date
]
118 datef
.write ('%d/%02d/%02d %6d %7d\n' % (date
.year
, date
.month
, date
.day
,
119 DateMap
[date
], total
))
123 # Let's slowly try to move some smarts into this class.
126 def __init__ (self
, commit
):
128 self
.merge
= self
.added
= self
.removed
= 0
129 self
.author
= LookupStoreHacker('Unknown hacker', 'unknown@hacker.net')
130 self
.email
= 'unknown@hacker.net'
136 def addreviewer (self
, reviewer
):
137 self
.reviews
.append (reviewer
)
139 def addtester (self
, tester
):
140 self
.testers
.append (tester
)
142 def addreporter (self
, reporter
):
143 self
.reports
.append (reporter
)
145 # The core hack for grabbing the information about a changeset.
148 global NextLine
, TotalAdded
, TotalRemoved
, TotalChanged
151 m
= Pcommit
.match (NextLine
)
154 NextLine
= sys
.stdin
.readline ()
158 p
= patch(m
.group (1))
159 NextLine
= sys
.stdin
.readline ()
160 ignore
= (FileFilter
is not None)
164 # If this line starts a new commit, drop out.
166 m
= Pcommit
.match (Line
)
169 NextLine
= sys
.stdin
.readline ()
171 # Maybe it's an author line?
173 m
= Pauthor
.match (Line
)
175 p
.email
= database
.RemapEmail (m
.group (2))
176 p
.author
= LookupStoreHacker(m
.group (1), p
.email
)
179 # Could be a signed-off-by:
181 m
= Psob
.search (Line
)
183 email
= database
.RemapEmail (m
.group (2))
184 sobber
= LookupStoreHacker(m
.group (1), email
)
185 if sobber
!= p
.author
or AuthorSOBs
:
186 p
.sobs
.append ((email
, LookupStoreHacker(m
.group (1), m
.group (2))))
189 # Various other tags of interest.
191 m
= Preview
.search (Line
) # Reviewed-by:
193 email
= database
.RemapEmail (m
.group (2))
194 p
.addreviewer (LookupStoreHacker(m
.group (1), email
))
196 m
= Ptest
.search (Line
) # Tested-by:
198 email
= database
.RemapEmail (m
.group (2))
199 p
.addtester (LookupStoreHacker (m
.group (1), email
))
200 p
.author
.testcredit (patch
)
202 m
= Prep
.search (Line
) # Reported-by:
204 email
= database
.RemapEmail (m
.group (2))
205 p
.addreporter (LookupStoreHacker (m
.group (1), email
))
206 p
.author
.reportcredit (patch
)
208 m
= Preptest
.search (Line
) # Reported-and-tested-by:
210 email
= database
.RemapEmail (m
.group (2))
211 h
= LookupStoreHacker (m
.group (1), email
)
214 p
.author
.reportcredit (patch
)
215 p
.author
.testcredit (patch
)
218 # If this one is a merge, make note of the fact.
220 m
= Pmerge
.match (Line
)
225 # See if it's the date.
227 m
= Pdate
.match (Line
)
229 dt
= rfc822
.parsedate(m
.group (2))
230 p
.date
= datetime
.date (dt
[0], dt
[1], dt
[2])
232 sys
.stderr
.write ('Funky date: %s\n' % p
.date
)
236 # If we have a file filter, check for file lines.
239 ignore
= ApplyFileFilter (Line
, ignore
)
241 # OK, maybe it's part of the diff itself.
244 if Padd
.match (Line
):
247 if Prem
.match (Line
):
250 # Record some global information - but only if this patch had
251 # stuff which wasn't ignored. This work should be done
254 if ((p
.added
+ p
.removed
) > 0 or not FileFilter
) and not p
.merge
:
255 TotalAdded
+= p
.added
256 TotalRemoved
+= p
.removed
257 TotalChanged
+= max (p
.added
, p
.removed
)
258 AddDateLines (p
.date
, max (p
.added
, p
.removed
))
259 empl
= p
.author
.emailemployer (p
.email
, p
.date
)
263 for sobemail
, sobber
in p
.sobs
:
264 empl
= sobber
.emailemployer (sobemail
, p
.date
)
269 def ApplyFileFilter (line
, ignore
):
271 # If this is the first file line (--- a/), set ignore one way
274 m
= Pfilea
.match (line
)
277 if FileFilter
.search (file):
281 # For the second line, we can turn ignore off, but not on
283 m
= Pfileb
.match (line
)
286 if FileFilter
.search (file):
291 # If this patch is signed off by both Andrew Morton and Linus Torvalds,
292 # remove the (redundant) Linus signoff.
295 if Linus
in p
.sobs
and Akpm
in p
.sobs
:
296 p
.sobs
.remove (Linus
)
300 # Here starts the real program.
305 # Read the config files.
307 ConfigFile
.ConfigFile (CFName
)
310 # Let's pre-seed the database with a couple of hackers
311 # we want to remember.
313 Linus
= ('torvalds@linux-foundation.org',
314 LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org'))
315 Akpm
= ('akpm@linux-foundation.org',
316 LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
318 NextLine
= sys
.stdin
.readline ()
319 TotalChanged
= TotalAdded
= TotalRemoved
= 0
324 print >> sys
.stderr
, 'Grabbing changesets...\r',
326 printcount
= CSCount
= 0
328 if (printcount
% 50) == 0:
329 print >> sys
.stderr
, 'Grabbing changesets...%d\r' % printcount
,
334 # if p.added > 100000 or p.removed > 100000:
335 # print 'Skipping massive add', p.commit
337 if FileFilter
and p
.added
== 0 and p
.removed
== 0:
340 p
.author
.addpatch (p
)
341 for sobemail
, sob
in p
.sobs
:
343 for hacker
in p
.reviews
:
345 for hacker
in p
.testers
:
347 for hacker
in p
.reports
:
350 csv
.AccumulatePatch (p
)
351 print >> sys
.stderr
, 'Grabbing changesets...done'
358 hlist
= database
.AllHackers ()
359 elist
= database
.AllEmployers ()
360 reports
.Write ('Processed %d csets from %d developers\n' % (CSCount
,
362 reports
.Write ('%d employers found\n' % len (elist
))
363 reports
.Write ('A total of %d lines added, %d removed (delta %d)\n' %
364 (TotalAdded
, TotalRemoved
, TotalAdded
- TotalRemoved
))
365 if TotalChanged
== 0:
366 TotalChanged
= 1 # HACK to avoid div by zero
371 csv
.OutputCSV (CSVFile
)
372 if CSVFile
is not None:
376 reports
.DevReports (hlist
, TotalChanged
, CSCount
, TotalRemoved
)
377 reports
.EmplReports (elist
, TotalChanged
, CSCount
)