Add a hack to find maintainerless files in the kernel
[git-dm.git] / reports.py
blob39c237bdae0012f64bdb922407b68013eb87965f
2 # A new home for the reporting code.
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-16 Eklektix, Inc.
7 # Copyright 2007-16 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
13 import sys
15 Outfile = sys.stdout
16 HTMLfile = None
17 ListCount = 999999
20 def SetOutput(file):
21 global Outfile
22 Outfile = file
24 def SetHTMLOutput(file):
25 global HTMLfile
26 HTMLfile = file
28 def SetMaxList(max):
29 global ListCount
30 ListCount = max
33 def Write(stuff):
34 Outfile.write(stuff)
38 THead = '''<p>
39 <table cellspacing=3 class="OddEven">
40 <tr><th colspan=3>%s</th></tr>
41 '''
43 def BeginReport(title):
44 Outfile.write('\n%s\n' % title)
45 if HTMLfile:
46 HTMLfile.write(THead % title)
48 TRow = ' <tr><td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>\n'
49 TRowStr = ' <tr><td>%s</td><td align="right">%d</td><td>%s</td></tr>\n'
51 def ReportLine(text, count, pct):
52 if count == 0:
53 return
54 Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct))
55 if HTMLfile:
56 HTMLfile.write(TRow % (text, count, pct))
58 def ReportLineStr(text, count, extra):
59 if count == 0:
60 return
61 Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
62 if HTMLfile:
63 HTMLfile.write(TRowStr % (text, count, extra))
65 def EndReport():
66 if HTMLfile:
67 HTMLfile.write('</table>\n\n')
70 # Comparison and report generation functions.
72 def ComparePCount(h):
73 return len(h.patches)
75 def ReportByPCount(hlist, cscount):
76 hlist.sort(key = ComparePCount, reverse = True)
77 count = 0
78 BeginReport('Developers with the most changesets')
79 for h in hlist:
80 pcount = len(h.patches)
81 changed = max(h.added, h.removed)
82 delta = h.added - h.removed
83 if pcount > 0:
84 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
85 count += 1
86 if count >= ListCount:
87 break
88 EndReport()
90 def CompareLChanged(h):
91 return h.changed
93 def ReportByLChanged(hlist, totalchanged):
94 hlist.sort(key = CompareLChanged, reverse = True)
95 count = 0
96 BeginReport('Developers with the most changed lines')
97 for h in hlist:
98 pcount = len(h.patches)
99 if h.changed > 0:
100 ReportLine(h.name, h.changed, (h.changed*100.0)/totalchanged)
101 count += 1
102 if count >= ListCount:
103 break
104 EndReport()
106 def CompareLRemoved(h):
107 return (h.removed - h.added)
109 def ReportByLRemoved(hlist, totalremoved):
110 hlist.sort(key = CompareLRemoved, reverse = True)
111 count = 0
112 BeginReport('Developers with the most lines removed')
113 for h in hlist:
114 pcount = len(h.patches)
115 changed = max(h.added, h.removed)
116 delta = h.added - h.removed
117 if delta < 0:
118 ReportLine(h.name, -delta, (-delta*100.0)/totalremoved)
119 count += 1
120 if count >= ListCount:
121 break
122 EndReport()
124 def CompareEPCount(e):
125 return e.count
127 def ReportByPCEmpl(elist, cscount):
128 elist.sort(key = CompareEPCount, reverse = True)
129 count = 0
130 BeginReport('Top changeset contributors by employer')
131 for e in elist:
132 if e.count != 0:
133 ReportLine(e.name, e.count, (e.count*100.0)/cscount)
134 count += 1
135 if count >= ListCount:
136 break
137 EndReport()
140 def CompareELChanged(e):
141 return e.changed
143 def ReportByELChanged(elist, totalchanged):
144 elist.sort(key = CompareELChanged, reverse = True)
145 count = 0
146 BeginReport('Top lines changed by employer')
147 for e in elist:
148 if e.changed != 0:
149 ReportLine(e.name, e.changed, (e.changed*100.0)/totalchanged)
150 count += 1
151 if count >= ListCount:
152 break
153 EndReport()
157 def CompareSOBs(h):
158 return len(h.signoffs)
160 def ReportBySOBs(hlist):
161 hlist.sort(key = CompareSOBs, reverse = True)
162 totalsobs = 0
163 for h in hlist:
164 totalsobs += len(h.signoffs)
165 count = 0
166 BeginReport('Developers with the most signoffs (total %d)' % totalsobs)
167 for h in hlist:
168 scount = len(h.signoffs)
169 if scount > 0:
170 ReportLine(h.name, scount, (scount*100.0)/totalsobs)
171 count += 1
172 if count >= ListCount:
173 break
174 EndReport()
177 # Reviewer reporting.
179 def CompareRevs(h):
180 return len(h.reviews)
182 def ReportByRevs(hlist):
183 hlist.sort(key = CompareRevs, reverse = True)
184 totalrevs = 0
185 for h in hlist:
186 totalrevs += len(h.reviews)
187 count = 0
188 BeginReport('Developers with the most reviews (total %d)' % totalrevs)
189 for h in hlist:
190 scount = len(h.reviews)
191 if scount > 0:
192 ReportLine(h.name, scount, (scount*100.0)/totalrevs)
193 count += 1
194 if count >= ListCount:
195 break
196 EndReport()
199 # tester reporting.
201 def CompareTests(h):
202 return len(h.tested)
204 def ReportByTests(hlist):
205 hlist.sort(key = CompareTests, reverse = True)
206 totaltests = 0
207 for h in hlist:
208 totaltests += len(h.tested)
209 count = 0
210 BeginReport('Developers with the most test credits (total %d)' % totaltests)
211 for h in hlist:
212 scount = len(h.tested)
213 if scount > 0:
214 ReportLine(h.name, scount, (scount*100.0)/totaltests)
215 count += 1
216 if count >= ListCount:
217 break
218 EndReport()
220 def CompareTestCred(h):
221 return h.testcred
223 def ReportByTestCreds(hlist):
224 hlist.sort(key = CompareTestCred, reverse = True)
225 totaltests = 0
226 for h in hlist:
227 totaltests += h.testcred
228 count = 0
229 BeginReport('Developers who gave the most tested-by credits (total %d)' % totaltests)
230 for h in hlist:
231 if h.testcred > 0:
232 ReportLine(h.name, h.testcred, (h.testcred*100.0)/totaltests)
233 count += 1
234 if count >= ListCount:
235 break
236 EndReport()
241 # Reporter reporting.
243 def CompareReports(h):
244 return len(h.reports)
246 def ReportByReports(hlist):
247 hlist.sort(key = CompareReports, reverse = True)
248 totalreps = 0
249 for h in hlist:
250 totalreps += len(h.reports)
251 count = 0
252 BeginReport('Developers with the most report credits (total %d)' % totalreps)
253 for h in hlist:
254 scount = len(h.reports)
255 if scount > 0:
256 ReportLine(h.name, scount, (scount*100.0)/totalreps)
257 count += 1
258 if count >= ListCount:
259 break
260 EndReport()
262 def CompareRepCred(h):
263 return h.repcred
265 def ReportByRepCreds(hlist):
266 hlist.sort(key = CompareRepCred, reverse = True)
267 totalreps = 0
268 for h in hlist:
269 totalreps += h.repcred
270 count = 0
271 BeginReport('Developers who gave the most report credits (total %d)' % totalreps)
272 for h in hlist:
273 if h.repcred > 0:
274 ReportLine(h.name, h.repcred, (h.repcred*100.0)/totalreps)
275 count += 1
276 if count >= ListCount:
277 break
278 EndReport()
281 # Versions.
283 def CompareVersionCounts(h):
284 if h.versions:
285 return len(h.versions)
286 return -1
288 def MissedVersions(hv, allv):
289 missed = [v for v in allv if v not in hv]
290 missed.reverse()
291 return ' '.join(missed)
293 def ReportVersions(hlist):
294 hlist.sort(key = CompareVersionCounts, reverse = True)
295 BeginReport('Developers represented in the most kernel versions')
296 count = 0
297 allversions = hlist[0].versions
298 for h in hlist:
299 ReportLineStr(h.name, len(h.versions), MissedVersions(h.versions, allversions))
300 count += 1
301 if count >= ListCount:
302 break
303 EndReport()
306 def CompareESOBs(e):
307 return e.sobs
309 def ReportByESOBs(elist):
310 elist.sort(key = CompareESOBs, reverse = True)
311 totalsobs = 0
312 for e in elist:
313 totalsobs += e.sobs
314 count = 0
315 BeginReport('Employers with the most signoffs (total %d)' % totalsobs)
316 for e in elist:
317 if e.sobs > 0:
318 ReportLine(e.name, e.sobs, (e.sobs*100.0)/totalsobs)
319 count += 1
320 if count >= ListCount:
321 break
322 EndReport()
324 def CompareHackers(e):
325 return len(e.hackers)
327 def ReportByEHackers(elist):
328 elist.sort(key = CompareHackers, reverse = True)
329 totalhackers = 0
330 for e in elist:
331 totalhackers += len(e.hackers)
332 count = 0
333 BeginReport('Employers with the most hackers (total %d)' % totalhackers)
334 for e in elist:
335 nhackers = len(e.hackers)
336 if nhackers > 0:
337 ReportLine(e.name, nhackers, (nhackers*100.0)/totalhackers)
338 count += 1
339 if count >= ListCount:
340 break
341 EndReport()
344 def DevReports(hlist, totalchanged, cscount, totalremoved):
345 ReportByPCount(hlist, cscount)
346 ReportByLChanged(hlist, totalchanged)
347 ReportByLRemoved(hlist, totalremoved)
348 ReportBySOBs(hlist)
349 ReportByRevs(hlist)
350 ReportByTests(hlist)
351 ReportByTestCreds(hlist)
352 ReportByReports(hlist)
353 ReportByRepCreds(hlist)
355 def EmplReports(elist, totalchanged, cscount):
356 ReportByPCEmpl(elist, cscount)
357 ReportByELChanged(elist, totalchanged)
358 ReportByESOBs(elist)
359 ReportByEHackers(elist)
362 # Who are the unknown hackers?
364 def IsUnknown(h):
365 empl = h.employer[0][0][1].name
366 return h.email[0] == empl or empl == '(Unknown)'
368 def ReportUnknowns(hlist, cscount):
370 # Trim the list to just the unknowns; try to work properly whether
371 # mapping to (Unknown) is happening or not.
373 ulist = [ h for h in hlist if IsUnknown(h) ]
374 ulist.sort(key = ComparePCount, reverse = True)
375 count = 0
376 BeginReport('Developers with unknown affiliation')
377 for h in ulist:
378 pcount = len(h.patches)
379 if pcount > 0:
380 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
381 count += 1
382 if count >= ListCount:
383 break
384 EndReport()
388 def ReportByFileType(hacker_list):
389 total = {}
390 total_by_hacker = {}
392 BeginReport('Developer contributions by type')
393 for h in hacker_list:
394 by_hacker = {}
395 for patch in h.patches:
396 # Get a summary by hacker
397 for (filetype, (added, removed)) in patch.filetypes.iteritems():
398 if by_hacker.has_key(filetype):
399 by_hacker[filetype][patch.ADDED] += added
400 by_hacker[filetype][patch.REMOVED] += removed
401 else:
402 by_hacker[filetype] = [added, removed]
404 # Update the totals
405 if total.has_key(filetype):
406 total[filetype][patch.ADDED] += added
407 total[filetype][patch.REMOVED] += removed
408 else:
409 total[filetype] = [added, removed, []]
411 # Print a summary by hacker
412 # FIXME why isn't this using Outfile?
413 print(h.name)
414 for filetype, counters in by_hacker.iteritems():
415 print('\t', filetype, counters)
416 h_added = by_hacker[filetype][patch.ADDED]
417 h_removed = by_hacker[filetype][patch.REMOVED]
418 total[filetype][2].append([h.name, h_added, h_removed])
420 # Print the global summary
421 BeginReport('Contributions by type and developers')
422 for filetype, (added, removed, hackers) in total.iteritems():
423 print(filetype, added, removed)
424 for h, h_added, h_removed in hackers:
425 print('\t%s: [%d, %d]' % (h, h_added, h_removed))
427 # Print the very global summary
428 BeginReport('General contributions by type')
429 for filetype, (added, removed, hackers) in total.iteritems():
430 print(filetype, added, removed)
433 # The file access report is a special beast.
435 def FileAccessReport(name, accesses, total):
436 outf = open(name, 'w')
437 files = sorted(accesses)
438 for file in files:
439 a = accesses[file]
440 outf.write('%6d %6.1f%% %s\n' % (a, (100.0*a)/total, file))
441 outf.close()