treeplot: a hack to count signed/unsigned pulls/commits
[git-dm.git] / reports.py
blobd7a96bc10ad38a72194a3986576434fcd232a9be
2 # A new home for the reporting code.
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-16 Eklektix, Inc.
7 # Copyright 2007-16 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
13 import sys
15 Outfile = sys.stdout
16 HTMLfile = None
17 ListCount = 999999
20 def SetOutput(file):
21 global Outfile
22 Outfile = file
24 def SetHTMLOutput(file):
25 global HTMLfile
26 HTMLfile = file
28 def SetMaxList(max):
29 global ListCount
30 ListCount = max
33 def Write(stuff):
34 Outfile.write(stuff)
38 THead = '''<p>
39 <table cellspacing=3 class="OddEven">
40 <tr><th colspan=3>%s</th></tr>
41 '''
43 def BeginReport(title):
44 Outfile.write('\n%s\n' % title)
45 if HTMLfile:
46 HTMLfile.write(THead % title)
48 TRow = ' <tr><td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>\n'
49 TRowStr = ' <tr><td>%s</td><td align="right">%d</td><td>%s</td></tr>\n'
51 def ReportLine(text, count, pct):
52 if count == 0:
53 return
54 Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct))
55 if HTMLfile:
56 HTMLfile.write(TRow % (text, count, pct))
58 def ReportLineStr(text, count, extra):
59 if count == 0:
60 return
61 Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
62 if HTMLfile:
63 HTMLfile.write(TRowStr % (text, count, extra))
65 def EndReport():
66 if HTMLfile:
67 HTMLfile.write('</table>\n\n')
70 # Comparison and report generation functions.
72 def ComparePCount(h1, h2):
73 return len(h2.patches) - len(h1.patches)
75 def ReportByPCount(hlist, cscount):
76 hlist.sort(ComparePCount)
77 count = 0
78 BeginReport('Developers with the most changesets')
79 for h in hlist:
80 pcount = len(h.patches)
81 changed = max(h.added, h.removed)
82 delta = h.added - h.removed
83 if pcount > 0:
84 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
85 count += 1
86 if count >= ListCount:
87 break
88 EndReport()
90 def CompareLChanged(h1, h2):
91 return h2.changed - h1.changed
93 def ReportByLChanged(hlist, totalchanged):
94 hlist.sort(CompareLChanged)
95 count = 0
96 BeginReport('Developers with the most changed lines')
97 for h in hlist:
98 pcount = len(h.patches)
99 if h.changed > 0:
100 ReportLine(h.name, h.changed, (h.changed*100.0)/totalchanged)
101 count += 1
102 if count >= ListCount:
103 break
104 EndReport()
106 def CompareLRemoved(h1, h2):
107 return (h2.removed - h2.added) - (h1.removed - h1.added)
109 def ReportByLRemoved(hlist, totalremoved):
110 hlist.sort(CompareLRemoved)
111 count = 0
112 BeginReport('Developers with the most lines removed')
113 for h in hlist:
114 pcount = len(h.patches)
115 changed = max(h.added, h.removed)
116 delta = h.added - h.removed
117 if delta < 0:
118 ReportLine(h.name, -delta, (-delta*100.0)/totalremoved)
119 count += 1
120 if count >= ListCount:
121 break
122 EndReport()
124 def CompareEPCount(e1, e2):
125 return e2.count - e1.count
127 def ReportByPCEmpl(elist, cscount):
128 elist.sort(CompareEPCount)
129 count = 0
130 BeginReport('Top changeset contributors by employer')
131 for e in elist:
132 if e.count != 0:
133 ReportLine(e.name, e.count, (e.count*100.0)/cscount)
134 count += 1
135 if count >= ListCount:
136 break
137 EndReport()
140 def CompareELChanged(e1, e2):
141 return e2.changed - e1.changed
143 def ReportByELChanged(elist, totalchanged):
144 elist.sort(CompareELChanged)
145 count = 0
146 BeginReport('Top lines changed by employer')
147 for e in elist:
148 if e.changed != 0:
149 ReportLine(e.name, e.changed, (e.changed*100.0)/totalchanged)
150 count += 1
151 if count >= ListCount:
152 break
153 EndReport()
157 def CompareSOBs(h1, h2):
158 return len(h2.signoffs) - len(h1.signoffs)
160 def ReportBySOBs(hlist):
161 hlist.sort(CompareSOBs)
162 totalsobs = 0
163 for h in hlist:
164 totalsobs += len(h.signoffs)
165 count = 0
166 BeginReport('Developers with the most signoffs (total %d)' % totalsobs)
167 for h in hlist:
168 scount = len(h.signoffs)
169 if scount > 0:
170 ReportLine(h.name, scount, (scount*100.0)/totalsobs)
171 count += 1
172 if count >= ListCount:
173 break
174 EndReport()
177 # Reviewer reporting.
179 def CompareRevs(h1, h2):
180 return len(h2.reviews) - len(h1.reviews)
182 def ReportByRevs(hlist):
183 hlist.sort(CompareRevs)
184 totalrevs = 0
185 for h in hlist:
186 totalrevs += len(h.reviews)
187 count = 0
188 BeginReport('Developers with the most reviews (total %d)' % totalrevs)
189 for h in hlist:
190 scount = len(h.reviews)
191 if scount > 0:
192 ReportLine(h.name, scount, (scount*100.0)/totalrevs)
193 count += 1
194 if count >= ListCount:
195 break
196 EndReport()
199 # tester reporting.
201 def CompareTests(h1, h2):
202 return len(h2.tested) - len(h1.tested)
204 def ReportByTests(hlist):
205 hlist.sort(CompareTests)
206 totaltests = 0
207 for h in hlist:
208 totaltests += len(h.tested)
209 count = 0
210 BeginReport('Developers with the most test credits (total %d)' % totaltests)
211 for h in hlist:
212 scount = len(h.tested)
213 if scount > 0:
214 ReportLine(h.name, scount, (scount*100.0)/totaltests)
215 count += 1
216 if count >= ListCount:
217 break
218 EndReport()
220 def CompareTestCred(h1, h2):
221 return h2.testcred - h1.testcred
223 def ReportByTestCreds(hlist):
224 hlist.sort(CompareTestCred)
225 totaltests = 0
226 for h in hlist:
227 totaltests += h.testcred
228 count = 0
229 BeginReport('Developers who gave the most tested-by credits (total %d)' % totaltests)
230 for h in hlist:
231 if h.testcred > 0:
232 ReportLine(h.name, h.testcred, (h.testcred*100.0)/totaltests)
233 count += 1
234 if count >= ListCount:
235 break
236 EndReport()
241 # Reporter reporting.
243 def CompareReports(h1, h2):
244 return len(h2.reports) - len(h1.reports)
246 def ReportByReports(hlist):
247 hlist.sort(CompareReports)
248 totalreps = 0
249 for h in hlist:
250 totalreps += len(h.reports)
251 count = 0
252 BeginReport('Developers with the most report credits (total %d)' % totalreps)
253 for h in hlist:
254 scount = len(h.reports)
255 if scount > 0:
256 ReportLine(h.name, scount, (scount*100.0)/totalreps)
257 count += 1
258 if count >= ListCount:
259 break
260 EndReport()
262 def CompareRepCred(h1, h2):
263 return h2.repcred - h1.repcred
265 def ReportByRepCreds(hlist):
266 hlist.sort(CompareRepCred)
267 totalreps = 0
268 for h in hlist:
269 totalreps += h.repcred
270 count = 0
271 BeginReport('Developers who gave the most report credits (total %d)' % totalreps)
272 for h in hlist:
273 if h.repcred > 0:
274 ReportLine(h.name, h.repcred, (h.repcred*100.0)/totalreps)
275 count += 1
276 if count >= ListCount:
277 break
278 EndReport()
281 # Versions.
283 def CompareVersionCounts(h1, h2):
284 if h1.versions and h2.versions:
285 return len(h2.versions) - len(h1.versions)
286 if h2.versions:
287 return 1
288 if h1.versions:
289 return -1
290 return 0
292 def MissedVersions(hv, allv):
293 missed = [v for v in allv if v not in hv]
294 missed.reverse()
295 return ' '.join(missed)
297 def ReportVersions(hlist):
298 hlist.sort(CompareVersionCounts)
299 BeginReport('Developers represented in the most kernel versions')
300 count = 0
301 allversions = hlist[0].versions
302 for h in hlist:
303 ReportLineStr(h.name, len(h.versions), MissedVersions(h.versions, allversions))
304 count += 1
305 if count >= ListCount:
306 break
307 EndReport()
310 def CompareESOBs(e1, e2):
311 return e2.sobs - e1.sobs
313 def ReportByESOBs(elist):
314 elist.sort(CompareESOBs)
315 totalsobs = 0
316 for e in elist:
317 totalsobs += e.sobs
318 count = 0
319 BeginReport('Employers with the most signoffs (total %d)' % totalsobs)
320 for e in elist:
321 if e.sobs > 0:
322 ReportLine(e.name, e.sobs, (e.sobs*100.0)/totalsobs)
323 count += 1
324 if count >= ListCount:
325 break
326 EndReport()
328 def CompareHackers(e1, e2):
329 return len(e2.hackers) - len(e1.hackers)
331 def ReportByEHackers(elist):
332 elist.sort(CompareHackers)
333 totalhackers = 0
334 for e in elist:
335 totalhackers += len(e.hackers)
336 count = 0
337 BeginReport('Employers with the most hackers (total %d)' % totalhackers)
338 for e in elist:
339 nhackers = len(e.hackers)
340 if nhackers > 0:
341 ReportLine(e.name, nhackers, (nhackers*100.0)/totalhackers)
342 count += 1
343 if count >= ListCount:
344 break
345 EndReport()
348 def DevReports(hlist, totalchanged, cscount, totalremoved):
349 ReportByPCount(hlist, cscount)
350 ReportByLChanged(hlist, totalchanged)
351 ReportByLRemoved(hlist, totalremoved)
352 ReportBySOBs(hlist)
353 ReportByRevs(hlist)
354 ReportByTests(hlist)
355 ReportByTestCreds(hlist)
356 ReportByReports(hlist)
357 ReportByRepCreds(hlist)
359 def EmplReports(elist, totalchanged, cscount):
360 ReportByPCEmpl(elist, cscount)
361 ReportByELChanged(elist, totalchanged)
362 ReportByESOBs(elist)
363 ReportByEHackers(elist)
366 # Who are the unknown hackers?
368 def IsUnknown(h):
369 empl = h.employer[0][0][1].name
370 return h.email[0] == empl or empl == '(Unknown)'
372 def ReportUnknowns(hlist, cscount):
374 # Trim the list to just the unknowns; try to work properly whether
375 # mapping to (Unknown) is happening or not.
377 ulist = [ h for h in hlist if IsUnknown(h) ]
378 ulist.sort(ComparePCount)
379 count = 0
380 BeginReport('Developers with unknown affiliation')
381 for h in ulist:
382 pcount = len(h.patches)
383 if pcount > 0:
384 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
385 count += 1
386 if count >= ListCount:
387 break
388 EndReport()
392 def ReportByFileType(hacker_list):
393 total = {}
394 total_by_hacker = {}
396 BeginReport('Developer contributions by type')
397 for h in hacker_list:
398 by_hacker = {}
399 for patch in h.patches:
400 # Get a summary by hacker
401 for (filetype, (added, removed)) in patch.filetypes.iteritems():
402 if by_hacker.has_key(filetype):
403 by_hacker[filetype][patch.ADDED] += added
404 by_hacker[filetype][patch.REMOVED] += removed
405 else:
406 by_hacker[filetype] = [added, removed]
408 # Update the totals
409 if total.has_key(filetype):
410 total[filetype][patch.ADDED] += added
411 total[filetype][patch.REMOVED] += removed
412 else:
413 total[filetype] = [added, removed, []]
415 # Print a summary by hacker
416 print h.name
417 for filetype, counters in by_hacker.iteritems():
418 print '\t', filetype, counters
419 h_added = by_hacker[filetype][patch.ADDED]
420 h_removed = by_hacker[filetype][patch.REMOVED]
421 total[filetype][2].append([h.name, h_added, h_removed])
423 # Print the global summary
424 BeginReport('Contributions by type and developers')
425 for filetype, (added, removed, hackers) in total.iteritems():
426 print filetype, added, removed
427 for h, h_added, h_removed in hackers:
428 print '\t%s: [%d, %d]' % (h, h_added, h_removed)
430 # Print the very global summary
431 BeginReport('General contributions by type')
432 for filetype, (added, removed, hackers) in total.iteritems():
433 print filetype, added, removed
436 # The file access report is a special beast.
438 def FileAccessReport(name, accesses, total):
439 outf = open(name, 'w')
440 files = accesses.keys()
441 files.sort()
442 for file in files:
443 a = accesses[file]
444 outf.write('%6d %6.1f%% %s\n' % (a, (100.0*a)/total, file))
445 outf.close()