reports.py: Add very basic rST output
[git-dm.git] / reports.py
blob782ebb8fb03229bee4e849c95f5742a7b92aeae8
2 # A new home for the reporting code.
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-16 Eklektix, Inc.
7 # Copyright 2007-16 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
13 import sys
15 Outfile = sys.stdout
16 HTMLfile = None
17 rSTfile = None
18 ListCount = 999999
21 def SetOutput(file):
22 global Outfile
23 Outfile = file
25 def SetHTMLOutput(file):
26 global HTMLfile
27 HTMLfile = file
29 def SetrSTOutput(file):
30 global rSTfile
31 rSTfile = file
33 def SetMaxList(max):
34 global ListCount
35 ListCount = max
38 def Write(stuff):
39 Outfile.write(stuff)
43 THead = '''<p>
44 <table cellspacing=3 class="OddEven">
45 <tr><th colspan=3>%s</th></tr>
46 '''
48 RHead = '''
49 .. table:: %s
50 :widths: auto
52 ==================================== =====
53 Name Count
54 ==================================== =====
55 '''
57 def BeginReport(title):
58 Outfile.write('\n%s\n' % title)
59 if HTMLfile:
60 HTMLfile.write(THead % title)
61 if rSTfile:
62 rSTfile.write(RHead % title)
64 TRow = ' <tr><td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>\n'
65 TRowStr = ' <tr><td>%s</td><td align="right">%d</td><td>%s</td></tr>\n'
67 def ReportLine(text, count, pct):
68 if count == 0:
69 return
70 Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct))
71 if HTMLfile:
72 HTMLfile.write(TRow % (text, count, pct))
73 if rSTfile:
74 rSTfile.write(" %-36s %d (%.1f%%)\n" % (text.strip(), count, pct))
76 def ReportLineStr(text, count, extra):
77 if count == 0:
78 return
79 Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
80 if HTMLfile:
81 HTMLfile.write(TRowStr % (text, count, extra))
82 if rSTfile:
83 rSTfile.write('%-36s %d %s\n' % (text, count, extra))
85 def EndReport():
86 if HTMLfile:
87 HTMLfile.write('</table>\n\n')
88 if rSTfile:
89 rSTfile.write(' ==================================== =====\n\n')
92 # Comparison and report generation functions.
94 def ComparePCount(h):
95 return len(h.patches)
97 def ReportByPCount(hlist, cscount):
98 hlist.sort(key = ComparePCount, reverse = True)
99 count = 0
100 BeginReport('Developers with the most changesets')
101 for h in hlist:
102 pcount = len(h.patches)
103 changed = max(h.added, h.removed)
104 delta = h.added - h.removed
105 if pcount > 0:
106 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
107 count += 1
108 if count >= ListCount:
109 break
110 EndReport()
112 def CompareLChanged(h):
113 return h.changed
115 def ReportByLChanged(hlist, totalchanged):
116 hlist.sort(key = CompareLChanged, reverse = True)
117 count = 0
118 BeginReport('Developers with the most changed lines')
119 for h in hlist:
120 pcount = len(h.patches)
121 if h.changed > 0:
122 ReportLine(h.name, h.changed, (h.changed*100.0)/totalchanged)
123 count += 1
124 if count >= ListCount:
125 break
126 EndReport()
128 def CompareLRemoved(h):
129 return (h.removed - h.added)
131 def ReportByLRemoved(hlist, totalremoved):
132 hlist.sort(key = CompareLRemoved, reverse = True)
133 count = 0
134 BeginReport('Developers with the most lines removed')
135 for h in hlist:
136 pcount = len(h.patches)
137 changed = max(h.added, h.removed)
138 delta = h.added - h.removed
139 if delta < 0:
140 ReportLine(h.name, -delta, (-delta*100.0)/totalremoved)
141 count += 1
142 if count >= ListCount:
143 break
144 EndReport()
146 def CompareEPCount(e):
147 return e.count
149 def ReportByPCEmpl(elist, cscount):
150 elist.sort(key = CompareEPCount, reverse = True)
151 count = 0
152 BeginReport('Top changeset contributors by employer')
153 for e in elist:
154 if e.count != 0:
155 ReportLine(e.name, e.count, (e.count*100.0)/cscount)
156 count += 1
157 if count >= ListCount:
158 break
159 EndReport()
162 def CompareELChanged(e):
163 return e.changed
165 def ReportByELChanged(elist, totalchanged):
166 elist.sort(key = CompareELChanged, reverse = True)
167 count = 0
168 BeginReport('Top lines changed by employer')
169 for e in elist:
170 if e.changed != 0:
171 ReportLine(e.name, e.changed, (e.changed*100.0)/totalchanged)
172 count += 1
173 if count >= ListCount:
174 break
175 EndReport()
179 def CompareSOBs(h):
180 return len(h.signoffs)
182 def ReportBySOBs(hlist):
183 hlist.sort(key = CompareSOBs, reverse = True)
184 totalsobs = 0
185 for h in hlist:
186 totalsobs += len(h.signoffs)
187 count = 0
188 BeginReport('Developers with the most signoffs (total %d)' % totalsobs)
189 for h in hlist:
190 scount = len(h.signoffs)
191 if scount > 0:
192 ReportLine(h.name, scount, (scount*100.0)/totalsobs)
193 count += 1
194 if count >= ListCount:
195 break
196 EndReport()
199 # Reviewer reporting.
201 def CompareRevs(h):
202 return len(h.reviews)
204 def ReportByRevs(hlist):
205 hlist.sort(key = CompareRevs, reverse = True)
206 totalrevs = 0
207 for h in hlist:
208 totalrevs += len(h.reviews)
209 count = 0
210 BeginReport('Developers with the most reviews (total %d)' % totalrevs)
211 for h in hlist:
212 scount = len(h.reviews)
213 if scount > 0:
214 ReportLine(h.name, scount, (scount*100.0)/totalrevs)
215 count += 1
216 if count >= ListCount:
217 break
218 EndReport()
221 # tester reporting.
223 def CompareTests(h):
224 return len(h.tested)
226 def ReportByTests(hlist):
227 hlist.sort(key = CompareTests, reverse = True)
228 totaltests = 0
229 for h in hlist:
230 totaltests += len(h.tested)
231 count = 0
232 BeginReport('Developers with the most test credits (total %d)' % totaltests)
233 for h in hlist:
234 scount = len(h.tested)
235 if scount > 0:
236 ReportLine(h.name, scount, (scount*100.0)/totaltests)
237 count += 1
238 if count >= ListCount:
239 break
240 EndReport()
242 def CompareTestCred(h):
243 return h.testcred
245 def ReportByTestCreds(hlist):
246 hlist.sort(key = CompareTestCred, reverse = True)
247 totaltests = 0
248 for h in hlist:
249 totaltests += h.testcred
250 count = 0
251 BeginReport('Developers who gave the most tested-by credits (total %d)' % totaltests)
252 for h in hlist:
253 if h.testcred > 0:
254 ReportLine(h.name, h.testcred, (h.testcred*100.0)/totaltests)
255 count += 1
256 if count >= ListCount:
257 break
258 EndReport()
263 # Reporter reporting.
265 def CompareReports(h):
266 return len(h.reports)
268 def ReportByReports(hlist):
269 hlist.sort(key = CompareReports, reverse = True)
270 totalreps = 0
271 for h in hlist:
272 totalreps += len(h.reports)
273 count = 0
274 BeginReport('Developers with the most report credits (total %d)' % totalreps)
275 for h in hlist:
276 scount = len(h.reports)
277 if scount > 0:
278 ReportLine(h.name, scount, (scount*100.0)/totalreps)
279 count += 1
280 if count >= ListCount:
281 break
282 EndReport()
284 def CompareRepCred(h):
285 return h.repcred
287 def ReportByRepCreds(hlist):
288 hlist.sort(key = CompareRepCred, reverse = True)
289 totalreps = 0
290 for h in hlist:
291 totalreps += h.repcred
292 count = 0
293 BeginReport('Developers who gave the most report credits (total %d)' % totalreps)
294 for h in hlist:
295 if h.repcred > 0:
296 ReportLine(h.name, h.repcred, (h.repcred*100.0)/totalreps)
297 count += 1
298 if count >= ListCount:
299 break
300 EndReport()
303 # Versions.
305 def CompareVersionCounts(h):
306 if h.versions:
307 return len(h.versions)
308 return -1
310 def MissedVersions(hv, allv):
311 missed = [v for v in allv if v not in hv]
312 missed.reverse()
313 return ' '.join(missed)
315 def ReportVersions(hlist):
316 hlist.sort(key = CompareVersionCounts, reverse = True)
317 BeginReport('Developers represented in the most kernel versions')
318 count = 0
319 allversions = hlist[0].versions
320 for h in hlist:
321 ReportLineStr(h.name, len(h.versions), MissedVersions(h.versions, allversions))
322 count += 1
323 if count >= ListCount:
324 break
325 EndReport()
328 def CompareESOBs(e):
329 return e.sobs
331 def ReportByESOBs(elist):
332 elist.sort(key = CompareESOBs, reverse = True)
333 totalsobs = 0
334 for e in elist:
335 totalsobs += e.sobs
336 count = 0
337 BeginReport('Employers with the most signoffs (total %d)' % totalsobs)
338 for e in elist:
339 if e.sobs > 0:
340 ReportLine(e.name, e.sobs, (e.sobs*100.0)/totalsobs)
341 count += 1
342 if count >= ListCount:
343 break
344 EndReport()
346 def CompareHackers(e):
347 return len(e.hackers)
349 def ReportByEHackers(elist):
350 elist.sort(key = CompareHackers, reverse = True)
351 totalhackers = 0
352 for e in elist:
353 totalhackers += len(e.hackers)
354 count = 0
355 BeginReport('Employers with the most hackers (total %d)' % totalhackers)
356 for e in elist:
357 nhackers = len(e.hackers)
358 if nhackers > 0:
359 ReportLine(e.name, nhackers, (nhackers*100.0)/totalhackers)
360 count += 1
361 if count >= ListCount:
362 break
363 EndReport()
366 def DevReports(hlist, totalchanged, cscount, totalremoved):
367 ReportByPCount(hlist, cscount)
368 ReportByLChanged(hlist, totalchanged)
369 ReportByLRemoved(hlist, totalremoved)
370 ReportBySOBs(hlist)
371 ReportByRevs(hlist)
372 ReportByTests(hlist)
373 ReportByTestCreds(hlist)
374 ReportByReports(hlist)
375 ReportByRepCreds(hlist)
377 def EmplReports(elist, totalchanged, cscount):
378 ReportByPCEmpl(elist, cscount)
379 ReportByELChanged(elist, totalchanged)
380 ReportByESOBs(elist)
381 ReportByEHackers(elist)
384 # Who are the unknown hackers?
386 def IsUnknown(h):
387 empl = h.employer[0][0][1].name
388 return h.email[0] == empl or empl == '(Unknown)'
390 def ReportUnknowns(hlist, cscount):
392 # Trim the list to just the unknowns; try to work properly whether
393 # mapping to (Unknown) is happening or not.
395 ulist = [ h for h in hlist if IsUnknown(h) ]
396 ulist.sort(key = ComparePCount, reverse = True)
397 count = 0
398 BeginReport('Developers with unknown affiliation')
399 for h in ulist:
400 pcount = len(h.patches)
401 if pcount > 0:
402 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
403 count += 1
404 if count >= ListCount:
405 break
406 EndReport()
410 def ReportByFileType(hacker_list):
411 total = {}
412 total_by_hacker = {}
414 BeginReport('Developer contributions by type')
415 for h in hacker_list:
416 by_hacker = {}
417 for patch in h.patches:
418 # Get a summary by hacker
419 for (filetype, (added, removed)) in patch.filetypes.iteritems():
420 if by_hacker.has_key(filetype):
421 by_hacker[filetype][patch.ADDED] += added
422 by_hacker[filetype][patch.REMOVED] += removed
423 else:
424 by_hacker[filetype] = [added, removed]
426 # Update the totals
427 if total.has_key(filetype):
428 total[filetype][patch.ADDED] += added
429 total[filetype][patch.REMOVED] += removed
430 else:
431 total[filetype] = [added, removed, []]
433 # Print a summary by hacker
434 # FIXME why isn't this using Outfile?
435 print(h.name)
436 for filetype, counters in by_hacker.iteritems():
437 print('\t', filetype, counters)
438 h_added = by_hacker[filetype][patch.ADDED]
439 h_removed = by_hacker[filetype][patch.REMOVED]
440 total[filetype][2].append([h.name, h_added, h_removed])
442 # Print the global summary
443 BeginReport('Contributions by type and developers')
444 for filetype, (added, removed, hackers) in total.iteritems():
445 print(filetype, added, removed)
446 for h, h_added, h_removed in hackers:
447 print('\t%s: [%d, %d]' % (h, h_added, h_removed))
449 # Print the very global summary
450 BeginReport('General contributions by type')
451 for filetype, (added, removed, hackers) in total.iteritems():
452 print(filetype, added, removed)
455 # The file access report is a special beast.
457 def FileAccessReport(name, accesses, total):
458 outf = open(name, 'w')
459 files = sorted(accesses)
460 for file in files:
461 a = accesses[file]
462 outf.write('%6d %6.1f%% %s\n' % (a, (100.0*a)/total, file))
463 outf.close()