Add an introductory comment.
[git-dm.git] / reports.py
blob084089eaab147d176116a5f5ba94a8e3da5104ba
2 # A new home for the reporting code.
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-13 Eklektix, Inc.
7 # Copyright 2007-13 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
13 import sys
15 Outfile = sys.stdout
16 HTMLfile = None
17 ListCount = 999999
20 def SetOutput(file):
21 global Outfile
22 Outfile = file
24 def SetHTMLOutput(file):
25 global HTMLfile
26 HTMLfile = file
28 def SetMaxList(max):
29 global ListCount
30 ListCount = max
33 def Write(stuff):
34 Outfile.write(stuff)
39 # HTML output support stuff.
41 HTMLclass = 0
42 HClasses = ['Even', 'Odd']
44 THead = '''<p>
45 <table cellspacing=3>
46 <tr><th colspan=3>%s</th></tr>
47 '''
49 def BeginReport(title):
50 global HTMLclass
52 Outfile.write('\n%s\n' % title)
53 if HTMLfile:
54 HTMLfile.write(THead % title)
55 HTMLclass = 0
57 TRow = ''' <tr class="%s">
58 <td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
59 '''
61 TRowStr = ''' <tr class="%s">
62 <td>%s</td><td align="right">%d</td><td>%s</td></tr>
63 '''
65 def ReportLine(text, count, pct):
66 global HTMLclass
67 if count == 0:
68 return
69 Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct))
70 if HTMLfile:
71 HTMLfile.write(TRow % (HClasses[HTMLclass], text, count, pct))
72 HTMLclass ^= 1
74 def ReportLineStr(text, count, extra):
75 global HTMLclass
76 if count == 0:
77 return
78 Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
79 if HTMLfile:
80 HTMLfile.write(TRowStr % (HClasses[HTMLclass], text, count, extra))
81 HTMLclass ^= 1
83 def EndReport():
84 if HTMLfile:
85 HTMLfile.write('</table>\n\n')
88 # Comparison and report generation functions.
90 def ComparePCount(h1, h2):
91 return len(h2.patches) - len(h1.patches)
93 def ReportByPCount(hlist, cscount):
94 hlist.sort(ComparePCount)
95 count = 0
96 BeginReport('Developers with the most changesets')
97 for h in hlist:
98 pcount = len(h.patches)
99 changed = max(h.added, h.removed)
100 delta = h.added - h.removed
101 if pcount > 0:
102 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
103 count += 1
104 if count >= ListCount:
105 break
106 EndReport()
108 def CompareLChanged(h1, h2):
109 return max(h2.added, h2.removed) - max(h1.added, h1.removed)
111 def ReportByLChanged(hlist, totalchanged):
112 hlist.sort(CompareLChanged)
113 count = 0
114 BeginReport('Developers with the most changed lines')
115 for h in hlist:
116 pcount = len(h.patches)
117 changed = max(h.added, h.removed)
118 delta = h.added - h.removed
119 if (h.added + h.removed) > 0:
120 ReportLine(h.name, changed, (changed*100.0)/totalchanged)
121 count += 1
122 if count >= ListCount:
123 break
124 EndReport()
126 def CompareLRemoved(h1, h2):
127 return (h2.removed - h2.added) - (h1.removed - h1.added)
129 def ReportByLRemoved(hlist, totalremoved):
130 hlist.sort(CompareLRemoved)
131 count = 0
132 BeginReport('Developers with the most lines removed')
133 for h in hlist:
134 pcount = len(h.patches)
135 changed = max(h.added, h.removed)
136 delta = h.added - h.removed
137 if delta < 0:
138 ReportLine(h.name, -delta, (-delta*100.0)/totalremoved)
139 count += 1
140 if count >= ListCount:
141 break
142 EndReport()
144 def CompareEPCount(e1, e2):
145 return e2.count - e1.count
147 def ReportByPCEmpl(elist, cscount):
148 elist.sort(CompareEPCount)
149 count = 0
150 BeginReport('Top changeset contributors by employer')
151 for e in elist:
152 if e.count != 0:
153 ReportLine(e.name, e.count, (e.count*100.0)/cscount)
154 count += 1
155 if count >= ListCount:
156 break
157 EndReport()
160 def CompareELChanged(e1, e2):
161 return e2.changed - e1.changed
163 def ReportByELChanged(elist, totalchanged):
164 elist.sort(CompareELChanged)
165 count = 0
166 BeginReport('Top lines changed by employer')
167 for e in elist:
168 if e.changed != 0:
169 ReportLine(e.name, e.changed, (e.changed*100.0)/totalchanged)
170 count += 1
171 if count >= ListCount:
172 break
173 EndReport()
177 def CompareSOBs(h1, h2):
178 return len(h2.signoffs) - len(h1.signoffs)
180 def ReportBySOBs(hlist):
181 hlist.sort(CompareSOBs)
182 totalsobs = 0
183 for h in hlist:
184 totalsobs += len(h.signoffs)
185 count = 0
186 BeginReport('Developers with the most signoffs (total %d)' % totalsobs)
187 for h in hlist:
188 scount = len(h.signoffs)
189 if scount > 0:
190 ReportLine(h.name, scount, (scount*100.0)/totalsobs)
191 count += 1
192 if count >= ListCount:
193 break
194 EndReport()
197 # Reviewer reporting.
199 def CompareRevs(h1, h2):
200 return len(h2.reviews) - len(h1.reviews)
202 def ReportByRevs(hlist):
203 hlist.sort(CompareRevs)
204 totalrevs = 0
205 for h in hlist:
206 totalrevs += len(h.reviews)
207 count = 0
208 BeginReport('Developers with the most reviews (total %d)' % totalrevs)
209 for h in hlist:
210 scount = len(h.reviews)
211 if scount > 0:
212 ReportLine(h.name, scount, (scount*100.0)/totalrevs)
213 count += 1
214 if count >= ListCount:
215 break
216 EndReport()
219 # tester reporting.
221 def CompareTests(h1, h2):
222 return len(h2.tested) - len(h1.tested)
224 def ReportByTests(hlist):
225 hlist.sort(CompareTests)
226 totaltests = 0
227 for h in hlist:
228 totaltests += len(h.tested)
229 count = 0
230 BeginReport('Developers with the most test credits (total %d)' % totaltests)
231 for h in hlist:
232 scount = len(h.tested)
233 if scount > 0:
234 ReportLine(h.name, scount, (scount*100.0)/totaltests)
235 count += 1
236 if count >= ListCount:
237 break
238 EndReport()
240 def CompareTestCred(h1, h2):
241 return h2.testcred - h1.testcred
243 def ReportByTestCreds(hlist):
244 hlist.sort(CompareTestCred)
245 totaltests = 0
246 for h in hlist:
247 totaltests += h.testcred
248 count = 0
249 BeginReport('Developers who gave the most tested-by credits (total %d)' % totaltests)
250 for h in hlist:
251 if h.testcred > 0:
252 ReportLine(h.name, h.testcred, (h.testcred*100.0)/totaltests)
253 count += 1
254 if count >= ListCount:
255 break
256 EndReport()
261 # Reporter reporting.
263 def CompareReports(h1, h2):
264 return len(h2.reports) - len(h1.reports)
266 def ReportByReports(hlist):
267 hlist.sort(CompareReports)
268 totalreps = 0
269 for h in hlist:
270 totalreps += len(h.reports)
271 count = 0
272 BeginReport('Developers with the most report credits (total %d)' % totalreps)
273 for h in hlist:
274 scount = len(h.reports)
275 if scount > 0:
276 ReportLine(h.name, scount, (scount*100.0)/totalreps)
277 count += 1
278 if count >= ListCount:
279 break
280 EndReport()
282 def CompareRepCred(h1, h2):
283 return h2.repcred - h1.repcred
285 def ReportByRepCreds(hlist):
286 hlist.sort(CompareRepCred)
287 totalreps = 0
288 for h in hlist:
289 totalreps += h.repcred
290 count = 0
291 BeginReport('Developers who gave the most report credits (total %d)' % totalreps)
292 for h in hlist:
293 if h.repcred > 0:
294 ReportLine(h.name, h.repcred, (h.repcred*100.0)/totalreps)
295 count += 1
296 if count >= ListCount:
297 break
298 EndReport()
301 # Versions.
303 def CompareVersionCounts(h1, h2):
304 if h1.versions and h2.versions:
305 return len(h2.versions) - len(h1.versions)
306 if h2.versions:
307 return 1
308 if h1.versions:
309 return -1
310 return 0
312 def MissedVersions(hv, allv):
313 missed = [v for v in allv if v not in hv]
314 missed.reverse()
315 return ' '.join(missed)
317 def ReportVersions(hlist):
318 hlist.sort(CompareVersionCounts)
319 BeginReport('Developers represented in the most kernel versions')
320 count = 0
321 allversions = hlist[0].versions
322 for h in hlist:
323 ReportLineStr(h.name, len(h.versions), MissedVersions(h.versions, allversions))
324 count += 1
325 if count >= ListCount:
326 break
327 EndReport()
330 def CompareESOBs(e1, e2):
331 return e2.sobs - e1.sobs
333 def ReportByESOBs(elist):
334 elist.sort(CompareESOBs)
335 totalsobs = 0
336 for e in elist:
337 totalsobs += e.sobs
338 count = 0
339 BeginReport('Employers with the most signoffs (total %d)' % totalsobs)
340 for e in elist:
341 if e.sobs > 0:
342 ReportLine(e.name, e.sobs, (e.sobs*100.0)/totalsobs)
343 count += 1
344 if count >= ListCount:
345 break
346 EndReport()
348 def CompareHackers(e1, e2):
349 return len(e2.hackers) - len(e1.hackers)
351 def ReportByEHackers(elist):
352 elist.sort(CompareHackers)
353 totalhackers = 0
354 for e in elist:
355 totalhackers += len(e.hackers)
356 count = 0
357 BeginReport('Employers with the most hackers (total %d)' % totalhackers)
358 for e in elist:
359 nhackers = len(e.hackers)
360 if nhackers > 0:
361 ReportLine(e.name, nhackers, (nhackers*100.0)/totalhackers)
362 count += 1
363 if count >= ListCount:
364 break
365 EndReport()
368 def DevReports(hlist, totalchanged, cscount, totalremoved):
369 ReportByPCount(hlist, cscount)
370 ReportByLChanged(hlist, totalchanged)
371 ReportByLRemoved(hlist, totalremoved)
372 ReportBySOBs(hlist)
373 ReportByRevs(hlist)
374 ReportByTests(hlist)
375 ReportByTestCreds(hlist)
376 ReportByReports(hlist)
377 ReportByRepCreds(hlist)
379 def EmplReports(elist, totalchanged, cscount):
380 ReportByPCEmpl(elist, cscount)
381 ReportByELChanged(elist, totalchanged)
382 ReportByESOBs(elist)
383 ReportByEHackers(elist)
386 # Who are the unknown hackers?
388 def IsUnknown(h):
389 empl = h.employer[0][0][1].name
390 return h.email[0] == empl or empl == '(Unknown)'
392 def ReportUnknowns(hlist, cscount):
394 # Trim the list to just the unknowns; try to work properly whether
395 # mapping to (Unknown) is happening or not.
397 ulist = [ h for h in hlist if IsUnknown(h) ]
398 ulist.sort(ComparePCount)
399 count = 0
400 BeginReport('Developers with unknown affiliation')
401 for h in ulist:
402 pcount = len(h.patches)
403 if pcount > 0:
404 ReportLine(h.name, pcount, (pcount*100.0)/cscount)
405 count += 1
406 if count >= ListCount:
407 break
408 EndReport()
412 def ReportByFileType(hacker_list):
413 total = {}
414 total_by_hacker = {}
416 BeginReport('Developer contributions by type')
417 for h in hacker_list:
418 by_hacker = {}
419 for patch in h.patches:
420 # Get a summary by hacker
421 for (filetype, (added, removed)) in patch.filetypes.iteritems():
422 if by_hacker.has_key(filetype):
423 by_hacker[filetype][patch.ADDED] += added
424 by_hacker[filetype][patch.REMOVED] += removed
425 else:
426 by_hacker[filetype] = [added, removed]
428 # Update the totals
429 if total.has_key(filetype):
430 total[filetype][patch.ADDED] += added
431 total[filetype][patch.REMOVED] += removed
432 else:
433 total[filetype] = [added, removed, []]
435 # Print a summary by hacker
436 print h.name
437 for filetype, counters in by_hacker.iteritems():
438 print '\t', filetype, counters
439 h_added = by_hacker[filetype][patch.ADDED]
440 h_removed = by_hacker[filetype][patch.REMOVED]
441 total[filetype][2].append([h.name, h_added, h_removed])
443 # Print the global summary
444 BeginReport('Contributions by type and developers')
445 for filetype, (added, removed, hackers) in total.iteritems():
446 print filetype, added, removed
447 for h, h_added, h_removed in hackers:
448 print '\t%s: [%d, %d]' % (h, h_added, h_removed)
450 # Print the very global summary
451 BeginReport('General contributions by type')
452 for filetype, (added, removed, hackers) in total.iteritems():
453 print filetype, added, removed
456 # The file access report is a special beast.
458 def FileAccessReport(name, accesses, total):
459 outf = open(name, 'w')
460 files = accesses.keys()
461 files.sort()
462 for file in files:
463 a = accesses[file]
464 outf.write('%6d %6.1f%% %s\n' % (a, (100.0*a)/total, file))
465 outf.close()