5 # This code is part of the LWN git data miner.
7 # Copyright 2007-9 LWN.net
8 # Copyright 2007-9 Jonathan Corbet <corbet@lwn.net>
10 # This file may be distributed under the terms of the GNU General
11 # Public License, version 2.
14 import database
, csv
, ConfigFile
, reports
15 import getopt
, datetime
16 import os
, re
, sys
, rfc822
, string
17 from patterns
import *
19 Today
= datetime
.date
.today()
22 # Remember author names we have griped about.
24 GripedAuthorNames
= [ ]
37 CFName
= 'gitdm.config'
43 # -a Andrew Morton's signoffs shadow Linus's
44 # -b dir Specify the base directory to fetch the configuration files
45 # -c cfile Specify a configuration file
46 # -d Output individual developer stats
47 # -D Output date statistics
48 # -h hfile HTML output to hfile
49 # -l count Maximum length for output lists
50 # -o file File for text output
51 # -r pattern Restrict to files matching pattern
52 # -s Ignore author SOB lines
53 # -u Map unknown employers to '(Unknown)'
54 # -x file.csv Export raw statistics as CSV
55 # -z Dump out the hacker database at completion
58 global MapUnknown
, DevReports
59 global DateStats
, AuthorSOBs
, FileFilter
, AkpmOverLt
, DumpDB
60 global CFName
, CSVFile
, DirName
62 opts
, rest
= getopt
.getopt (sys
.argv
[1:], 'ab:dc:Dh:l:o:r:sux:z')
75 reports
.SetHTMLOutput (open (opt
[1], 'w'))
77 reports
.SetMaxList (int (opt
[1]))
79 reports
.SetOutput (open (opt
[1], 'w'))
81 print 'Filter on "%s"' % (opt
[1])
82 FileFilter
= re
.compile (opt
[1])
88 CSVFile
= open (opt
[1], 'w')
89 print "open output file " + opt
[1] + "\n"
95 def LookupStoreHacker (name
, email
):
96 email
= database
.RemapEmail (email
)
97 h
= database
.LookupEmail (email
)
100 elist
= database
.LookupEmployer (email
, MapUnknown
)
101 h
= database
.LookupName (name
)
103 h
.addemail (email
, elist
)
105 return database
.StoreHacker(name
, elist
, email
)
113 def AddDateLines(date
, lines
):
115 print 'Skip big patch (%d)' % lines
118 DateMap
[date
] += lines
120 DateMap
[date
] = lines
122 def PrintDateStats():
123 dates
= DateMap
.keys ()
126 datef
= open ('datelc', 'w')
128 total
+= DateMap
[date
]
129 datef
.write ('%d/%02d/%02d %6d %7d\n' % (date
.year
, date
.month
, date
.day
,
130 DateMap
[date
], total
))
134 # Let's slowly try to move some smarts into this class.
137 def __init__ (self
, commit
):
139 self
.merge
= self
.added
= self
.removed
= 0
140 self
.author
= LookupStoreHacker('Unknown hacker', 'unknown@hacker.net')
141 self
.email
= 'unknown@hacker.net'
147 def addreviewer (self
, reviewer
):
148 self
.reviews
.append (reviewer
)
150 def addtester (self
, tester
):
151 self
.testers
.append (tester
)
153 def addreporter (self
, reporter
):
154 self
.reports
.append (reporter
)
156 # The core hack for grabbing the information about a changeset.
162 m
= Pcommit
.match (NextLine
)
165 NextLine
= sys
.stdin
.readline ()
169 p
= patch(m
.group (1))
170 NextLine
= sys
.stdin
.readline ()
171 ignore
= (FileFilter
is not None)
175 # If this line starts a new commit, drop out.
177 m
= Pcommit
.match (Line
)
180 NextLine
= sys
.stdin
.readline ()
182 # Maybe it's an author line?
184 m
= Pauthor
.match (Line
)
186 p
.email
= database
.RemapEmail (m
.group (2))
187 p
.author
= LookupStoreHacker(m
.group (1), p
.email
)
190 # Could be a signed-off-by:
192 m
= Psob
.match (Line
)
194 email
= database
.RemapEmail (m
.group (2))
195 sobber
= LookupStoreHacker(m
.group (1), email
)
196 if sobber
!= p
.author
or AuthorSOBs
:
197 p
.sobs
.append ((email
, LookupStoreHacker(m
.group (1), m
.group (2))))
200 # Various other tags of interest.
202 m
= Preview
.match (Line
) # Reviewed-by:
204 email
= database
.RemapEmail (m
.group (2))
205 p
.addreviewer (LookupStoreHacker(m
.group (1), email
))
207 m
= Ptest
.match (Line
) # Tested-by:
209 email
= database
.RemapEmail (m
.group (2))
210 p
.addtester (LookupStoreHacker (m
.group (1), email
))
211 p
.author
.testcredit (patch
)
213 m
= Prep
.match (Line
) # Reported-by:
215 email
= database
.RemapEmail (m
.group (2))
216 p
.addreporter (LookupStoreHacker (m
.group (1), email
))
217 p
.author
.reportcredit (patch
)
219 m
= Preptest
.match (Line
) # Reported-and-tested-by:
221 email
= database
.RemapEmail (m
.group (2))
222 h
= LookupStoreHacker (m
.group (1), email
)
225 p
.author
.reportcredit (patch
)
226 p
.author
.testcredit (patch
)
229 # If this one is a merge, make note of the fact.
231 m
= Pmerge
.match (Line
)
236 # See if it's the date.
238 m
= Pdate
.match (Line
)
240 dt
= rfc822
.parsedate(m
.group (2))
241 p
.date
= datetime
.date (dt
[0], dt
[1], dt
[2])
243 sys
.stderr
.write ('Funky date: %s\n' % p
.date
)
247 # If we have a file filter, check for file lines.
250 ignore
= ApplyFileFilter (Line
, ignore
)
252 # OK, maybe it's part of the diff itself.
255 if Padd
.match (Line
):
258 if Prem
.match (Line
):
261 if '@' in p
.author
.name
:
262 GripeAboutAuthorName (p
.author
.name
)
266 def GripeAboutAuthorName (name
):
267 if name
in GripedAuthorNames
:
269 GripedAuthorNames
.append (name
)
270 print '%s is an author name, probably not what you want' % (name
)
272 def ApplyFileFilter (line
, ignore
):
274 # If this is the first file line (--- a/), set ignore one way
277 m
= Pfilea
.match (line
)
280 if FileFilter
.search (file):
284 # For the second line, we can turn ignore off, but not on
286 m
= Pfileb
.match (line
)
289 if FileFilter
.search (file):
294 # If this patch is signed off by both Andrew Morton and Linus Torvalds,
295 # remove the (redundant) Linus signoff.
298 if Linus
in p
.sobs
and Akpm
in p
.sobs
:
299 p
.sobs
.remove (Linus
)
303 # Here starts the real program.
308 # Read the config files.
310 ConfigFile
.ConfigFile (CFName
, DirName
)
313 # Let's pre-seed the database with a couple of hackers
314 # we want to remember.
316 Linus
= ('torvalds@linux-foundation.org',
317 LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org'))
318 Akpm
= ('akpm@linux-foundation.org',
319 LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
321 NextLine
= sys
.stdin
.readline ()
322 TotalChanged
= TotalAdded
= TotalRemoved
= 0
327 print >> sys
.stderr
, 'Grabbing changesets...\r',
329 printcount
= CSCount
= 0
331 if (printcount
% 50) == 0:
332 print >> sys
.stderr
, 'Grabbing changesets...%d\r' % printcount
,
337 # if p.added > 100000 or p.removed > 100000:
338 # print 'Skipping massive add', p.commit
340 if FileFilter
and p
.added
== 0 and p
.removed
== 0:
344 # Record some global information - but only if this patch had
345 # stuff which wasn't ignored.
347 if ((p
.added
+ p
.removed
) > 0 or not FileFilter
) and not p
.merge
:
348 TotalAdded
+= p
.added
349 TotalRemoved
+= p
.removed
350 TotalChanged
+= max (p
.added
, p
.removed
)
351 AddDateLines (p
.date
, max (p
.added
, p
.removed
))
352 empl
= p
.author
.emailemployer (p
.email
, p
.date
)
356 for sobemail
, sobber
in p
.sobs
:
357 empl
= sobber
.emailemployer (sobemail
, p
.date
)
361 p
.author
.addpatch (p
)
362 for sobemail
, sob
in p
.sobs
:
364 for hacker
in p
.reviews
:
366 for hacker
in p
.testers
:
368 for hacker
in p
.reports
:
371 csv
.AccumulatePatch (p
)
372 print >> sys
.stderr
, 'Grabbing changesets...done '
379 hlist
= database
.AllHackers ()
380 elist
= database
.AllEmployers ()
383 if len (h
.patches
) > 0:
388 reports
.Write ('Processed %d csets from %d developers\n' % (CSCount
,
390 reports
.Write ('%d employers found\n' % (nempl
))
391 reports
.Write ('A total of %d lines added, %d removed (delta %d)\n' %
392 (TotalAdded
, TotalRemoved
, TotalAdded
- TotalRemoved
))
393 if TotalChanged
== 0:
394 TotalChanged
= 1 # HACK to avoid div by zero
399 csv
.OutputCSV (CSVFile
)
400 if CSVFile
is not None:
404 reports
.DevReports (hlist
, TotalChanged
, CSCount
, TotalRemoved
)
405 reports
.EmplReports (elist
, TotalChanged
, CSCount
)