A quick and dirty treeplot utility
[git-dm.git] / gitdm
blob178adf863324c55906669fa047c57e8c9e0bdc19
1 #!/usr/bin/python
5 # This code is part of the LWN git data miner.
7 # Copyright 2007-8 LWN.net
8 # Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
10 # This file may be distributed under the terms of the GNU General
11 # Public License, version 2.
14 import database, csv, ConfigFile, reports
15 import getopt, datetime
16 import os, re, sys, rfc822, string
17 from patterns import *
19 Today = datetime.date.today()
21 # Control options.
23 MapUnknown = 0
24 DevReports = 1
25 DateStats = 0
26 AuthorSOBs = 1
27 FileFilter = None
28 CSVFile = None
29 AkpmOverLt = 0
30 DumpDB = 0
31 CFName = 'gitdm.config'
33 # Options:
35 # -a Andrew Morton's signoffs shadow Linus's
36 # -c cfile Specify a configuration file
37 # -d Output individual developer stats
38 # -D Output date statistics
39 # -h hfile HTML output to hfile
40 # -l count Maximum length for output lists
41 # -o file File for text output
42 # -r pattern Restrict to files matching pattern
43 # -s Ignore author SOB lines
44 # -u Map unknown employers to '(Unknown)'
45 # -x file.csv Export raw statistics as CSV
46 # -z Dump out the hacker database at completion
48 def ParseOpts ():
49 global MapUnknown, DevReports
50 global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
51 global CFName, CSVFile
53 opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:sux:z')
54 for opt in opts:
55 if opt[0] == '-a':
56 AkpmOverLt = 1
57 elif opt[0] == '-c':
58 CFName = opt[1]
59 elif opt[0] == '-d':
60 DevReports = 0
61 elif opt[0] == '-D':
62 DateStats = 1
63 elif opt[0] == '-h':
64 reports.SetHTMLOutput (open (opt[1], 'w'))
65 elif opt[0] == '-l':
66 reports.SetMaxList (int (opt[1]))
67 elif opt[0] == '-o':
68 reports.SetOutput (open (opt[1], 'w'))
69 elif opt[0] == '-r':
70 print 'Filter on "%s"' % (opt[1])
71 FileFilter = re.compile (opt[1])
72 elif opt[0] == '-s':
73 AuthorSOBs = 0
74 elif opt[0] == '-u':
75 MapUnknown = 1
76 elif opt[0] == '-x':
77 CSVFile = open (opt[1], 'w')
78 print "open output file " + opt[1] + "\n"
79 elif opt[0] == '-z':
80 DumpDB = 1
84 def LookupStoreHacker (name, email):
85 email = database.RemapEmail (email)
86 h = database.LookupEmail (email)
87 if h: # already there
88 return h
89 elist = database.LookupEmployer (email, MapUnknown)
90 h = database.LookupName (name)
91 if h: # new email
92 h.addemail (email, elist)
93 return h
94 return database.StoreHacker(name, elist, email)
97 # Date tracking.
100 DateMap = { }
102 def AddDateLines(date, lines):
103 if lines > 1000000:
104 print 'Skip big patch (%d)' % lines
105 return
106 try:
107 DateMap[date] += lines
108 except KeyError:
109 DateMap[date] = lines
111 def PrintDateStats():
112 dates = DateMap.keys ()
113 dates.sort ()
114 total = 0
115 datef = open ('datelc', 'w')
116 for date in dates:
117 total += DateMap[date]
118 datef.write ('%d/%02d/%02d %6d %7d\n' % (date.year, date.month, date.day,
119 DateMap[date], total))
123 # Let's slowly try to move some smarts into this class.
125 class patch:
126 def __init__ (self, commit):
127 self.commit = commit
128 self.merge = self.added = self.removed = 0
129 self.author = LookupStoreHacker('Unknown hacker', 'unknown@hacker.net')
130 self.email = 'unknown@hacker.net'
131 self.sobs = [ ]
132 self.reviews = [ ]
133 self.testers = [ ]
134 self.reports = [ ]
136 def addreviewer (self, reviewer):
137 self.reviews.append (reviewer)
139 def addtester (self, tester):
140 self.testers.append (tester)
142 def addreporter (self, reporter):
143 self.reports.append (reporter)
145 # The core hack for grabbing the information about a changeset.
147 def grabpatch():
148 global NextLine, TotalAdded, TotalRemoved, TotalChanged
150 while (1):
151 m = Pcommit.match (NextLine)
152 if m:
153 break;
154 NextLine = sys.stdin.readline ()
155 if not NextLine:
156 return
158 p = patch(m.group (1))
159 NextLine = sys.stdin.readline ()
160 ignore = (FileFilter is not None)
161 while NextLine:
162 Line = NextLine
164 # If this line starts a new commit, drop out.
166 m = Pcommit.match (Line)
167 if m:
168 break
169 NextLine = sys.stdin.readline ()
171 # Maybe it's an author line?
173 m = Pauthor.match (Line)
174 if m:
175 p.email = database.RemapEmail (m.group (2))
176 p.author = LookupStoreHacker(m.group (1), p.email)
177 continue
179 # Could be a signed-off-by:
181 m = Psob.search (Line)
182 if m:
183 email = database.RemapEmail (m.group (2))
184 sobber = LookupStoreHacker(m.group (1), email)
185 if sobber != p.author or AuthorSOBs:
186 p.sobs.append ((email, LookupStoreHacker(m.group (1), m.group (2))))
187 continue
189 # Various other tags of interest.
191 m = Preview.search (Line) # Reviewed-by:
192 if m:
193 email = database.RemapEmail (m.group (2))
194 p.addreviewer (LookupStoreHacker(m.group (1), email))
195 continue
196 m = Ptest.search (Line) # Tested-by:
197 if m:
198 email = database.RemapEmail (m.group (2))
199 p.addtester (LookupStoreHacker (m.group (1), email))
200 p.author.testcredit (patch)
201 continue
202 m = Prep.search (Line) # Reported-by:
203 if m:
204 email = database.RemapEmail (m.group (2))
205 p.addreporter (LookupStoreHacker (m.group (1), email))
206 p.author.reportcredit (patch)
207 continue
208 m = Preptest.search (Line) # Reported-and-tested-by:
209 if m:
210 email = database.RemapEmail (m.group (2))
211 h = LookupStoreHacker (m.group (1), email)
212 p.addreporter (h)
213 p.addtester (h)
214 p.author.reportcredit (patch)
215 p.author.testcredit (patch)
216 continue
218 # If this one is a merge, make note of the fact.
220 m = Pmerge.match (Line)
221 if m:
222 p.merge = 1
223 continue
225 # See if it's the date.
227 m = Pdate.match (Line)
228 if m:
229 dt = rfc822.parsedate(m.group (2))
230 p.date = datetime.date (dt[0], dt[1], dt[2])
231 if p.date > Today:
232 sys.stderr.write ('Funky date: %s\n' % p.date)
233 p.date = Today
234 continue
236 # If we have a file filter, check for file lines.
238 if FileFilter:
239 ignore = ApplyFileFilter (Line, ignore)
241 # OK, maybe it's part of the diff itself.
243 if not ignore:
244 if Padd.match (Line):
245 p.added += 1
246 continue
247 if Prem.match (Line):
248 p.removed += 1
250 # Record some global information - but only if this patch had
251 # stuff which wasn't ignored. This work should be done
252 # elsewhere,
254 if ((p.added + p.removed) > 0 or not FileFilter) and not p.merge:
255 TotalAdded += p.added
256 TotalRemoved += p.removed
257 TotalChanged += max (p.added, p.removed)
258 AddDateLines (p.date, max (p.added, p.removed))
259 empl = p.author.emailemployer (p.email, p.date)
260 empl.AddCSet (p)
261 if AkpmOverLt:
262 TrimLTSOBs (p)
263 for sobemail, sobber in p.sobs:
264 empl = sobber.emailemployer (sobemail, p.date)
265 empl.AddSOB()
266 return p
269 def ApplyFileFilter (line, ignore):
271 # If this is the first file line (--- a/), set ignore one way
272 # or the other.
274 m = Pfilea.match (line)
275 if m:
276 file = m.group (1)
277 if FileFilter.search (file):
278 return 0
279 return 1
281 # For the second line, we can turn ignore off, but not on
283 m = Pfileb.match (line)
284 if m:
285 file = m.group (1)
286 if FileFilter.search (file):
287 return 0
288 return ignore
291 # If this patch is signed off by both Andrew Morton and Linus Torvalds,
292 # remove the (redundant) Linus signoff.
294 def TrimLTSOBs (p):
295 if Linus in p.sobs and Akpm in p.sobs:
296 p.sobs.remove (Linus)
300 # Here starts the real program.
302 ParseOpts ()
305 # Read the config files.
307 ConfigFile.ConfigFile (CFName)
310 # Let's pre-seed the database with a couple of hackers
311 # we want to remember.
313 Linus = ('torvalds@linux-foundation.org',
314 LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org'))
315 Akpm = ('akpm@linux-foundation.org',
316 LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
318 NextLine = sys.stdin.readline ()
319 TotalChanged = TotalAdded = TotalRemoved = 0
322 # Snarf changesets.
324 print >> sys.stderr, 'Grabbing changesets...\r',
326 printcount = CSCount = 0
327 while (1):
328 if (printcount % 50) == 0:
329 print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
330 printcount += 1
331 p = grabpatch()
332 if not p:
333 break
334 # if p.added > 100000 or p.removed > 100000:
335 # print 'Skipping massive add', p.commit
336 # continue
337 if FileFilter and p.added == 0 and p.removed == 0:
338 continue
339 if not p.merge:
340 p.author.addpatch (p)
341 for sobemail, sob in p.sobs:
342 sob.addsob (p)
343 for hacker in p.reviews:
344 hacker.addreview (p)
345 for hacker in p.testers:
346 hacker.addtested (p)
347 for hacker in p.reports:
348 hacker.addreport (p)
349 CSCount += 1
350 csv.AccumulatePatch (p)
351 print >> sys.stderr, 'Grabbing changesets...done'
353 if DumpDB:
354 database.DumpDB ()
356 # Say something
358 hlist = database.AllHackers ()
359 elist = database.AllEmployers ()
360 reports.Write ('Processed %d csets from %d developers\n' % (CSCount,
361 len (hlist)))
362 reports.Write ('%d employers found\n' % len (elist))
363 reports.Write ('A total of %d lines added, %d removed (delta %d)\n' %
364 (TotalAdded, TotalRemoved, TotalAdded - TotalRemoved))
365 if TotalChanged == 0:
366 TotalChanged = 1 # HACK to avoid div by zero
367 if DateStats:
368 PrintDateStats ()
369 sys.exit(0)
371 csv.OutputCSV (CSVFile)
372 if CSVFile is not None:
373 CSVFile.close ()
375 if DevReports:
376 reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved)
377 reports.EmplReports (elist, TotalChanged, CSCount)