Author of Year: Commits (%). Sort tags date/desc & list general info.
[gitstats.git] / statgit
blob740fb2b5e4bd2f18deadb5f2881c6cc54bf5de2a
1 #!/usr/bin/python
2 # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
3 # GPLv2
4 import commands
5 import datetime
6 import os
7 import re
8 import sys
10 def getoutput(cmd):
11 print '>> %s' % cmd
12 output = commands.getoutput(cmd)
13 return output
15 def getkeyssortedbyvalues(dict):
16 return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
18 class DataCollector:
19 def __init__(self):
20 pass
23 # This should be the main function to extract data from the repository.
24 def collect(self, dir):
25 self.dir = dir
28 # : get a dictionary of author
29 def getAuthorInfo(self, author):
30 return None
32 def getActivityByDayOfWeek(self):
33 return {}
35 def getActivityByHourOfDay(self):
36 return {}
39 # Get a list of authors
40 def getAuthors(self):
41 return []
43 def getFirstCommitDate(self):
44 return datetime.datetime.now()
46 def getLastCommitDate(self):
47 return datetime.datetime.now()
49 def getTags(self):
50 return []
52 def getTotalAuthors(self):
53 return -1
55 def getTotalCommits(self):
56 return -1
58 def getTotalFiles(self):
59 return -1
61 def getTotalLOC(self):
62 return -1
64 class GitDataCollector(DataCollector):
65 def collect(self, dir):
66 DataCollector.collect(self, dir)
68 self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
69 self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
70 self.total_files = int(getoutput('git-ls-files |wc -l'))
71 self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
73 self.activity_by_hour_of_day = {} # hour -> commits
74 self.activity_by_day_of_week = {} # day -> commits
76 self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
78 # author of the month
79 self.author_of_month = {} # month -> author -> commits
80 self.author_of_year = {} # year -> author -> commits
81 self.commits_by_month = {} # month -> commits
82 self.commits_by_year = {} # year -> commits
83 self.first_commit_stamp = 0
84 self.last_commit_stamp = 0
86 # tags
87 self.tags = {}
88 lines = getoutput('git-show-ref --tags').split('\n')
89 for line in lines:
90 (hash, tag) = line.split(' ')
91 tag = tag.replace('refs/tags/', '')
92 output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
93 if len(output) > 0:
94 parts = output.split(' ')
95 stamp = 0
96 try:
97 stamp = int(parts[0])
98 except ValueError:
99 stamp = 0
100 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
101 pass
103 # TODO also collect statistics for "last 30 days"/"last 12 months"
104 lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
105 for line in lines:
106 # linux-2.6 says "<unknown>" for one line O_o
107 parts = line.split(' ')
108 author = ''
109 try:
110 stamp = int(parts[0])
111 except ValueError:
112 stamp = 0
113 if len(parts) > 1:
114 author = ' '.join(parts[1:])
115 date = datetime.datetime.fromtimestamp(float(stamp))
117 # First and last commit stamp
118 if self.last_commit_stamp == 0:
119 self.last_commit_stamp = stamp
120 self.first_commit_stamp = stamp
122 # activity
123 # hour
124 hour = date.hour
125 if hour in self.activity_by_hour_of_day:
126 self.activity_by_hour_of_day[hour] += 1
127 else:
128 self.activity_by_hour_of_day[hour] = 1
130 # day
131 day = date.weekday()
132 if day in self.activity_by_day_of_week:
133 self.activity_by_day_of_week[day] += 1
134 else:
135 self.activity_by_day_of_week[day] = 1
137 # author stats
138 if author not in self.authors:
139 self.authors[author] = {}
140 # TODO commits
141 if 'last_commit_stamp' not in self.authors[author]:
142 self.authors[author]['last_commit_stamp'] = stamp
143 self.authors[author]['first_commit_stamp'] = stamp
144 if 'commits' in self.authors[author]:
145 self.authors[author]['commits'] += 1
146 else:
147 self.authors[author]['commits'] = 1
149 # author of the month/year
150 yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
151 if yymm in self.author_of_month:
152 if author in self.author_of_month[yymm]:
153 self.author_of_month[yymm][author] += 1
154 else:
155 self.author_of_month[yymm][author] = 1
156 else:
157 self.author_of_month[yymm] = {}
158 self.author_of_month[yymm][author] = 1
159 if yymm in self.commits_by_month:
160 self.commits_by_month[yymm] += 1
161 else:
162 self.commits_by_month[yymm] = 1
164 yy = datetime.datetime.fromtimestamp(stamp).year
165 if yy in self.author_of_year:
166 if author in self.author_of_year[yy]:
167 self.author_of_year[yy][author] += 1
168 else:
169 self.author_of_year[yy][author] = 1
170 else:
171 self.author_of_year[yy] = {}
172 self.author_of_year[yy][author] = 1
173 if yy in self.commits_by_year:
174 self.commits_by_year[yy] += 1
175 else:
176 self.commits_by_year[yy] = 1
178 def getActivityByDayOfWeek(self):
179 return self.activity_by_day_of_week
181 def getActivityByHourOfDay(self):
182 return self.activity_by_hour_of_day
184 def getAuthorInfo(self, author):
185 a = self.authors[author]
187 commits = a['commits']
188 commits_frac = (100 * float(commits)) / self.getTotalCommits()
189 date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
190 date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
192 res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
193 return res
195 def getAuthors(self):
196 return self.authors.keys()
198 def getFirstCommitDate(self):
199 return datetime.datetime.fromtimestamp(self.first_commit_stamp)
201 def getLastCommitDate(self):
202 return datetime.datetime.fromtimestamp(self.last_commit_stamp)
204 def getTags(self):
205 lines = getoutput('git-show-ref --tags |cut -d/ -f3')
206 return lines.split('\n')
208 def getTagDate(self, tag):
209 return self.revToDate('tags/' + tag)
211 def getTotalAuthors(self):
212 return self.total_authors
214 def getTotalCommits(self):
215 return self.total_commits
217 def getTotalFiles(self):
218 return self.total_files
220 def getTotalLOC(self):
221 return self.total_lines
223 def revToDate(self, rev):
224 stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
225 return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
227 class ReportCreator:
228 def __init__(self):
229 pass
231 def create(self, data, path):
232 self.data = data
233 self.path = path
235 class HTMLReportCreator(ReportCreator):
236 def create(self, data, path):
237 ReportCreator.create(self, data, path)
239 f = open(path + "/index.html", 'w')
240 format = '%Y-%m-%d %H:%m:%S'
241 self.printHeader(f)
243 f.write('<h1>StatGit</h1>')
245 self.printNav(f)
247 f.write('<dl>');
248 f.write('<dt>Generated</dt><dd>%s</dd>' % datetime.datetime.now().strftime(format));
249 f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
250 f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
251 f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
252 f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
253 f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
254 f.write('</dl>');
256 f.write('</body>\n</html>');
257 f.close()
260 # Activity
261 f = open(path + '/activity.html', 'w')
262 self.printHeader(f)
263 f.write('<h1>Activity</h1>')
264 self.printNav(f)
266 f.write('<h2>Last 30 days</h2>')
268 f.write('<h2>Last 12 months</h2>')
270 # Hour of Day
271 f.write('\n<h2>Hour of Day</h2>\n\n')
272 hour_of_day = data.getActivityByHourOfDay()
273 f.write('<table><tr><th>Hour</th>')
274 for i in range(1, 25):
275 f.write('<th>%d</th>' % i)
276 f.write('</tr>\n<tr><th>Commits</th>')
277 fp = open(path + '/hour_of_day.dat', 'w')
278 for i in range(0, 24):
279 if i in hour_of_day:
280 f.write('<td>%d</td>' % hour_of_day[i])
281 fp.write('%d %d\n' % (i, hour_of_day[i]))
282 else:
283 f.write('<td>0</td>')
284 fp.write('%d 0\n' % i)
285 fp.close()
286 f.write('</tr>\n<tr><th>%</th>')
287 totalcommits = data.getTotalCommits()
288 for i in range(0, 24):
289 if i in hour_of_day:
290 f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
291 else:
292 f.write('<td>0.00</td>')
293 f.write('</tr></table>')
295 # Day of Week
296 # TODO show also by hour of weekday?
297 f.write('\n<h2>Day of Week</h2>\n\n')
298 day_of_week = data.getActivityByDayOfWeek()
299 f.write('<table>')
300 f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
301 fp = open(path + '/day_of_week.dat', 'w')
302 for d in range(0, 7):
303 fp.write('%d %d\n' % (d + 1, day_of_week[d]))
304 f.write('<tr>')
305 f.write('<th>%d</th>' % (d + 1))
306 if d in day_of_week:
307 f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
308 else:
309 f.write('<td>0</td>')
310 f.write('</tr>')
311 f.write('</table>')
312 fp.close()
314 f.close()
316 # authors.html
317 f = open(path + '/authors.html', 'w')
318 self.printHeader(f)
320 f.write('<h1>Authors</h1>')
321 self.printNav(f)
323 f.write('\n<h2>List of authors</h2>\n\n')
325 f.write('<table class="authors">')
326 f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
327 for author in data.getAuthors():
328 info = data.getAuthorInfo(author)
329 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
330 f.write('</table>')
332 f.write('\n<h2>Author of Month</h2>\n\n')
333 f.write('<table>')
334 f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
335 for yymm in reversed(sorted(data.author_of_month.keys())):
336 authordict = data.author_of_month[yymm]
337 authors = getkeyssortedbyvalues(authordict)
338 authors.reverse()
339 commits = data.author_of_month[yymm][authors[0]]
340 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
342 f.write('</table>')
344 f.write('\n<h2>Author of Year</h2>\n\n')
345 f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
346 for yy in reversed(sorted(data.author_of_year.keys())):
347 authordict = data.author_of_year[yy]
348 authors = getkeyssortedbyvalues(authordict)
349 authors.reverse()
350 commits = data.author_of_year[yy][authors[0]]
351 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
352 f.write('</table>')
354 f.write('</body></html>')
355 f.close()
358 # tags.html
359 f = open(path + '/tags.html', 'w')
360 self.printHeader(f)
361 f.write('<h1>Tags</h1>')
362 self.printNav(f)
364 f.write('<dl>')
365 f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
366 f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
367 f.write('</dl>')
369 f.write('<table>')
370 f.write('<tr><th>Name</th><th>Date</th></tr>')
371 # sort the tags by date desc
372 tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
373 for tag in tags_sorted_by_date_desc:
374 f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
375 f.write('</table>')
377 f.write('</body></html>')
378 f.close()
379 pass
381 def printHeader(self, f):
382 f.write("""<html>
383 <head>
384 <title>StatGit</title>
385 <link rel="stylesheet" href="statgit.css" type="text/css" />
386 </head>
387 <body>
388 """)
390 def printNav(self, f):
391 f.write("""
392 <div class="nav">
393 <li><a href="index.html">General</a></li>
394 <li><a href="activity.html">Activity</a></li>
395 <li><a href="authors.html">Authors</a></li>
396 <li><a href="files.html">Files</a></li>
397 <li><a href="lines.html">Lines</a></li>
398 <li><a href="tags.html">Tags</a></li>
399 </ul>
400 </div>
401 """)
404 usage = """
405 Usage: statgit [options] <gitpath> <outputpath>
407 Options:
408 -o html
411 if len(sys.argv) < 3:
412 print usage
413 sys.exit(0)
415 gitpath = sys.argv[1]
416 outputpath = sys.argv[2]
418 print 'Git path: %s' % gitpath
419 print 'Output path: %s' % outputpath
421 os.chdir(gitpath)
423 print 'Collecting data...'
424 data = GitDataCollector()
425 data.collect(gitpath)
427 print 'Generating report...'
428 report = HTMLReportCreator()
429 report.create(data, outputpath)