Activity: Month of Year (table).
[gitstats.git] / statgit
blobc21523395d9d13f962a0e01346bf9a9999ecbe36
1 #!/usr/bin/python
2 # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
3 # GPLv2
4 import commands
5 import datetime
6 import os
7 import re
8 import sys
9 import time
11 GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
13 def getoutput(cmd):
14 print '>> %s' % cmd
15 output = commands.getoutput(cmd)
16 return output
18 def getkeyssortedbyvalues(dict):
19 return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
21 # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits')
23 class DataCollector:
24 def __init__(self):
25 self.stamp_created = time.time()
26 pass
29 # This should be the main function to extract data from the repository.
30 def collect(self, dir):
31 self.dir = dir
34 # : get a dictionary of author
35 def getAuthorInfo(self, author):
36 return None
38 def getActivityByDayOfWeek(self):
39 return {}
41 def getActivityByHourOfDay(self):
42 return {}
45 # Get a list of authors
46 def getAuthors(self):
47 return []
49 def getFirstCommitDate(self):
50 return datetime.datetime.now()
52 def getLastCommitDate(self):
53 return datetime.datetime.now()
55 def getStampCreated(self):
56 return self.stamp_created
58 def getTags(self):
59 return []
61 def getTotalAuthors(self):
62 return -1
64 def getTotalCommits(self):
65 return -1
67 def getTotalFiles(self):
68 return -1
70 def getTotalLOC(self):
71 return -1
73 class GitDataCollector(DataCollector):
74 def collect(self, dir):
75 DataCollector.collect(self, dir)
77 self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
78 self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
79 self.total_files = int(getoutput('git-ls-files |wc -l'))
80 self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
82 self.activity_by_hour_of_day = {} # hour -> commits
83 self.activity_by_day_of_week = {} # day -> commits
84 self.activity_by_month_of_year = {} # month [1-12] -> commits
86 self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
88 # author of the month
89 self.author_of_month = {} # month -> author -> commits
90 self.author_of_year = {} # year -> author -> commits
91 self.commits_by_month = {} # month -> commits
92 self.commits_by_year = {} # year -> commits
93 self.first_commit_stamp = 0
94 self.last_commit_stamp = 0
96 # tags
97 self.tags = {}
98 lines = getoutput('git-show-ref --tags').split('\n')
99 for line in lines:
100 if len(line) == 0:
101 continue
102 (hash, tag) = line.split(' ')
103 tag = tag.replace('refs/tags/', '')
104 output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
105 if len(output) > 0:
106 parts = output.split(' ')
107 stamp = 0
108 try:
109 stamp = int(parts[0])
110 except ValueError:
111 stamp = 0
112 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
113 pass
115 # TODO also collect statistics for "last 30 days"/"last 12 months"
116 lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
117 for line in lines:
118 # linux-2.6 says "<unknown>" for one line O_o
119 parts = line.split(' ')
120 author = ''
121 try:
122 stamp = int(parts[0])
123 except ValueError:
124 stamp = 0
125 if len(parts) > 1:
126 author = ' '.join(parts[1:])
127 date = datetime.datetime.fromtimestamp(float(stamp))
129 # First and last commit stamp
130 if self.last_commit_stamp == 0:
131 self.last_commit_stamp = stamp
132 self.first_commit_stamp = stamp
134 # activity
135 # hour
136 hour = date.hour
137 if hour in self.activity_by_hour_of_day:
138 self.activity_by_hour_of_day[hour] += 1
139 else:
140 self.activity_by_hour_of_day[hour] = 1
142 # day
143 day = date.weekday()
144 if day in self.activity_by_day_of_week:
145 self.activity_by_day_of_week[day] += 1
146 else:
147 self.activity_by_day_of_week[day] = 1
149 # month of year
150 month = date.month
151 if month in self.activity_by_month_of_year:
152 self.activity_by_month_of_year[month] += 1
153 else:
154 self.activity_by_month_of_year[month] = 1
156 # author stats
157 if author not in self.authors:
158 self.authors[author] = {}
159 # TODO commits
160 if 'last_commit_stamp' not in self.authors[author]:
161 self.authors[author]['last_commit_stamp'] = stamp
162 self.authors[author]['first_commit_stamp'] = stamp
163 if 'commits' in self.authors[author]:
164 self.authors[author]['commits'] += 1
165 else:
166 self.authors[author]['commits'] = 1
168 # author of the month/year
169 yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
170 if yymm in self.author_of_month:
171 if author in self.author_of_month[yymm]:
172 self.author_of_month[yymm][author] += 1
173 else:
174 self.author_of_month[yymm][author] = 1
175 else:
176 self.author_of_month[yymm] = {}
177 self.author_of_month[yymm][author] = 1
178 if yymm in self.commits_by_month:
179 self.commits_by_month[yymm] += 1
180 else:
181 self.commits_by_month[yymm] = 1
183 yy = datetime.datetime.fromtimestamp(stamp).year
184 if yy in self.author_of_year:
185 if author in self.author_of_year[yy]:
186 self.author_of_year[yy][author] += 1
187 else:
188 self.author_of_year[yy][author] = 1
189 else:
190 self.author_of_year[yy] = {}
191 self.author_of_year[yy][author] = 1
192 if yy in self.commits_by_year:
193 self.commits_by_year[yy] += 1
194 else:
195 self.commits_by_year[yy] = 1
197 def getActivityByDayOfWeek(self):
198 return self.activity_by_day_of_week
200 def getActivityByHourOfDay(self):
201 return self.activity_by_hour_of_day
203 def getAuthorInfo(self, author):
204 a = self.authors[author]
206 commits = a['commits']
207 commits_frac = (100 * float(commits)) / self.getTotalCommits()
208 date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
209 date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
211 res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
212 return res
214 def getAuthors(self):
215 return self.authors.keys()
217 def getFirstCommitDate(self):
218 return datetime.datetime.fromtimestamp(self.first_commit_stamp)
220 def getLastCommitDate(self):
221 return datetime.datetime.fromtimestamp(self.last_commit_stamp)
223 def getTags(self):
224 lines = getoutput('git-show-ref --tags |cut -d/ -f3')
225 return lines.split('\n')
227 def getTagDate(self, tag):
228 return self.revToDate('tags/' + tag)
230 def getTotalAuthors(self):
231 return self.total_authors
233 def getTotalCommits(self):
234 return self.total_commits
236 def getTotalFiles(self):
237 return self.total_files
239 def getTotalLOC(self):
240 return self.total_lines
242 def revToDate(self, rev):
243 stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
244 return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
246 class ReportCreator:
247 def __init__(self):
248 pass
250 def create(self, data, path):
251 self.data = data
252 self.path = path
254 class HTMLReportCreator(ReportCreator):
255 def create(self, data, path):
256 ReportCreator.create(self, data, path)
258 f = open(path + "/index.html", 'w')
259 format = '%Y-%m-%d %H:%m:%S'
260 self.printHeader(f)
262 f.write('<h1>StatGit</h1>')
264 self.printNav(f)
266 f.write('<dl>');
267 f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()));
268 f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
269 f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
270 f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
271 f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
272 f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
273 f.write('</dl>');
275 f.write('</body>\n</html>');
276 f.close()
279 # Activity
280 f = open(path + '/activity.html', 'w')
281 self.printHeader(f)
282 f.write('<h1>Activity</h1>')
283 self.printNav(f)
285 f.write('<h2>Last 30 days</h2>')
287 f.write('<h2>Last 12 months</h2>')
289 # Hour of Day
290 f.write('\n<h2>Hour of Day</h2>\n\n')
291 hour_of_day = data.getActivityByHourOfDay()
292 f.write('<table><tr><th>Hour</th>')
293 for i in range(1, 25):
294 f.write('<th>%d</th>' % i)
295 f.write('</tr>\n<tr><th>Commits</th>')
296 fp = open(path + '/hour_of_day.dat', 'w')
297 for i in range(0, 24):
298 if i in hour_of_day:
299 f.write('<td>%d</td>' % hour_of_day[i])
300 fp.write('%d %d\n' % (i, hour_of_day[i]))
301 else:
302 f.write('<td>0</td>')
303 fp.write('%d 0\n' % i)
304 fp.close()
305 f.write('</tr>\n<tr><th>%</th>')
306 totalcommits = data.getTotalCommits()
307 for i in range(0, 24):
308 if i in hour_of_day:
309 f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
310 else:
311 f.write('<td>0.00</td>')
312 f.write('</tr></table>')
313 f.write('<img src="hour_of_day.png" />')
314 fg = open(path + '/hour_of_day.dat', 'w')
315 for i in range(0, 24):
316 if i in hour_of_day:
317 fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
318 else:
319 fg.write('%d 0\n' % (i + 1))
320 fg.close()
322 # Day of Week
323 # TODO show also by hour of weekday?
324 f.write('\n<h2>Day of Week</h2>\n\n')
325 day_of_week = data.getActivityByDayOfWeek()
326 f.write('<div class="vtable"><table>')
327 f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
328 fp = open(path + '/day_of_week.dat', 'w')
329 for d in range(0, 7):
330 fp.write('%d %d\n' % (d + 1, day_of_week[d]))
331 f.write('<tr>')
332 f.write('<th>%d</th>' % (d + 1))
333 if d in day_of_week:
334 f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
335 else:
336 f.write('<td>0</td>')
337 f.write('</tr>')
338 f.write('</table></div>')
339 f.write('<img src="day_of_week.png" />')
340 fp.close()
342 # TODO Month of Year
343 f.write('\n<h2>Month of Year</h2>\n\n')
344 f.write('<div class="vtable"><table>')
345 f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
346 for mm in range(1, 13):
347 commits = 0
348 if mm in data.activity_by_month_of_year:
349 commits = data.activity_by_month_of_year[mm]
350 f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
351 f.write('</table></div>')
353 # Commits by year/month
354 f.write('<h2>Commits by year/month</h2>')
355 f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
356 for yymm in reversed(sorted(data.commits_by_month.keys())):
357 f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
358 f.write('</table></div>')
359 f.write('<img src="commits_by_year_month.png" />')
360 fg = open(path + '/commits_by_year_month.dat', 'w')
361 for yymm in sorted(data.commits_by_month.keys()):
362 fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
363 fg.close()
365 # Commits by year
366 f.write('<h2>Commits by year</h2>')
367 f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
368 for yy in reversed(sorted(data.commits_by_year.keys())):
369 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
370 f.write('</table></div>')
371 f.write('<img src="commits_by_year.png" />')
372 fg = open(path + '/commits_by_year.dat', 'w')
373 for yy in sorted(data.commits_by_year.keys()):
374 fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
375 fg.close()
377 f.write('</body></html>')
378 f.close()
381 # Authors
382 f = open(path + '/authors.html', 'w')
383 self.printHeader(f)
385 f.write('<h1>Authors</h1>')
386 self.printNav(f)
388 f.write('\n<h2>List of authors</h2>\n\n')
390 f.write('<table class="authors">')
391 f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
392 for author in sorted(data.getAuthors()):
393 info = data.getAuthorInfo(author)
394 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
395 f.write('</table>')
397 f.write('\n<h2>Author of Month</h2>\n\n')
398 f.write('<table>')
399 f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
400 for yymm in reversed(sorted(data.author_of_month.keys())):
401 authordict = data.author_of_month[yymm]
402 authors = getkeyssortedbyvalues(authordict)
403 authors.reverse()
404 commits = data.author_of_month[yymm][authors[0]]
405 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
407 f.write('</table>')
409 f.write('\n<h2>Author of Year</h2>\n\n')
410 f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
411 for yy in reversed(sorted(data.author_of_year.keys())):
412 authordict = data.author_of_year[yy]
413 authors = getkeyssortedbyvalues(authordict)
414 authors.reverse()
415 commits = data.author_of_year[yy][authors[0]]
416 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
417 f.write('</table>')
419 f.write('</body></html>')
420 f.close()
423 # Files
424 f = open(path + '/files.html', 'w')
425 self.printHeader(f)
426 f.write('<h1>Files</h1>')
427 self.printNav(f)
429 f.write('<dl>\n')
430 f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
431 f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
432 f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
433 f.write('</dl>\n')
435 f.write('<h2>File count by date</h2>')
437 f.write('<h2>Average file size by date</h2>')
439 f.write('</body></html>')
440 f.close()
443 # tags.html
444 f = open(path + '/tags.html', 'w')
445 self.printHeader(f)
446 f.write('<h1>Tags</h1>')
447 self.printNav(f)
449 f.write('<dl>')
450 f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
451 if len(data.tags) > 0:
452 f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
453 f.write('</dl>')
455 f.write('<table>')
456 f.write('<tr><th>Name</th><th>Date</th></tr>')
457 # sort the tags by date desc
458 tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
459 for tag in tags_sorted_by_date_desc:
460 f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
461 f.write('</table>')
463 f.write('</body></html>')
464 f.close()
466 self.createGraphs(path)
467 pass
469 def createGraphs(self, path):
470 print 'Generating graphs...'
472 # hour of day
473 f = open(path + '/hour_of_day.plot', 'w')
474 f.write(GNUPLOT_COMMON)
475 f.write(
477 set output 'hour_of_day.png'
478 unset key
479 set xrange [0.5:24.5]
480 set xtics 4
481 set ylabel "Commits"
482 plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
483 """)
484 f.close()
486 # day of week
487 f = open(path + '/day_of_week.plot', 'w')
488 f.write(GNUPLOT_COMMON)
489 f.write(
491 set output 'day_of_week.png'
492 unset key
493 set xrange [0.5:7.5]
494 set xtics 1
495 set ylabel "Commits"
496 plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
497 """)
498 f.close()
500 # commits_by_year_month
501 f = open(path + '/commits_by_year_month.plot', 'w')
502 f.write(GNUPLOT_COMMON)
503 f.write(
504 # TODO rotate xtic labels by 90 degrees
506 set output 'commits_by_year_month.png'
507 unset key
508 set xdata time
509 set timefmt "%Y-%m"
510 set format x "%Y-%m"
511 set xtics 15768000
512 set ylabel "Commits"
513 plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
514 """)
515 f.close()
517 # commits_by_year
518 f = open(path + '/commits_by_year.plot', 'w')
519 f.write(GNUPLOT_COMMON)
520 f.write(
522 set output 'commits_by_year.png'
523 unset key
524 set xtics 1
525 set ylabel "Commits"
526 plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
527 """)
528 f.close()
530 os.chdir(path)
531 for i in ('hour_of_day', 'day_of_week', 'commits_by_year_month', 'commits_by_year'):
532 os.system('gnuplot %s.plot' % i)
534 pass
536 def printHeader(self, f):
537 f.write("""<html>
538 <head>
539 <title>StatGit</title>
540 <link rel="stylesheet" href="statgit.css" type="text/css" />
541 </head>
542 <body>
543 """)
545 def printNav(self, f):
546 f.write("""
547 <div class="nav">
548 <li><a href="index.html">General</a></li>
549 <li><a href="activity.html">Activity</a></li>
550 <li><a href="authors.html">Authors</a></li>
551 <li><a href="files.html">Files</a></li>
552 <li><a href="lines.html">Lines</a></li>
553 <li><a href="tags.html">Tags</a></li>
554 </ul>
555 </div>
556 """)
559 usage = """
560 Usage: statgit [options] <gitpath> <outputpath>
562 Options:
563 -o html
566 if len(sys.argv) < 3:
567 print usage
568 sys.exit(0)
570 gitpath = sys.argv[1]
571 outputpath = os.path.abspath(sys.argv[2])
573 print 'Git path: %s' % gitpath
574 print 'Output path: %s' % outputpath
576 os.chdir(gitpath)
578 print 'Collecting data...'
579 data = GitDataCollector()
580 data.collect(gitpath)
582 print 'Generating report...'
583 report = HTMLReportCreator()
584 report.create(data, outputpath)