Fixed for git repository & cleaned up todo.
[gitstats.git] / statgit
blob42d2d6a8c6e82f4d23d9e46c283fa6a474259a01
1 #!/usr/bin/python
2 # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
3 # GPLv2
4 import commands
5 import datetime
6 import glob
7 import os
8 import re
9 import sys
10 import time
12 GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
14 def getoutput(cmd):
15 print '>> %s' % cmd
16 output = commands.getoutput(cmd)
17 return output
19 def getkeyssortedbyvalues(dict):
20 return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
22 # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits')
24 class DataCollector:
25 def __init__(self):
26 self.stamp_created = time.time()
27 pass
30 # This should be the main function to extract data from the repository.
31 def collect(self, dir):
32 self.dir = dir
35 # : get a dictionary of author
36 def getAuthorInfo(self, author):
37 return None
39 def getActivityByDayOfWeek(self):
40 return {}
42 def getActivityByHourOfDay(self):
43 return {}
46 # Get a list of authors
47 def getAuthors(self):
48 return []
50 def getFirstCommitDate(self):
51 return datetime.datetime.now()
53 def getLastCommitDate(self):
54 return datetime.datetime.now()
56 def getStampCreated(self):
57 return self.stamp_created
59 def getTags(self):
60 return []
62 def getTotalAuthors(self):
63 return -1
65 def getTotalCommits(self):
66 return -1
68 def getTotalFiles(self):
69 return -1
71 def getTotalLOC(self):
72 return -1
74 class GitDataCollector(DataCollector):
75 def collect(self, dir):
76 DataCollector.collect(self, dir)
78 self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
79 self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
80 self.total_files = int(getoutput('git-ls-files |wc -l'))
81 self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
83 self.activity_by_hour_of_day = {} # hour -> commits
84 self.activity_by_day_of_week = {} # day -> commits
85 self.activity_by_month_of_year = {} # month [1-12] -> commits
87 self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
89 # author of the month
90 self.author_of_month = {} # month -> author -> commits
91 self.author_of_year = {} # year -> author -> commits
92 self.commits_by_month = {} # month -> commits
93 self.commits_by_year = {} # year -> commits
94 self.first_commit_stamp = 0
95 self.last_commit_stamp = 0
97 # tags
98 self.tags = {}
99 lines = getoutput('git-show-ref --tags').split('\n')
100 for line in lines:
101 if len(line) == 0:
102 continue
103 (hash, tag) = line.split(' ')
104 tag = tag.replace('refs/tags/', '')
105 output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
106 if len(output) > 0:
107 parts = output.split(' ')
108 stamp = 0
109 try:
110 stamp = int(parts[0])
111 except ValueError:
112 stamp = 0
113 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
114 pass
116 # TODO also collect statistics for "last 30 days"/"last 12 months"
117 lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
118 for line in lines:
119 # linux-2.6 says "<unknown>" for one line O_o
120 parts = line.split(' ')
121 author = ''
122 try:
123 stamp = int(parts[0])
124 except ValueError:
125 stamp = 0
126 if len(parts) > 1:
127 author = ' '.join(parts[1:])
128 date = datetime.datetime.fromtimestamp(float(stamp))
130 # First and last commit stamp
131 if self.last_commit_stamp == 0:
132 self.last_commit_stamp = stamp
133 self.first_commit_stamp = stamp
135 # activity
136 # hour
137 hour = date.hour
138 if hour in self.activity_by_hour_of_day:
139 self.activity_by_hour_of_day[hour] += 1
140 else:
141 self.activity_by_hour_of_day[hour] = 1
143 # day
144 day = date.weekday()
145 if day in self.activity_by_day_of_week:
146 self.activity_by_day_of_week[day] += 1
147 else:
148 self.activity_by_day_of_week[day] = 1
150 # month of year
151 month = date.month
152 if month in self.activity_by_month_of_year:
153 self.activity_by_month_of_year[month] += 1
154 else:
155 self.activity_by_month_of_year[month] = 1
157 # author stats
158 if author not in self.authors:
159 self.authors[author] = {}
160 # TODO commits
161 if 'last_commit_stamp' not in self.authors[author]:
162 self.authors[author]['last_commit_stamp'] = stamp
163 self.authors[author]['first_commit_stamp'] = stamp
164 if 'commits' in self.authors[author]:
165 self.authors[author]['commits'] += 1
166 else:
167 self.authors[author]['commits'] = 1
169 # author of the month/year
170 yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
171 if yymm in self.author_of_month:
172 if author in self.author_of_month[yymm]:
173 self.author_of_month[yymm][author] += 1
174 else:
175 self.author_of_month[yymm][author] = 1
176 else:
177 self.author_of_month[yymm] = {}
178 self.author_of_month[yymm][author] = 1
179 if yymm in self.commits_by_month:
180 self.commits_by_month[yymm] += 1
181 else:
182 self.commits_by_month[yymm] = 1
184 yy = datetime.datetime.fromtimestamp(stamp).year
185 if yy in self.author_of_year:
186 if author in self.author_of_year[yy]:
187 self.author_of_year[yy][author] += 1
188 else:
189 self.author_of_year[yy][author] = 1
190 else:
191 self.author_of_year[yy] = {}
192 self.author_of_year[yy][author] = 1
193 if yy in self.commits_by_year:
194 self.commits_by_year[yy] += 1
195 else:
196 self.commits_by_year[yy] = 1
198 # outputs "<stamp> <files>" for each revision
199 self.files_by_stamp = {} # stamp -> files
200 lines = getoutput('git-rev-list --pretty=format:"%at %H" HEAD |grep -v ^commit |while read line; do set $line; echo "$1 $(git-ls-tree -r "$2" |wc -l)"; done').split('\n')
201 for line in lines:
202 parts = line.split(' ')
203 if len(parts) != 2:
204 continue
205 (stamp, files) = parts[0:2]
206 self.files_by_stamp[int(stamp)] = int(files)
208 def getActivityByDayOfWeek(self):
209 return self.activity_by_day_of_week
211 def getActivityByHourOfDay(self):
212 return self.activity_by_hour_of_day
214 def getAuthorInfo(self, author):
215 a = self.authors[author]
217 commits = a['commits']
218 commits_frac = (100 * float(commits)) / self.getTotalCommits()
219 date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
220 date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
222 res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
223 return res
225 def getAuthors(self):
226 return self.authors.keys()
228 def getFirstCommitDate(self):
229 return datetime.datetime.fromtimestamp(self.first_commit_stamp)
231 def getLastCommitDate(self):
232 return datetime.datetime.fromtimestamp(self.last_commit_stamp)
234 def getTags(self):
235 lines = getoutput('git-show-ref --tags |cut -d/ -f3')
236 return lines.split('\n')
238 def getTagDate(self, tag):
239 return self.revToDate('tags/' + tag)
241 def getTotalAuthors(self):
242 return self.total_authors
244 def getTotalCommits(self):
245 return self.total_commits
247 def getTotalFiles(self):
248 return self.total_files
250 def getTotalLOC(self):
251 return self.total_lines
253 def revToDate(self, rev):
254 stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
255 return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
257 class ReportCreator:
258 def __init__(self):
259 pass
261 def create(self, data, path):
262 self.data = data
263 self.path = path
265 class HTMLReportCreator(ReportCreator):
266 def create(self, data, path):
267 ReportCreator.create(self, data, path)
269 f = open(path + "/index.html", 'w')
270 format = '%Y-%m-%d %H:%m:%S'
271 self.printHeader(f)
273 f.write('<h1>StatGit</h1>')
275 self.printNav(f)
277 f.write('<dl>');
278 f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()));
279 f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
280 f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
281 f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
282 f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
283 f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
284 f.write('</dl>');
286 f.write('</body>\n</html>');
287 f.close()
290 # Activity
291 f = open(path + '/activity.html', 'w')
292 self.printHeader(f)
293 f.write('<h1>Activity</h1>')
294 self.printNav(f)
296 f.write('<h2>Last 30 days</h2>')
298 f.write('<h2>Last 12 months</h2>')
300 # Hour of Day
301 f.write('\n<h2>Hour of Day</h2>\n\n')
302 hour_of_day = data.getActivityByHourOfDay()
303 f.write('<table><tr><th>Hour</th>')
304 for i in range(1, 25):
305 f.write('<th>%d</th>' % i)
306 f.write('</tr>\n<tr><th>Commits</th>')
307 fp = open(path + '/hour_of_day.dat', 'w')
308 for i in range(0, 24):
309 if i in hour_of_day:
310 f.write('<td>%d</td>' % hour_of_day[i])
311 fp.write('%d %d\n' % (i, hour_of_day[i]))
312 else:
313 f.write('<td>0</td>')
314 fp.write('%d 0\n' % i)
315 fp.close()
316 f.write('</tr>\n<tr><th>%</th>')
317 totalcommits = data.getTotalCommits()
318 for i in range(0, 24):
319 if i in hour_of_day:
320 f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
321 else:
322 f.write('<td>0.00</td>')
323 f.write('</tr></table>')
324 f.write('<img src="hour_of_day.png" />')
325 fg = open(path + '/hour_of_day.dat', 'w')
326 for i in range(0, 24):
327 if i in hour_of_day:
328 fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
329 else:
330 fg.write('%d 0\n' % (i + 1))
331 fg.close()
333 # Day of Week
334 # TODO show also by hour of weekday?
335 f.write('\n<h2>Day of Week</h2>\n\n')
336 day_of_week = data.getActivityByDayOfWeek()
337 f.write('<div class="vtable"><table>')
338 f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
339 fp = open(path + '/day_of_week.dat', 'w')
340 for d in range(0, 7):
341 fp.write('%d %d\n' % (d + 1, day_of_week[d]))
342 f.write('<tr>')
343 f.write('<th>%d</th>' % (d + 1))
344 if d in day_of_week:
345 f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
346 else:
347 f.write('<td>0</td>')
348 f.write('</tr>')
349 f.write('</table></div>')
350 f.write('<img src="day_of_week.png" />')
351 fp.close()
353 # Month of Year
354 f.write('\n<h2>Month of Year</h2>\n\n')
355 f.write('<div class="vtable"><table>')
356 f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
357 fp = open (path + '/month_of_year.dat', 'w')
358 for mm in range(1, 13):
359 commits = 0
360 if mm in data.activity_by_month_of_year:
361 commits = data.activity_by_month_of_year[mm]
362 f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
363 fp.write('%d %d\n' % (mm, commits))
364 fp.close()
365 f.write('</table></div>')
366 f.write('<img src="month_of_year.png" />')
368 # Commits by year/month
369 f.write('<h2>Commits by year/month</h2>')
370 f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
371 for yymm in reversed(sorted(data.commits_by_month.keys())):
372 f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
373 f.write('</table></div>')
374 f.write('<img src="commits_by_year_month.png" />')
375 fg = open(path + '/commits_by_year_month.dat', 'w')
376 for yymm in sorted(data.commits_by_month.keys()):
377 fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
378 fg.close()
380 # Commits by year
381 f.write('<h2>Commits by year</h2>')
382 f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
383 for yy in reversed(sorted(data.commits_by_year.keys())):
384 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
385 f.write('</table></div>')
386 f.write('<img src="commits_by_year.png" />')
387 fg = open(path + '/commits_by_year.dat', 'w')
388 for yy in sorted(data.commits_by_year.keys()):
389 fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
390 fg.close()
392 f.write('</body></html>')
393 f.close()
396 # Authors
397 f = open(path + '/authors.html', 'w')
398 self.printHeader(f)
400 f.write('<h1>Authors</h1>')
401 self.printNav(f)
403 f.write('\n<h2>List of authors</h2>\n\n')
405 f.write('<table class="authors">')
406 f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
407 for author in sorted(data.getAuthors()):
408 info = data.getAuthorInfo(author)
409 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
410 f.write('</table>')
412 f.write('\n<h2>Author of Month</h2>\n\n')
413 f.write('<table>')
414 f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
415 for yymm in reversed(sorted(data.author_of_month.keys())):
416 authordict = data.author_of_month[yymm]
417 authors = getkeyssortedbyvalues(authordict)
418 authors.reverse()
419 commits = data.author_of_month[yymm][authors[0]]
420 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
422 f.write('</table>')
424 f.write('\n<h2>Author of Year</h2>\n\n')
425 f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
426 for yy in reversed(sorted(data.author_of_year.keys())):
427 authordict = data.author_of_year[yy]
428 authors = getkeyssortedbyvalues(authordict)
429 authors.reverse()
430 commits = data.author_of_year[yy][authors[0]]
431 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
432 f.write('</table>')
434 f.write('</body></html>')
435 f.close()
438 # Files
439 f = open(path + '/files.html', 'w')
440 self.printHeader(f)
441 f.write('<h1>Files</h1>')
442 self.printNav(f)
444 f.write('<dl>\n')
445 f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
446 f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
447 f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
448 f.write('</dl>\n')
450 f.write('<h2>File count by date</h2>')
452 fg = open(path + '/files_by_date.dat', 'w')
453 for stamp in sorted(data.files_by_stamp.keys()):
454 fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
455 fg.close()
457 f.write('<img src="files_by_date.png" />')
459 f.write('<h2>Average file size by date</h2>')
461 f.write('</body></html>')
462 f.close()
465 # tags.html
466 f = open(path + '/tags.html', 'w')
467 self.printHeader(f)
468 f.write('<h1>Tags</h1>')
469 self.printNav(f)
471 f.write('<dl>')
472 f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
473 if len(data.tags) > 0:
474 f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
475 f.write('</dl>')
477 f.write('<table>')
478 f.write('<tr><th>Name</th><th>Date</th></tr>')
479 # sort the tags by date desc
480 tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
481 for tag in tags_sorted_by_date_desc:
482 f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
483 f.write('</table>')
485 f.write('</body></html>')
486 f.close()
488 self.createGraphs(path)
489 pass
491 def createGraphs(self, path):
492 print 'Generating graphs...'
494 # hour of day
495 f = open(path + '/hour_of_day.plot', 'w')
496 f.write(GNUPLOT_COMMON)
497 f.write(
499 set output 'hour_of_day.png'
500 unset key
501 set xrange [0.5:24.5]
502 set xtics 4
503 set ylabel "Commits"
504 plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
505 """)
506 f.close()
508 # day of week
509 f = open(path + '/day_of_week.plot', 'w')
510 f.write(GNUPLOT_COMMON)
511 f.write(
513 set output 'day_of_week.png'
514 unset key
515 set xrange [0.5:7.5]
516 set xtics 1
517 set ylabel "Commits"
518 plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
519 """)
520 f.close()
522 # Month of Year
523 f = open(path + '/month_of_year.plot', 'w')
524 f.write(GNUPLOT_COMMON)
525 f.write(
527 set output 'month_of_year.png'
528 unset key
529 set xrange [0.5:12.5]
530 set xtics 1
531 set ylabel "Commits"
532 plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
533 """)
534 f.close()
536 # commits_by_year_month
537 f = open(path + '/commits_by_year_month.plot', 'w')
538 f.write(GNUPLOT_COMMON)
539 f.write(
540 # TODO rotate xtic labels by 90 degrees
542 set output 'commits_by_year_month.png'
543 unset key
544 set xdata time
545 set timefmt "%Y-%m"
546 set format x "%Y-%m"
547 set xtics 15768000
548 set ylabel "Commits"
549 plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
550 """)
551 f.close()
553 # commits_by_year
554 f = open(path + '/commits_by_year.plot', 'w')
555 f.write(GNUPLOT_COMMON)
556 f.write(
558 set output 'commits_by_year.png'
559 unset key
560 set xtics 1
561 set ylabel "Commits"
562 plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
563 """)
564 f.close()
566 # Files by date
567 f = open(path + '/files_by_date.plot', 'w')
568 f.write(GNUPLOT_COMMON)
569 f.write(
571 set output 'files_by_date.png'
572 unset key
573 set xdata time
574 set timefmt "%Y-%m-%d"
575 set format x "%Y-%m-%d"
576 set ylabel "Files"
577 set xtics rotate by 90
578 plot 'files_by_date.dat' using 1:2 smooth csplines
579 """)
580 f.close()
582 os.chdir(path)
583 files = glob.glob(path + '/*.plot')
584 for f in files:
585 print '>> gnuplot %s' % os.path.basename(f)
586 os.system('gnuplot %s' % f)
588 def printHeader(self, f):
589 f.write("""<html>
590 <head>
591 <title>StatGit</title>
592 <link rel="stylesheet" href="statgit.css" type="text/css" />
593 </head>
594 <body>
595 """)
597 def printNav(self, f):
598 f.write("""
599 <div class="nav">
600 <li><a href="index.html">General</a></li>
601 <li><a href="activity.html">Activity</a></li>
602 <li><a href="authors.html">Authors</a></li>
603 <li><a href="files.html">Files</a></li>
604 <li><a href="lines.html">Lines</a></li>
605 <li><a href="tags.html">Tags</a></li>
606 </ul>
607 </div>
608 """)
611 usage = """
612 Usage: statgit [options] <gitpath> <outputpath>
614 Options:
615 -o html
618 if len(sys.argv) < 3:
619 print usage
620 sys.exit(0)
622 gitpath = sys.argv[1]
623 outputpath = os.path.abspath(sys.argv[2])
625 print 'Git path: %s' % gitpath
626 print 'Output path: %s' % outputpath
628 os.chdir(gitpath)
630 print 'Collecting data...'
631 data = GitDataCollector()
632 data.collect(gitpath)
634 print 'Generating report...'
635 report = HTMLReportCreator()
636 report.create(data, outputpath)