Graph: Day of Week.
[gitstats.git] / statgit
blob313f8c57fee6000c25a9139507fdd8c6d9900750
1 #!/usr/bin/python
2 # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
3 # GPLv2
4 import commands
5 import datetime
6 import os
7 import re
8 import sys
10 GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
12 def getoutput(cmd):
13 print '>> %s' % cmd
14 output = commands.getoutput(cmd)
15 return output
17 def getkeyssortedbyvalues(dict):
18 return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
20 # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits')
22 class DataCollector:
23 def __init__(self):
24 pass
27 # This should be the main function to extract data from the repository.
28 def collect(self, dir):
29 self.dir = dir
32 # : get a dictionary of author
33 def getAuthorInfo(self, author):
34 return None
36 def getActivityByDayOfWeek(self):
37 return {}
39 def getActivityByHourOfDay(self):
40 return {}
43 # Get a list of authors
44 def getAuthors(self):
45 return []
47 def getFirstCommitDate(self):
48 return datetime.datetime.now()
50 def getLastCommitDate(self):
51 return datetime.datetime.now()
53 def getTags(self):
54 return []
56 def getTotalAuthors(self):
57 return -1
59 def getTotalCommits(self):
60 return -1
62 def getTotalFiles(self):
63 return -1
65 def getTotalLOC(self):
66 return -1
68 class GitDataCollector(DataCollector):
69 def collect(self, dir):
70 DataCollector.collect(self, dir)
72 self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
73 self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
74 self.total_files = int(getoutput('git-ls-files |wc -l'))
75 self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
77 self.activity_by_hour_of_day = {} # hour -> commits
78 self.activity_by_day_of_week = {} # day -> commits
80 self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
82 # author of the month
83 self.author_of_month = {} # month -> author -> commits
84 self.author_of_year = {} # year -> author -> commits
85 self.commits_by_month = {} # month -> commits
86 self.commits_by_year = {} # year -> commits
87 self.first_commit_stamp = 0
88 self.last_commit_stamp = 0
90 # tags
91 self.tags = {}
92 lines = getoutput('git-show-ref --tags').split('\n')
93 for line in lines:
94 if len(line) == 0:
95 continue
96 (hash, tag) = line.split(' ')
97 tag = tag.replace('refs/tags/', '')
98 output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
99 if len(output) > 0:
100 parts = output.split(' ')
101 stamp = 0
102 try:
103 stamp = int(parts[0])
104 except ValueError:
105 stamp = 0
106 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
107 pass
109 # TODO also collect statistics for "last 30 days"/"last 12 months"
110 lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
111 for line in lines:
112 # linux-2.6 says "<unknown>" for one line O_o
113 parts = line.split(' ')
114 author = ''
115 try:
116 stamp = int(parts[0])
117 except ValueError:
118 stamp = 0
119 if len(parts) > 1:
120 author = ' '.join(parts[1:])
121 date = datetime.datetime.fromtimestamp(float(stamp))
123 # First and last commit stamp
124 if self.last_commit_stamp == 0:
125 self.last_commit_stamp = stamp
126 self.first_commit_stamp = stamp
128 # activity
129 # hour
130 hour = date.hour
131 if hour in self.activity_by_hour_of_day:
132 self.activity_by_hour_of_day[hour] += 1
133 else:
134 self.activity_by_hour_of_day[hour] = 1
136 # day
137 day = date.weekday()
138 if day in self.activity_by_day_of_week:
139 self.activity_by_day_of_week[day] += 1
140 else:
141 self.activity_by_day_of_week[day] = 1
143 # author stats
144 if author not in self.authors:
145 self.authors[author] = {}
146 # TODO commits
147 if 'last_commit_stamp' not in self.authors[author]:
148 self.authors[author]['last_commit_stamp'] = stamp
149 self.authors[author]['first_commit_stamp'] = stamp
150 if 'commits' in self.authors[author]:
151 self.authors[author]['commits'] += 1
152 else:
153 self.authors[author]['commits'] = 1
155 # author of the month/year
156 yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
157 if yymm in self.author_of_month:
158 if author in self.author_of_month[yymm]:
159 self.author_of_month[yymm][author] += 1
160 else:
161 self.author_of_month[yymm][author] = 1
162 else:
163 self.author_of_month[yymm] = {}
164 self.author_of_month[yymm][author] = 1
165 if yymm in self.commits_by_month:
166 self.commits_by_month[yymm] += 1
167 else:
168 self.commits_by_month[yymm] = 1
170 yy = datetime.datetime.fromtimestamp(stamp).year
171 if yy in self.author_of_year:
172 if author in self.author_of_year[yy]:
173 self.author_of_year[yy][author] += 1
174 else:
175 self.author_of_year[yy][author] = 1
176 else:
177 self.author_of_year[yy] = {}
178 self.author_of_year[yy][author] = 1
179 if yy in self.commits_by_year:
180 self.commits_by_year[yy] += 1
181 else:
182 self.commits_by_year[yy] = 1
184 def getActivityByDayOfWeek(self):
185 return self.activity_by_day_of_week
187 def getActivityByHourOfDay(self):
188 return self.activity_by_hour_of_day
190 def getAuthorInfo(self, author):
191 a = self.authors[author]
193 commits = a['commits']
194 commits_frac = (100 * float(commits)) / self.getTotalCommits()
195 date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
196 date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
198 res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
199 return res
201 def getAuthors(self):
202 return self.authors.keys()
204 def getFirstCommitDate(self):
205 return datetime.datetime.fromtimestamp(self.first_commit_stamp)
207 def getLastCommitDate(self):
208 return datetime.datetime.fromtimestamp(self.last_commit_stamp)
210 def getTags(self):
211 lines = getoutput('git-show-ref --tags |cut -d/ -f3')
212 return lines.split('\n')
214 def getTagDate(self, tag):
215 return self.revToDate('tags/' + tag)
217 def getTotalAuthors(self):
218 return self.total_authors
220 def getTotalCommits(self):
221 return self.total_commits
223 def getTotalFiles(self):
224 return self.total_files
226 def getTotalLOC(self):
227 return self.total_lines
229 def revToDate(self, rev):
230 stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
231 return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
233 class ReportCreator:
234 def __init__(self):
235 pass
237 def create(self, data, path):
238 self.data = data
239 self.path = path
241 class HTMLReportCreator(ReportCreator):
242 def create(self, data, path):
243 ReportCreator.create(self, data, path)
245 f = open(path + "/index.html", 'w')
246 format = '%Y-%m-%d %H:%m:%S'
247 self.printHeader(f)
249 f.write('<h1>StatGit</h1>')
251 self.printNav(f)
253 f.write('<dl>');
254 f.write('<dt>Generated</dt><dd>%s</dd>' % datetime.datetime.now().strftime(format));
255 f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
256 f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
257 f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
258 f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
259 f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
260 f.write('</dl>');
262 f.write('</body>\n</html>');
263 f.close()
266 # Activity
267 f = open(path + '/activity.html', 'w')
268 self.printHeader(f)
269 f.write('<h1>Activity</h1>')
270 self.printNav(f)
272 f.write('<h2>Last 30 days</h2>')
274 f.write('<h2>Last 12 months</h2>')
276 # Hour of Day
277 f.write('\n<h2>Hour of Day</h2>\n\n')
278 hour_of_day = data.getActivityByHourOfDay()
279 f.write('<table><tr><th>Hour</th>')
280 for i in range(1, 25):
281 f.write('<th>%d</th>' % i)
282 f.write('</tr>\n<tr><th>Commits</th>')
283 fp = open(path + '/hour_of_day.dat', 'w')
284 for i in range(0, 24):
285 if i in hour_of_day:
286 f.write('<td>%d</td>' % hour_of_day[i])
287 fp.write('%d %d\n' % (i, hour_of_day[i]))
288 else:
289 f.write('<td>0</td>')
290 fp.write('%d 0\n' % i)
291 fp.close()
292 f.write('</tr>\n<tr><th>%</th>')
293 totalcommits = data.getTotalCommits()
294 for i in range(0, 24):
295 if i in hour_of_day:
296 f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
297 else:
298 f.write('<td>0.00</td>')
299 f.write('</tr></table>')
300 f.write('<img src="hour_of_day.png" />')
301 fg = open(path + '/hour_of_day.dat', 'w')
302 for i in range(0, 24):
303 if i in hour_of_day:
304 fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
305 else:
306 fg.write('%d 0\n' % (i + 1))
307 fg.close()
309 # Day of Week
310 # TODO show also by hour of weekday?
311 f.write('\n<h2>Day of Week</h2>\n\n')
312 day_of_week = data.getActivityByDayOfWeek()
313 f.write('<table>')
314 f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
315 fp = open(path + '/day_of_week.dat', 'w')
316 for d in range(0, 7):
317 fp.write('%d %d\n' % (d + 1, day_of_week[d]))
318 f.write('<tr>')
319 f.write('<th>%d</th>' % (d + 1))
320 if d in day_of_week:
321 f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
322 else:
323 f.write('<td>0</td>')
324 f.write('</tr>')
325 f.write('</table>')
326 f.write('<img src="day_of_week.png" />')
327 fp.close()
329 # Commits by year/month
330 f.write('<h2>Commits by year/month</h2>')
331 f.write('<table><tr><th>Month</th><th>Commits</th></tr>')
332 for yymm in reversed(sorted(data.commits_by_month.keys())):
333 f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
334 f.write('</table>')
335 f.write('<img src="commits_by_year_month.png" />')
336 fg = open(path + '/commits_by_year_month.dat', 'w')
337 for yymm in sorted(data.commits_by_month.keys()):
338 fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
339 fg.close()
341 # Commits by year
342 f.write('<h2>Commits by year</h2>')
343 f.write('<table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
344 for yy in reversed(sorted(data.commits_by_year.keys())):
345 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
346 f.write('</table>')
347 f.write('<img src="commits_by_year.png" />')
348 fg = open(path + '/commits_by_year.dat', 'w')
349 for yy in sorted(data.commits_by_year.keys()):
350 fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
351 fg.close()
353 f.write('</body></html>')
354 f.close()
357 # Authors
358 f = open(path + '/authors.html', 'w')
359 self.printHeader(f)
361 f.write('<h1>Authors</h1>')
362 self.printNav(f)
364 f.write('\n<h2>List of authors</h2>\n\n')
366 f.write('<table class="authors">')
367 f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
368 for author in sorted(data.getAuthors()):
369 info = data.getAuthorInfo(author)
370 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
371 f.write('</table>')
373 f.write('\n<h2>Author of Month</h2>\n\n')
374 f.write('<table>')
375 f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
376 for yymm in reversed(sorted(data.author_of_month.keys())):
377 authordict = data.author_of_month[yymm]
378 authors = getkeyssortedbyvalues(authordict)
379 authors.reverse()
380 commits = data.author_of_month[yymm][authors[0]]
381 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
383 f.write('</table>')
385 f.write('\n<h2>Author of Year</h2>\n\n')
386 f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
387 for yy in reversed(sorted(data.author_of_year.keys())):
388 authordict = data.author_of_year[yy]
389 authors = getkeyssortedbyvalues(authordict)
390 authors.reverse()
391 commits = data.author_of_year[yy][authors[0]]
392 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
393 f.write('</table>')
395 f.write('</body></html>')
396 f.close()
399 # Files
400 f = open(path + '/files.html', 'w')
401 self.printHeader(f)
402 f.write('<h1>Files</h1>')
403 self.printNav(f)
405 f.write('<h2>File count by date</h2>')
407 f.write('</body></html>')
408 f.close()
411 # tags.html
412 f = open(path + '/tags.html', 'w')
413 self.printHeader(f)
414 f.write('<h1>Tags</h1>')
415 self.printNav(f)
417 f.write('<dl>')
418 f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
419 if len(data.tags) > 0:
420 f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
421 f.write('</dl>')
423 f.write('<table>')
424 f.write('<tr><th>Name</th><th>Date</th></tr>')
425 # sort the tags by date desc
426 tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
427 for tag in tags_sorted_by_date_desc:
428 f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
429 f.write('</table>')
431 f.write('</body></html>')
432 f.close()
434 self.createGraphs(path)
435 pass
437 def createGraphs(self, path):
438 print 'Generating graphs...'
440 # hour of day
441 f = open(path + '/hour_of_day.plot', 'w')
442 f.write(GNUPLOT_COMMON)
443 f.write(
445 set output 'hour_of_day.png'
446 unset key
447 set xrange [0.5:24.5]
448 set xtics 4
449 set ylabel "Commits"
450 plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
451 """)
452 f.close()
454 # day of week
455 f = open(path + '/day_of_week.plot', 'w')
456 f.write(GNUPLOT_COMMON)
457 f.write(
459 set output 'day_of_week.png'
460 unset key
461 set xrange [0.5:7.5]
462 set xtics 1
463 set ylabel "Commits"
464 plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
465 """)
466 f.close()
468 # commits_by_year_month
469 f = open(path + '/commits_by_year_month.plot', 'w')
470 f.write(GNUPLOT_COMMON)
471 f.write(
472 # TODO rotate xtic labels by 90 degrees
474 set output 'commits_by_year_month.png'
475 unset key
476 set xdata time
477 set timefmt "%Y-%m"
478 set format x "%Y-%m"
479 set xtics 15768000
480 set ylabel "Commits"
481 plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
482 """)
483 f.close()
485 # commits_by_year
486 f = open(path + '/commits_by_year.plot', 'w')
487 f.write(GNUPLOT_COMMON)
488 f.write(
490 set output 'commits_by_year.png'
491 unset key
492 set xtics 1
493 set ylabel "Commits"
494 plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
495 """)
496 f.close()
498 os.chdir(path)
499 for i in ('hour_of_day', 'day_of_week', 'commits_by_year_month', 'commits_by_year'):
500 os.system('gnuplot %s.plot' % i)
502 pass
504 def printHeader(self, f):
505 f.write("""<html>
506 <head>
507 <title>StatGit</title>
508 <link rel="stylesheet" href="statgit.css" type="text/css" />
509 </head>
510 <body>
511 """)
513 def printNav(self, f):
514 f.write("""
515 <div class="nav">
516 <li><a href="index.html">General</a></li>
517 <li><a href="activity.html">Activity</a></li>
518 <li><a href="authors.html">Authors</a></li>
519 <li><a href="files.html">Files</a></li>
520 <li><a href="lines.html">Lines</a></li>
521 <li><a href="tags.html">Tags</a></li>
522 </ul>
523 </div>
524 """)
527 usage = """
528 Usage: statgit [options] <gitpath> <outputpath>
530 Options:
531 -o html
534 if len(sys.argv) < 3:
535 print usage
536 sys.exit(0)
538 gitpath = sys.argv[1]
539 outputpath = os.path.abspath(sys.argv[2])
541 print 'Git path: %s' % gitpath
542 print 'Output path: %s' % outputpath
544 os.chdir(gitpath)
546 print 'Collecting data...'
547 data = GitDataCollector()
548 data.collect(gitpath)
550 print 'Generating report...'
551 report = HTMLReportCreator()
552 report.create(data, outputpath)