Activity: close body/html.
[gitstats.git] / statgit
blobca10f4b3d3002446fc02ccdfa3cd34efac7e6248
1 #!/usr/bin/python
2 # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
3 # GPLv2
4 import commands
5 import datetime
6 import os
7 import re
8 import sys
10 def getoutput(cmd):
11 print '>> %s' % cmd
12 output = commands.getoutput(cmd)
13 return output
15 def getkeyssortedbyvalues(dict):
16 return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
18 # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits')
20 class DataCollector:
21 def __init__(self):
22 pass
25 # This should be the main function to extract data from the repository.
26 def collect(self, dir):
27 self.dir = dir
30 # : get a dictionary of author
31 def getAuthorInfo(self, author):
32 return None
34 def getActivityByDayOfWeek(self):
35 return {}
37 def getActivityByHourOfDay(self):
38 return {}
41 # Get a list of authors
42 def getAuthors(self):
43 return []
45 def getFirstCommitDate(self):
46 return datetime.datetime.now()
48 def getLastCommitDate(self):
49 return datetime.datetime.now()
51 def getTags(self):
52 return []
54 def getTotalAuthors(self):
55 return -1
57 def getTotalCommits(self):
58 return -1
60 def getTotalFiles(self):
61 return -1
63 def getTotalLOC(self):
64 return -1
66 class GitDataCollector(DataCollector):
67 def collect(self, dir):
68 DataCollector.collect(self, dir)
70 self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
71 self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
72 self.total_files = int(getoutput('git-ls-files |wc -l'))
73 self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
75 self.activity_by_hour_of_day = {} # hour -> commits
76 self.activity_by_day_of_week = {} # day -> commits
78 self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
80 # author of the month
81 self.author_of_month = {} # month -> author -> commits
82 self.author_of_year = {} # year -> author -> commits
83 self.commits_by_month = {} # month -> commits
84 self.commits_by_year = {} # year -> commits
85 self.first_commit_stamp = 0
86 self.last_commit_stamp = 0
88 # tags
89 self.tags = {}
90 lines = getoutput('git-show-ref --tags').split('\n')
91 for line in lines:
92 (hash, tag) = line.split(' ')
93 tag = tag.replace('refs/tags/', '')
94 output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
95 if len(output) > 0:
96 parts = output.split(' ')
97 stamp = 0
98 try:
99 stamp = int(parts[0])
100 except ValueError:
101 stamp = 0
102 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
103 pass
105 # TODO also collect statistics for "last 30 days"/"last 12 months"
106 lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
107 for line in lines:
108 # linux-2.6 says "<unknown>" for one line O_o
109 parts = line.split(' ')
110 author = ''
111 try:
112 stamp = int(parts[0])
113 except ValueError:
114 stamp = 0
115 if len(parts) > 1:
116 author = ' '.join(parts[1:])
117 date = datetime.datetime.fromtimestamp(float(stamp))
119 # First and last commit stamp
120 if self.last_commit_stamp == 0:
121 self.last_commit_stamp = stamp
122 self.first_commit_stamp = stamp
124 # activity
125 # hour
126 hour = date.hour
127 if hour in self.activity_by_hour_of_day:
128 self.activity_by_hour_of_day[hour] += 1
129 else:
130 self.activity_by_hour_of_day[hour] = 1
132 # day
133 day = date.weekday()
134 if day in self.activity_by_day_of_week:
135 self.activity_by_day_of_week[day] += 1
136 else:
137 self.activity_by_day_of_week[day] = 1
139 # author stats
140 if author not in self.authors:
141 self.authors[author] = {}
142 # TODO commits
143 if 'last_commit_stamp' not in self.authors[author]:
144 self.authors[author]['last_commit_stamp'] = stamp
145 self.authors[author]['first_commit_stamp'] = stamp
146 if 'commits' in self.authors[author]:
147 self.authors[author]['commits'] += 1
148 else:
149 self.authors[author]['commits'] = 1
151 # author of the month/year
152 yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
153 if yymm in self.author_of_month:
154 if author in self.author_of_month[yymm]:
155 self.author_of_month[yymm][author] += 1
156 else:
157 self.author_of_month[yymm][author] = 1
158 else:
159 self.author_of_month[yymm] = {}
160 self.author_of_month[yymm][author] = 1
161 if yymm in self.commits_by_month:
162 self.commits_by_month[yymm] += 1
163 else:
164 self.commits_by_month[yymm] = 1
166 yy = datetime.datetime.fromtimestamp(stamp).year
167 if yy in self.author_of_year:
168 if author in self.author_of_year[yy]:
169 self.author_of_year[yy][author] += 1
170 else:
171 self.author_of_year[yy][author] = 1
172 else:
173 self.author_of_year[yy] = {}
174 self.author_of_year[yy][author] = 1
175 if yy in self.commits_by_year:
176 self.commits_by_year[yy] += 1
177 else:
178 self.commits_by_year[yy] = 1
180 def getActivityByDayOfWeek(self):
181 return self.activity_by_day_of_week
183 def getActivityByHourOfDay(self):
184 return self.activity_by_hour_of_day
186 def getAuthorInfo(self, author):
187 a = self.authors[author]
189 commits = a['commits']
190 commits_frac = (100 * float(commits)) / self.getTotalCommits()
191 date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
192 date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
194 res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
195 return res
197 def getAuthors(self):
198 return self.authors.keys()
200 def getFirstCommitDate(self):
201 return datetime.datetime.fromtimestamp(self.first_commit_stamp)
203 def getLastCommitDate(self):
204 return datetime.datetime.fromtimestamp(self.last_commit_stamp)
206 def getTags(self):
207 lines = getoutput('git-show-ref --tags |cut -d/ -f3')
208 return lines.split('\n')
210 def getTagDate(self, tag):
211 return self.revToDate('tags/' + tag)
213 def getTotalAuthors(self):
214 return self.total_authors
216 def getTotalCommits(self):
217 return self.total_commits
219 def getTotalFiles(self):
220 return self.total_files
222 def getTotalLOC(self):
223 return self.total_lines
225 def revToDate(self, rev):
226 stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
227 return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
229 class ReportCreator:
230 def __init__(self):
231 pass
233 def create(self, data, path):
234 self.data = data
235 self.path = path
237 class HTMLReportCreator(ReportCreator):
238 def create(self, data, path):
239 ReportCreator.create(self, data, path)
241 f = open(path + "/index.html", 'w')
242 format = '%Y-%m-%d %H:%m:%S'
243 self.printHeader(f)
245 f.write('<h1>StatGit</h1>')
247 self.printNav(f)
249 f.write('<dl>');
250 f.write('<dt>Generated</dt><dd>%s</dd>' % datetime.datetime.now().strftime(format));
251 f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
252 f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
253 f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
254 f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
255 f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
256 f.write('</dl>');
258 f.write('</body>\n</html>');
259 f.close()
262 # Activity
263 f = open(path + '/activity.html', 'w')
264 self.printHeader(f)
265 f.write('<h1>Activity</h1>')
266 self.printNav(f)
268 f.write('<h2>Last 30 days</h2>')
270 f.write('<h2>Last 12 months</h2>')
272 # Hour of Day
273 f.write('\n<h2>Hour of Day</h2>\n\n')
274 hour_of_day = data.getActivityByHourOfDay()
275 f.write('<table><tr><th>Hour</th>')
276 for i in range(1, 25):
277 f.write('<th>%d</th>' % i)
278 f.write('</tr>\n<tr><th>Commits</th>')
279 fp = open(path + '/hour_of_day.dat', 'w')
280 for i in range(0, 24):
281 if i in hour_of_day:
282 f.write('<td>%d</td>' % hour_of_day[i])
283 fp.write('%d %d\n' % (i, hour_of_day[i]))
284 else:
285 f.write('<td>0</td>')
286 fp.write('%d 0\n' % i)
287 fp.close()
288 f.write('</tr>\n<tr><th>%</th>')
289 totalcommits = data.getTotalCommits()
290 for i in range(0, 24):
291 if i in hour_of_day:
292 f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
293 else:
294 f.write('<td>0.00</td>')
295 f.write('</tr></table>')
297 # Day of Week
298 # TODO show also by hour of weekday?
299 f.write('\n<h2>Day of Week</h2>\n\n')
300 day_of_week = data.getActivityByDayOfWeek()
301 f.write('<table>')
302 f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
303 fp = open(path + '/day_of_week.dat', 'w')
304 for d in range(0, 7):
305 fp.write('%d %d\n' % (d + 1, day_of_week[d]))
306 f.write('<tr>')
307 f.write('<th>%d</th>' % (d + 1))
308 if d in day_of_week:
309 f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
310 else:
311 f.write('<td>0</td>')
312 f.write('</tr>')
313 f.write('</table>')
314 fp.close()
316 # Commits by year/month
317 f.write('<h2>Commits by year/month</h2>')
318 f.write('<table><tr><th>Month</th><th>Commits</th></tr>')
319 for yymm in reversed(sorted(data.commits_by_month.keys())):
320 f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
321 f.write('</table>')
323 # Commits by year
324 f.write('<h2>Commits by year</h2>')
325 f.write('<table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
326 for yy in reversed(sorted(data.commits_by_year.keys())):
327 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
328 f.write('</table>')
330 f.write('</body></html>')
331 f.close()
334 # Authors
335 f = open(path + '/authors.html', 'w')
336 self.printHeader(f)
338 f.write('<h1>Authors</h1>')
339 self.printNav(f)
341 f.write('\n<h2>List of authors</h2>\n\n')
343 f.write('<table class="authors">')
344 f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
345 for author in sorted(data.getAuthors()):
346 info = data.getAuthorInfo(author)
347 f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
348 f.write('</table>')
350 f.write('\n<h2>Author of Month</h2>\n\n')
351 f.write('<table>')
352 f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
353 for yymm in reversed(sorted(data.author_of_month.keys())):
354 authordict = data.author_of_month[yymm]
355 authors = getkeyssortedbyvalues(authordict)
356 authors.reverse()
357 commits = data.author_of_month[yymm][authors[0]]
358 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
360 f.write('</table>')
362 f.write('\n<h2>Author of Year</h2>\n\n')
363 f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
364 for yy in reversed(sorted(data.author_of_year.keys())):
365 authordict = data.author_of_year[yy]
366 authors = getkeyssortedbyvalues(authordict)
367 authors.reverse()
368 commits = data.author_of_year[yy][authors[0]]
369 f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
370 f.write('</table>')
372 f.write('</body></html>')
373 f.close()
376 # tags.html
377 f = open(path + '/tags.html', 'w')
378 self.printHeader(f)
379 f.write('<h1>Tags</h1>')
380 self.printNav(f)
382 f.write('<dl>')
383 f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
384 f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
385 f.write('</dl>')
387 f.write('<table>')
388 f.write('<tr><th>Name</th><th>Date</th></tr>')
389 # sort the tags by date desc
390 tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
391 for tag in tags_sorted_by_date_desc:
392 f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
393 f.write('</table>')
395 f.write('</body></html>')
396 f.close()
397 pass
399 def printHeader(self, f):
400 f.write("""<html>
401 <head>
402 <title>StatGit</title>
403 <link rel="stylesheet" href="statgit.css" type="text/css" />
404 </head>
405 <body>
406 """)
408 def printNav(self, f):
409 f.write("""
410 <div class="nav">
411 <li><a href="index.html">General</a></li>
412 <li><a href="activity.html">Activity</a></li>
413 <li><a href="authors.html">Authors</a></li>
414 <li><a href="files.html">Files</a></li>
415 <li><a href="lines.html">Lines</a></li>
416 <li><a href="tags.html">Tags</a></li>
417 </ul>
418 </div>
419 """)
422 usage = """
423 Usage: statgit [options] <gitpath> <outputpath>
425 Options:
426 -o html
429 if len(sys.argv) < 3:
430 print usage
431 sys.exit(0)
433 gitpath = sys.argv[1]
434 outputpath = os.path.abspath(sys.argv[2])
436 print 'Git path: %s' % gitpath
437 print 'Output path: %s' % outputpath
439 os.chdir(gitpath)
441 print 'Collecting data...'
442 data = GitDataCollector()
443 data.collect(gitpath)
445 print 'Generating report...'
446 report = HTMLReportCreator()
447 report.create(data, outputpath)