gitstats: Teach 'stats.py author -f' to sort output by commit count
[git-stats.git] / src / git_stats / author.py
blob457ab0bb5d7b159b597cc7e0c6b3c7fb0f09e396
1 #!/usr/bin/env python
3 import operator
4 import os
5 import sys
7 from optparse import OptionParser, OptionValueError
8 from git import Repo
10 from git_stats import parse
12 class Activity:
13 """Simple storage class containing stats on the activity in one file."""
15 def __init__(self):
16 self.count = 0
17 self.added = 0
18 self.deleted = 0
19 self.id = []
21 def __str__(self):
22 return ("%4d: %5d+ %5d- %5d~" %
23 (self.count, self.added, self.deleted, self.added-self.deleted))
25 def activityInArea(log):
26 """Parses the specified file containing commit logs.
27 The output is expected to be in the format described below:
29 [<id>\n]+
30 [<lines added>\t<lines deleted>\t<path>]+
34 Params:
35 log: The log formatted as described above.
37 Returns:
38 A dictionary containing activity per author is returned.
39 Each author contains a dictionary with activity per path.
40 Each path contains one Activity object with the aggregated results.
41 """
43 # Create a dict to store the activity stats per path
44 activityByAuthor = {}
46 # Create a place to store the result in
47 activities = []
48 ids = []
50 i = 0
52 # Parse all the lines in the file
53 for line in log:
54 i += 1
56 # Split the line at the tab and store the data
57 splitline = line.split('\t')
58 size = len(splitline)
59 length = len(line.lstrip())
61 # There is something on this line, but it contains no separator
62 if size == 1 and length > 0:
63 # Get the id address minus the newline
64 ids.append(line[:-1])
65 elif size == 3:
66 activity = Activity()
67 activity.id = ids
69 try:
70 addpart = splitline[0]
71 deletepart = splitline[1]
73 if addpart == '-':
74 addpart = 0
76 if deletepart == '-':
77 deletepart = 0
79 activity.added = int(addpart)
80 activity.deleted = int(deletepart)
81 except ValueError, e:
82 print("On line '%d', could not convert number: %s" % (i,str(e)))
84 activity.path = splitline[2][:-1]
85 activities.append(activity)
86 elif length == 0:
87 for activity in activities:
88 for author in activity.id:
89 if not author in activityByAuthor:
90 activityByAuthor[author] = {}
92 activityByPath = activityByAuthor[author]
94 # If we have not encountered this path, create an entry
95 if not activity.path in activityByPath:
96 addme = Activity()
97 activityByPath[activity.path] = addme
99 known = activityByPath[activity.path]
101 result = Activity()
102 result.added = known.added + activity.added
103 result.deleted = known.deleted + activity.deleted
104 result.count = known.count + 1
106 # Store it under it's path
107 activityByPath[activity.path] = result
109 # Store it under it's author
110 activityByAuthor[author] = activityByPath
112 # Create a fresh activity to store the next round in
113 activities = []
114 ids = []
115 else:
116 print("Cannot parse line %d." % i)
118 # Return the result
119 return activityByAuthor
121 def activityInFile(path, id, start_from, relative):
122 """Shows the activity for the file in the current repo.
124 Params:
125 path: The path to filter on.
126 id: The id of the developer to show in the result.
127 startfrom: The commit to start logging from.
128 relative: Treat path as relative to the current working directory.
131 git = Repo(".").git
133 result = git.log(start_from, "--", path, pretty="format:%" + id, with_keep_cwd=relative)
134 activity = result.split('\n')
136 result = {}
138 for line in activity:
139 # Create an entry if there was none for this author yet
140 if not line in result:
141 result[line] = 0
143 result[line] += 1
145 return result
147 def activity(id, field, start_from):
148 """Shows the activity for the specified developer in the current repo.
150 Params:
151 id: The id of the developer, as specified by field.
152 field: The field to filter on.
153 startfrom: The commit to start logging from.
156 git = Repo(".").git
157 result = git.log("--numstat", start_from, "--", pretty="format:%" + field)
159 log = result.splitlines(True)
160 allActivity = activityInArea(log)
162 result = []
164 if not id:
165 for author in sorted(allActivity):
166 activity = allActivity[author]
167 result.append(author + ":")
169 for key in sorted(activity):
170 value = activity[key]
171 result.append("\t%s = %s" % (str(value), str(key)))
173 result.append("")
175 return result
177 if not id in allActivity:
178 result.append("Unknown author " + id)
179 result.append("Known authors:")
180 result.extend(allActivity.keys())
182 return result
184 activity = allActivity[id]
186 for key in sorted(activity):
187 value = activity[key]
188 result.append("%s = %s" % (str(value), str(key)))
190 return result
192 def aggregateActivity(id_filter, field, start_from):
193 """Aggregates the activity for all developers
195 Args:
196 id_filter: The id to filter on, if None all developers will be shown.
197 field: The field to filter on.
200 git = Repo(".").git
201 result = git.log("--numstat", start_from, "--", pretty="format:%" + field)
203 log = result.splitlines(True)
204 allActivity = activityInArea(log)
206 aggregatedActivity = {}
208 for _, activityByPath in allActivity.iteritems():
209 for path, activity in activityByPath.iteritems():
210 if not path in aggregatedActivity:
211 aggregatedActivity[path] = Activity()
213 known = aggregatedActivity[path]
215 result = Activity()
216 result.added = known.added + activity.added
217 result.deleted = known.deleted + activity.deleted
218 result.count = known.count + activity.count
220 aggregatedActivity[path] = result
222 result = []
224 for key in sorted(aggregatedActivity):
225 value = aggregatedActivity[key]
226 result.append("%s = %s" % (str(value), str(key)))
228 return result
230 def _checkOptions(parser, options):
231 """Checks the specified options and uses the parser to indicate errors
233 Args:
234 parser: The parser to use to signal when the options are bad.
235 options: The options to check.
238 opts = [options.aggregate, options.developer, options.file, options.everyone]
240 if not parse.isUnique(opts, at_least_one=True):
241 parser.error("Please choose exactly one mode")
243 if options.file:
244 try:
245 parse.checkFile(value=options.file, relative=options.relative)
246 except OptionValueError, e:
247 parser.error(e)
250 def dispatch(*args):
251 """Dispatches author related commands
254 progname = os.path.basename(sys.argv[0]) + " author"
256 parser = OptionParser(option_class=parse.GitOption, prog=progname)
258 parser.add_option(
259 "-a", "--aggregate",
260 action="store_true",
261 help="aggregate the results")
263 parser.add_option(
264 "-e", "--everyone",
265 action="store_true",
266 help="show the activity of all developers")
268 parser.add_option(
269 "-d", "--developer",
270 help="the id to filter on")
272 parser.add_option(
273 "-f", "--file",
274 help="the file to filter on")
276 parser.add_option(
277 "-i", "--id",
278 help="the one/two letter identifier specifying which field to use as id")
280 parser.add_option(
281 "-s", "--start-from",
282 type="commit",
283 metavar="COMMIT",
284 help="the commit to start logging from")
286 parser.add_option(
287 "-r", "--relative",
288 action="store_true",
289 help="paths are relative to the current directory")
291 parser.set_default("id", "ae")
292 parser.set_default("start_from", "HEAD")
293 parser.set_default("relative", False)
295 (options, args) = parser.parse_args(list(args))
297 _checkOptions(parser, options)
299 if options.aggregate:
300 result = aggregateActivity(options.developer, options.id, options.start_from)
301 elif options.developer or options.everyone:
302 result = activity(options.developer, options.id, options.start_from)
303 elif options.file:
304 activity_for_file = activityInFile( options.file,
305 options.id,
306 options.start_from,
307 options.relative)
309 result = []
311 for key, value in sorted(activity_for_file.iteritems(), reverse=True,
312 key=operator.itemgetter(1)):
313 result.append("%s: %s" % (key, str(value)))
315 for line in result:
316 print(line)