gitstats: Check for empty diffs
[git-stats.git] / src / git_stats / author.py
blobc7da388d3004377a6d2940100d5fdfeb3b6fa8a9
1 #!/usr/bin/env python
3 import os
4 import sys
6 from optparse import OptionParser, OptionValueError
7 from git import Repo
9 from git_stats import parse
11 class Activity:
12 """Simple storage class containing stats on the activity in one file."""
14 def __init__(self):
15 self.count = 0
16 self.added = 0
17 self.deleted = 0
18 self.id = []
20 def __str__(self):
21 return "%4d: %5d+ %5d-" % (self.count, self.added, self.deleted)
23 def activityInArea(log):
24 """Parses the specified file containing commit logs.
25 The output is expected to be in the format described below:
27 [<id>\n]+
28 [<lines added>\t<lines deleted>\t<path>]+
32 Params:
33 log: The log formatted as described above.
35 Returns:
36 A dictionary containing activity per author is returned.
37 Each author contains a dictionary with activity per path.
38 Each path contains one Activity object with the aggregated results.
39 """
41 # Create a dict to store the activity stats per path
42 activityByAuthor = {}
44 # Create a place to store the result in
45 activities = []
46 ids = []
48 i = 0
50 # Parse all the lines in the file
51 for line in log:
52 i += 1
54 # Split the line at the tab and store the data
55 splitline = line.split('\t')
56 size = len(splitline)
57 length = len(line.lstrip())
59 # There is something on this line, but it contains no separator
60 if size == 1 and length > 0:
61 # Get the id address minus the newline
62 ids.append(line[:-1])
63 elif size == 3:
64 activity = Activity()
65 activity.id = ids
67 try:
68 addpart = splitline[0]
69 deletepart = splitline[1]
71 if addpart == '-':
72 addpart = 0
74 if deletepart == '-':
75 deletepart = 0
77 activity.added = int(addpart)
78 activity.deleted = int(deletepart)
79 except ValueError, e:
80 print("On line '" + str(i) + "', could not convert number: " + str(e))
82 activity.path = splitline[2][:-1]
83 activities.append(activity)
84 elif length == 0:
85 for activity in activities:
86 for author in activity.id:
87 if not activityByAuthor.has_key(author):
88 activityByAuthor[author] = {}
90 activityByPath = activityByAuthor[author]
92 # If we have not encountered this path, create an entry
93 if not activityByPath.has_key(activity.path):
94 addme = Activity()
95 activityByPath[activity.path] = addme
97 known = activityByPath[activity.path]
99 result = Activity()
100 result.added = known.added + activity.added
101 result.deleted = known.deleted + activity.deleted
102 result.count = known.count + 1
104 # Store it under it's path
105 activityByPath[activity.path] = result
107 # Store it under it's author
108 activityByAuthor[author] = activityByPath
110 # Create a fresh activity to store the next round in
111 activities = []
112 ids = []
113 else:
114 print("Cannot parse line " + str(i) + ".")
116 # Return the result
117 return activityByAuthor
119 def activityInFile(path, id, start_from, relative):
120 """Shows the activity for the file in the current repo.
122 Params:
123 path: The path to filter on.
124 id: The id of the developer to show in the result.
125 startfrom: The commit to start logging from.
126 relative: Treat path as relative to the current working directory.
129 git = Repo(".").git
131 result = git.log(start_from, "--", path, pretty="format:%" + id, with_keep_cwd=relative)
132 activity = result.split('\n')
134 result = {}
136 for line in activity:
137 # Create an entry if there was none for this author yet
138 if not result.has_key(line):
139 result[line] = 0
141 result[line] += 1
143 return result
145 def activity(id, field, startFrom):
146 """Shows the activity for the specified developer in the current repo.
148 Params:
149 id: The id of the developer, as specified by field.
150 field: The field to filter on.
151 startfrom: The commit to start logging from.
154 git = Repo(".").git
155 result = git.log("--numstat", startFrom, "--", pretty="format:%" + field)
157 log = result.splitlines(True)
158 allActivity = activityInArea(log)
160 result = []
162 if not id:
163 for author, activity in allActivity.iteritems():
164 result.append(author + ":")
165 keys = activity.keys()
166 keys.sort()
168 for key in keys:
169 value = activity[key]
170 result.append("\t" + str(value) + " = " + str(key))
172 result.append("")
174 return result
176 if not allActivity.has_key(id):
177 result.append("Unknown author " + id)
178 result.append("Known authors:")
179 result.extend(allActivity.keys())
181 return result
183 activity = allActivity[id]
185 keys = activity.keys()
186 keys.sort()
188 for key in keys:
189 value = activity[key]
190 result.append(str(value) + " = " + str(key))
192 return result
194 def aggregateActivity(idFilter, field, startFrom):
195 """Aggregates the activity for all developers
197 Args:
198 idFilter: The id to filter on, if None all developers will be shown.
199 field: The field to filter on.
202 git = Repo(".").git
203 result = git.log("--numstat", startFrom, "--", pretty="format:%" + field)
205 log = result.splitlines(True)
206 allActivity = activityInArea(log)
208 aggregatedActivity = {}
210 for _, activityByPath in allActivity.iteritems():
211 for path, activity in activityByPath.iteritems():
212 if not aggregatedActivity.has_key(path):
213 aggregatedActivity[path] = Activity()
215 known = aggregatedActivity[path]
217 result = Activity()
218 result.added = known.added + activity.added
219 result.deleted = known.deleted + activity.deleted
220 result.count = known.count + activity.count
222 aggregatedActivity[path] = result
224 result = []
226 keys = aggregatedActivity.keys()
227 keys.sort()
229 for key in keys:
230 value = aggregatedActivity[key]
231 result.append(str(value) + " = " + str(key))
233 return result
235 def _checkOptions(parser, options):
236 """Checks the specified options and uses the parser to indicate errors
238 Args:
239 parser: The parser to use to signal when the options are bad.
240 options: The options to check.
243 opts = [options.aggregate, options.developer, options.file, options.everyone]
245 if not parse.isUnique(opts):
246 parser.error("Please choose exactly one mode")
248 if options.file:
249 try:
250 parse.check_file(value=options.file, relative=options.relative)
251 except OptionValueError, e:
252 parser.error(e)
255 def dispatch(*args):
256 """Dispatches author related commands
259 progname = os.path.basename(sys.argv[0]) + " author"
261 parser = OptionParser(option_class=parse.GitOption, prog=progname)
263 parser.add_option(
264 "-a", "--aggregate",
265 action="store_true",
266 help="aggregate the results")
268 parser.add_option(
269 "-e", "--everyone",
270 action="store_true",
271 help="show the activity of all developers")
273 parser.add_option(
274 "-d", "--developer",
275 help="the id to filter on")
277 parser.add_option(
278 "-f", "--file",
279 help="the file to filter on")
281 parser.add_option(
282 "-i", "--id",
283 help="the one/two letter identifier specifying which field to use as id")
285 parser.add_option(
286 "-s", "--start-from",
287 type="commit",
288 metavar="COMMIT",
289 help="the commit to start logging from")
291 parser.add_option(
292 "-r", "--relative",
293 action="store_true",
294 help="paths are relative to the current directory")
296 parser.set_default("id", "ae")
297 parser.set_default("start_from", "HEAD")
298 parser.set_default("relative", False)
300 (options, args) = parser.parse_args(list(args))
302 _checkOptions(parser, options)
304 if options.aggregate:
305 result = aggregateActivity(options.developer, options.id, options.start_from)
306 elif options.developer or options.everyone:
307 result = activity(options.developer, options.id, options.start_from)
308 elif options.file:
309 activity_for_file = activityInFile( options.file,
310 options.id,
311 options.start_from,
312 options.relative)
314 result = []
316 for key, value in activity_for_file.iteritems():
317 result.append(key + ": " + str(value))
319 for line in result:
320 print(line)