gitstats: Refactoring, cleanups and documentation
[git-stats.git] / src / git_stats / author.py
blobae838d320823ae9881fee531deeca90c12b8320f
1 #!/usr/bin/env python
3 import os
4 import sys
6 from optparse import OptionParser, OptionValueError
7 from git import Repo
9 from git_stats import parse
11 class Activity:
12 """Simple storage class containing stats on the activity in one file."""
14 def __init__(self):
15 self.count = 0
16 self.added = 0
17 self.deleted = 0
18 self.id = []
20 def __str__(self):
21 return "%4d: %5d+ %5d-" % (self.count, self.added, self.deleted)
23 def activityInArea(log):
24 """Parses the specified file containing commit logs.
25 The output is expected to be in the format described below:
27 [<id>\n]+
28 [<lines added>\t<lines deleted>\t<path>]+
32 Params:
33 log: The log formatted as described above.
35 Returns:
36 A dictionary containing activity per author is returned.
37 Each author contains a dictionary with activity per path.
38 Each path contains one Activity object with the aggregated results.
39 """
41 # Create a dict to store the activity stats per path
42 activityByAuthor = {}
44 # Create a place to store the result in
45 activities = []
46 ids = []
48 i = 0
50 # Parse all the lines in the file
51 for line in log:
52 i += 1
54 # Split the line at the tab and store the data
55 splitline = line.split('\t')
56 size = len(splitline)
57 length = len(line.lstrip())
59 # There is something on this line, but it contains no separator
60 if size == 1 and length > 0:
61 # Get the id address minus the newline
62 ids.append(line[:-1])
63 elif size == 3:
64 activity = Activity()
65 activity.id = ids
67 try:
68 addpart = splitline[0]
69 deletepart = splitline[1]
71 if addpart == '-':
72 addpart = 0
74 if deletepart == '-':
75 deletepart = 0
77 activity.added = int(addpart)
78 activity.deleted = int(deletepart)
79 except ValueError, e:
80 print("On line '" + str(i) + "', could not convert number: " + str(e))
82 activity.path = splitline[2][:-1]
83 activities.append(activity)
84 elif length == 0:
85 for activity in activities:
86 for author in activity.id:
87 if not activityByAuthor.has_key(author):
88 activityByAuthor[author] = {}
90 activityByPath = activityByAuthor[author]
92 # If we have not encountered this path, create an entry
93 if not activityByPath.has_key(activity.path):
94 addme = Activity()
95 activityByPath[activity.path] = addme
97 known = activityByPath[activity.path]
99 result = Activity()
100 result.added = known.added + activity.added
101 result.deleted = known.deleted + activity.deleted
102 result.count = known.count + 1
104 # Store it under it's path
105 activityByPath[activity.path] = result
107 # Store it under it's author
108 activityByAuthor[author] = activityByPath
110 # Create a fresh activity to store the next round in
111 activities = []
112 ids = []
113 else:
114 print("Cannot parse line " + str(i) + ".")
116 # Return the result
117 return activityByAuthor
119 def activityInFile(path, id, start_from, relative):
120 """Shows the activity for the file in the current repo.
122 Params:
123 path: The path to filter on.
124 id: The id of the developer to show in the result.
125 startfrom: The commit to start logging from.
126 relative: Treat path as relative to the current working directory.
129 git = Repo(".").git
131 result = git.log(start_from, "--", path, pretty="format:%" + id, with_keep_cwd=relative)
132 activity = result.split('\n')
134 result = {}
136 for line in activity:
137 # Create an entry if there was none for this author yet
138 if not result.has_key(line):
139 result[line] = 0
141 result[line] += 1
143 return result
145 def activity(id, field, startFrom):
146 """Shows the activity for the specified developer in the current repo.
148 Params:
149 id: The id of the developer, as specified by field.
150 field: The field to filter on.
151 startfrom: The commit to start logging from.
154 git = Repo(".").git
155 result = git.log("--numstat", startFrom, "--", pretty="format:%" + field)
157 log = result.splitlines(True)
158 allActivity = activityInArea(log)
160 result = []
162 if not id:
163 keys = allActivity.keys()
164 keys.sort()
166 for author in keys:
167 activity = allActivity[author]
168 result.append(author + ":")
169 keys = activity.keys()
170 keys.sort()
172 for key in keys:
173 value = activity[key]
174 result.append("\t" + str(value) + " = " + str(key))
176 result.append("")
178 return result
180 if not allActivity.has_key(id):
181 result.append("Unknown author " + id)
182 result.append("Known authors:")
183 result.extend(allActivity.keys())
185 return result
187 activity = allActivity[id]
189 keys = activity.keys()
190 keys.sort()
192 for key in keys:
193 value = activity[key]
194 result.append(str(value) + " = " + str(key))
196 return result
198 def aggregateActivity(idFilter, field, startFrom):
199 """Aggregates the activity for all developers
201 Args:
202 idFilter: The id to filter on, if None all developers will be shown.
203 field: The field to filter on.
206 git = Repo(".").git
207 result = git.log("--numstat", startFrom, "--", pretty="format:%" + field)
209 log = result.splitlines(True)
210 allActivity = activityInArea(log)
212 aggregatedActivity = {}
214 for _, activityByPath in allActivity.iteritems():
215 for path, activity in activityByPath.iteritems():
216 if not aggregatedActivity.has_key(path):
217 aggregatedActivity[path] = Activity()
219 known = aggregatedActivity[path]
221 result = Activity()
222 result.added = known.added + activity.added
223 result.deleted = known.deleted + activity.deleted
224 result.count = known.count + activity.count
226 aggregatedActivity[path] = result
228 result = []
230 keys = aggregatedActivity.keys()
231 keys.sort()
233 for key in keys:
234 value = aggregatedActivity[key]
235 result.append(str(value) + " = " + str(key))
237 return result
239 def _checkOptions(parser, options):
240 """Checks the specified options and uses the parser to indicate errors
242 Args:
243 parser: The parser to use to signal when the options are bad.
244 options: The options to check.
247 opts = [options.aggregate, options.developer, options.file, options.everyone]
249 if not parse.isUnique(opts, atLeastOne=True):
250 parser.error("Please choose exactly one mode")
252 if options.file:
253 try:
254 parse.check_file(value=options.file, relative=options.relative)
255 except OptionValueError, e:
256 parser.error(e)
259 def dispatch(*args):
260 """Dispatches author related commands
263 progname = os.path.basename(sys.argv[0]) + " author"
265 parser = OptionParser(option_class=parse.GitOption, prog=progname)
267 parser.add_option(
268 "-a", "--aggregate",
269 action="store_true",
270 help="aggregate the results")
272 parser.add_option(
273 "-e", "--everyone",
274 action="store_true",
275 help="show the activity of all developers")
277 parser.add_option(
278 "-d", "--developer",
279 help="the id to filter on")
281 parser.add_option(
282 "-f", "--file",
283 help="the file to filter on")
285 parser.add_option(
286 "-i", "--id",
287 help="the one/two letter identifier specifying which field to use as id")
289 parser.add_option(
290 "-s", "--start-from",
291 type="commit",
292 metavar="COMMIT",
293 help="the commit to start logging from")
295 parser.add_option(
296 "-r", "--relative",
297 action="store_true",
298 help="paths are relative to the current directory")
300 parser.set_default("id", "ae")
301 parser.set_default("start_from", "HEAD")
302 parser.set_default("relative", False)
304 (options, args) = parser.parse_args(list(args))
306 _checkOptions(parser, options)
308 if options.aggregate:
309 result = aggregateActivity(options.developer, options.id, options.start_from)
310 elif options.developer or options.everyone:
311 result = activity(options.developer, options.id, options.start_from)
312 elif options.file:
313 activity_for_file = activityInFile( options.file,
314 options.id,
315 options.start_from,
316 options.relative)
318 result = []
320 for key, value in activity_for_file.iteritems():
321 result.append(key + ": " + str(value))
323 for line in result:
324 print(line)