gitstats: Added a 'net loc' to 'author -a'
[git-stats.git] / src / git_stats / author.py
blob0690196be676c1663e8f5d1df90e03669462e878
1 #!/usr/bin/env python
3 import os
4 import sys
6 from optparse import OptionParser, OptionValueError
7 from git import Repo
9 from git_stats import parse
11 class Activity:
12 """Simple storage class containing stats on the activity in one file."""
14 def __init__(self):
15 self.count = 0
16 self.added = 0
17 self.deleted = 0
18 self.id = []
20 def __str__(self):
21 return "%4d: %5d+ %5d- %5d~" % \
22 (self.count, self.added, self.deleted, self.added-self.deleted)
24 def activityInArea(log):
25 """Parses the specified file containing commit logs.
26 The output is expected to be in the format described below:
28 [<id>\n]+
29 [<lines added>\t<lines deleted>\t<path>]+
33 Params:
34 log: The log formatted as described above.
36 Returns:
37 A dictionary containing activity per author is returned.
38 Each author contains a dictionary with activity per path.
39 Each path contains one Activity object with the aggregated results.
40 """
42 # Create a dict to store the activity stats per path
43 activityByAuthor = {}
45 # Create a place to store the result in
46 activities = []
47 ids = []
49 i = 0
51 # Parse all the lines in the file
52 for line in log:
53 i += 1
55 # Split the line at the tab and store the data
56 splitline = line.split('\t')
57 size = len(splitline)
58 length = len(line.lstrip())
60 # There is something on this line, but it contains no separator
61 if size == 1 and length > 0:
62 # Get the id address minus the newline
63 ids.append(line[:-1])
64 elif size == 3:
65 activity = Activity()
66 activity.id = ids
68 try:
69 addpart = splitline[0]
70 deletepart = splitline[1]
72 if addpart == '-':
73 addpart = 0
75 if deletepart == '-':
76 deletepart = 0
78 activity.added = int(addpart)
79 activity.deleted = int(deletepart)
80 except ValueError, e:
81 print("On line '" + str(i) + "', could not convert number: " + str(e))
83 activity.path = splitline[2][:-1]
84 activities.append(activity)
85 elif length == 0:
86 for activity in activities:
87 for author in activity.id:
88 if not activityByAuthor.has_key(author):
89 activityByAuthor[author] = {}
91 activityByPath = activityByAuthor[author]
93 # If we have not encountered this path, create an entry
94 if not activityByPath.has_key(activity.path):
95 addme = Activity()
96 activityByPath[activity.path] = addme
98 known = activityByPath[activity.path]
100 result = Activity()
101 result.added = known.added + activity.added
102 result.deleted = known.deleted + activity.deleted
103 result.count = known.count + 1
105 # Store it under it's path
106 activityByPath[activity.path] = result
108 # Store it under it's author
109 activityByAuthor[author] = activityByPath
111 # Create a fresh activity to store the next round in
112 activities = []
113 ids = []
114 else:
115 print("Cannot parse line " + str(i) + ".")
117 # Return the result
118 return activityByAuthor
120 def activityInFile(path, id, start_from, relative):
121 """Shows the activity for the file in the current repo.
123 Params:
124 path: The path to filter on.
125 id: The id of the developer to show in the result.
126 startfrom: The commit to start logging from.
127 relative: Treat path as relative to the current working directory.
130 git = Repo(".").git
132 result = git.log(start_from, "--", path, pretty="format:%" + id, with_keep_cwd=relative)
133 activity = result.split('\n')
135 result = {}
137 for line in activity:
138 # Create an entry if there was none for this author yet
139 if not result.has_key(line):
140 result[line] = 0
142 result[line] += 1
144 return result
146 def activity(id, field, startFrom):
147 """Shows the activity for the specified developer in the current repo.
149 Params:
150 id: The id of the developer, as specified by field.
151 field: The field to filter on.
152 startfrom: The commit to start logging from.
155 git = Repo(".").git
156 result = git.log("--numstat", startFrom, "--", pretty="format:%" + field)
158 log = result.splitlines(True)
159 allActivity = activityInArea(log)
161 result = []
163 if not id:
164 keys = allActivity.keys()
165 keys.sort()
167 for author in keys:
168 activity = allActivity[author]
169 result.append(author + ":")
170 keys = activity.keys()
171 keys.sort()
173 for key in keys:
174 value = activity[key]
175 result.append("\t" + str(value) + " = " + str(key))
177 result.append("")
179 return result
181 if not allActivity.has_key(id):
182 result.append("Unknown author " + id)
183 result.append("Known authors:")
184 result.extend(allActivity.keys())
186 return result
188 activity = allActivity[id]
190 keys = activity.keys()
191 keys.sort()
193 for key in keys:
194 value = activity[key]
195 result.append(str(value) + " = " + str(key))
197 return result
199 def aggregateActivity(idFilter, field, startFrom):
200 """Aggregates the activity for all developers
202 Args:
203 idFilter: The id to filter on, if None all developers will be shown.
204 field: The field to filter on.
207 git = Repo(".").git
208 result = git.log("--numstat", startFrom, "--", pretty="format:%" + field)
210 log = result.splitlines(True)
211 allActivity = activityInArea(log)
213 aggregatedActivity = {}
215 for _, activityByPath in allActivity.iteritems():
216 for path, activity in activityByPath.iteritems():
217 if not aggregatedActivity.has_key(path):
218 aggregatedActivity[path] = Activity()
220 known = aggregatedActivity[path]
222 result = Activity()
223 result.added = known.added + activity.added
224 result.deleted = known.deleted + activity.deleted
225 result.count = known.count + activity.count
227 aggregatedActivity[path] = result
229 result = []
231 keys = aggregatedActivity.keys()
232 keys.sort()
234 for key in keys:
235 value = aggregatedActivity[key]
236 result.append(str(value) + " = " + str(key))
238 return result
240 def _checkOptions(parser, options):
241 """Checks the specified options and uses the parser to indicate errors
243 Args:
244 parser: The parser to use to signal when the options are bad.
245 options: The options to check.
248 opts = [options.aggregate, options.developer, options.file, options.everyone]
250 if not parse.isUnique(opts, atLeastOne=True):
251 parser.error("Please choose exactly one mode")
253 if options.file:
254 try:
255 parse.check_file(value=options.file, relative=options.relative)
256 except OptionValueError, e:
257 parser.error(e)
260 def dispatch(*args):
261 """Dispatches author related commands
264 progname = os.path.basename(sys.argv[0]) + " author"
266 parser = OptionParser(option_class=parse.GitOption, prog=progname)
268 parser.add_option(
269 "-a", "--aggregate",
270 action="store_true",
271 help="aggregate the results")
273 parser.add_option(
274 "-e", "--everyone",
275 action="store_true",
276 help="show the activity of all developers")
278 parser.add_option(
279 "-d", "--developer",
280 help="the id to filter on")
282 parser.add_option(
283 "-f", "--file",
284 help="the file to filter on")
286 parser.add_option(
287 "-i", "--id",
288 help="the one/two letter identifier specifying which field to use as id")
290 parser.add_option(
291 "-s", "--start-from",
292 type="commit",
293 metavar="COMMIT",
294 help="the commit to start logging from")
296 parser.add_option(
297 "-r", "--relative",
298 action="store_true",
299 help="paths are relative to the current directory")
301 parser.set_default("id", "ae")
302 parser.set_default("start_from", "HEAD")
303 parser.set_default("relative", False)
305 (options, args) = parser.parse_args(list(args))
307 _checkOptions(parser, options)
309 if options.aggregate:
310 result = aggregateActivity(options.developer, options.id, options.start_from)
311 elif options.developer or options.everyone:
312 result = activity(options.developer, options.id, options.start_from)
313 elif options.file:
314 activity_for_file = activityInFile( options.file,
315 options.id,
316 options.start_from,
317 options.relative)
319 result = []
321 for key, value in activity_for_file.iteritems():
322 result.append(key + ": " + str(value))
324 for line in result:
325 print(line)