3c863cdb0c1f44849d24f83e240a4e08b10fd13a
[git-stats.git] / src / git_stats / author.py
blob3c863cdb0c1f44849d24f83e240a4e08b10fd13a
1 #!/usr/bin/env python
3 import os
4 import sys
6 from optparse import OptionParser, OptionValueError
7 from git import Repo
9 from git_stats import parse
11 class Activity:
12 """Simple storage class containing stats on the activity in one file."""
14 def __init__(self):
15 self.count = 0
16 self.added = 0
17 self.deleted = 0
18 self.id = []
20 def __str__(self):
21 return ("%4d: %5d+ %5d- %5d~" %
22 (self.count, self.added, self.deleted, self.added-self.deleted))
24 def activityInArea(log):
25 """Parses the specified file containing commit logs.
26 The output is expected to be in the format described below:
28 [<id>\n]+
29 [<lines added>\t<lines deleted>\t<path>]+
33 Params:
34 log: The log formatted as described above.
36 Returns:
37 A dictionary containing activity per author is returned.
38 Each author contains a dictionary with activity per path.
39 Each path contains one Activity object with the aggregated results.
40 """
42 # Create a dict to store the activity stats per path
43 activityByAuthor = {}
45 # Create a place to store the result in
46 activities = []
47 ids = []
49 i = 0
51 # Parse all the lines in the file
52 for line in log:
53 i += 1
55 # Split the line at the tab and store the data
56 splitline = line.split('\t')
57 size = len(splitline)
58 length = len(line.lstrip())
60 # There is something on this line, but it contains no separator
61 if size == 1 and length > 0:
62 # Get the id address minus the newline
63 ids.append(line[:-1])
64 elif size == 3:
65 activity = Activity()
66 activity.id = ids
68 try:
69 addpart = splitline[0]
70 deletepart = splitline[1]
72 if addpart == '-':
73 addpart = 0
75 if deletepart == '-':
76 deletepart = 0
78 activity.added = int(addpart)
79 activity.deleted = int(deletepart)
80 except ValueError, e:
81 print("On line '%d', could not convert number: %s" % (i,str(e)))
83 activity.path = splitline[2][:-1]
84 activities.append(activity)
85 elif length == 0:
86 for activity in activities:
87 for author in activity.id:
88 if not author in activityByAuthor:
89 activityByAuthor[author] = {}
91 activityByPath = activityByAuthor[author]
93 # If we have not encountered this path, create an entry
94 if not activity.path in activityByPath:
95 addme = Activity()
96 activityByPath[activity.path] = addme
98 known = activityByPath[activity.path]
100 result = Activity()
101 result.added = known.added + activity.added
102 result.deleted = known.deleted + activity.deleted
103 result.count = known.count + 1
105 # Store it under it's path
106 activityByPath[activity.path] = result
108 # Store it under it's author
109 activityByAuthor[author] = activityByPath
111 # Create a fresh activity to store the next round in
112 activities = []
113 ids = []
114 else:
115 print("Cannot parse line %d." % i)
117 # Return the result
118 return activityByAuthor
120 def activityInFile(path, id, start_from, relative):
121 """Shows the activity for the file in the current repo.
123 Params:
124 path: The path to filter on.
125 id: The id of the developer to show in the result.
126 startfrom: The commit to start logging from.
127 relative: Treat path as relative to the current working directory.
130 git = Repo(".").git
132 result = git.log(start_from, "--", path, pretty="format:%" + id, with_keep_cwd=relative)
133 activity = result.split('\n')
135 result = {}
137 for line in activity:
138 # Create an entry if there was none for this author yet
139 if not line in result:
140 result[line] = 0
142 result[line] += 1
144 return result
146 def activity(id, field, start_from):
147 """Shows the activity for the specified developer in the current repo.
149 Params:
150 id: The id of the developer, as specified by field.
151 field: The field to filter on.
152 startfrom: The commit to start logging from.
155 git = Repo(".").git
156 result = git.log("--numstat", start_from, "--", pretty="format:%" + field)
158 log = result.splitlines(True)
159 allActivity = activityInArea(log)
161 result = []
163 if not id:
164 for author in sorted(allActivity):
165 activity = allActivity[author]
166 result.append(author + ":")
168 for key in sorted(activity):
169 value = activity[key]
170 result.append("\t%s = %s" % (str(value), str(key)))
172 result.append("")
174 return result
176 if not id in allActivity:
177 result.append("Unknown author " + id)
178 result.append("Known authors:")
179 result.extend(allActivity.keys())
181 return result
183 activity = allActivity[id]
185 for key in sorted(activity):
186 value = activity[key]
187 result.append("%s = %s" % (str(value), str(key)))
189 return result
191 def aggregateActivity(id_filter, field, start_from):
192 """Aggregates the activity for all developers
194 Args:
195 id_filter: The id to filter on, if None all developers will be shown.
196 field: The field to filter on.
199 git = Repo(".").git
200 result = git.log("--numstat", start_from, "--", pretty="format:%" + field)
202 log = result.splitlines(True)
203 allActivity = activityInArea(log)
205 aggregatedActivity = {}
207 for _, activityByPath in allActivity.iteritems():
208 for path, activity in activityByPath.iteritems():
209 if not path in aggregatedActivity:
210 aggregatedActivity[path] = Activity()
212 known = aggregatedActivity[path]
214 result = Activity()
215 result.added = known.added + activity.added
216 result.deleted = known.deleted + activity.deleted
217 result.count = known.count + activity.count
219 aggregatedActivity[path] = result
221 result = []
223 for key in sorted(aggregatedActivity):
224 value = aggregatedActivity[key]
225 result.append("%s = %s" % (str(value), str(key)))
227 return result
229 def _checkOptions(parser, options):
230 """Checks the specified options and uses the parser to indicate errors
232 Args:
233 parser: The parser to use to signal when the options are bad.
234 options: The options to check.
237 opts = [options.aggregate, options.developer, options.file, options.everyone]
239 if not parse.isUnique(opts, at_least_one=True):
240 parser.error("Please choose exactly one mode")
242 if options.file:
243 try:
244 parse.checkFile(value=options.file, relative=options.relative)
245 except OptionValueError, e:
246 parser.error(e)
249 def dispatch(*args):
250 """Dispatches author related commands
253 progname = os.path.basename(sys.argv[0]) + " author"
255 parser = OptionParser(option_class=parse.GitOption, prog=progname)
257 parser.add_option(
258 "-a", "--aggregate",
259 action="store_true",
260 help="aggregate the results")
262 parser.add_option(
263 "-e", "--everyone",
264 action="store_true",
265 help="show the activity of all developers")
267 parser.add_option(
268 "-d", "--developer",
269 help="the id to filter on")
271 parser.add_option(
272 "-f", "--file",
273 help="the file to filter on")
275 parser.add_option(
276 "-i", "--id",
277 help="the one/two letter identifier specifying which field to use as id")
279 parser.add_option(
280 "-s", "--start-from",
281 type="commit",
282 metavar="COMMIT",
283 help="the commit to start logging from")
285 parser.add_option(
286 "-r", "--relative",
287 action="store_true",
288 help="paths are relative to the current directory")
290 parser.set_default("id", "ae")
291 parser.set_default("start_from", "HEAD")
292 parser.set_default("relative", False)
294 (options, args) = parser.parse_args(list(args))
296 _checkOptions(parser, options)
298 if options.aggregate:
299 result = aggregateActivity(options.developer, options.id, options.start_from)
300 elif options.developer or options.everyone:
301 result = activity(options.developer, options.id, options.start_from)
302 elif options.file:
303 activity_for_file = activityInFile( options.file,
304 options.id,
305 options.start_from,
306 options.relative)
308 result = []
310 for key, value in activity_for_file.iteritems():
311 result.append("%s: %s" % (key, str(value)))
313 for line in result:
314 print(line)