gitstats: Bugfix to _parseFileDiff so that it handles diffs with mode changes only
[git-stats.git] / src / git_stats / diff.py
blobf960c2cabdfa23cf94209d3a49c13caa30848143
1 #!/usr/bin/env python
3 import collections
4 import os
5 import sys
7 from optparse import OptionParser
8 from git import Repo
10 from git_stats import parse
12 class fileDiff:
13 """A class to store the information of a file diff in.
15 Fields:
16 afile: The file used as the left side of the diff.
17 bfile: The file used as the right side of the diff.
18 context: The context of this diff.
19 apos: Where the left side of the diff starts.
20 bpos: Where the right side of the diff starts.
21 linesAdded: Which lines were added.
22 linesDeleted: Which lines were deleted.
23 """
25 def __init__(self, diffHeader):
26 self.afile = ""
27 self.bfile = ""
28 self.context = ""
30 self.linesAdded = []
31 self.linesDeleted = []
33 for line in diffHeader:
34 if line.startswith("--- "):
35 self.afile = line[4:]
37 if line.startswith("+++ "):
38 self.bfile = line[4:]
40 def __str__(self):
41 a = "Diff for '" + self.afile + "' (" + str(self.astart) + ") against '"
42 b = self.bfile + "' (" + str(self.bstart) + ")" + self.context + "."
44 return a + b + '\n' + str(self.linesAdded) + '\n' + str(self.linesDeleted)
46 def getCommitDiff(commit, ignoreWhitespace=True, noContext=False):
47 """Returns the commit diff for the specified commit
49 Params:
50 commit: The commit to get the diff for.
52 Returns: The commit diff.
53 """
55 git = Repo(".").git
56 args = ["-p"]
58 if ignoreWhitespace:
59 args.append("-w")
61 if noContext:
62 args.append("-U0")
64 args.append(commit + "^")
65 args.append(commit)
67 result = git.diff_tree(*args)
69 return result
71 def splitDiff(diff):
72 """Splits off the diff in chunks, one for each file
74 Params:
75 diff: The diff to split up.
77 Returns: A list containing a chunk per file.
78 """
80 chunks = []
82 content = []
84 for line in diff:
85 if line.startswith("diff"):
86 if content:
87 chunks.append(content)
89 content = []
91 content.append(line)
93 chunks.append(content)
95 return chunks
97 def _splitFileDiff(diff):
98 """Splits a file diff into chunks, one per area.
100 Params:
101 diff: The diff to split up.
103 Returns: The diff header and a list with all the chunks.
106 chunks = []
108 header = []
109 content = []
111 start = 0
113 for line in diff:
114 if line.startswith("@@"):
115 break
117 header.append(line)
118 start += 1
120 for line in diff[start:]:
121 if line.startswith("@@"):
122 if content:
123 chunks.append(content)
125 content = []
127 content.append(line)
129 chunks.append(content)
131 return header, chunks
133 def _parseFileDiff(header, chunk):
134 """Takes a file diff and returns the parsed result
136 Params:
137 header: The diff header.
138 chunk: The chunk to parse.
140 Returns: A fileDiff containing the parsed diff.
143 result = fileDiff(header)
145 if not chunk:
146 return result
148 deleted = []
149 added = []
151 # Find out where the context line ends, skipping the first '@@'
152 to = chunk[0].find("@@", 2)
154 # Get the context, skipping the first and last '@@"
155 context = chunk[0][3:to]
157 # Split it at the spaces and store the positions, ignoring '-' and '+'
158 split = context.split(' ')
159 a = split[0][1:]
160 b = split[1][1:]
162 apos = int(a.split(',')[0])
163 bpos = int(b.split(',')[0])
165 result.astart = apos
166 result.bstart = bpos
168 # Start at the first line (skip the context line)
169 for line in chunk[1:]:
170 if line.startswith("-"):
171 deleted.append((apos, line[1:]))
172 apos += 1
174 if line.startswith("+"):
175 added.append((bpos, line[1:]))
176 bpos += 1
178 result.linesDeleted = deleted
179 result.linesAdded = added
181 return result
183 def parseCommitDiff(diff):
184 """Takes a commit diff and returns the parsed result
186 Params:
187 diff: The diff to parse.
189 Returns: A parsedDiff instance containing the parsed diff.
192 result = []
194 # Split the diff in file sized chunks
195 chunks = splitDiff(diff)
197 # Loop over all the file diffs and parse them
198 for chunk in chunks:
199 header, filechunks = _splitFileDiff(chunk)
201 # Loop over all the chunks and parse them
202 for filechunk in filechunks:
203 # Get the result and store it
204 fd = _parseFileDiff(header, filechunk)
205 result.append(fd)
207 return result
209 def _compareFileDiffs(adiff, bdiff, invert=False):
210 """Compares two fileDiffs and returns whether they are equal
212 Args:
213 adiff: The first fileDiff.
214 bdiff: The second fileDiff.
215 invert: Whether to compare linesAdded with linesDeleted.
217 Returns: Whether the two diffs are equal.
220 if invert:
221 if not adiff.linesAdded == bdiff.linesDeleted:
222 return False
223 if not adiff.linesDeleted == bdiff.linesAdded:
224 return False
225 else:
226 if not adiff.linesAdded == bdiff.linesAdded:
227 return False
228 if not adiff.linesDeleted == bdiff.linesDeleted:
229 return False
231 # Checked everything, accept
232 return True
234 def _compareDiffs(adiffs, bdiffs, compareChanges=False, invert=False):
235 """Compares the two diffs and returns whether they are equal
237 Args:
238 adiffs: The first set of diffs.
239 bdiffs: The second set of diffs.
240 compareChanges: Whether to compare not only which lines changed.
241 invert: When compareChanges, invert the comparison of deleted/added.
243 Returns: Whether the diffs are equal.
246 for fd in adiffs:
247 # Look for a match in the bdiffs
248 for theirs in bdiffs:
250 # Looks like we have a match
251 if (theirs.astart <= fd.astart and theirs.bstart >= fd.bstart) or \
252 (invert and theirs.astart <= fd.bstart and theirs.bstart >= fd.astart):
254 # If we want to compare changes, do they match
255 if compareChanges:
256 # Reject if they are inequal
257 if not _compareFileDiffs(fd, theirs, invert):
258 return False
260 # It was indeed a match, stop searching through bdiffs
261 break
263 else:
264 # Went through all items in bdiffs and couldn't find a matching pair
265 return False
267 # All items in adiffs checked, all had a matching pair, accept.
268 return True
270 def _difference(adiffs, bdiffs, compareChanges=False, invert=False):
271 """Calculates the difference between two diffs and returns it
273 Params:
274 adiffs: The first set of diffs.
275 bdiffs: The second set of diffs.
276 compareChanges: Whether to compare not only which lines changed.
277 invert: When compareChanges, invert the comparison of deleted/added.
279 Returns: Which keys are missing and the difference between both diffs.
282 afiles = collections.defaultdict(list)
283 bfiles = collections.defaultdict(list)
285 missing = []
286 difference = []
288 for fd in adiffs:
289 afiles[(fd.afile, fd.bfile)].append(fd)
291 for fd in bdiffs:
292 bfiles[(fd.afile, fd.bfile)].append(fd)
294 for key, fds in afiles.iteritems():
295 if not bfiles.has_key(key):
296 missing.append(key)
297 continue
299 theirs = bfiles[key]
301 if not _compareDiffs(fds, theirs, compareChanges, invert):
302 difference.append((fds, theirs))
304 return missing, difference
306 def commitdiffEqual(original, potentialMatch, threshold=0,
307 compareChanges=True, invert=False, verbose=True):
308 """Tests whether a commit matches another by a specified threshhold.
310 Params:
311 original: The original commit that is to be checked.
312 potentialMatch: The commit that might match original.
313 threshhold: The threshold for how close they have to match.
314 compareChanges: Whether to compare the changes made or just changes lines.
315 invert: Whether to compare deletions with insertions instead.
317 Returns: Whether the commit diffs are equal.
320 git = Repo(".").git
322 # Get the diff, but ignore whitespace
323 result = getCommitDiff(original, noContext=True)
325 diffOriginal = result.split('\n')
327 # Get the diff but ignore whitespace
328 result = getCommitDiff(potentialMatch, noContext=True)
330 diffPotentialMatch = result.split('\n')
332 parsedOriginal = parseCommitDiff(diffOriginal)
333 parsedPotentialMatch = parseCommitDiff(diffPotentialMatch)
335 missing, diff = _difference(parsedOriginal, parsedPotentialMatch, compareChanges=compareChanges, invert=invert)
337 if verbose:
338 if missing:
339 print("Missing the following keys:")
340 for key in missing:
341 print(key)
343 if diff:
344 print("Found the following differences:")
345 for ours, theirs in diff:
346 print("---")
347 for fd in ours:
348 print(fd)
349 print("\nDoes not match:\n")
350 for fd in theirs:
351 print(fd)
352 print("----")
354 # Unequal if something missing, or there is a difference
355 return not (missing or diff)
357 def isReverted(commit, potentialRevert):
358 """Returns whether the specified commit is reverted by another one
360 Args:
361 commit: The commit that might be reverted.
362 potentialRevert: The commit that might be a revert.
363 """
365 return commitdiffEqual(commit, potentialRevert, invert=True, verbose=False)
367 def dispatch(*args):
368 """Dispatches diff related commands
371 progname = os.path.basename(sys.argv[0]) + " diff"
373 parser = OptionParser(option_class=parse.GitOption, prog=progname)
375 parser.add_option(
376 "-e", "--equals",
377 type="commit",
378 nargs=2,
379 help="show whether the two diffs for the specified commits match",
380 metavar="COMMIT COMMIT")
382 parser.add_option(
383 "-t", "--threshold",
384 type="int",
385 help="the threshold for comparison")
387 parser.add_option(
388 "-n", "--no-compare",
389 action="store_false",
390 dest="compare",
391 help="do not compare the diff content, just look at which lines were touched")
393 parser.add_option(
394 "-i", "--invert",
395 action="store_true",
396 help="compare additions with deletions instead of with additions, and vise versa")
398 parser.set_default("threshold", 0)
399 parser.set_default("compare", True)
400 parser.set_default("invert", False)
402 (options, args) = parser.parse_args(list(args))
404 if options.equals:
405 result = commitdiffEqual( threshold=options.threshold,
406 compareChanges=options.compare,
407 invert=options.invert,
408 *options.equals)
410 if result:
411 print("Equal")