gitstats: Check for empty diffs
[git-stats.git] / src / git_stats / diff.py
blobe0a230a6a863ed6c122f5a178a3d7578896c7e01
1 #!/usr/bin/env python
3 import collections
4 import os
5 import sys
7 from optparse import OptionParser
8 from git import Repo
10 from git_stats import commit
11 from git_stats import parse
13 class fileDiff:
14 """A class to store the information of a file diff in.
16 Fields:
17 afile: The file used as the left side of the diff.
18 bfile: The file used as the right side of the diff.
19 context: The context of this diff.
20 apos: Where the left side of the diff starts.
21 bpos: Where the right side of the diff starts.
22 linesAdded: Which lines were added.
23 linesDeleted: Which lines were deleted.
24 """
26 def __init__(self, diffHeader):
27 self.afile = ""
28 self.bfile = ""
29 self.context = ""
31 self.linesAdded = []
32 self.linesDeleted = []
34 for line in diffHeader:
35 if line.startswith("--- "):
36 self.afile = line[4:]
38 if line.startswith("+++ "):
39 self.bfile = line[4:]
41 def __str__(self):
42 a = "Diff for '" + self.afile + "' (" + str(self.astart) + ") against '"
43 b = self.bfile + "' (" + str(self.bstart) + ")" + self.context + "."
45 return a + b + '\n' + str(self.linesAdded) + '\n' + str(self.linesDeleted)
47 def splitDiff(diff):
48 """Splits off the diff in chunks, one for each file
50 Params:
51 diff: The diff to split up.
53 Returns: A list containing a chunk per file.
54 """
56 chunks = []
58 content = []
60 for line in diff:
61 if line.startswith("diff"):
62 if content:
63 chunks.append(content)
65 content = []
67 content.append(line)
69 chunks.append(content)
71 return chunks
73 def _splitFileDiff(diff):
74 """Splits a file diff into chunks, one per area.
76 Params:
77 diff: The diff to split up.
79 Returns: The diff header and a list with all the chunks.
80 """
82 chunks = []
84 header = []
85 content = []
87 start = 0
89 for line in diff:
90 if line.startswith("@@"):
91 break
93 header.append(line)
94 start += 1
96 for line in diff[start:]:
97 if line.startswith("@@"):
98 if content:
99 chunks.append(content)
101 content = []
103 content.append(line)
105 chunks.append(content)
107 return header, chunks
109 def _parseFileDiff(header, chunk):
110 """Takes a file diff and returns the parsed result
112 Params:
113 header: The diff header.
114 chunk: The chunk to parse.
116 Returns: A fileDiff containing the parsed diff.
119 result = fileDiff(header)
121 if not chunk:
122 return result
124 deleted = []
125 added = []
127 # Find out where the context line ends, skipping the first '@@'
128 to = chunk[0].find("@@", 2)
130 # Get the context, skipping the first and last '@@"
131 context = chunk[0][3:to]
133 # Split it at the spaces and store the positions, ignoring '-' and '+'
134 split = context.split(' ')
135 a = split[0][1:]
136 b = split[1][1:]
138 apos = int(a.split(',')[0])
139 bpos = int(b.split(',')[0])
141 result.astart = apos
142 result.bstart = bpos
144 # Start at the first line (skip the context line)
145 for line in chunk[1:]:
146 if line.startswith("-"):
147 deleted.append((apos, line[1:]))
148 apos += 1
150 if line.startswith("+"):
151 added.append((bpos, line[1:]))
152 bpos += 1
154 result.linesDeleted = deleted
155 result.linesAdded = added
157 return result
159 def parseCommitDiff(diff):
160 """Takes a commit diff and returns the parsed result
162 Params:
163 diff: The diff to parse.
165 Returns: A parsedDiff instance containing the parsed diff.
168 result = []
170 # Split the diff in file sized chunks
171 chunks = splitDiff(diff)
173 # Loop over all the file diffs and parse them
174 for chunk in chunks:
175 header, filechunks = _splitFileDiff(chunk)
177 # Loop over all the chunks and parse them
178 for filechunk in filechunks:
179 # Get the result and store it
180 fd = _parseFileDiff(header, filechunk)
181 result.append(fd)
183 return result
185 def _compareFileDiffs(adiff, bdiff, invert=False):
186 """Compares two fileDiffs and returns whether they are equal
188 Args:
189 adiff: The first fileDiff.
190 bdiff: The second fileDiff.
191 invert: Whether to compare linesAdded with linesDeleted.
193 Returns: Whether the two diffs are equal.
196 if invert:
197 if not adiff.linesAdded == bdiff.linesDeleted:
198 return False
199 if not adiff.linesDeleted == bdiff.linesAdded:
200 return False
201 else:
202 if not adiff.linesAdded == bdiff.linesAdded:
203 return False
204 if not adiff.linesDeleted == bdiff.linesDeleted:
205 return False
207 # Checked everything, accept
208 return True
210 def _compareDiffs(adiffs, bdiffs, compareChanges=False, invert=False):
211 """Compares the two diffs and returns whether they are equal
213 Args:
214 adiffs: The first set of diffs.
215 bdiffs: The second set of diffs.
216 compareChanges: Whether to compare not only which lines changed.
217 invert: When compareChanges, invert the comparison of deleted/added.
219 Returns: Whether the diffs are equal.
222 for fd in adiffs:
223 # Look for a match in the bdiffs
224 for theirs in bdiffs:
226 # Check for empty diffs
227 if ((not fd.linesAdded and not fd.linesDeleted) and \
228 (theirs.linesAdded or theirs.linesDeleted)) or \
229 ((not theirs.linesAdded and not theirs.linesDeleted) and\
230 (fd.linesAdded and fd.linesDeleted)):
231 return False
233 # Check if both are empty diffs
234 if not fd.linesAdded and not theirs.linesAdded and \
235 not fd.linesDeleted and not theirs.linesDeleted:
236 return True
238 # Looks like we have a match
239 if (theirs.astart <= fd.astart and theirs.bstart >= fd.bstart) or \
240 (invert and theirs.astart <= fd.bstart and theirs.bstart >= fd.astart):
242 # If we want to compare changes, do they match
243 if compareChanges:
244 # Reject if they are inequal
245 if not _compareFileDiffs(fd, theirs, invert):
246 return False
248 # It was indeed a match, stop searching through bdiffs
249 break
251 else:
252 # Went through all items in bdiffs and couldn't find a matching pair
253 return False
255 # All items in adiffs checked, all had a matching pair, accept.
256 return True
258 def _difference(adiffs, bdiffs, compareChanges=False, invert=False):
259 """Calculates the difference between two diffs and returns it
261 Params:
262 adiffs: The first set of diffs.
263 bdiffs: The second set of diffs.
264 compareChanges: Whether to compare not only which lines changed.
265 invert: When compareChanges, invert the comparison of deleted/added.
267 Returns: Which keys are missing and the difference between both diffs.
270 afiles = collections.defaultdict(list)
271 bfiles = collections.defaultdict(list)
273 missing = []
274 difference = []
276 for fd in adiffs:
277 afiles[(fd.afile, fd.bfile)].append(fd)
279 for fd in bdiffs:
280 bfiles[(fd.afile, fd.bfile)].append(fd)
282 for key, fds in afiles.iteritems():
283 if not bfiles.has_key(key):
284 missing.append(key)
285 continue
287 theirs = bfiles[key]
289 if not _compareDiffs(fds, theirs, compareChanges, invert):
290 difference.append((fds, theirs))
292 return missing, difference
294 def commitdiffEqual(original, potentialMatch, threshold=0,
295 compareChanges=True, invert=False, verbose=True):
296 """Tests whether a commit matches another by a specified threshhold.
298 Params:
299 original: The original commit that is to be checked.
300 potentialMatch: The commit that might match original.
301 threshhold: The threshold for how close they have to match.
302 compareChanges: Whether to compare the changes made or just changes lines.
303 invert: Whether to compare deletions with insertions instead.
305 Returns: Whether the commit diffs are equal.
308 git = Repo(".").git
310 # Get the diff, but ignore whitespace
311 result = commit.getDiff(original, noContext=True)
313 diffOriginal = result.split('\n')
315 # Get the diff but ignore whitespace
316 result = commit.getDiff(potentialMatch, noContext=True)
318 diffPotentialMatch = result.split('\n')
320 parsedOriginal = parseCommitDiff(diffOriginal)
321 parsedPotentialMatch = parseCommitDiff(diffPotentialMatch)
323 missing, diff = _difference(parsedOriginal, parsedPotentialMatch, compareChanges=compareChanges, invert=invert)
325 if verbose:
326 if missing:
327 print("Missing the following keys:")
328 for key in missing:
329 print(key)
331 if diff:
332 print("Found the following differences:")
333 for ours, theirs in diff:
334 print("---")
335 for fd in ours:
336 print(fd)
337 print("\nDoes not match:\n")
338 for fd in theirs:
339 print(fd)
340 print("----")
342 # Unequal if something missing, or there is a difference
343 return not (missing or diff)
345 def isReverted(commit, potentialRevert):
346 """Returns whether the specified commit is reverted by another one
348 Args:
349 commit: The commit that might be reverted.
350 potentialRevert: The commit that might be a revert.
351 """
353 return commitdiffEqual(commit, potentialRevert, invert=True, verbose=False)
355 def findReverts(potentialRevert):
356 """Returns all commits that are reverted by the specified commit
359 paths = commit.pathsTouchedBy(potentialRevert)
361 # If no paths were touched, there can't be any reverts
362 if not paths:
363 return []
365 commits = commit.commitsThatTouched(paths)
367 result = []
369 for aCommit in commits:
370 if aCommit == potentialRevert:
371 continue
373 if isReverted(aCommit, potentialRevert):
374 result.append(aCommit)
376 return result
378 def dispatch(*args):
379 """Dispatches diff related commands
382 progname = os.path.basename(sys.argv[0]) + " diff"
384 parser = OptionParser(option_class=parse.GitOption, prog=progname)
386 parser.add_option(
387 "-e", "--equals",
388 type="commit",
389 nargs=2,
390 help="show whether the two diffs for the specified commits match",
391 metavar="COMMIT COMMIT")
393 parser.add_option(
394 "-t", "--threshold",
395 type="int",
396 help="the threshold for comparison")
398 parser.add_option(
399 "-n", "--no-compare",
400 action="store_false",
401 dest="compare",
402 help="do not compare the diff content, just look at which lines were touched")
404 parser.add_option(
405 "-i", "--invert",
406 action="store_true",
407 help="compare additions with deletions instead of with additions, and vise versa")
409 parser.add_option(
410 "-r", "--reverts",
411 type="commit",
412 help="show only commits that are reverted by the specified commit")
414 parser.set_default("threshold", 0)
415 parser.set_default("compare", True)
416 parser.set_default("invert", False)
418 (options, args) = parser.parse_args(list(args))
420 if options.equals:
421 result = commitdiffEqual( threshold=options.threshold,
422 compareChanges=options.compare,
423 invert=options.invert,
424 *options.equals)
426 if result:
427 print("Equal")
429 if options.reverts:
430 result = findReverts(options.reverts)
431 commit.prettyPrint(result)